chore: Refactor data import (#3434)

* Complete refactor of import

* feat: Notion data import (#3442)
This commit is contained in:
Tom Moor
2022-04-23 10:07:35 -07:00
committed by GitHub
parent bdcfaae025
commit 33ce49cc33
45 changed files with 2217 additions and 1066 deletions

View File

@@ -1,5 +1,5 @@
import path from "path";
import File from "formidable/lib/file";
import fs from "fs-extra";
import Attachment from "@server/models/Attachment";
import { buildUser } from "@server/test/factories";
import { flushdb } from "@server/test/support";
@@ -13,16 +13,16 @@ describe("documentImporter", () => {
it("should convert Word Document to markdown", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -34,15 +34,15 @@ describe("documentImporter", () => {
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -54,18 +54,18 @@ describe("documentImporter", () => {
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
const user = await buildUser();
const name = "normal.docx.txt";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "normal.docx.txt";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
} catch (err) {
@@ -77,15 +77,15 @@ describe("documentImporter", () => {
it("should convert Word Document on Windows to markdown", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -97,15 +97,16 @@ describe("documentImporter", () => {
it("should convert HTML Document to markdown", async () => {
const user = await buildUser();
const name = "webpage.html";
const file = new File({
name,
type: "text/html",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "webpage.html";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "text/html",
fileName,
content,
ip,
});
expect(response.text).toContain("Text paragraph");
@@ -114,15 +115,15 @@ describe("documentImporter", () => {
it("should convert Confluence Word output to markdown", async () => {
const user = await buildUser();
const name = "confluence.doc";
const file = new File({
name,
type: "application/msword",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "confluence.doc";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/msword",
fileName,
content,
ip,
});
expect(response.text).toContain("this is a test document");
@@ -131,49 +132,34 @@ describe("documentImporter", () => {
it("should load markdown", async () => {
const user = await buildUser();
const name = "markdown.md";
const file = new File({
name,
type: "text/plain",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "text/plain",
fileName,
content,
ip,
});
expect(response.text).toContain("This is a test paragraph");
expect(response.title).toEqual("Heading 1");
});
it("should handle encoded slashes", async () => {
const user = await buildUser();
const name = "this %2F and %2F this.md";
const file = new File({
name,
type: "text/plain",
path: path.resolve(__dirname, "..", "test", "fixtures", "empty.md"),
});
const response = await documentImporter({
user,
file,
ip,
});
expect(response.text).toContain("");
expect(response.title).toEqual("this / and / this");
});
it("should fallback to extension if mimetype unknown", async () => {
const user = await buildUser();
const name = "markdown.md";
const file = new File({
name,
type: "application/lol",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "application/lol",
fileName,
content,
ip,
});
expect(response.text).toContain("This is a test paragraph");
@@ -182,18 +168,18 @@ describe("documentImporter", () => {
it("should error with unknown file type", async () => {
const user = await buildUser();
const name = "files.zip";
const file = new File({
name,
type: "executable/zip",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "empty.zip";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await documentImporter({
user,
file,
mimeType: "executable/zip",
fileName,
content,
ip,
});
} catch (err) {