chore: Refactor data import (#3434)
* Complete refactor of import * feat: Notion data import (#3442)
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import path from "path";
|
||||
import File from "formidable/lib/file";
|
||||
import fs from "fs-extra";
|
||||
import Attachment from "@server/models/Attachment";
|
||||
import { buildUser } from "@server/test/factories";
|
||||
import { flushdb } from "@server/test/support";
|
||||
@@ -13,16 +13,16 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type:
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType:
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -34,15 +34,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -54,18 +54,18 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "normal.docx.txt";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "normal.docx.txt";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
let error;
|
||||
|
||||
try {
|
||||
await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
@@ -77,15 +77,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document on Windows to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -97,15 +97,16 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert HTML Document to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "webpage.html";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/html",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "webpage.html";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "text/html",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("Text paragraph");
|
||||
@@ -114,15 +115,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Confluence Word output to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "confluence.doc";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/msword",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "confluence.doc";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/msword",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("this is a test document");
|
||||
@@ -131,49 +132,34 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should load markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "markdown.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/plain",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "markdown.md";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "text/plain",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("This is a test paragraph");
|
||||
expect(response.title).toEqual("Heading 1");
|
||||
});
|
||||
|
||||
it("should handle encoded slashes", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "this %2F and %2F this.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/plain",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", "empty.md"),
|
||||
});
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("");
|
||||
expect(response.title).toEqual("this / and / this");
|
||||
});
|
||||
|
||||
it("should fallback to extension if mimetype unknown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "markdown.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/lol",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "markdown.md";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/lol",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("This is a test paragraph");
|
||||
@@ -182,18 +168,18 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should error with unknown file type", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "files.zip";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "executable/zip",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "empty.zip";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
let error;
|
||||
|
||||
try {
|
||||
await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "executable/zip",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
|
||||
Reference in New Issue
Block a user