Rearchitect import (#6141)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import JSZip from "jszip";
|
||||
import path from "path";
|
||||
import fs from "fs-extra";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import find from "lodash/find";
|
||||
import mime from "mime-types";
|
||||
@@ -13,18 +14,19 @@ import {
|
||||
DocumentJSONExport,
|
||||
JSONExportMetadata,
|
||||
} from "@server/types";
|
||||
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
|
||||
import ImportHelper, { FileTreeNode } from "@server/utils/ImportHelper";
|
||||
import ImportTask, { StructuredImportData } from "./ImportTask";
|
||||
|
||||
export default class ImportJSONTask extends ImportTask {
|
||||
public async parseData(
|
||||
buffer: Buffer,
|
||||
fileOperation: FileOperation
|
||||
dirPath: string,
|
||||
_: FileOperation
|
||||
): Promise<StructuredImportData> {
|
||||
const zip = await JSZip.loadAsync(buffer);
|
||||
const tree = ZipHelper.toFileTree(zip);
|
||||
|
||||
return this.parseFileTree({ fileOperation, zip, tree });
|
||||
const tree = await ImportHelper.toFileTree(dirPath);
|
||||
if (!tree) {
|
||||
throw new Error("Could not find valid content in zip file");
|
||||
}
|
||||
return this.parseFileTree(tree.children);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -34,14 +36,10 @@ export default class ImportJSONTask extends ImportTask {
|
||||
* @param tree An array of FileTreeNode representing root files in the zip
|
||||
* @returns A StructuredImportData object
|
||||
*/
|
||||
private async parseFileTree({
|
||||
zip,
|
||||
tree,
|
||||
}: {
|
||||
zip: JSZip;
|
||||
fileOperation: FileOperation;
|
||||
tree: FileTreeNode[];
|
||||
}): Promise<StructuredImportData> {
|
||||
private async parseFileTree(
|
||||
tree: FileTreeNode[]
|
||||
): Promise<StructuredImportData> {
|
||||
let rootPath = "";
|
||||
const output: StructuredImportData = {
|
||||
collections: [],
|
||||
documents: [],
|
||||
@@ -51,10 +49,16 @@ export default class ImportJSONTask extends ImportTask {
|
||||
// Load metadata
|
||||
let metadata: JSONExportMetadata | undefined = undefined;
|
||||
for (const node of tree) {
|
||||
if (node.path === "metadata.json") {
|
||||
const zipObject = zip.files["metadata.json"];
|
||||
metadata = JSON.parse(await zipObject.async("string"));
|
||||
if (!rootPath) {
|
||||
rootPath = path.dirname(node.path);
|
||||
}
|
||||
if (node.path === "metadata.json") {
|
||||
metadata = JSON.parse(await fs.readFile(node.path, "utf8"));
|
||||
}
|
||||
}
|
||||
|
||||
if (!rootPath) {
|
||||
throw new Error("Could not find root path");
|
||||
}
|
||||
|
||||
Logger.debug("task", "Importing JSON metadata", { metadata });
|
||||
@@ -93,13 +97,12 @@ export default class ImportJSONTask extends ImportTask {
|
||||
}) {
|
||||
Object.values(attachments).forEach((node) => {
|
||||
const id = uuidv4();
|
||||
const zipObject = zip.files[node.key];
|
||||
const mimeType = mime.lookup(node.key) || "application/octet-stream";
|
||||
|
||||
output.attachments.push({
|
||||
id,
|
||||
name: node.name,
|
||||
buffer: () => zipObject.async("nodebuffer"),
|
||||
buffer: () => fs.readFile(path.join(rootPath, node.key)),
|
||||
mimeType,
|
||||
path: node.key,
|
||||
externalId: node.id,
|
||||
@@ -109,17 +112,12 @@ export default class ImportJSONTask extends ImportTask {
|
||||
|
||||
// All nodes in the root level should be collections as JSON + metadata
|
||||
for (const node of tree) {
|
||||
if (
|
||||
node.path.endsWith("/") ||
|
||||
node.path === ".DS_Store" ||
|
||||
node.path === "metadata.json"
|
||||
) {
|
||||
if (node.children.length > 0 || node.path.endsWith("metadata.json")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const zipObject = zip.files[node.path];
|
||||
const item: CollectionJSONExport = JSON.parse(
|
||||
await zipObject.async("string")
|
||||
await fs.readFile(node.path, "utf8")
|
||||
);
|
||||
|
||||
const collectionId = uuidv4();
|
||||
|
||||
@@ -77,8 +77,8 @@ describe("ImportMarkdownZipTask", () => {
|
||||
error = err;
|
||||
}
|
||||
|
||||
expect(error && error.message).toBe(
|
||||
"Uploaded file does not contain any valid documents"
|
||||
expect(error && error.message).toContain(
|
||||
"Uploaded file does not contain any valid collections"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,40 +1,38 @@
|
||||
import JSZip from "jszip";
|
||||
import fs from "fs-extra";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import mime from "mime-types";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import documentImporter from "@server/commands/documentImporter";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { FileOperation, User } from "@server/models";
|
||||
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
|
||||
import ImportHelper, { FileTreeNode } from "@server/utils/ImportHelper";
|
||||
import ImportTask, { StructuredImportData } from "./ImportTask";
|
||||
|
||||
export default class ImportMarkdownZipTask extends ImportTask {
|
||||
public async parseData(
|
||||
stream: NodeJS.ReadableStream,
|
||||
dirPath: string,
|
||||
fileOperation: FileOperation
|
||||
): Promise<StructuredImportData> {
|
||||
const zip = await JSZip.loadAsync(stream);
|
||||
const tree = ZipHelper.toFileTree(zip);
|
||||
const tree = await ImportHelper.toFileTree(dirPath);
|
||||
if (!tree) {
|
||||
throw new Error("Could not find valid content in zip file");
|
||||
}
|
||||
|
||||
return this.parseFileTree({ fileOperation, zip, tree });
|
||||
return this.parseFileTree(fileOperation, tree.children);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the file structure from zipAsFileTree into documents,
|
||||
* collections, and attachments.
|
||||
*
|
||||
* @param fileOperation The file operation
|
||||
* @param tree An array of FileTreeNode representing root files in the zip
|
||||
* @returns A StructuredImportData object
|
||||
*/
|
||||
private async parseFileTree({
|
||||
zip,
|
||||
tree,
|
||||
fileOperation,
|
||||
}: {
|
||||
zip: JSZip;
|
||||
fileOperation: FileOperation;
|
||||
tree: FileTreeNode[];
|
||||
}): Promise<StructuredImportData> {
|
||||
private async parseFileTree(
|
||||
fileOperation: FileOperation,
|
||||
tree: FileTreeNode[]
|
||||
): Promise<StructuredImportData> {
|
||||
const user = await User.findByPk(fileOperation.userId, {
|
||||
rejectOnEmpty: true,
|
||||
});
|
||||
@@ -59,14 +57,6 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
return parseNodeChildren(child.children, collectionId);
|
||||
}
|
||||
|
||||
const zipObject = zip.files[child.path];
|
||||
if (!zipObject) {
|
||||
Logger.info("task", "Zip file referenced path that doesn't exist", {
|
||||
path: child.path,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const id = uuidv4();
|
||||
|
||||
// this is an attachment
|
||||
@@ -76,7 +66,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
name: child.name,
|
||||
path: child.path,
|
||||
mimeType: mime.lookup(child.path) || "application/octet-stream",
|
||||
buffer: () => zipObject.async("nodebuffer"),
|
||||
buffer: () => fs.readFile(child.path),
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -84,29 +74,14 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
const { title, emoji, text } = await documentImporter({
|
||||
mimeType: "text/markdown",
|
||||
fileName: child.name,
|
||||
content: await zipObject.async("string"),
|
||||
content:
|
||||
child.children.length > 0
|
||||
? ""
|
||||
: await fs.readFile(child.path, "utf8"),
|
||||
user,
|
||||
ip: user.lastActiveIp || undefined,
|
||||
});
|
||||
|
||||
let metadata;
|
||||
try {
|
||||
metadata = zipObject.comment ? JSON.parse(zipObject.comment) : {};
|
||||
} catch (err) {
|
||||
Logger.debug(
|
||||
"task",
|
||||
`ZIP comment found for ${child.name}, but could not be parsed as metadata: ${zipObject.comment}`
|
||||
);
|
||||
}
|
||||
|
||||
const createdAt = metadata.createdAt
|
||||
? new Date(metadata.createdAt)
|
||||
: zipObject.date;
|
||||
|
||||
const updatedAt = metadata.updatedAt
|
||||
? new Date(metadata.updatedAt)
|
||||
: zipObject.date;
|
||||
|
||||
const existingDocumentIndex = output.documents.findIndex(
|
||||
(doc) =>
|
||||
doc.title === title &&
|
||||
@@ -134,8 +109,6 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
title,
|
||||
emoji,
|
||||
text,
|
||||
updatedAt,
|
||||
createdAt,
|
||||
collectionId,
|
||||
parentDocumentId,
|
||||
path: child.path,
|
||||
@@ -150,7 +123,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
|
||||
// All nodes in the root level should be collections
|
||||
for (const node of tree) {
|
||||
if (node.path.endsWith("/")) {
|
||||
if (node.children.length > 0) {
|
||||
const collectionId = uuidv4();
|
||||
output.collections.push({
|
||||
id: collectionId,
|
||||
|
||||
@@ -37,7 +37,9 @@ describe("ImportNotionTask", () => {
|
||||
// Check that the image url was replaced in the text with a redirect
|
||||
const attachments = Array.from(response.attachments.values());
|
||||
const documents = Array.from(response.documents.values());
|
||||
expect(documents[2].text).toContain(attachments[0].redirectUrl);
|
||||
expect(documents.map((d) => d.text).join("")).toContain(
|
||||
attachments[0].redirectUrl
|
||||
);
|
||||
});
|
||||
|
||||
it("should import successfully from a HTML export", async () => {
|
||||
@@ -76,6 +78,8 @@ describe("ImportNotionTask", () => {
|
||||
);
|
||||
|
||||
const documents = Array.from(response.documents.values());
|
||||
expect(documents[1].text).toContain(attachment?.redirectUrl);
|
||||
expect(documents.map((d) => d.text).join("")).toContain(
|
||||
attachment?.redirectUrl
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import path from "path";
|
||||
import JSZip from "jszip";
|
||||
import fs from "fs-extra";
|
||||
import compact from "lodash/compact";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import mime from "mime-types";
|
||||
@@ -7,35 +7,33 @@ import { v4 as uuidv4 } from "uuid";
|
||||
import documentImporter from "@server/commands/documentImporter";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { FileOperation, User } from "@server/models";
|
||||
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
|
||||
import ImportHelper, { FileTreeNode } from "@server/utils/ImportHelper";
|
||||
import ImportTask, { StructuredImportData } from "./ImportTask";
|
||||
|
||||
export default class ImportNotionTask extends ImportTask {
|
||||
public async parseData(
|
||||
stream: NodeJS.ReadableStream,
|
||||
dirPath: string,
|
||||
fileOperation: FileOperation
|
||||
): Promise<StructuredImportData> {
|
||||
const zip = await JSZip.loadAsync(stream);
|
||||
const tree = ZipHelper.toFileTree(zip);
|
||||
return this.parseFileTree({ fileOperation, zip, tree });
|
||||
const tree = await ImportHelper.toFileTree(dirPath);
|
||||
if (!tree) {
|
||||
throw new Error("Could not find valid content in zip file");
|
||||
}
|
||||
return this.parseFileTree(fileOperation, tree.children);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the file structure from zipAsFileTree into documents,
|
||||
* collections, and attachments.
|
||||
*
|
||||
* @param fileOperation The file operation
|
||||
* @param tree An array of FileTreeNode representing root files in the zip
|
||||
* @returns A StructuredImportData object
|
||||
*/
|
||||
private async parseFileTree({
|
||||
zip,
|
||||
tree,
|
||||
fileOperation,
|
||||
}: {
|
||||
zip: JSZip;
|
||||
fileOperation: FileOperation;
|
||||
tree: FileTreeNode[];
|
||||
}): Promise<StructuredImportData> {
|
||||
private async parseFileTree(
|
||||
fileOperation: FileOperation,
|
||||
tree: FileTreeNode[]
|
||||
): Promise<StructuredImportData> {
|
||||
const user = await User.findByPk(fileOperation.userId, {
|
||||
rejectOnEmpty: true,
|
||||
});
|
||||
@@ -58,7 +56,6 @@ export default class ImportNotionTask extends ImportTask {
|
||||
return;
|
||||
}
|
||||
|
||||
const zipObject = zip.files[child.path];
|
||||
const id = uuidv4();
|
||||
const match = child.title.match(this.NotionUUIDRegex);
|
||||
const name = child.title.replace(this.NotionUUIDRegex, "");
|
||||
@@ -78,7 +75,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
name: child.name,
|
||||
path: child.path,
|
||||
mimeType,
|
||||
buffer: () => zipObject.async("nodebuffer"),
|
||||
buffer: () => fs.readFile(child.path),
|
||||
externalId,
|
||||
});
|
||||
return;
|
||||
@@ -89,7 +86,10 @@ export default class ImportNotionTask extends ImportTask {
|
||||
const { title, emoji, text } = await documentImporter({
|
||||
mimeType: mimeType || "text/markdown",
|
||||
fileName: name,
|
||||
content: zipObject ? await zipObject.async("string") : "",
|
||||
content:
|
||||
child.children.length > 0
|
||||
? ""
|
||||
: await fs.readFile(child.path, "utf8"),
|
||||
user,
|
||||
ip: user.lastActiveIp || undefined,
|
||||
});
|
||||
@@ -205,11 +205,10 @@ export default class ImportNotionTask extends ImportTask {
|
||||
mimeType === "text/plain" ||
|
||||
mimeType === "text/html"
|
||||
) {
|
||||
const zipObject = zip.files[node.path];
|
||||
const { text } = await documentImporter({
|
||||
mimeType,
|
||||
fileName: name,
|
||||
content: await zipObject.async("string"),
|
||||
content: await fs.readFile(node.path, "utf8"),
|
||||
user,
|
||||
ip: user.lastActiveIp || undefined,
|
||||
});
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import path from "path";
|
||||
import { rm } from "fs-extra";
|
||||
import truncate from "lodash/truncate";
|
||||
import tmp from "tmp";
|
||||
import unzipper from "unzipper";
|
||||
import {
|
||||
AttachmentPreset,
|
||||
CollectionPermission,
|
||||
@@ -10,6 +13,7 @@ import { CollectionValidation } from "@shared/validations";
|
||||
import attachmentCreator from "@server/commands/attachmentCreator";
|
||||
import documentCreator from "@server/commands/documentCreator";
|
||||
import { serializer } from "@server/editor";
|
||||
import env from "@server/env";
|
||||
import { InternalError, ValidationError } from "@server/errors";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import {
|
||||
@@ -98,19 +102,22 @@ export default abstract class ImportTask extends BaseTask<Props> {
|
||||
* @param props The props
|
||||
*/
|
||||
public async perform({ fileOperationId }: Props) {
|
||||
let dirPath;
|
||||
const fileOperation = await FileOperation.findByPk(fileOperationId, {
|
||||
rejectOnEmpty: true,
|
||||
});
|
||||
|
||||
try {
|
||||
Logger.info("task", `ImportTask fetching data for ${fileOperationId}`);
|
||||
const data = await this.fetchData(fileOperation);
|
||||
if (!data) {
|
||||
dirPath = await this.fetchAndExtractData(fileOperation);
|
||||
if (!dirPath) {
|
||||
throw InternalError("Failed to fetch data for import from storage.");
|
||||
}
|
||||
|
||||
Logger.info("task", `ImportTask parsing data for ${fileOperationId}`);
|
||||
const parsed = await this.parseData(data, fileOperation);
|
||||
Logger.info("task", `ImportTask parsing data for ${fileOperationId}`, {
|
||||
dirPath,
|
||||
});
|
||||
const parsed = await this.parseData(dirPath, fileOperation);
|
||||
|
||||
if (parsed.collections.length === 0) {
|
||||
throw ValidationError(
|
||||
@@ -152,6 +159,10 @@ export default abstract class ImportTask extends BaseTask<Props> {
|
||||
error
|
||||
);
|
||||
throw error;
|
||||
} finally {
|
||||
if (dirPath) {
|
||||
await this.cleanupExtractedData(dirPath, fileOperation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,38 +190,70 @@ export default abstract class ImportTask extends BaseTask<Props> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the remote data associated with the file operation as a Buffer.
|
||||
* Fetch the remote data associated with the file operation into a temporary disk location.
|
||||
*
|
||||
* @param fileOperation The FileOperation to fetch data for
|
||||
* @returns A promise that resolves to the data as a buffer.
|
||||
* @returns A promise that resolves to the temporary file path.
|
||||
*/
|
||||
protected async fetchData(fileOperation: FileOperation): Promise<Buffer> {
|
||||
protected async fetchAndExtractData(
|
||||
fileOperation: FileOperation
|
||||
): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const bufs: Buffer[] = [];
|
||||
const stream = fileOperation.stream;
|
||||
if (!stream) {
|
||||
return reject(new Error("No stream available"));
|
||||
}
|
||||
|
||||
stream.on("data", function (d) {
|
||||
bufs.push(d);
|
||||
});
|
||||
stream.on("error", reject);
|
||||
stream.on("end", () => {
|
||||
resolve(Buffer.concat(bufs));
|
||||
tmp.dir((err, path) => {
|
||||
if (err) {
|
||||
return reject(err);
|
||||
}
|
||||
|
||||
const dest = unzipper
|
||||
.Extract({ path, verbose: env.isDevelopment })
|
||||
.on("error", reject)
|
||||
.on("close", () => resolve(path));
|
||||
|
||||
stream
|
||||
.on("error", (err) => {
|
||||
dest.end();
|
||||
reject(err);
|
||||
})
|
||||
.pipe(dest);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the data loaded from fetchData into a consistent structured format
|
||||
* Cleanup the temporary directory where the data was fetched and extracted.
|
||||
*
|
||||
* @param dirPath The temporary directory path where the data was fetched
|
||||
* @param fileOperation The associated FileOperation
|
||||
*/
|
||||
protected async cleanupExtractedData(
|
||||
dirPath: string,
|
||||
fileOperation: FileOperation
|
||||
) {
|
||||
try {
|
||||
await rm(dirPath, { recursive: true, force: true });
|
||||
} catch (error) {
|
||||
Logger.error(
|
||||
`ImportTask failed to cleanup extracted data for ${fileOperation.id}`,
|
||||
error
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the data loaded from fetchAndExtractData into a consistent structured format
|
||||
* that represents collections, documents, and the relationships between them.
|
||||
*
|
||||
* @param data The data loaded from fetchData
|
||||
* @param dirPath The temporary directory path where the data was fetched
|
||||
* @param fileOperation The FileOperation to parse data for
|
||||
* @returns A promise that resolves to the structured data
|
||||
*/
|
||||
protected abstract parseData(
|
||||
data: Buffer | NodeJS.ReadableStream,
|
||||
dirPath: string,
|
||||
fileOperation: FileOperation
|
||||
): Promise<StructuredImportData>;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user