177 lines
5.7 KiB
TypeScript
177 lines
5.7 KiB
TypeScript
import fs from "fs-extra";
|
|
import escapeRegExp from "lodash/escapeRegExp";
|
|
import mime from "mime-types";
|
|
import { v4 as uuidv4 } from "uuid";
|
|
import documentImporter from "@server/commands/documentImporter";
|
|
import Logger from "@server/logging/Logger";
|
|
import { FileOperation, User } from "@server/models";
|
|
import { Buckets } from "@server/models/helpers/AttachmentHelper";
|
|
import ImportHelper, { FileTreeNode } from "@server/utils/ImportHelper";
|
|
import ImportTask, { StructuredImportData } from "./ImportTask";
|
|
|
|
export default class ImportMarkdownZipTask extends ImportTask {
|
|
public async parseData(
|
|
dirPath: string,
|
|
fileOperation: FileOperation
|
|
): Promise<StructuredImportData> {
|
|
const tree = await ImportHelper.toFileTree(dirPath);
|
|
if (!tree) {
|
|
throw new Error("Could not find valid content in zip file");
|
|
}
|
|
|
|
return this.parseFileTree(fileOperation, tree.children);
|
|
}
|
|
|
|
/**
|
|
* Converts the file structure from zipAsFileTree into documents,
|
|
* collections, and attachments.
|
|
*
|
|
* @param fileOperation The file operation
|
|
* @param tree An array of FileTreeNode representing root files in the zip
|
|
* @returns A StructuredImportData object
|
|
*/
|
|
private async parseFileTree(
|
|
fileOperation: FileOperation,
|
|
tree: FileTreeNode[]
|
|
): Promise<StructuredImportData> {
|
|
const user = await User.findByPk(fileOperation.userId, {
|
|
rejectOnEmpty: true,
|
|
});
|
|
const output: StructuredImportData = {
|
|
collections: [],
|
|
documents: [],
|
|
attachments: [],
|
|
};
|
|
|
|
async function parseNodeChildren(
|
|
children: FileTreeNode[],
|
|
collectionId: string,
|
|
parentDocumentId?: string
|
|
): Promise<void> {
|
|
await Promise.all(
|
|
children.map(async (child) => {
|
|
// special case for folders of attachments
|
|
if (
|
|
child.name === Buckets.uploads ||
|
|
child.name === Buckets.public ||
|
|
(child.children.length > 0 &&
|
|
(child.path.includes(`/${Buckets.public}/`) ||
|
|
child.path.includes(`/${Buckets.uploads}/`)))
|
|
) {
|
|
return parseNodeChildren(child.children, collectionId);
|
|
}
|
|
|
|
const id = uuidv4();
|
|
|
|
// this is an attachment
|
|
if (
|
|
child.children.length === 0 &&
|
|
(child.path.includes(`/${Buckets.uploads}/`) ||
|
|
child.path.includes(`/${Buckets.public}/`))
|
|
) {
|
|
output.attachments.push({
|
|
id,
|
|
name: child.name,
|
|
path: child.path,
|
|
mimeType: mime.lookup(child.path) || "application/octet-stream",
|
|
buffer: () => fs.readFile(child.path),
|
|
});
|
|
return;
|
|
}
|
|
|
|
const { title, icon, text } = await documentImporter({
|
|
mimeType: "text/markdown",
|
|
fileName: child.name,
|
|
content:
|
|
child.children.length > 0
|
|
? ""
|
|
: await fs.readFile(child.path, "utf8"),
|
|
user,
|
|
ip: user.lastActiveIp || undefined,
|
|
});
|
|
|
|
const existingDocumentIndex = output.documents.findIndex(
|
|
(doc) =>
|
|
doc.title === title &&
|
|
doc.collectionId === collectionId &&
|
|
doc.parentDocumentId === parentDocumentId
|
|
);
|
|
|
|
const existingDocument = output.documents[existingDocumentIndex];
|
|
|
|
// When there is a file and a folder with the same name this handles
|
|
// the case by combining the two into one document with nested children
|
|
if (existingDocument) {
|
|
if (existingDocument.text === "") {
|
|
output.documents[existingDocumentIndex].text = text;
|
|
}
|
|
|
|
await parseNodeChildren(
|
|
child.children,
|
|
collectionId,
|
|
existingDocument.id
|
|
);
|
|
} else {
|
|
output.documents.push({
|
|
id,
|
|
title,
|
|
emoji: icon,
|
|
icon,
|
|
text,
|
|
collectionId,
|
|
parentDocumentId,
|
|
path: child.path,
|
|
mimeType: "text/markdown",
|
|
});
|
|
|
|
await parseNodeChildren(child.children, collectionId, id);
|
|
}
|
|
})
|
|
);
|
|
}
|
|
|
|
// All nodes in the root level should be collections
|
|
for (const node of tree) {
|
|
if (node.children.length > 0) {
|
|
const collectionId = uuidv4();
|
|
output.collections.push({
|
|
id: collectionId,
|
|
name: node.title,
|
|
});
|
|
await parseNodeChildren(node.children, collectionId);
|
|
} else {
|
|
Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
|
|
fileOperationId: fileOperation.id,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Check all of the attachments we've created against urls in the text
|
|
// and replace them out with attachment redirect urls before continuing.
|
|
for (const document of output.documents) {
|
|
for (const attachment of output.attachments) {
|
|
const encodedPath = encodeURI(attachment.path);
|
|
|
|
// Pull the collection and subdirectory out of the path name, upload
|
|
// folders in an export are relative to the document itself
|
|
const normalizedAttachmentPath = encodedPath
|
|
.replace(
|
|
new RegExp(`(.*)/${Buckets.uploads}/`),
|
|
`${Buckets.uploads}/`
|
|
)
|
|
.replace(new RegExp(`(.*)/${Buckets.public}/`), `${Buckets.public}/`);
|
|
|
|
const reference = `<<${attachment.id}>>`;
|
|
document.text = document.text
|
|
.replace(new RegExp(escapeRegExp(encodedPath), "g"), reference)
|
|
.replace(
|
|
new RegExp(`/?${escapeRegExp(normalizedAttachmentPath)}`, "g"),
|
|
reference
|
|
);
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
}
|