fix: Exports generate invalid internal links (#4639)
* refactoring * Refactoring continues * Refactor export, fix internal links in exported docs * fix: Dupe document name detection * sigh
This commit is contained in:
195
server/queues/tasks/ExportDocumentTreeTask.ts
Normal file
195
server/queues/tasks/ExportDocumentTreeTask.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
import path from "path";
|
||||
import JSZip from "jszip";
|
||||
import { FileOperationFormat } from "@shared/types";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { Collection } from "@server/models";
|
||||
import Attachment from "@server/models/Attachment";
|
||||
import Document from "@server/models/Document";
|
||||
import DocumentHelper from "@server/models/helpers/DocumentHelper";
|
||||
import ZipHelper from "@server/utils/ZipHelper";
|
||||
import { serializeFilename } from "@server/utils/fs";
|
||||
import parseAttachmentIds from "@server/utils/parseAttachmentIds";
|
||||
import { getFileByKey } from "@server/utils/s3";
|
||||
import { NavigationNode } from "~/types";
|
||||
import ExportTask from "./ExportTask";
|
||||
|
||||
export default abstract class ExportDocumentTreeTask extends ExportTask {
|
||||
/**
|
||||
* Exports the document tree to the given zip instance.
|
||||
*
|
||||
* @param zip The JSZip instance to add files to
|
||||
* @param documentId The document ID to export
|
||||
* @param pathInZip The path in the zip to add the document to
|
||||
* @param format The format to export in
|
||||
*/
|
||||
protected async addDocumentToArchive({
|
||||
zip,
|
||||
pathInZip,
|
||||
documentId,
|
||||
format = FileOperationFormat.MarkdownZip,
|
||||
pathMap,
|
||||
}: {
|
||||
zip: JSZip;
|
||||
pathInZip: string;
|
||||
documentId: string;
|
||||
format: FileOperationFormat;
|
||||
pathMap: Map<string, string>;
|
||||
}) {
|
||||
const document = await Document.findByPk(documentId);
|
||||
if (!document) {
|
||||
return;
|
||||
}
|
||||
|
||||
let text =
|
||||
format === FileOperationFormat.HTMLZip
|
||||
? await DocumentHelper.toHTML(document, { centered: true })
|
||||
: await DocumentHelper.toMarkdown(document);
|
||||
const attachments = await Attachment.findAll({
|
||||
where: {
|
||||
teamId: document.teamId,
|
||||
id: parseAttachmentIds(document.text),
|
||||
},
|
||||
});
|
||||
|
||||
// Add any referenced attachments to the zip file and replace the
|
||||
// reference in the document with the path to the attachment in the zip
|
||||
await Promise.all(
|
||||
attachments.map(async (attachment) => {
|
||||
try {
|
||||
const img = await getFileByKey(attachment.key);
|
||||
const dir = path.dirname(pathInZip);
|
||||
if (img) {
|
||||
zip.file(path.join(dir, attachment.key), img as Blob, {
|
||||
createFolders: true,
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
Logger.error(
|
||||
`Failed to add attachment to archive: ${attachment.key}`,
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
text = text.replace(attachment.redirectUrl, encodeURI(attachment.key));
|
||||
})
|
||||
);
|
||||
|
||||
// Replace any internal links with relative paths to the document in the zip
|
||||
const internalLinks = [
|
||||
...text.matchAll(/\/doc\/(?:[0-9a-zA-Z-_~]*-)?([a-zA-Z0-9]{10,15})/g),
|
||||
];
|
||||
internalLinks.forEach((match) => {
|
||||
const matchedLink = match[0];
|
||||
const matchedDocPath = pathMap.get(matchedLink);
|
||||
|
||||
if (matchedDocPath) {
|
||||
const relativePath = path.relative(pathInZip, matchedDocPath);
|
||||
if (relativePath.startsWith(".")) {
|
||||
text = text.replace(
|
||||
matchedLink,
|
||||
encodeURI(relativePath.substring(1))
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Finally, add the document to the zip file
|
||||
zip.file(pathInZip, text, {
|
||||
date: document.updatedAt,
|
||||
createFolders: true,
|
||||
comment: JSON.stringify({
|
||||
createdAt: document.createdAt,
|
||||
updatedAt: document.updatedAt,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Exports the documents and attachments in the given collections to a zip file
|
||||
* and returns the path to the zip file in tmp.
|
||||
*
|
||||
* @param zip The JSZip instance to add files to
|
||||
* @param collections The collections to export
|
||||
* @param format The format to export in
|
||||
*
|
||||
* @returns The path to the zip file in tmp.
|
||||
*/
|
||||
protected async addCollectionsToArchive(
|
||||
zip: JSZip,
|
||||
collections: Collection[],
|
||||
format: FileOperationFormat
|
||||
) {
|
||||
const pathMap = this.createPathMap(collections, format);
|
||||
|
||||
for (const path of pathMap) {
|
||||
const documentId = path[0].replace("/doc/", "");
|
||||
const pathInZip = path[1];
|
||||
|
||||
await this.addDocumentToArchive({
|
||||
zip,
|
||||
pathInZip,
|
||||
documentId,
|
||||
format,
|
||||
pathMap,
|
||||
});
|
||||
}
|
||||
|
||||
return ZipHelper.toTmpFile(zip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a map of document urls to their path in the zip file.
|
||||
*
|
||||
* @param collections
|
||||
*/
|
||||
private createPathMap(
|
||||
collections: Collection[],
|
||||
format: FileOperationFormat
|
||||
) {
|
||||
const map = new Map<string, string>();
|
||||
|
||||
for (const collection of collections) {
|
||||
if (collection.documentStructure) {
|
||||
this.addDocumentTreeToPathMap(
|
||||
map,
|
||||
collection.documentStructure,
|
||||
serializeFilename(collection.name),
|
||||
format
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private addDocumentTreeToPathMap(
|
||||
map: Map<string, string>,
|
||||
nodes: NavigationNode[],
|
||||
root: string,
|
||||
format: FileOperationFormat
|
||||
) {
|
||||
for (const node of nodes) {
|
||||
const title = serializeFilename(node.title) || "Untitled";
|
||||
const extension = format === FileOperationFormat.HTMLZip ? "html" : "md";
|
||||
|
||||
// Ensure the document is given a unique path in zip, even if it has
|
||||
// the same title as another document in the same collection.
|
||||
let i = 0;
|
||||
let filePath = path.join(root, `${title}.${extension}`);
|
||||
while (Array.from(map.values()).includes(filePath)) {
|
||||
filePath = path.join(root, `${title} (${++i}).${extension}`);
|
||||
}
|
||||
|
||||
map.set(node.url, filePath);
|
||||
|
||||
if (node.children?.length) {
|
||||
this.addDocumentTreeToPathMap(
|
||||
map,
|
||||
node.children,
|
||||
path.join(root, title),
|
||||
format
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,16 @@
|
||||
import JSZip from "jszip";
|
||||
import { FileOperationFormat } from "@shared/types";
|
||||
import { Collection } from "@server/models";
|
||||
import { archiveCollections } from "@server/utils/zip";
|
||||
import ExportTask from "./ExportTask";
|
||||
import ExportDocumentTreeTask from "./ExportDocumentTreeTask";
|
||||
|
||||
export default class ExportHTMLZipTask extends ExportTask {
|
||||
export default class ExportHTMLZipTask extends ExportDocumentTreeTask {
|
||||
public async export(collections: Collection[]) {
|
||||
return await archiveCollections(collections, FileOperationFormat.HTMLZip);
|
||||
const zip = new JSZip();
|
||||
|
||||
return await this.addCollectionsToArchive(
|
||||
zip,
|
||||
collections,
|
||||
FileOperationFormat.HTMLZip
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
import JSZip from "jszip";
|
||||
import { FileOperationFormat } from "@shared/types";
|
||||
import { Collection } from "@server/models";
|
||||
import { archiveCollections } from "@server/utils/zip";
|
||||
import ExportTask from "./ExportTask";
|
||||
import ExportDocumentTreeTask from "./ExportDocumentTreeTask";
|
||||
|
||||
export default class ExportMarkdownZipTask extends ExportTask {
|
||||
export default class ExportMarkdownZipTask extends ExportDocumentTreeTask {
|
||||
public async export(collections: Collection[]) {
|
||||
return await archiveCollections(
|
||||
const zip = new JSZip();
|
||||
|
||||
return await this.addCollectionsToArchive(
|
||||
zip,
|
||||
collections,
|
||||
FileOperationFormat.MarkdownZip
|
||||
);
|
||||
|
||||
@@ -5,7 +5,7 @@ import { v4 as uuidv4 } from "uuid";
|
||||
import documentImporter from "@server/commands/documentImporter";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { FileOperation, User } from "@server/models";
|
||||
import { zipAsFileTree, FileTreeNode } from "@server/utils/zip";
|
||||
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
|
||||
import ImportTask, { StructuredImportData } from "./ImportTask";
|
||||
|
||||
export default class ImportMarkdownZipTask extends ImportTask {
|
||||
@@ -14,7 +14,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
fileOperation: FileOperation
|
||||
): Promise<StructuredImportData> {
|
||||
const zip = await JSZip.loadAsync(buffer);
|
||||
const tree = zipAsFileTree(zip);
|
||||
const tree = ZipHelper.toFileTree(zip);
|
||||
|
||||
return this.parseFileTree({ fileOperation, zip, tree });
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import { v4 as uuidv4 } from "uuid";
|
||||
import documentImporter from "@server/commands/documentImporter";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import { FileOperation, User } from "@server/models";
|
||||
import { zipAsFileTree, FileTreeNode } from "@server/utils/zip";
|
||||
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
|
||||
import ImportTask, { StructuredImportData } from "./ImportTask";
|
||||
|
||||
export default class ImportNotionTask extends ImportTask {
|
||||
@@ -15,7 +15,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
fileOperation: FileOperation
|
||||
): Promise<StructuredImportData> {
|
||||
const zip = await JSZip.loadAsync(buffer);
|
||||
const tree = zipAsFileTree(zip);
|
||||
const tree = ZipHelper.toFileTree(zip);
|
||||
return this.parseFileTree({ fileOperation, zip, tree });
|
||||
}
|
||||
|
||||
|
||||
115
server/utils/ZipHelper.ts
Normal file
115
server/utils/ZipHelper.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import JSZip from "jszip";
|
||||
import { find } from "lodash";
|
||||
import tmp from "tmp";
|
||||
import { ValidationError } from "@server/errors";
|
||||
import { trace } from "@server/logging/tracing";
|
||||
import { deserializeFilename } from "./fs";
|
||||
|
||||
export type FileTreeNode = {
|
||||
/** The title, extracted from the file name */
|
||||
title: string;
|
||||
/** The file name including extension */
|
||||
name: string;
|
||||
/** Full path to the file within the zip file */
|
||||
path: string;
|
||||
/** Any nested children */
|
||||
children: FileTreeNode[];
|
||||
};
|
||||
|
||||
@trace()
|
||||
export default class ZipHelper {
|
||||
/**
|
||||
* Converts the flat structure returned by JSZIP into a nested file structure
|
||||
* for easier processing.
|
||||
*
|
||||
* @param zip The JSZip instance
|
||||
* @param maxFiles The maximum number of files to unzip (Prevent zip bombs)
|
||||
*/
|
||||
public static toFileTree(
|
||||
zip: JSZip,
|
||||
/** The maximum number of files to unzip */
|
||||
maxFiles = 10000
|
||||
) {
|
||||
let fileCount = 0;
|
||||
const paths = Object.keys(zip.files).map((filePath) => {
|
||||
if (++fileCount > maxFiles) {
|
||||
throw ValidationError("Too many files in zip");
|
||||
}
|
||||
|
||||
return `/${filePath}`;
|
||||
});
|
||||
const tree: FileTreeNode[] = [];
|
||||
|
||||
paths.forEach(function (filePath) {
|
||||
if (filePath.startsWith("/__MACOSX")) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pathParts = filePath.split("/");
|
||||
|
||||
// Remove first blank element from the parts array.
|
||||
pathParts.shift();
|
||||
|
||||
let currentLevel = tree; // initialize currentLevel to root
|
||||
|
||||
pathParts.forEach(function (name) {
|
||||
// check to see if the path already exists.
|
||||
const existingPath = find(currentLevel, {
|
||||
name,
|
||||
});
|
||||
|
||||
if (existingPath) {
|
||||
// The path to this item was already in the tree, so don't add again.
|
||||
// Set the current level to this path's children
|
||||
currentLevel = existingPath.children;
|
||||
} else if (name.endsWith(".DS_Store") || !name) {
|
||||
return;
|
||||
} else {
|
||||
const newPart = {
|
||||
name,
|
||||
path: filePath.replace(/^\//, ""),
|
||||
title: deserializeFilename(path.parse(path.basename(name)).name),
|
||||
children: [],
|
||||
};
|
||||
|
||||
currentLevel.push(newPart);
|
||||
currentLevel = newPart.children;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return tree;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a zip file to a temporary disk location
|
||||
*
|
||||
* @param zip JSZip object
|
||||
* @returns pathname of the temporary file where the zip was written to disk
|
||||
*/
|
||||
public static async toTmpFile(zip: JSZip): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
tmp.file(
|
||||
{
|
||||
prefix: "export-",
|
||||
postfix: ".zip",
|
||||
},
|
||||
(err, path) => {
|
||||
if (err) {
|
||||
return reject(err);
|
||||
}
|
||||
zip
|
||||
.generateNodeStream({
|
||||
type: "nodebuffer",
|
||||
streamFiles: true,
|
||||
})
|
||||
.pipe(fs.createWriteStream(path))
|
||||
.on("finish", () => resolve(path))
|
||||
.on("error", reject);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,266 +0,0 @@
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import JSZip, { JSZipObject } from "jszip";
|
||||
import { find } from "lodash";
|
||||
import tmp from "tmp";
|
||||
import { FileOperationFormat } from "@shared/types";
|
||||
import { ValidationError } from "@server/errors";
|
||||
import Logger from "@server/logging/Logger";
|
||||
import Attachment from "@server/models/Attachment";
|
||||
import Collection from "@server/models/Collection";
|
||||
import Document from "@server/models/Document";
|
||||
import DocumentHelper from "@server/models/helpers/DocumentHelper";
|
||||
import { NavigationNode } from "~/types";
|
||||
import { deserializeFilename, serializeFilename } from "./fs";
|
||||
import parseAttachmentIds from "./parseAttachmentIds";
|
||||
import { getFileByKey } from "./s3";
|
||||
|
||||
type ItemType = "collection" | "document" | "attachment";
|
||||
|
||||
export type Item = {
|
||||
path: string;
|
||||
dir: string;
|
||||
name: string;
|
||||
depth: number;
|
||||
metadata: Record<string, any>;
|
||||
type: ItemType;
|
||||
item: JSZipObject;
|
||||
};
|
||||
|
||||
export type FileTreeNode = {
|
||||
/** The title, extracted from the file name */
|
||||
title: string;
|
||||
/** The file name including extension */
|
||||
name: string;
|
||||
/** The full path to within the zip file */
|
||||
path: string;
|
||||
/** The nested children */
|
||||
children: FileTreeNode[];
|
||||
};
|
||||
|
||||
async function addDocumentTreeToArchive(
|
||||
zip: JSZip,
|
||||
documents: NavigationNode[],
|
||||
format = FileOperationFormat.MarkdownZip
|
||||
) {
|
||||
for (const doc of documents) {
|
||||
const document = await Document.findByPk(doc.id);
|
||||
|
||||
if (!document) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text =
|
||||
format === FileOperationFormat.HTMLZip
|
||||
? await DocumentHelper.toHTML(document, { centered: true })
|
||||
: await DocumentHelper.toMarkdown(document);
|
||||
const attachments = await Attachment.findAll({
|
||||
where: {
|
||||
teamId: document.teamId,
|
||||
id: parseAttachmentIds(document.text),
|
||||
},
|
||||
});
|
||||
|
||||
for (const attachment of attachments) {
|
||||
await addImageToArchive(zip, attachment.key);
|
||||
text = text.replace(attachment.redirectUrl, encodeURI(attachment.key));
|
||||
}
|
||||
|
||||
let title = serializeFilename(document.title) || "Untitled";
|
||||
|
||||
const extension = format === FileOperationFormat.HTMLZip ? "html" : "md";
|
||||
|
||||
title = safeAddFileToArchive(zip, `${title}.${extension}`, text, {
|
||||
date: document.updatedAt,
|
||||
comment: JSON.stringify({
|
||||
createdAt: document.createdAt,
|
||||
updatedAt: document.updatedAt,
|
||||
}),
|
||||
});
|
||||
|
||||
if (doc.children && doc.children.length) {
|
||||
const folder = zip.folder(path.parse(title).name);
|
||||
|
||||
if (folder) {
|
||||
await addDocumentTreeToArchive(folder, doc.children, format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the content of a file in remote storage to the given zip file.
|
||||
*
|
||||
* @param zip JSZip object to add to
|
||||
* @param key path to file in S3 storage
|
||||
*/
|
||||
async function addImageToArchive(zip: JSZip, key: string) {
|
||||
try {
|
||||
const img = await getFileByKey(key);
|
||||
|
||||
// @ts-expect-error Blob
|
||||
zip.file(key, img, {
|
||||
createFolders: true,
|
||||
});
|
||||
} catch (err) {
|
||||
Logger.error("Error loading image attachment from S3", err, {
|
||||
key,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds content to a zip file, if the given filename already exists in the zip
|
||||
* then it will automatically increment numbers at the end of the filename.
|
||||
*
|
||||
* @param zip JSZip object to add to
|
||||
* @param key filename with extension
|
||||
* @param content the content to add
|
||||
* @param options options for added content
|
||||
* @returns The new title
|
||||
*/
|
||||
function safeAddFileToArchive(
|
||||
zip: JSZip,
|
||||
key: string,
|
||||
content: string | Uint8Array | ArrayBuffer | Blob,
|
||||
options: JSZip.JSZipFileOptions
|
||||
) {
|
||||
// @ts-expect-error root exists
|
||||
const root = zip.root;
|
||||
|
||||
// Filenames in the directory already
|
||||
const keysInDirectory = Object.keys(zip.files)
|
||||
.filter((k) => k.includes(root))
|
||||
.filter((k) => !k.endsWith("/"))
|
||||
.map((k) => path.basename(k).replace(/\s\((\d+)\)\./, "."));
|
||||
|
||||
// The number of duplicate filenames
|
||||
const existingKeysCount = keysInDirectory.filter((t) => t === key).length;
|
||||
const filename = path.parse(key).name;
|
||||
const extension = path.extname(key);
|
||||
|
||||
// Construct the new de-duplicated filename (if any)
|
||||
const safeKey =
|
||||
existingKeysCount > 0
|
||||
? `${filename} (${existingKeysCount})${extension}`
|
||||
: key;
|
||||
|
||||
zip.file(safeKey, content, options);
|
||||
return safeKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a zip file to a temporary disk location
|
||||
*
|
||||
* @param zip JSZip object
|
||||
* @returns pathname of the temporary file where the zip was written to disk
|
||||
*/
|
||||
async function archiveToPath(zip: JSZip): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
tmp.file(
|
||||
{
|
||||
prefix: "export-",
|
||||
postfix: ".zip",
|
||||
},
|
||||
(err, path) => {
|
||||
if (err) {
|
||||
return reject(err);
|
||||
}
|
||||
zip
|
||||
.generateNodeStream({
|
||||
type: "nodebuffer",
|
||||
streamFiles: true,
|
||||
})
|
||||
.pipe(fs.createWriteStream(path))
|
||||
.on("finish", () => resolve(path))
|
||||
.on("error", reject);
|
||||
}
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export async function archiveCollections(
|
||||
collections: Collection[],
|
||||
format: FileOperationFormat
|
||||
) {
|
||||
const zip = new JSZip();
|
||||
|
||||
for (const collection of collections) {
|
||||
if (collection.documentStructure) {
|
||||
const folder = zip.folder(serializeFilename(collection.name));
|
||||
|
||||
if (folder) {
|
||||
await addDocumentTreeToArchive(
|
||||
folder,
|
||||
collection.documentStructure,
|
||||
format
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return archiveToPath(zip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the flat structure returned by JSZIP into a nested file structure
|
||||
* for easier processing.
|
||||
*
|
||||
* @param paths An array of paths to files in the zip
|
||||
* @returns
|
||||
*/
|
||||
export function zipAsFileTree(
|
||||
zip: JSZip,
|
||||
/** The maximum number of files to unzip */
|
||||
maxFiles = 10000
|
||||
) {
|
||||
let fileCount = 0;
|
||||
const paths = Object.keys(zip.files).map((filePath) => {
|
||||
if (++fileCount > maxFiles) {
|
||||
throw ValidationError("Too many files in zip");
|
||||
}
|
||||
|
||||
return `/${filePath}`;
|
||||
});
|
||||
const tree: FileTreeNode[] = [];
|
||||
|
||||
paths.forEach(function (filePath) {
|
||||
if (filePath.startsWith("/__MACOSX")) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pathParts = filePath.split("/");
|
||||
|
||||
// Remove first blank element from the parts array.
|
||||
pathParts.shift();
|
||||
|
||||
let currentLevel = tree; // initialize currentLevel to root
|
||||
|
||||
pathParts.forEach(function (name) {
|
||||
// check to see if the path already exists.
|
||||
const existingPath = find(currentLevel, {
|
||||
name,
|
||||
});
|
||||
|
||||
if (existingPath) {
|
||||
// The path to this item was already in the tree, so don't add again.
|
||||
// Set the current level to this path's children
|
||||
currentLevel = existingPath.children;
|
||||
} else if (name.endsWith(".DS_Store") || !name) {
|
||||
return;
|
||||
} else {
|
||||
const newPart = {
|
||||
name,
|
||||
path: filePath.replace(/^\//, ""),
|
||||
title: deserializeFilename(path.parse(path.basename(name)).name),
|
||||
children: [],
|
||||
};
|
||||
|
||||
currentLevel.push(newPart);
|
||||
currentLevel = newPart.children;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return tree;
|
||||
}
|
||||
Reference in New Issue
Block a user