feat: Add import/export of documents as JSON (#4621)

* feat: Add export of documents as JSON

* Rename, add structured collection description

* stash

* ui

* Add entity creation data to JSON archive

* Import JSON UI plumbing

* stash

* Messy, but working

* tsc

* tsc
This commit is contained in:
Tom Moor
2023-01-29 10:24:44 -08:00
committed by GitHub
parent 85ca25371c
commit d02d3cb55d
23 changed files with 649 additions and 119 deletions

View File

@@ -4,8 +4,10 @@ import DocumentHelper from "@server/models/helpers/DocumentHelper";
type Props = {
id?: string;
urlId?: string;
title: string;
text: string;
text?: string;
state?: Buffer;
publish?: boolean;
collectionId?: string | null;
parentDocumentId?: string | null;
@@ -19,13 +21,15 @@ type Props = {
editorVersion?: string;
source?: "import";
ip?: string;
transaction: Transaction;
transaction?: Transaction;
};
export default async function documentCreator({
title = "",
text = "",
state,
id,
urlId,
publish,
collectionId,
parentDocumentId,
@@ -43,9 +47,24 @@ export default async function documentCreator({
transaction,
}: Props): Promise<Document> {
const templateId = templateDocument ? templateDocument.id : undefined;
if (urlId) {
const existing = await Document.unscoped().findOne({
attributes: ["id"],
transaction,
where: {
urlId,
},
});
if (existing) {
urlId = undefined;
}
}
const document = await Document.create(
{
id,
urlId,
parentDocumentId,
editorVersion,
collectionId,
@@ -63,8 +82,10 @@ export default async function documentCreator({
? DocumentHelper.replaceTemplateVariables(templateDocument.title, user)
: title,
text: templateDocument ? templateDocument.text : text,
state,
},
{
silent: !!createdAt,
transaction,
}
);

View File

@@ -21,12 +21,12 @@ import {
Length as SimpleLength,
} from "sequelize-typescript";
import isUUID from "validator/lib/isUUID";
import type { CollectionSort } from "@shared/types";
import { CollectionPermission, NavigationNode } from "@shared/types";
import { sortNavigationNodes } from "@shared/utils/collections";
import { SLUG_URL_REGEX } from "@shared/utils/urlHelpers";
import { CollectionValidation } from "@shared/validations";
import slugify from "@server/utils/slugify";
import type { CollectionSort } from "~/types";
import CollectionGroup from "./CollectionGroup";
import CollectionUser from "./CollectionUser";
import Document from "./Document";
@@ -153,7 +153,7 @@ class Collection extends ParanoidModel {
msg: `description must be ${CollectionValidation.maxDescriptionLength} characters or less`,
})
@Column
description: string;
description: string | null;
@Length({
max: 50,

View File

@@ -3,7 +3,9 @@ import { FileOperationFormat, FileOperationType } from "@shared/types";
import { FileOperation } from "@server/models";
import { Event as TEvent, FileOperationEvent } from "@server/types";
import ExportHTMLZipTask from "../tasks/ExportHTMLZipTask";
import ExportJSONTask from "../tasks/ExportJSONTask";
import ExportMarkdownZipTask from "../tasks/ExportMarkdownZipTask";
import ImportJSONTask from "../tasks/ImportJSONTask";
import ImportMarkdownZipTask from "../tasks/ImportMarkdownZipTask";
import ImportNotionTask from "../tasks/ImportNotionTask";
import BaseProcessor from "./BaseProcessor";
@@ -32,6 +34,11 @@ export default class FileOperationsProcessor extends BaseProcessor {
fileOperationId: event.modelId,
});
break;
case FileOperationFormat.JSON:
await ImportJSONTask.schedule({
fileOperationId: event.modelId,
});
break;
default:
}
}
@@ -48,6 +55,11 @@ export default class FileOperationsProcessor extends BaseProcessor {
fileOperationId: event.modelId,
});
break;
case FileOperationFormat.JSON:
await ExportJSONTask.schedule({
fileOperationId: event.modelId,
});
break;
default:
}
}

View File

@@ -0,0 +1,143 @@
import JSZip from "jszip";
import { omit } from "lodash";
import { NavigationNode } from "@shared/types";
import { parser } from "@server/editor";
import env from "@server/env";
import Logger from "@server/logging/Logger";
import {
Attachment,
Collection,
Document,
FileOperation,
} from "@server/models";
import DocumentHelper from "@server/models/helpers/DocumentHelper";
import { presentAttachment, presentCollection } from "@server/presenters";
import { CollectionJSONExport, JSONExportMetadata } from "@server/types";
import ZipHelper from "@server/utils/ZipHelper";
import { serializeFilename } from "@server/utils/fs";
import parseAttachmentIds from "@server/utils/parseAttachmentIds";
import { getFileByKey } from "@server/utils/s3";
import packageJson from "../../../package.json";
import ExportTask from "./ExportTask";
export default class ExportJSONTask extends ExportTask {
public async export(collections: Collection[], fileOperation: FileOperation) {
const zip = new JSZip();
// serial to avoid overloading, slow and steady wins the race
for (const collection of collections) {
await this.addCollectionToArchive(zip, collection);
}
await this.addMetadataToArchive(zip, fileOperation);
return ZipHelper.toTmpFile(zip);
}
private async addMetadataToArchive(zip: JSZip, fileOperation: FileOperation) {
const user = await fileOperation.$get("user");
const metadata: JSONExportMetadata = {
exportVersion: 1,
version: packageJson.version,
createdAt: new Date().toISOString(),
createdById: fileOperation.userId,
createdByEmail: user?.email ?? null,
};
zip.file(
`metadata.json`,
env.ENVIRONMENT === "development"
? JSON.stringify(metadata, null, 2)
: JSON.stringify(metadata)
);
}
private async addCollectionToArchive(zip: JSZip, collection: Collection) {
const output: CollectionJSONExport = {
collection: {
...omit(presentCollection(collection), ["url", "documents"]),
description: collection.description
? parser.parse(collection.description)
: null,
documentStructure: collection.documentStructure,
},
documents: {},
attachments: {},
};
async function addDocumentTree(nodes: NavigationNode[]) {
for (const node of nodes) {
const document = await Document.findByPk(node.id, {
includeState: true,
});
if (!document) {
continue;
}
const attachments = await Attachment.findAll({
where: {
teamId: document.teamId,
id: parseAttachmentIds(document.text),
},
});
await Promise.all(
attachments.map(async (attachment) => {
try {
const stream = getFileByKey(attachment.key);
if (stream) {
zip.file(attachment.key, stream, {
createFolders: true,
});
}
output.attachments[attachment.id] = {
...omit(presentAttachment(attachment), "url"),
key: attachment.key,
};
} catch (err) {
Logger.error(
`Failed to add attachment to archive: ${attachment.key}`,
err
);
}
})
);
output.documents[document.id] = {
id: document.id,
urlId: document.urlId,
title: document.title,
data: DocumentHelper.toProsemirror(document),
createdById: document.createdById,
createdByEmail: document.createdBy.email,
createdAt: document.createdAt.toISOString(),
updatedAt: document.updatedAt.toISOString(),
publishedAt: document.publishedAt
? document.publishedAt.toISOString()
: null,
fullWidth: document.fullWidth,
template: document.template,
parentDocumentId: document.parentDocumentId,
};
if (node.children?.length > 0) {
await addDocumentTree(node.children);
}
}
}
if (collection.documentStructure) {
await addDocumentTree(collection.documentStructure);
}
zip.file(
`${serializeFilename(collection.name)}.json`,
env.ENVIRONMENT === "development"
? JSON.stringify(output, null, 2)
: JSON.stringify(output)
);
}
}

View File

@@ -47,7 +47,7 @@ export default abstract class ExportTask extends BaseTask<Props> {
state: FileOperationState.Creating,
});
const filePath = await this.export(collections);
const filePath = await this.export(collections, fileOperation);
Logger.info("task", `ExportTask uploading data for ${fileOperationId}`);
@@ -98,7 +98,10 @@ export default abstract class ExportTask extends BaseTask<Props> {
* @param collections The collections to export
* @returns A promise that resolves to a temporary file path
*/
protected abstract export(collections: Collection[]): Promise<string>;
protected abstract export(
collections: Collection[],
fileOperation: FileOperation
): Promise<string>;
/**
* Update the state of the underlying FileOperation in the database and send

View File

@@ -0,0 +1,171 @@
import JSZip from "jszip";
import { escapeRegExp, find } from "lodash";
import mime from "mime-types";
import { Node } from "prosemirror-model";
import { v4 as uuidv4 } from "uuid";
import { schema, serializer } from "@server/editor";
import Logger from "@server/logging/Logger";
import { FileOperation } from "@server/models";
import {
AttachmentJSONExport,
CollectionJSONExport,
DocumentJSONExport,
JSONExportMetadata,
} from "@server/types";
import ZipHelper, { FileTreeNode } from "@server/utils/ZipHelper";
import ImportTask, { StructuredImportData } from "./ImportTask";
export default class ImportJSONTask extends ImportTask {
public async parseData(
buffer: Buffer,
fileOperation: FileOperation
): Promise<StructuredImportData> {
const zip = await JSZip.loadAsync(buffer);
const tree = ZipHelper.toFileTree(zip);
return this.parseFileTree({ fileOperation, zip, tree });
}
/**
* Converts the file structure from zipAsFileTree into documents,
* collections, and attachments.
*
* @param tree An array of FileTreeNode representing root files in the zip
* @returns A StructuredImportData object
*/
private async parseFileTree({
zip,
tree,
}: {
zip: JSZip;
fileOperation: FileOperation;
tree: FileTreeNode[];
}): Promise<StructuredImportData> {
const output: StructuredImportData = {
collections: [],
documents: [],
attachments: [],
};
// Load metadata
let metadata: JSONExportMetadata | undefined = undefined;
for (const node of tree) {
if (node.path === "metadata.json") {
const zipObject = zip.files["metadata.json"];
metadata = JSON.parse(await zipObject.async("string"));
}
}
Logger.debug("task", "Importing JSON metadata", { metadata });
function mapDocuments(
documents: { [id: string]: DocumentJSONExport },
collectionId: string
) {
Object.values(documents).forEach(async (node) => {
const id = uuidv4();
output.documents.push({
...node,
path: "",
// TODO: This is kind of temporary, we can import the document
// structure directly in the future.
text: serializer.serialize(Node.fromJSON(schema, node.data)),
createdAt: node.createdAt ? new Date(node.createdAt) : undefined,
updatedAt: node.updatedAt ? new Date(node.updatedAt) : undefined,
publishedAt: node.publishedAt ? new Date(node.publishedAt) : null,
collectionId,
sourceId: node.id,
parentDocumentId: node.parentDocumentId
? find(
output.documents,
(d) => d.sourceId === node.parentDocumentId
)?.id
: null,
id,
});
});
}
async function mapAttachments(attachments: {
[id: string]: AttachmentJSONExport;
}) {
Object.values(attachments).forEach(async (node) => {
const id = uuidv4();
const zipObject = zip.files[node.key];
const mimeType = mime.lookup(node.key) || "application/octet-stream";
output.attachments.push({
id,
name: node.name,
buffer: () => zipObject.async("nodebuffer"),
mimeType,
path: node.key,
sourceId: node.id,
});
});
}
// All nodes in the root level should be collections as JSON + metadata
for (const node of tree) {
if (
node.path.endsWith("/") ||
node.path === ".DS_Store" ||
node.path === "metadata.json"
) {
continue;
}
const zipObject = zip.files[node.path];
const item: CollectionJSONExport = JSON.parse(
await zipObject.async("string")
);
const collectionId = uuidv4();
output.collections.push({
...item.collection,
description:
item.collection.description &&
typeof item.collection.description === "object"
? serializer.serialize(
Node.fromJSON(schema, item.collection.description)
)
: item.collection.description,
id: collectionId,
sourceId: item.collection.id,
});
if (Object.values(item.documents).length) {
await mapDocuments(item.documents, collectionId);
}
if (Object.values(item.attachments).length) {
await mapAttachments(item.attachments);
}
}
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before continuing.
for (const document of output.documents) {
for (const attachment of output.attachments) {
const encodedPath = encodeURI(attachment.path);
// Pull the collection and subdirectory out of the path name, upload
// folders in an export are relative to the document itself
const normalizedAttachmentPath = encodedPath.replace(
/(.*)uploads\//,
"uploads/"
);
const reference = `<<${attachment.id}>>`;
document.text = document.text
.replace(new RegExp(escapeRegExp(encodedPath), "g"), reference)
.replace(
new RegExp(`/?${escapeRegExp(normalizedAttachmentPath)}`, "g"),
reference
);
}
}
return output;
}
}

View File

@@ -35,7 +35,9 @@ export default class ImportMarkdownZipTask extends ImportTask {
fileOperation: FileOperation;
tree: FileTreeNode[];
}): Promise<StructuredImportData> {
const user = await User.findByPk(fileOperation.userId);
const user = await User.findByPk(fileOperation.userId, {
rejectOnEmpty: true,
});
const output: StructuredImportData = {
collections: [],
documents: [],
@@ -47,10 +49,6 @@ export default class ImportMarkdownZipTask extends ImportTask {
collectionId: string,
parentDocumentId?: string
): Promise<void> {
if (!user) {
throw new Error("User not found");
}
await Promise.all(
children.map(async (child) => {
// special case for folders of attachments

View File

@@ -35,10 +35,9 @@ export default class ImportNotionTask extends ImportTask {
fileOperation: FileOperation;
tree: FileTreeNode[];
}): Promise<StructuredImportData> {
const user = await User.findByPk(fileOperation.userId);
if (!user) {
throw new Error("User not found");
}
const user = await User.findByPk(fileOperation.userId, {
rejectOnEmpty: true,
});
const output: StructuredImportData = {
collections: [],
@@ -51,10 +50,6 @@ export default class ImportNotionTask extends ImportTask {
collectionId: string,
parentDocumentId?: string
): Promise<void> => {
if (!user) {
throw new Error("User not found");
}
await Promise.all(
children.map(async (child) => {
// Ignore the CSV's for databases upfront
@@ -245,7 +240,7 @@ export default class ImportNotionTask extends ImportTask {
}
for (const collection of output.collections) {
if (collection.description) {
if (typeof collection.description === "string") {
collection.description = replaceInternalLinksAndImages(
collection.description
);

View File

@@ -1,10 +1,15 @@
import { S3 } from "aws-sdk";
import { truncate } from "lodash";
import { CollectionPermission, FileOperationState } from "@shared/types";
import {
CollectionPermission,
CollectionSort,
FileOperationState,
} from "@shared/types";
import { CollectionValidation } from "@shared/validations";
import attachmentCreator from "@server/commands/attachmentCreator";
import documentCreator from "@server/commands/documentCreator";
import { sequelize } from "@server/database/sequelize";
import { serializer } from "@server/editor";
import { InternalError, ValidationError } from "@server/errors";
import Logger from "@server/logging/Logger";
import {
@@ -27,6 +32,11 @@ type Props = {
export type StructuredImportData = {
collections: {
id: string;
urlId?: string;
color?: string;
icon?: string | null;
sort?: CollectionSort;
permission?: CollectionPermission | null;
name: string;
/**
* The collection description. To reference an attachment or image use the
@@ -37,12 +47,13 @@ export type StructuredImportData = {
* link to the document as part of persistData once the document url is
* generated.
*/
description?: string;
description?: string | Record<string, any> | null;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
documents: {
id: string;
urlId?: string;
title: string;
/**
* The document text. To reference an attachment or image use the special
@@ -54,10 +65,14 @@ export type StructuredImportData = {
* is generated.
*/
text: string;
data?: Record<string, any>;
collectionId: string;
updatedAt?: Date;
createdAt?: Date;
parentDocumentId?: string;
publishedAt?: Date | null;
parentDocumentId?: string | null;
createdById?: string;
createdByEmail?: string | null;
path: string;
/** Optional id from import source, useful for mapping */
sourceId?: string;
@@ -96,7 +111,7 @@ export default abstract class ImportTask extends BaseTask<Props> {
if (parsed.collections.length === 0) {
throw ValidationError(
"Uploaded file does not contain any collections. The root of the zip file must contain folders representing collections."
"Uploaded file does not contain any valid collections. It may be corrupt, the wrong type, or version."
);
}
@@ -246,6 +261,12 @@ export default abstract class ImportTask extends BaseTask<Props> {
Logger.debug("task", `ImportTask persisting collection ${item.id}`);
let description = item.description;
// Description can be markdown text or a Prosemirror object if coming
// from JSON format. In that case we need to serialize to Markdown.
if (description instanceof Object) {
description = serializer.serialize(description);
}
if (description) {
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before saving.
@@ -272,6 +293,21 @@ export default abstract class ImportTask extends BaseTask<Props> {
}
}
const options: { urlId?: string } = {};
if (item.urlId) {
const existing = await Collection.unscoped().findOne({
attributes: ["id"],
transaction,
where: {
urlId: item.urlId,
},
});
if (!existing) {
options.urlId = item.urlId;
}
}
// check if collection with name exists
const response = await Collection.findOrCreate({
where: {
@@ -279,10 +315,13 @@ export default abstract class ImportTask extends BaseTask<Props> {
name: item.name,
},
defaults: {
...options,
id: item.id,
description: truncate(description, {
length: CollectionValidation.maxDescriptionLength,
}),
description: description
? truncate(description, {
length: CollectionValidation.maxDescriptionLength,
})
: null,
createdById: fileOperation.userId,
permission: CollectionPermission.ReadWrite,
importId: fileOperation.id,
@@ -300,12 +339,16 @@ export default abstract class ImportTask extends BaseTask<Props> {
const name = `${item.name} (Imported)`;
collection = await Collection.create(
{
...options,
id: item.id,
description,
color: item.color,
icon: item.icon,
sort: item.sort,
teamId: fileOperation.teamId,
createdById: fileOperation.userId,
name,
permission: CollectionPermission.ReadWrite,
permission: item.permission ?? CollectionPermission.ReadWrite,
importId: fileOperation.id,
},
{ transaction }
@@ -360,7 +403,23 @@ export default abstract class ImportTask extends BaseTask<Props> {
);
}
const options: { urlId?: string } = {};
if (item.urlId) {
const existing = await Document.unscoped().findOne({
attributes: ["id"],
transaction,
where: {
urlId: item.urlId,
},
});
if (!existing) {
options.urlId = item.urlId;
}
}
const document = await documentCreator({
...options,
source: "import",
id: item.id,
title: item.title,

View File

@@ -2,7 +2,12 @@ import { ParameterizedContext, DefaultContext } from "koa";
import { IRouterParamContext } from "koa-router";
import { Transaction } from "sequelize/types";
import { z } from "zod";
import { Client } from "@shared/types";
import {
CollectionSort,
NavigationNode,
Client,
CollectionPermission,
} from "@shared/types";
import BaseSchema from "@server/routes/api/BaseSchema";
import { AccountProvisionerResult } from "./commands/accountProvisioner";
import { FileOperation, Team, User } from "./models";
@@ -343,3 +348,60 @@ export type Event =
export type NotificationMetadata = {
notificationId?: string;
};
export type JSONExportMetadata = {
/* The version of the export, allows updated structure in the future. */
exportVersion: number;
/* The version of the application that created the export. */
version: string;
/* The date the export was created. */
createdAt: string;
/* The ID of the user that created the export. */
createdById: string;
/* The email of the user that created the export. */
createdByEmail: string | null;
};
export type DocumentJSONExport = {
id: string;
urlId: string;
title: string;
data: Record<string, any>;
createdById: string;
createdByEmail: string | null;
createdAt: string;
updatedAt: string;
publishedAt: string | null;
fullWidth: boolean;
template: boolean;
parentDocumentId: string | null;
};
export type AttachmentJSONExport = {
id: string;
documentId: string | null;
contentType: string;
name: string;
size: number;
key: string;
};
export type CollectionJSONExport = {
collection: {
id: string;
urlId: string;
name: string;
description: Record<string, any> | null;
permission?: CollectionPermission | null;
color: string;
icon?: string | null;
sort: CollectionSort;
documentStructure: NavigationNode[] | null;
};
documents: {
[id: string]: DocumentJSONExport;
};
attachments: {
[id: string]: AttachmentJSONExport;
};
};