feat: Bulk HTML export (#4620)

* wip

* Working bulk html export

* Refactor

* test

* test
This commit is contained in:
Tom Moor
2022-12-30 17:42:20 +00:00
committed by GitHub
parent 1b8dd9399c
commit f3469d25fe
32 changed files with 485 additions and 258 deletions

View File

@@ -1,7 +1,7 @@
import { Transaction } from "sequelize";
import { v4 as uuidv4 } from "uuid";
import { Attachment, Event, User } from "@server/models";
import { uploadToS3FromBuffer } from "@server/utils/s3";
import { uploadToS3 } from "@server/utils/s3";
export default async function attachmentCreator({
id,
@@ -24,7 +24,13 @@ export default async function attachmentCreator({
}) {
const key = `uploads/${user.id}/${uuidv4()}/${name}`;
const acl = process.env.AWS_S3_ACL || "private";
const url = await uploadToS3FromBuffer(buffer, type, key, acl);
const url = await uploadToS3({
body: buffer,
contentType: type,
contentLength: buffer.length,
key,
acl,
});
const attachment = await Attachment.create(
{
id,

View File

@@ -1,33 +1,37 @@
import { Transaction } from "sequelize";
import { APM } from "@server/logging/tracing";
import { Collection, Event, Team, User, FileOperation } from "@server/models";
import {
FileOperationFormat,
FileOperationType,
FileOperationState,
FileOperationFormat,
} from "@server/models/FileOperation";
} from "@shared/types";
import { APM } from "@server/logging/tracing";
import { Collection, Event, Team, User, FileOperation } from "@server/models";
import { getAWSKeyForFileOp } from "@server/utils/s3";
type Props = {
collection?: Collection;
team: Team;
user: User;
format?: FileOperationFormat;
ip: string;
transaction: Transaction;
};
async function collectionExporter({
collection,
team,
user,
format = FileOperationFormat.MarkdownZip,
ip,
transaction,
}: {
collection?: Collection;
team: Team;
user: User;
ip: string;
transaction: Transaction;
}) {
}: Props) {
const collectionId = collection?.id;
const key = getAWSKeyForFileOp(user.teamId, collection?.name || team.name);
const fileOperation = await FileOperation.create(
{
type: FileOperationType.Export,
state: FileOperationState.Creating,
format: FileOperationFormat.MarkdownZip,
format,
key,
url: null,
size: 0,
@@ -49,7 +53,8 @@ async function collectionExporter({
collectionId,
ip,
data: {
type: FileOperationType.Import,
type: FileOperationType.Export,
format,
},
},
{

View File

@@ -8,6 +8,11 @@ import {
Table,
DataType,
} from "sequelize-typescript";
import {
FileOperationFormat,
FileOperationState,
FileOperationType,
} from "@shared/types";
import { deleteFromS3, getFileByKey } from "@server/utils/s3";
import Collection from "./Collection";
import Team from "./Team";
@@ -15,24 +20,6 @@ import User from "./User";
import IdModel from "./base/IdModel";
import Fix from "./decorators/Fix";
export enum FileOperationType {
Import = "import",
Export = "export",
}
export enum FileOperationFormat {
MarkdownZip = "outline-markdown",
Notion = "notion",
}
export enum FileOperationState {
Creating = "creating",
Uploading = "uploading",
Complete = "complete",
Error = "error",
Expired = "expired",
}
@DefaultScope(() => ({
include: [
{

View File

@@ -1,10 +1,8 @@
import invariant from "invariant";
import { FileOperationFormat, FileOperationType } from "@shared/types";
import { FileOperation } from "@server/models";
import {
FileOperationFormat,
FileOperationType,
} from "@server/models/FileOperation";
import { Event as TEvent, FileOperationEvent } from "@server/types";
import ExportHTMLZipTask from "../tasks/ExportHTMLZipTask";
import ExportMarkdownZipTask from "../tasks/ExportMarkdownZipTask";
import ImportMarkdownZipTask from "../tasks/ImportMarkdownZipTask";
import ImportNotionTask from "../tasks/ImportNotionTask";
@@ -40,6 +38,11 @@ export default class FileOperationsProcessor extends BaseProcessor {
if (fileOperation.type === FileOperationType.Export) {
switch (fileOperation.format) {
case FileOperationFormat.HTMLZip:
await ExportHTMLZipTask.schedule({
fileOperationId: event.modelId,
});
break;
case FileOperationFormat.MarkdownZip:
await ExportMarkdownZipTask.schedule({
fileOperationId: event.modelId,

View File

@@ -1,9 +1,6 @@
import { subDays } from "date-fns";
import { FileOperationState, FileOperationType } from "@shared/types";
import { FileOperation } from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import { buildFileOperation } from "@server/test/factories";
import { setupTestDatabase } from "@server/test/support";
import CleanupExpiredFileOperationsTask from "./CleanupExpiredFileOperationsTask";

View File

@@ -1,8 +1,8 @@
import { subDays } from "date-fns";
import { Op } from "sequelize";
import { FileOperationState } from "@shared/types";
import Logger from "@server/logging/Logger";
import { FileOperation } from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {

View File

@@ -0,0 +1,10 @@
import { FileOperationFormat } from "@shared/types";
import { Collection } from "@server/models";
import { archiveCollections } from "@server/utils/zip";
import ExportTask from "./ExportTask";
export default class ExportHTMLZipTask extends ExportTask {
public async export(collections: Collection[]) {
return await archiveCollections(collections, FileOperationFormat.HTMLZip);
}
}

View File

@@ -1,130 +1,13 @@
import fs from "fs";
import { truncate } from "lodash";
import ExportFailureEmail from "@server/emails/templates/ExportFailureEmail";
import ExportSuccessEmail from "@server/emails/templates/ExportSuccessEmail";
import Logger from "@server/logging/Logger";
import { Collection, Event, FileOperation, Team, User } from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import fileOperationPresenter from "@server/presenters/fileOperation";
import { uploadToS3FromBuffer } from "@server/utils/s3";
import { FileOperationFormat } from "@shared/types";
import { Collection } from "@server/models";
import { archiveCollections } from "@server/utils/zip";
import BaseTask, { TaskPriority } from "./BaseTask";
import ExportTask from "./ExportTask";
type Props = {
fileOperationId: string;
};
export default class ExportMarkdownZipTask extends BaseTask<Props> {
/**
* Runs the export task.
*
* @param props The props
*/
public async perform({ fileOperationId }: Props) {
const fileOperation = await FileOperation.findByPk(fileOperationId, {
rejectOnEmpty: true,
});
const [team, user] = await Promise.all([
Team.findByPk(fileOperation.teamId, { rejectOnEmpty: true }),
User.findByPk(fileOperation.userId, { rejectOnEmpty: true }),
]);
const collectionIds = fileOperation.collectionId
? [fileOperation.collectionId]
: await user.collectionIds();
const collections = await Collection.findAll({
where: {
id: collectionIds,
},
});
try {
Logger.info("task", `ExportTask processing data for ${fileOperationId}`);
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Creating,
});
const filePath = await archiveCollections(collections);
Logger.info("task", `ExportTask uploading data for ${fileOperationId}`);
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Uploading,
});
const fileBuffer = await fs.promises.readFile(filePath);
const stat = await fs.promises.stat(filePath);
const url = await uploadToS3FromBuffer(
fileBuffer,
"application/zip",
fileOperation.key,
"private"
);
await this.updateFileOperation(fileOperation, {
size: stat.size,
state: FileOperationState.Complete,
url,
});
await ExportSuccessEmail.schedule({
to: user.email,
userId: user.id,
id: fileOperation.id,
teamUrl: team.url,
teamId: team.id,
});
} catch (error) {
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Error,
error,
});
await ExportFailureEmail.schedule({
to: user.email,
userId: user.id,
teamUrl: team.url,
teamId: team.id,
});
throw error;
}
}
/**
* Update the state of the underlying FileOperation in the database and send
* an event to the client.
*
* @param fileOperation The FileOperation to update
*/
private async updateFileOperation(
fileOperation: FileOperation,
options: Partial<FileOperation> & { error?: Error }
) {
await fileOperation.update({
...options,
error: options.error
? truncate(options.error.message, { length: 255 })
: undefined,
});
await Event.schedule({
name: "fileOperations.update",
modelId: fileOperation.id,
teamId: fileOperation.teamId,
actorId: fileOperation.userId,
data: fileOperationPresenter(fileOperation),
});
}
/**
* Job options such as priority and retry strategy, as defined by Bull.
*/
public get options() {
return {
priority: TaskPriority.Background,
attempts: 1,
};
export default class ExportMarkdownZipTask extends ExportTask {
public async export(collections: Collection[]) {
return await archiveCollections(
collections,
FileOperationFormat.MarkdownZip
);
}
}

View File

@@ -0,0 +1,138 @@
import fs from "fs";
import { truncate } from "lodash";
import { FileOperationState } from "@shared/types";
import ExportFailureEmail from "@server/emails/templates/ExportFailureEmail";
import ExportSuccessEmail from "@server/emails/templates/ExportSuccessEmail";
import Logger from "@server/logging/Logger";
import { Collection, Event, FileOperation, Team, User } from "@server/models";
import fileOperationPresenter from "@server/presenters/fileOperation";
import { uploadToS3 } from "@server/utils/s3";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {
fileOperationId: string;
};
export default abstract class ExportTask extends BaseTask<Props> {
/**
* Transforms the data to be exported, uploads, and notifies user.
*
* @param props The props
*/
public async perform({ fileOperationId }: Props) {
Logger.info("task", `ExportTask fetching data for ${fileOperationId}`);
const fileOperation = await FileOperation.findByPk(fileOperationId, {
rejectOnEmpty: true,
});
const [team, user] = await Promise.all([
Team.findByPk(fileOperation.teamId, { rejectOnEmpty: true }),
User.findByPk(fileOperation.userId, { rejectOnEmpty: true }),
]);
const collectionIds = fileOperation.collectionId
? [fileOperation.collectionId]
: await user.collectionIds();
const collections = await Collection.findAll({
where: {
id: collectionIds,
},
});
try {
Logger.info("task", `ExportTask processing data for ${fileOperationId}`);
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Creating,
});
const filePath = await this.export(collections);
Logger.info("task", `ExportTask uploading data for ${fileOperationId}`);
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Uploading,
});
const stat = await fs.promises.stat(filePath);
const url = await uploadToS3({
body: fs.createReadStream(filePath),
contentLength: stat.size,
contentType: "application/zip",
key: fileOperation.key,
acl: "private",
});
await this.updateFileOperation(fileOperation, {
size: stat.size,
state: FileOperationState.Complete,
url,
});
await ExportSuccessEmail.schedule({
to: user.email,
userId: user.id,
id: fileOperation.id,
teamUrl: team.url,
teamId: team.id,
});
} catch (error) {
await this.updateFileOperation(fileOperation, {
state: FileOperationState.Error,
error,
});
await ExportFailureEmail.schedule({
to: user.email,
userId: user.id,
teamUrl: team.url,
teamId: team.id,
});
throw error;
}
}
/**
* Transform the data in all of the passed collections into a single Buffer.
*
* @param collections The collections to export
* @returns A promise that resolves to a temporary file path
*/
protected abstract export(collections: Collection[]): Promise<string>;
/**
* Update the state of the underlying FileOperation in the database and send
* an event to the client.
*
* @param fileOperation The FileOperation to update
*/
private async updateFileOperation(
fileOperation: FileOperation,
options: Partial<FileOperation> & { error?: Error }
) {
await fileOperation.update({
...options,
error: options.error
? truncate(options.error.message, { length: 255 })
: undefined,
});
await Event.schedule({
name: "fileOperations.update",
modelId: fileOperation.id,
teamId: fileOperation.teamId,
actorId: fileOperation.userId,
data: fileOperationPresenter(fileOperation),
});
}
/**
* Job options such as priority and retry strategy, as defined by Bull.
*/
public get options() {
return {
priority: TaskPriority.Background,
attempts: 1,
};
}
}

View File

@@ -1,6 +1,6 @@
import { S3 } from "aws-sdk";
import { truncate } from "lodash";
import { CollectionPermission } from "@shared/types";
import { CollectionPermission, FileOperationState } from "@shared/types";
import { CollectionValidation } from "@shared/validations";
import attachmentCreator from "@server/commands/attachmentCreator";
import documentCreator from "@server/commands/documentCreator";
@@ -15,7 +15,6 @@ import {
FileOperation,
Attachment,
} from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {

View File

@@ -3,7 +3,12 @@ import invariant from "invariant";
import Router from "koa-router";
import { Sequelize, Op, WhereOptions } from "sequelize";
import { randomElement } from "@shared/random";
import { CollectionPermission } from "@shared/types";
import {
CollectionPermission,
FileOperationFormat,
FileOperationState,
FileOperationType,
} from "@shared/types";
import { colorPalette } from "@shared/utils/collections";
import { RateLimiterStrategy } from "@server/RateLimiter";
import collectionExporter from "@server/commands/collectionExporter";
@@ -27,11 +32,6 @@ import {
Attachment,
FileOperation,
} from "@server/models";
import {
FileOperationFormat,
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import { authorize } from "@server/policies";
import {
presentCollection,
@@ -576,16 +576,20 @@ router.post(
router.post(
"collections.export_all",
auth(),
rateLimiter(RateLimiterStrategy.TenPerHour),
rateLimiter(RateLimiterStrategy.FivePerHour),
async (ctx) => {
const { format = FileOperationFormat.MarkdownZip } = ctx.request.body;
const { user } = ctx.state;
const team = await Team.findByPk(user.teamId);
authorize(user, "createExport", team);
assertIn(format, Object.values(FileOperationFormat), "Invalid format");
const fileOperation = await sequelize.transaction(async (transaction) => {
return collectionExporter({
user,
team,
format,
ip: ctx.request.ip,
transaction,
});

View File

@@ -1,8 +1,5 @@
import { FileOperationState, FileOperationType } from "@shared/types";
import { Collection, User, Event, FileOperation } from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import {
buildAdmin,
buildCollection,

View File

@@ -1,10 +1,10 @@
import Router from "koa-router";
import { WhereOptions } from "sequelize/types";
import { FileOperationType } from "@shared/types";
import fileOperationDeleter from "@server/commands/fileOperationDeleter";
import { ValidationError } from "@server/errors";
import auth from "@server/middlewares/authentication";
import { FileOperation, Team } from "@server/models";
import { FileOperationType } from "@server/models/FileOperation";
import { authorize } from "@server/policies";
import { presentFileOperation } from "@server/presenters";
import { ContextWithState } from "@server/types";

View File

@@ -1,6 +1,10 @@
import { isNull } from "lodash";
import { v4 as uuidv4 } from "uuid";
import { CollectionPermission } from "@shared/types";
import {
CollectionPermission,
FileOperationState,
FileOperationType,
} from "@shared/types";
import {
Share,
Team,
@@ -21,10 +25,6 @@ import {
ApiKey,
Subscription,
} from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
let count = 1;

View File

@@ -1,4 +1,4 @@
export const uploadToS3FromBuffer = jest.fn().mockReturnValue("/endpoint/key");
export const uploadToS3 = jest.fn().mockReturnValue("/endpoint/key");
export const publicS3Endpoint = jest.fn().mockReturnValue("http://mock");

View File

@@ -85,21 +85,28 @@ export const publicS3Endpoint = (isServerUpload?: boolean) => {
}${AWS_S3_UPLOAD_BUCKET_NAME}`;
};
export const uploadToS3FromBuffer = async (
buffer: Buffer,
contentType: string,
key: string,
acl: string
) => {
export const uploadToS3 = async ({
body,
contentLength,
contentType,
key,
acl,
}: {
body: S3.Body;
contentLength: number;
contentType: string;
key: string;
acl: string;
}) => {
await s3
.putObject({
ACL: acl,
Bucket: AWS_S3_UPLOAD_BUCKET_NAME,
Key: key,
ContentType: contentType,
ContentLength: buffer.length,
ContentLength: contentLength,
ContentDisposition: "attachment",
Body: buffer,
Body: body,
})
.promise();
const endpoint = publicS3Endpoint(true);

View File

@@ -3,6 +3,7 @@ import path from "path";
import JSZip, { JSZipObject } from "jszip";
import { find } from "lodash";
import tmp from "tmp";
import { FileOperationFormat } from "@shared/types";
import { ValidationError } from "@server/errors";
import Logger from "@server/logging/Logger";
import Attachment from "@server/models/Attachment";
@@ -26,9 +27,21 @@ export type Item = {
item: JSZipObject;
};
export type FileTreeNode = {
/** The title, extracted from the file name */
title: string;
/** The file name including extension */
name: string;
/** The full path to within the zip file */
path: string;
/** The nested children */
children: FileTreeNode[];
};
async function addDocumentTreeToArchive(
zip: JSZip,
documents: NavigationNode[]
documents: NavigationNode[],
format = FileOperationFormat.MarkdownZip
) {
for (const doc of documents) {
const document = await Document.findByPk(doc.id);
@@ -37,7 +50,10 @@ async function addDocumentTreeToArchive(
continue;
}
let text = DocumentHelper.toMarkdown(document);
let text =
format === FileOperationFormat.HTMLZip
? await DocumentHelper.toHTML(document)
: await DocumentHelper.toMarkdown(document);
const attachments = await Attachment.findAll({
where: {
teamId: document.teamId,
@@ -52,7 +68,9 @@ async function addDocumentTreeToArchive(
let title = serializeFilename(document.title) || "Untitled";
title = safeAddFileToArchive(zip, `${title}.md`, text, {
const extension = format === FileOperationFormat.HTMLZip ? "html" : "md";
title = safeAddFileToArchive(zip, `${title}.${extension}`, text, {
date: document.updatedAt,
comment: JSON.stringify({
createdAt: document.createdAt,
@@ -161,7 +179,10 @@ async function archiveToPath(zip: JSZip): Promise<string> {
});
}
export async function archiveCollections(collections: Collection[]) {
export async function archiveCollections(
collections: Collection[],
format: FileOperationFormat
) {
const zip = new JSZip();
for (const collection of collections) {
@@ -169,7 +190,11 @@ export async function archiveCollections(collections: Collection[]) {
const folder = zip.folder(serializeFilename(collection.name));
if (folder) {
await addDocumentTreeToArchive(folder, collection.documentStructure);
await addDocumentTreeToArchive(
folder,
collection.documentStructure,
format
);
}
}
}
@@ -177,17 +202,6 @@ export async function archiveCollections(collections: Collection[]) {
return archiveToPath(zip);
}
export type FileTreeNode = {
/** The title, extracted from the file name */
title: string;
/** The file name including extension */
name: string;
/** The full path to within the zip file */
path: string;
/** The nested children */
children: FileTreeNode[];
};
/**
* Converts the flat structure returned by JSZIP into a nested file structure
* for easier processing.