chore: Refactor data import (#3434)

* Complete refactor of import

* feat: Notion data import (#3442)
This commit is contained in:
Tom Moor
2022-04-23 10:07:35 -07:00
committed by GitHub
parent bdcfaae025
commit 33ce49cc33
45 changed files with 2217 additions and 1066 deletions

View File

@@ -9,7 +9,7 @@ import { sequelize } from "@server/database/sequelize";
import Logger from "@server/logging/logger";
import { APM } from "@server/logging/tracing";
import Document from "@server/models/Document";
import documentUpdater from "../commands/documentUpdater";
import documentCollaborativeUpdater from "../commands/documentCollaborativeUpdater";
import markdownToYDoc from "./utils/markdownToYDoc";
@APM.trace({
@@ -71,7 +71,7 @@ export default class PersistenceExtension implements Extension {
Logger.info("database", `Persisting ${documentId}`);
try {
await documentUpdater({
await documentCollaborativeUpdater({
documentId,
ydoc: document,
userId: context.user?.id,

View File

@@ -1,44 +1,60 @@
import { Transaction } from "sequelize";
import { v4 as uuidv4 } from "uuid";
import { Attachment, Event, User } from "@server/models";
import { uploadToS3FromBuffer } from "@server/utils/s3";
export default async function attachmentCreator({
id,
name,
type,
buffer,
user,
source,
ip,
transaction,
}: {
id?: string;
name: string;
type: string;
buffer: Buffer;
user: User;
source?: "import";
ip: string;
ip?: string;
transaction?: Transaction;
}) {
const key = `uploads/${user.id}/${uuidv4()}/${name}`;
const acl = process.env.AWS_S3_ACL || "private";
const url = await uploadToS3FromBuffer(buffer, type, key, acl);
const attachment = await Attachment.create({
key,
acl,
url,
size: buffer.length,
contentType: type,
teamId: user.teamId,
userId: user.id,
});
await Event.create({
name: "attachments.create",
data: {
name,
source,
const attachment = await Attachment.create(
{
id,
key,
acl,
url,
size: buffer.length,
contentType: type,
teamId: user.teamId,
userId: user.id,
},
modelId: attachment.id,
teamId: user.teamId,
actorId: user.id,
ip,
});
{
transaction,
}
);
await Event.create(
{
name: "attachments.create",
data: {
name,
source,
},
modelId: attachment.id,
teamId: user.teamId,
actorId: user.id,
ip,
},
{
transaction,
}
);
return attachment;
}

View File

@@ -1,85 +0,0 @@
import path from "path";
import File from "formidable/lib/file";
import { Attachment, Document, Collection } from "@server/models";
import { buildUser } from "@server/test/factories";
import { flushdb } from "@server/test/support";
import collectionImporter from "./collectionImporter";
jest.mock("../utils/s3");
beforeEach(() => flushdb());
describe("collectionImporter", () => {
const ip = "127.0.0.1";
it("should import documents in outline format", async () => {
const user = await buildUser();
const name = "outline.zip";
const file = new File({
name,
type: "application/zip",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const response = await collectionImporter({
type: "outline",
user,
file,
ip,
});
expect(response.collections.length).toEqual(1);
expect(response.documents.length).toEqual(8);
expect(response.attachments.length).toEqual(6);
expect(await Collection.count()).toEqual(1);
expect(await Document.count()).toEqual(8);
expect(await Attachment.count()).toEqual(6);
});
it("should throw an error with corrupt zip", async () => {
const user = await buildUser();
const name = "corrupt.zip";
const file = new File({
name,
type: "application/zip",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
let error;
try {
await collectionImporter({
type: "outline",
user,
file,
ip,
});
} catch (err) {
error = err;
}
expect(error && error.message).toBeTruthy();
});
it("should throw an error with empty zip", async () => {
const user = await buildUser();
const name = "empty.zip";
const file = new File({
name,
type: "application/zip",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
let error;
try {
await collectionImporter({
type: "outline",
user,
file,
ip,
});
} catch (err) {
error = err;
}
expect(error && error.message).toBe(
"Uploaded file does not contain importable documents"
);
});
});

View File

@@ -1,206 +0,0 @@
import fs from "fs";
import os from "os";
import path from "path";
import File from "formidable/lib/file";
import invariant from "invariant";
import { values, keys } from "lodash";
import { v4 as uuidv4 } from "uuid";
import Logger from "@server/logging/logger";
import { APM } from "@server/logging/tracing";
import { Attachment, Event, Document, Collection, User } from "@server/models";
import { parseOutlineExport, Item } from "@server/utils/zip";
import { FileImportError } from "../errors";
import attachmentCreator from "./attachmentCreator";
import documentCreator from "./documentCreator";
import documentImporter from "./documentImporter";
type FileWithPath = File & {
path: string;
};
async function collectionImporter({
file,
type,
user,
ip,
}: {
file: FileWithPath;
user: User;
type: "outline";
ip: string;
}) {
// load the zip structure into memory
const zipData = await fs.promises.readFile(file.path);
let items: Item[];
try {
items = await parseOutlineExport(zipData);
} catch (err) {
throw FileImportError(err.message);
}
if (!items.filter((item) => item.type === "document").length) {
throw FileImportError(
"Uploaded file does not contain importable documents"
);
}
// store progress and pointers
const collections: Record<string, Collection> = {};
const documents: Record<string, Document> = {};
const attachments: Record<string, Attachment> = {};
for (const item of items) {
if (item.type === "collection") {
// check if collection with name exists
const response = await Collection.findOrCreate({
where: {
teamId: user.teamId,
name: item.name,
},
defaults: {
createdById: user.id,
permission: "read_write",
},
});
let collection = response[0];
const isCreated = response[1];
// create new collection if name already exists, yes it's possible that
// there is also a "Name (Imported)" but this is a case not worth dealing
// with right now
if (!isCreated) {
const name = `${item.name} (Imported)`;
collection = await Collection.create({
teamId: user.teamId,
createdById: user.id,
name,
permission: "read_write",
});
await Event.create({
name: "collections.create",
collectionId: collection.id,
teamId: collection.teamId,
actorId: user.id,
data: {
name,
},
ip,
});
}
collections[item.path] = collection;
continue;
}
if (item.type === "document") {
const collectionDir = item.dir.split("/")[0];
const collection = collections[collectionDir];
invariant(collection, `Collection must exist for document ${item.dir}`);
// we have a document
const content = await item.item.async("string");
const name = path.basename(item.name);
const tmpDir = os.tmpdir();
const tmpFilePath = `${tmpDir}/upload-${uuidv4()}`;
await fs.promises.writeFile(tmpFilePath, content);
const file = new File({
name,
type: "text/markdown",
path: tmpFilePath,
});
const { text, title } = await documentImporter({
file,
user,
ip,
});
await fs.promises.unlink(tmpFilePath);
// must be a nested document, find and reference the parent document
let parentDocumentId;
if (item.depth > 1) {
const parentDocument =
documents[`${item.dir}.md`] || documents[item.dir];
invariant(parentDocument, `Document must exist for parent ${item.dir}`);
parentDocumentId = parentDocument.id;
}
const document = await documentCreator({
source: "import",
title,
text,
publish: true,
collectionId: collection.id,
createdAt: item.metadata.createdAt
? new Date(item.metadata.createdAt)
: item.item.date,
updatedAt: item.item.date,
parentDocumentId,
user,
ip,
});
documents[item.path] = document;
continue;
}
if (item.type === "attachment") {
const buffer = await item.item.async("nodebuffer");
const attachment = await attachmentCreator({
source: "import",
name: item.name,
type,
buffer,
user,
ip,
});
attachments[item.path] = attachment;
continue;
}
Logger.info("commands", `Skipped importing ${item.path}`);
}
// All collections, documents, and attachments have been created - time to
// update the documents to point to newly uploaded attachments where possible
for (const attachmentPath of keys(attachments)) {
const attachment = attachments[attachmentPath];
for (const document of values(documents)) {
// pull the collection and subdirectory out of the path name, upload folders
// in an Outline export are relative to the document itself
const normalizedAttachmentPath = attachmentPath.replace(
/(.*)uploads\//,
"uploads/"
);
document.text = document.text
.replace(attachmentPath, attachment.redirectUrl)
.replace(normalizedAttachmentPath, attachment.redirectUrl)
.replace(`/${normalizedAttachmentPath}`, attachment.redirectUrl);
// does nothing if the document text is unchanged
await document.save({
fields: ["text"],
});
}
}
// reload collections to get document mapping
for (const collection of values(collections)) {
await collection.reload();
}
return {
documents: values(documents),
collections: values(collections),
attachments: values(attachments),
};
}
export default APM.traceFunction({
serviceName: "command",
spanName: "collectionImporter",
})(collectionImporter);

View File

@@ -0,0 +1,70 @@
import { yDocToProsemirrorJSON } from "@getoutline/y-prosemirror";
import invariant from "invariant";
import { uniq } from "lodash";
import { Node } from "prosemirror-model";
import * as Y from "yjs";
import { schema, serializer } from "@server/editor";
import { Document, Event } from "@server/models";
export default async function documentCollaborativeUpdater({
documentId,
ydoc,
userId,
}: {
documentId: string;
ydoc: Y.Doc;
userId?: string;
}) {
const document = await Document.scope("withState").findByPk(documentId);
invariant(document, "document not found");
const state = Y.encodeStateAsUpdate(ydoc);
const node = Node.fromJSON(schema, yDocToProsemirrorJSON(ydoc, "default"));
const text = serializer.serialize(node, undefined);
const isUnchanged = document.text === text;
const hasMultiplayerState = !!document.state;
if (isUnchanged && hasMultiplayerState) {
return;
}
// extract collaborators from doc user data
const pud = new Y.PermanentUserData(ydoc);
const pudIds = Array.from(pud.clients.values());
const existingIds = document.collaboratorIds;
const collaboratorIds = uniq([...pudIds, ...existingIds]);
await Document.scope(["withDrafts", "withState"]).update(
{
text,
state: Buffer.from(state),
updatedAt: isUnchanged ? document.updatedAt : new Date(),
lastModifiedById:
isUnchanged || !userId ? document.lastModifiedById : userId,
collaboratorIds,
},
{
silent: true,
hooks: false,
where: {
id: documentId,
},
}
);
if (isUnchanged) {
return;
}
await Event.schedule({
name: "documents.update",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: userId,
data: {
multiplayer: true,
title: document.title,
},
});
}

View File

@@ -1,9 +1,11 @@
import invariant from "invariant";
import { Transaction } from "sequelize";
import { Document, Event, User } from "@server/models";
export default async function documentCreator({
title = "",
text = "",
id,
publish,
collectionId,
parentDocumentId,
@@ -14,15 +16,19 @@ export default async function documentCreator({
template,
user,
editorVersion,
publishedAt,
source,
ip,
transaction,
}: {
id?: string;
title: string;
text: string;
publish?: boolean;
collectionId: string;
parentDocumentId?: string;
templateDocument?: Document | null;
publishedAt?: Date;
template?: boolean;
createdAt?: Date;
updatedAt?: Date;
@@ -30,42 +36,35 @@ export default async function documentCreator({
user: User;
editorVersion?: string;
source?: "import";
ip: string;
ip?: string;
transaction: Transaction;
}): Promise<Document> {
const templateId = templateDocument ? templateDocument.id : undefined;
const document = await Document.create({
parentDocumentId,
editorVersion,
collectionId,
teamId: user.teamId,
userId: user.id,
createdAt,
updatedAt,
lastModifiedById: user.id,
createdById: user.id,
template,
templateId,
title: templateDocument ? templateDocument.title : title,
text: templateDocument ? templateDocument.text : text,
});
await Event.create({
name: "documents.create",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
source,
title: document.title,
const document = await Document.create(
{
id,
parentDocumentId,
editorVersion,
collectionId,
teamId: user.teamId,
userId: user.id,
createdAt,
updatedAt,
lastModifiedById: user.id,
createdById: user.id,
template,
templateId,
publishedAt,
title: templateDocument ? templateDocument.title : title,
text: templateDocument ? templateDocument.text : text,
},
ip,
});
if (publish) {
await document.publish(user.id);
await Event.create({
name: "documents.publish",
{
transaction,
}
);
await Event.create(
{
name: "documents.create",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
@@ -73,9 +72,34 @@ export default async function documentCreator({
data: {
source,
title: document.title,
templateId,
},
ip,
});
},
{
transaction,
}
);
if (publish) {
await document.publish(user.id, { transaction });
await Event.create(
{
name: "documents.publish",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
source,
title: document.title,
},
ip,
},
{
transaction,
}
);
}
// reload to get all of the data needed to present (user, collection etc)
@@ -86,6 +110,7 @@ export default async function documentCreator({
id: document.id,
publishedAt: document.publishedAt,
},
transaction,
});
invariant(doc, "Document must exist");

View File

@@ -1,5 +1,5 @@
import path from "path";
import File from "formidable/lib/file";
import fs from "fs-extra";
import Attachment from "@server/models/Attachment";
import { buildUser } from "@server/test/factories";
import { flushdb } from "@server/test/support";
@@ -13,16 +13,16 @@ describe("documentImporter", () => {
it("should convert Word Document to markdown", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType:
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -34,15 +34,15 @@ describe("documentImporter", () => {
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -54,18 +54,18 @@ describe("documentImporter", () => {
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
const user = await buildUser();
const name = "normal.docx.txt";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "normal.docx.txt";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
} catch (err) {
@@ -77,15 +77,15 @@ describe("documentImporter", () => {
it("should convert Word Document on Windows to markdown", async () => {
const user = await buildUser();
const name = "images.docx";
const file = new File({
name,
type: "application/octet-stream",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "images.docx";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/octet-stream",
fileName,
content,
ip,
});
const attachments = await Attachment.count();
@@ -97,15 +97,16 @@ describe("documentImporter", () => {
it("should convert HTML Document to markdown", async () => {
const user = await buildUser();
const name = "webpage.html";
const file = new File({
name,
type: "text/html",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "webpage.html";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "text/html",
fileName,
content,
ip,
});
expect(response.text).toContain("Text paragraph");
@@ -114,15 +115,15 @@ describe("documentImporter", () => {
it("should convert Confluence Word output to markdown", async () => {
const user = await buildUser();
const name = "confluence.doc";
const file = new File({
name,
type: "application/msword",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "confluence.doc";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
const response = await documentImporter({
user,
file,
mimeType: "application/msword",
fileName,
content,
ip,
});
expect(response.text).toContain("this is a test document");
@@ -131,49 +132,34 @@ describe("documentImporter", () => {
it("should load markdown", async () => {
const user = await buildUser();
const name = "markdown.md";
const file = new File({
name,
type: "text/plain",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "text/plain",
fileName,
content,
ip,
});
expect(response.text).toContain("This is a test paragraph");
expect(response.title).toEqual("Heading 1");
});
it("should handle encoded slashes", async () => {
const user = await buildUser();
const name = "this %2F and %2F this.md";
const file = new File({
name,
type: "text/plain",
path: path.resolve(__dirname, "..", "test", "fixtures", "empty.md"),
});
const response = await documentImporter({
user,
file,
ip,
});
expect(response.text).toContain("");
expect(response.title).toEqual("this / and / this");
});
it("should fallback to extension if mimetype unknown", async () => {
const user = await buildUser();
const name = "markdown.md";
const file = new File({
name,
type: "application/lol",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "markdown.md";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName),
"utf8"
);
const response = await documentImporter({
user,
file,
mimeType: "application/lol",
fileName,
content,
ip,
});
expect(response.text).toContain("This is a test paragraph");
@@ -182,18 +168,18 @@ describe("documentImporter", () => {
it("should error with unknown file type", async () => {
const user = await buildUser();
const name = "files.zip";
const file = new File({
name,
type: "executable/zip",
path: path.resolve(__dirname, "..", "test", "fixtures", name),
});
const fileName = "empty.zip";
const content = await fs.readFile(
path.resolve(__dirname, "..", "test", "fixtures", fileName)
);
let error;
try {
await documentImporter({
user,
file,
mimeType: "executable/zip",
fileName,
content,
ip,
});
} catch (err) {

View File

@@ -1,9 +1,10 @@
import fs from "fs";
import path from "path";
import emojiRegex from "emoji-regex";
import { strikethrough, tables } from "joplin-turndown-plugin-gfm";
import { truncate } from "lodash";
import mammoth from "mammoth";
import quotedPrintable from "quoted-printable";
import { Transaction } from "sequelize";
import TurndownService from "turndown";
import utf8 from "utf8";
import { MAX_TITLE_LENGTH } from "@shared/constants";
@@ -21,7 +22,7 @@ const turndownService = new TurndownService({
hr: "---",
bulletListMarker: "-",
headingStyle: "atx",
});
}).remove(["script", "style", "title", "head"]);
// Use the GitHub-flavored markdown plugin to parse
// strikethoughs and tables
@@ -37,7 +38,7 @@ turndownService
interface ImportableFile {
type: string;
getMarkdown: (file: any) => Promise<string>;
getMarkdown: (content: Buffer | string) => Promise<string>;
}
const importMapping: ImportableFile[] = [
@@ -68,26 +69,34 @@ const importMapping: ImportableFile[] = [
},
];
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
async function fileToMarkdown(file): Promise<string> {
return fs.promises.readFile(file.path, "utf8");
async function fileToMarkdown(content: Buffer | string): Promise<string> {
if (content instanceof Buffer) {
content = content.toString("utf8");
}
return content;
}
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
async function docxToMarkdown(file): Promise<string> {
const { value } = await mammoth.convertToHtml(file);
return turndownService.turndown(value);
async function docxToMarkdown(content: Buffer | string): Promise<string> {
if (content instanceof Buffer) {
const { value: html } = await mammoth.convertToHtml({ buffer: content });
return turndownService.turndown(html);
}
throw new Error("docxToMarkdown: content must be a Buffer");
}
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
async function htmlToMarkdown(file): Promise<string> {
const value = await fs.promises.readFile(file.path, "utf8");
return turndownService.turndown(value);
async function htmlToMarkdown(content: Buffer | string): Promise<string> {
if (content instanceof Buffer) {
content = content.toString("utf8");
}
return turndownService.turndown(content);
}
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
async function confluenceToMarkdown(file): Promise<string> {
let value = await fs.promises.readFile(file.path, "utf8");
async function confluenceToMarkdown(value: Buffer | string): Promise<string> {
if (value instanceof Buffer) {
value = value.toString("utf8");
}
// We're only supporting the ridiculous output from Confluence here, regular
// Word documents should call into the docxToMarkdown importer.
@@ -143,22 +152,28 @@ async function confluenceToMarkdown(file): Promise<string> {
}
async function documentImporter({
file,
mimeType,
fileName,
content,
user,
ip,
transaction,
}: {
user: User;
file: File;
ip: string;
mimeType: string;
fileName: string;
content: Buffer | string;
ip?: string;
transaction?: Transaction;
}): Promise<{
text: string;
title: string;
}> {
const fileInfo = importMapping.filter((item) => {
if (item.type === file.type) {
if (item.type === mimeType) {
if (
file.type === "application/octet-stream" &&
path.extname(file.name) !== ".docx"
mimeType === "application/octet-stream" &&
path.extname(fileName) !== ".docx"
) {
return false;
}
@@ -166,7 +181,7 @@ async function documentImporter({
return true;
}
if (item.type === "text/markdown" && path.extname(file.name) === ".md") {
if (item.type === "text/markdown" && path.extname(fileName) === ".md") {
return true;
}
@@ -174,20 +189,35 @@ async function documentImporter({
})[0];
if (!fileInfo) {
throw InvalidRequestError(`File type ${file.type} not supported`);
throw InvalidRequestError(`File type ${mimeType} not supported`);
}
let title = deserializeFilename(file.name.replace(/\.[^/.]+$/, ""));
let text = await fileInfo.getMarkdown(file);
let title = deserializeFilename(fileName.replace(/\.[^/.]+$/, ""));
let text = await fileInfo.getMarkdown(content);
text = text.trim();
// find and extract first emoji, in the case of some imports it can be outside
// of the title, at the top of the document.
const regex = emojiRegex();
const matches = regex.exec(text);
const firstEmoji = matches ? matches[0] : undefined;
if (firstEmoji && text.startsWith(firstEmoji)) {
text = text.replace(firstEmoji, "").trim();
}
// If the first line of the imported text looks like a markdown heading
// then we can use this as the document title
if (text.trim().startsWith("# ")) {
if (text.startsWith("# ")) {
const result = parseTitle(text);
title = result.title;
text = text.replace(`# ${title}\n`, "");
}
// If we parsed an emoji from _above_ the title then add it back at prefixing
if (firstEmoji) {
title = `${firstEmoji} ${title}`;
}
// find data urls, convert to blobs, upload and write attachments
const images = parseImages(text);
const dataURIs = images.filter((href) => href.startsWith("data:"));
@@ -201,6 +231,7 @@ async function documentImporter({
buffer,
user,
ip,
transaction,
});
text = text.replace(uri, attachment.redirectUrl);
}

View File

@@ -1,70 +1,128 @@
import { yDocToProsemirrorJSON } from "@getoutline/y-prosemirror";
import invariant from "invariant";
import { uniq } from "lodash";
import { Node } from "prosemirror-model";
import * as Y from "yjs";
import { schema, serializer } from "@server/editor";
import { Document, Event } from "@server/models";
import { Transaction } from "sequelize";
import { Event, Document, User } from "@server/models";
type Props = {
/** The user updating the document */
user: User;
/** The existing document */
document: Document;
/** The new title */
title?: string;
/** The new text content */
text?: string;
/** The version of the client editor that was used */
editorVersion?: string;
/** The ID of the template that was used */
templateId?: string;
/** If the document should be displayed full-width on the screen */
fullWidth?: boolean;
/** Whether the text be appended to the end instead of replace */
append?: boolean;
/** Whether the document should be published to the collection */
publish?: boolean;
/** The IP address of the user creating the document */
ip: string;
/** The database transaction to run within */
transaction: Transaction;
};
/**
* This command updates document properties. To update collaborative text state
* use documentCollaborativeUpdater.
*
* @param Props The properties of the document to update
* @returns Document The updated document
*/
export default async function documentUpdater({
documentId,
ydoc,
userId,
}: {
documentId: string;
ydoc: Y.Doc;
userId?: string;
}) {
const document = await Document.scope("withState").findByPk(documentId);
invariant(document, "document not found");
user,
document,
title,
text,
editorVersion,
templateId,
fullWidth,
append,
publish,
transaction,
ip,
}: Props): Promise<Document> {
const previousTitle = document.title;
const state = Y.encodeStateAsUpdate(ydoc);
const node = Node.fromJSON(schema, yDocToProsemirrorJSON(ydoc, "default"));
const text = serializer.serialize(node, undefined);
const isUnchanged = document.text === text;
const hasMultiplayerState = !!document.state;
if (isUnchanged && hasMultiplayerState) {
return;
if (title !== undefined) {
document.title = title;
}
// extract collaborators from doc user data
const pud = new Y.PermanentUserData(ydoc);
const pudIds = Array.from(pud.clients.values());
const existingIds = document.collaboratorIds;
const collaboratorIds = uniq([...pudIds, ...existingIds]);
await Document.scope(["withDrafts", "withState"]).update(
{
text,
state: Buffer.from(state),
updatedAt: isUnchanged ? document.updatedAt : new Date(),
lastModifiedById:
isUnchanged || !userId ? document.lastModifiedById : userId,
collaboratorIds,
},
{
silent: true,
hooks: false,
where: {
id: documentId,
},
if (editorVersion) {
document.editorVersion = editorVersion;
}
if (templateId) {
document.templateId = templateId;
}
if (fullWidth !== undefined) {
document.fullWidth = fullWidth;
}
if (!user.team?.collaborativeEditing) {
if (append) {
document.text += text;
} else if (text !== undefined) {
document.text = text;
}
);
if (isUnchanged) {
return;
}
await Event.schedule({
name: "documents.update",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: userId,
data: {
multiplayer: true,
title: document.title,
},
});
document.lastModifiedById = user.id;
const changed = document.changed();
if (publish) {
await document.publish(user.id, { transaction });
} else {
await document.save({ transaction });
}
if (publish) {
await Event.create(
{
name: "documents.publish",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
title: document.title,
},
ip,
},
{ transaction }
);
} else if (changed) {
await Event.create(
{
name: "documents.update",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
title: document.title,
},
ip,
},
{ transaction }
);
}
if (document.title !== previousTitle) {
Event.schedule({
name: "documents.title_change",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
previousTitle,
title: document.title,
},
ip,
});
}
return document;
}

View File

@@ -0,0 +1,14 @@
'use strict';
module.exports = {
up: async (queryInterface, Sequelize) => {
await queryInterface.addColumn("file_operations", "format", {
type: Sequelize.STRING,
defaultValue: "outline-markdown",
allowNull: false
});
},
down: async (queryInterface) => {
await queryInterface.removeColumn("file_operations", "format");
},
};

View File

@@ -520,7 +520,7 @@ class Collection extends ParanoidModel {
*/
updateDocument = async function (
updatedDocument: Document,
options?: { transaction: Transaction }
options?: { transaction?: Transaction | null }
) {
if (!this.documentStructure) {
return;

View File

@@ -9,6 +9,7 @@ import {
FindOptions,
ScopeOptions,
WhereOptions,
SaveOptions,
} from "sequelize";
import {
ForeignKey,
@@ -238,7 +239,10 @@ class Document extends ParanoidModel {
// hooks
@BeforeSave
static async updateTitleInCollectionStructure(model: Document) {
static async updateTitleInCollectionStructure(
model: Document,
{ transaction }: SaveOptions<Document>
) {
// templates, drafts, and archived documents don't appear in the structure
// and so never need to be updated when the title changes
if (
@@ -250,18 +254,16 @@ class Document extends ParanoidModel {
return;
}
return this.sequelize!.transaction(async (transaction: Transaction) => {
const collection = await Collection.findByPk(model.collectionId, {
transaction,
lock: transaction.LOCK.UPDATE,
});
if (!collection) {
return;
}
await collection.updateDocument(model, { transaction });
model.collection = collection;
const collection = await Collection.findByPk(model.collectionId, {
transaction,
lock: Transaction.LOCK.UPDATE,
});
if (!collection) {
return;
}
await collection.updateDocument(model, { transaction });
model.collection = collection;
}
@AfterCreate
@@ -801,30 +803,28 @@ class Document extends ParanoidModel {
return this.save(options);
};
publish = async (userId: string) => {
publish = async (userId: string, { transaction }: SaveOptions<Document>) => {
// If the document is already published then calling publish should act like
// a regular save
if (this.publishedAt) {
return this.save();
return this.save({ transaction });
}
await this.sequelize.transaction(async (transaction: Transaction) => {
if (!this.template) {
const collection = await Collection.findByPk(this.collectionId, {
transaction,
lock: transaction.LOCK.UPDATE,
});
if (!this.template) {
const collection = await Collection.findByPk(this.collectionId, {
transaction,
lock: Transaction.LOCK.UPDATE,
});
if (collection) {
await collection.addDocumentToStructure(this, 0, { transaction });
this.collection = collection;
}
if (collection) {
await collection.addDocumentToStructure(this, 0, { transaction });
this.collection = collection;
}
});
}
this.lastModifiedById = userId;
this.publishedAt = new Date();
return this.save();
return this.save({ transaction });
};
unpublish = async (userId: string) => {

View File

@@ -1,3 +1,4 @@
import { SaveOptions } from "sequelize";
import {
ForeignKey,
AfterSave,
@@ -45,8 +46,12 @@ class Event extends BaseModel {
}
@AfterSave
static async enqueue(model: Event) {
globalEventQueue.add(model);
static async enqueue(model: Event, options: SaveOptions<Event>) {
if (options.transaction) {
options.transaction.afterCommit(() => void globalEventQueue.add(model));
return;
}
void globalEventQueue.add(model);
}
// associations

View File

@@ -7,13 +7,31 @@ import {
Table,
DataType,
} from "sequelize-typescript";
import { deleteFromS3 } from "@server/utils/s3";
import { deleteFromS3, getFileByKey } from "@server/utils/s3";
import Collection from "./Collection";
import Team from "./Team";
import User from "./User";
import BaseModel from "./base/BaseModel";
import Fix from "./decorators/Fix";
export enum FileOperationType {
Import = "import",
Export = "export",
}
export enum FileOperationFormat {
MarkdownZip = "outline-markdown",
Notion = "notion",
}
export enum FileOperationState {
Creating = "creating",
Uploading = "uploading",
Complete = "complete",
Error = "error",
Expired = "expired",
}
@DefaultScope(() => ({
include: [
{
@@ -32,12 +50,15 @@ import Fix from "./decorators/Fix";
@Fix
class FileOperation extends BaseModel {
@Column(DataType.ENUM("import", "export"))
type: "import" | "export";
type: FileOperationType;
@Column(DataType.STRING)
format: FileOperationFormat;
@Column(
DataType.ENUM("creating", "uploading", "complete", "error", "expired")
)
state: "creating" | "uploading" | "complete" | "error" | "expired";
state: FileOperationState;
@Column
key: string;
@@ -57,6 +78,10 @@ class FileOperation extends BaseModel {
await this.save();
};
get buffer() {
return getFileByKey(this.key);
}
// hooks
@BeforeDestroy

View File

@@ -173,45 +173,55 @@ class Team extends ParanoidModel {
return subdomain;
};
provisionFirstCollection = async function (userId: string) {
const collection = await Collection.create({
name: "Welcome",
description:
"This collection is a quick guide to what Outline is all about. Feel free to delete this collection once your team is up to speed with the basics!",
teamId: this.id,
createdById: userId,
sort: Collection.DEFAULT_SORT,
permission: "read_write",
});
// For the first collection we go ahead and create some intitial documents to get
// the team started. You can edit these in /server/onboarding/x.md
const onboardingDocs = [
"Integrations & API",
"Our Editor",
"Getting Started",
"What is Outline",
];
for (const title of onboardingDocs) {
const text = await readFile(
path.join(process.cwd(), "server", "onboarding", `${title}.md`),
"utf8"
provisionFirstCollection = async (userId: string) => {
await this.sequelize!.transaction(async (transaction) => {
const collection = await Collection.create(
{
name: "Welcome",
description:
"This collection is a quick guide to what Outline is all about. Feel free to delete this collection once your team is up to speed with the basics!",
teamId: this.id,
createdById: userId,
sort: Collection.DEFAULT_SORT,
permission: "read_write",
},
{
transaction,
}
);
const document = await Document.create({
version: 2,
isWelcome: true,
parentDocumentId: null,
collectionId: collection.id,
teamId: collection.teamId,
userId: collection.createdById,
lastModifiedById: collection.createdById,
createdById: collection.createdById,
title,
text,
});
await document.publish(collection.createdById);
}
// For the first collection we go ahead and create some intitial documents to get
// the team started. You can edit these in /server/onboarding/x.md
const onboardingDocs = [
"Integrations & API",
"Our Editor",
"Getting Started",
"What is Outline",
];
for (const title of onboardingDocs) {
const text = await readFile(
path.join(process.cwd(), "server", "onboarding", `${title}.md`),
"utf8"
);
const document = await Document.create(
{
version: 2,
isWelcome: true,
parentDocumentId: null,
collectionId: collection.id,
teamId: collection.teamId,
userId: collection.createdById,
lastModifiedById: collection.createdById,
createdById: collection.createdById,
title,
text,
},
{ transaction }
);
await document.publish(collection.createdById, { transaction });
}
});
};
collectionIds = async function (paranoid = true) {

View File

@@ -4,6 +4,7 @@ import ExportFailureEmail from "@server/emails/templates/ExportFailureEmail";
import ExportSuccessEmail from "@server/emails/templates/ExportSuccessEmail";
import Logger from "@server/logging/logger";
import { FileOperation, Collection, Event, Team, User } from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import { Event as TEvent } from "@server/types";
import { uploadToS3FromBuffer } from "@server/utils/s3";
import { archiveCollections } from "@server/utils/zip";
@@ -41,7 +42,7 @@ export default class ExportsProcessor extends BaseProcessor {
});
this.updateFileOperation(fileOperation, actorId, teamId, {
state: "creating",
state: FileOperationState.Creating,
});
// heavy lifting of creating the zip file
Logger.info(
@@ -50,7 +51,7 @@ export default class ExportsProcessor extends BaseProcessor {
);
const filePath = await archiveCollections(collections);
let url;
let state: any = "creating";
let state = FileOperationState.Creating;
try {
// @ts-expect-error ts-migrate(2769) FIXME: No overload matches this call.
@@ -58,7 +59,7 @@ export default class ExportsProcessor extends BaseProcessor {
// @ts-expect-error ts-migrate(2769) FIXME: No overload matches this call.
const stat = await fs.promises.stat(filePath);
this.updateFileOperation(fileOperation, actorId, teamId, {
state: "uploading",
state: FileOperationState.Uploading,
size: stat.size,
});
Logger.info(
@@ -75,12 +76,12 @@ export default class ExportsProcessor extends BaseProcessor {
"processor",
`Upload complete for file operation ${fileOperation.id}`
);
state = "complete";
state = FileOperationState.Complete;
} catch (error) {
Logger.error("Error exporting collection data", error, {
fileOperationId: fileOperation.id,
});
state = "error";
state = FileOperationState.Error;
url = undefined;
} finally {
this.updateFileOperation(fileOperation, actorId, teamId, {
@@ -88,7 +89,7 @@ export default class ExportsProcessor extends BaseProcessor {
url,
});
if (state === "error") {
if (state === FileOperationState.Error) {
await ExportFailureEmail.schedule({
to: user.email,
teamUrl: team.url,

View File

@@ -0,0 +1,40 @@
import invariant from "invariant";
import { FileOperation } from "@server/models";
import {
FileOperationFormat,
FileOperationType,
} from "@server/models/FileOperation";
import { Event as TEvent, FileOperationEvent } from "@server/types";
import ImportMarkdownZipTask from "../tasks/ImportMarkdownZipTask";
import ImportNotionTask from "../tasks/ImportNotionTask";
import BaseProcessor from "./BaseProcessor";
export default class FileOperationsProcessor extends BaseProcessor {
static applicableEvents: TEvent["name"][] = ["fileOperations.create"];
async perform(event: FileOperationEvent) {
if (event.name !== "fileOperations.create") {
return;
}
const fileOperation = await FileOperation.findByPk(event.modelId);
invariant(fileOperation, "fileOperation not found");
// map file operation type and format to the appropriate task
if (fileOperation.type === FileOperationType.Import) {
switch (fileOperation.format) {
case FileOperationFormat.MarkdownZip:
await ImportMarkdownZipTask.schedule({
fileOperationId: event.modelId,
});
break;
case FileOperationFormat.Notion:
await ImportNotionTask.schedule({
fileOperationId: event.modelId,
});
break;
default:
}
}
}
}

View File

@@ -1,79 +0,0 @@
import fs from "fs";
import os from "os";
import File from "formidable/lib/file";
import invariant from "invariant";
import collectionImporter from "@server/commands/collectionImporter";
import { Event, FileOperation, Attachment, User } from "@server/models";
import { Event as TEvent } from "@server/types";
import BaseProcessor from "./BaseProcessor";
export default class ImportsProcessor extends BaseProcessor {
static applicableEvents: TEvent["name"][] = ["collections.import"];
async perform(event: TEvent) {
switch (event.name) {
case "collections.import": {
let state, error;
const { type } = event.data;
const attachment = await Attachment.findByPk(event.modelId);
invariant(attachment, "attachment not found");
const user = await User.findByPk(event.actorId);
invariant(user, "user not found");
const fileOperation = await FileOperation.create({
type: "import",
state: "creating",
size: attachment.size,
key: attachment.key,
userId: user.id,
teamId: user.teamId,
});
await Event.schedule({
name: "fileOperations.create",
modelId: fileOperation.id,
teamId: user.teamId,
actorId: user.id,
});
try {
const buffer = await attachment.buffer;
const tmpDir = os.tmpdir();
const tmpFilePath = `${tmpDir}/upload-${event.modelId}`;
await fs.promises.writeFile(tmpFilePath, buffer as Uint8Array);
const file = new File({
name: attachment.name,
type: attachment.contentType,
path: tmpFilePath,
});
await collectionImporter({
file,
user,
type,
ip: event.ip,
});
await attachment.destroy();
state = "complete";
} catch (err) {
state = "error";
error = err.message;
} finally {
await fileOperation.update({ state, error });
await Event.schedule({
name: "fileOperations.update",
modelId: fileOperation.id,
teamId: user.teamId,
actorId: user.id,
});
}
return;
}
default:
}
}
}

View File

@@ -33,7 +33,7 @@ export default abstract class BaseTask<T> {
* @param props Properties to be used by the task
* @returns A promise that resolves once the task has completed.
*/
public abstract perform(props: T): Promise<void>;
public abstract perform(props: T): Promise<any>;
/**
* Job options such as priority and retry strategy, as defined by Bull.

View File

@@ -1,5 +1,9 @@
import { subDays } from "date-fns";
import { FileOperation } from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import { buildFileOperation } from "@server/test/factories";
import { flushdb } from "@server/test/support";
import CleanupExpiredFileOperationsTask from "./CleanupExpiredFileOperationsTask";
@@ -9,13 +13,13 @@ beforeEach(() => flushdb());
describe("CleanupExpiredFileOperationsTask", () => {
it("should expire exports older than 30 days ago", async () => {
await buildFileOperation({
type: "export",
state: "complete",
type: FileOperationType.Export,
state: FileOperationState.Complete,
createdAt: subDays(new Date(), 30),
});
await buildFileOperation({
type: "export",
state: "complete",
type: FileOperationType.Export,
state: FileOperationState.Complete,
});
/* This is a test helper that creates a new task and runs it. */
@@ -24,8 +28,8 @@ describe("CleanupExpiredFileOperationsTask", () => {
const data = await FileOperation.count({
where: {
type: "export",
state: "expired",
type: FileOperationType.Export,
state: FileOperationState.Expired,
},
});
expect(data).toEqual(1);
@@ -33,13 +37,13 @@ describe("CleanupExpiredFileOperationsTask", () => {
it("should not expire exports made less than 30 days ago", async () => {
await buildFileOperation({
type: "export",
state: "complete",
type: FileOperationType.Export,
state: FileOperationState.Complete,
createdAt: subDays(new Date(), 29),
});
await buildFileOperation({
type: "export",
state: "complete",
type: FileOperationType.Export,
state: FileOperationState.Complete,
});
const task = new CleanupExpiredFileOperationsTask();
@@ -47,8 +51,8 @@ describe("CleanupExpiredFileOperationsTask", () => {
const data = await FileOperation.count({
where: {
type: "export",
state: "expired",
type: FileOperationType.Export,
state: FileOperationState.Expired,
},
});
expect(data).toEqual(0);

View File

@@ -2,6 +2,10 @@ import { subDays } from "date-fns";
import { Op } from "sequelize";
import Logger from "@server/logging/logger";
import { FileOperation } from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {
@@ -13,12 +17,12 @@ export default class CleanupExpiredFileOperationsTask extends BaseTask<Props> {
Logger.info("task", `Expiring export file operations older than 30 days…`);
const fileOperations = await FileOperation.unscoped().findAll({
where: {
type: "export",
type: FileOperationType.Export,
createdAt: {
[Op.lt]: subDays(new Date(), 30),
},
state: {
[Op.ne]: "expired",
[Op.ne]: FileOperationState.Expired,
},
},
limit,

View File

@@ -0,0 +1,87 @@
import fs from "fs";
import path from "path";
import { FileOperation } from "@server/models";
import { buildFileOperation } from "@server/test/factories";
import { flushdb } from "@server/test/support";
import ImportMarkdownZipTask from "./ImportMarkdownZipTask";
beforeEach(() => flushdb());
describe("ImportMarkdownZipTask", () => {
it("should import the documents, attachments", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "buffer", {
get() {
return fs.readFileSync(
path.resolve(__dirname, "..", "..", "test", "fixtures", "outline.zip")
);
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
const props = {
fileOperationId: fileOperation.id,
};
const task = new ImportMarkdownZipTask();
const response = await task.perform(props);
expect(response.collections.size).toEqual(1);
expect(response.documents.size).toEqual(8);
expect(response.attachments.size).toEqual(6);
});
it("should throw an error with corrupt zip", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "buffer", {
get() {
return fs.readFileSync(
path.resolve(__dirname, "..", "..", "test", "fixtures", "corrupt.zip")
);
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
const props = {
fileOperationId: fileOperation.id,
};
let error;
try {
const task = new ImportMarkdownZipTask();
await task.perform(props);
} catch (err) {
error = err;
}
expect(error && error.message).toBeTruthy();
});
it("should throw an error with empty collection in zip", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "buffer", {
get() {
return fs.readFileSync(
path.resolve(__dirname, "..", "..", "test", "fixtures", "empty.zip")
);
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
const props = {
fileOperationId: fileOperation.id,
};
let error;
try {
const task = new ImportMarkdownZipTask();
await task.perform(props);
} catch (err) {
error = err;
}
expect(error && error.message).toBe(
"Uploaded file does not contain any valid documents"
);
});
});

View File

@@ -0,0 +1,171 @@
import JSZip from "jszip";
import mime from "mime-types";
import { v4 as uuidv4 } from "uuid";
import documentImporter from "@server/commands/documentImporter";
import Logger from "@server/logging/logger";
import { FileOperation, User } from "@server/models";
import { zipAsFileTree, FileTreeNode } from "@server/utils/zip";
import ImportTask, { StructuredImportData } from "./ImportTask";
export default class ImportMarkdownZipTask extends ImportTask {
public async parseData(
buffer: Buffer,
fileOperation: FileOperation
): Promise<StructuredImportData> {
const zip = await JSZip.loadAsync(buffer);
const tree = zipAsFileTree(zip);
return this.parseFileTree({ fileOperation, zip, tree });
}
/**
* Converts the file structure from zipAsFileTree into documents,
* collections, and attachments.
*
* @param tree An array of FileTreeNode representing root files in the zip
* @returns A StructuredImportData object
*/
private async parseFileTree({
zip,
tree,
fileOperation,
}: {
zip: JSZip;
fileOperation: FileOperation;
tree: FileTreeNode[];
}): Promise<StructuredImportData> {
const user = await User.findByPk(fileOperation.userId);
const output: StructuredImportData = {
collections: [],
documents: [],
attachments: [],
};
async function parseNodeChildren(
children: FileTreeNode[],
collectionId: string,
parentDocumentId?: string
): Promise<void> {
if (!user) {
throw new Error("User not found");
}
await Promise.all(
children.map(async (child) => {
// special case for folders of attachments
if (
child.name === "uploads" ||
(child.children.length > 0 && child.path.includes("/uploads/"))
) {
return parseNodeChildren(child.children, collectionId);
}
const zipObject = zip.files[child.path];
const id = uuidv4();
// this is an attachment
if (child.path.includes("/uploads/") && child.children.length === 0) {
output.attachments.push({
id,
name: child.name,
path: child.path,
mimeType: mime.lookup(child.path) || "application/octet-stream",
buffer: await zipObject.async("nodebuffer"),
});
return;
}
const { title, text } = await documentImporter({
mimeType: "text/markdown",
fileName: child.name,
content: await zipObject.async("string"),
user,
ip: user.lastActiveIp || undefined,
});
let metadata;
try {
metadata = zipObject.comment ? JSON.parse(zipObject.comment) : {};
} catch (err) {
Logger.debug(
"task",
`ZIP comment found for ${child.name}, but could not be parsed as metadata: ${zipObject.comment}`
);
}
const createdAt = metadata.createdAt
? new Date(metadata.createdAt)
: zipObject.date;
const updatedAt = metadata.updatedAt
? new Date(metadata.updatedAt)
: zipObject.date;
const existingEmptyDocumentIndex = output.documents.findIndex(
(doc) =>
doc.title === title &&
doc.collectionId === collectionId &&
doc.parentDocumentId === parentDocumentId &&
doc.text === ""
);
// When there is a file and a folder with the same name this handles
// the case by combining the two into one document with nested children
if (existingEmptyDocumentIndex !== -1) {
output.documents[existingEmptyDocumentIndex].text = text;
} else {
output.documents.push({
id,
title,
text,
updatedAt,
createdAt,
collectionId,
parentDocumentId,
path: child.path,
});
}
await parseNodeChildren(child.children, collectionId, id);
})
);
}
// All nodes in the root level should be collections
for (const node of tree) {
if (node.path.endsWith("/")) {
const collectionId = uuidv4();
output.collections.push({
id: collectionId,
name: node.title,
});
await parseNodeChildren(node.children, collectionId);
} else {
Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
fileOperationId: fileOperation.id,
});
}
}
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before continuing.
for (const document of output.documents) {
for (const attachment of output.attachments) {
// Pull the collection and subdirectory out of the path name, upload
// folders in an export are relative to the document itself
const normalizedAttachmentPath = attachment.path.replace(
/(.*)uploads\//,
"uploads/"
);
const reference = `<<${attachment.id}>>`;
document.text = document.text
.replace(new RegExp(attachment.path, "g"), reference)
.replace(new RegExp(normalizedAttachmentPath, "g"), reference)
.replace(new RegExp(`/${normalizedAttachmentPath}`, "g"), reference);
}
}
return output;
}
}

View File

@@ -0,0 +1,80 @@
import fs from "fs";
import path from "path";
import { FileOperation } from "@server/models";
import { buildFileOperation } from "@server/test/factories";
import { flushdb } from "@server/test/support";
import ImportNotionTask from "./ImportNotionTask";
beforeEach(() => flushdb());
describe("ImportNotionTask", () => {
it("should import successfully from a Markdown export", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "buffer", {
get() {
return fs.readFileSync(
path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"notion-markdown.zip"
)
);
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
const props = {
fileOperationId: fileOperation.id,
};
const task = new ImportNotionTask();
const response = await task.perform(props);
expect(response.collections.size).toEqual(2);
expect(response.documents.size).toEqual(6);
expect(response.attachments.size).toEqual(1);
// Check that the image url was replaced in the text with a redirect
const attachments = Array.from(response.attachments.values());
const documents = Array.from(response.documents.values());
expect(documents[2].text).toContain(attachments[0].redirectUrl);
});
it("should import successfully from a HTML export", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "buffer", {
get() {
return fs.readFileSync(
path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"notion-html.zip"
)
);
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
const props = {
fileOperationId: fileOperation.id,
};
const task = new ImportNotionTask();
const response = await task.perform(props);
expect(response.collections.size).toEqual(2);
expect(response.documents.size).toEqual(6);
expect(response.attachments.size).toEqual(4);
// Check that the image url was replaced in the text with a redirect
const attachments = Array.from(response.attachments.values());
const documents = Array.from(response.documents.values());
expect(documents[1].text).toContain(attachments[1].redirectUrl);
});
});

View File

@@ -0,0 +1,301 @@
import path from "path";
import JSZip from "jszip";
import { compact } from "lodash";
import mime from "mime-types";
import { v4 as uuidv4 } from "uuid";
import documentImporter from "@server/commands/documentImporter";
import Logger from "@server/logging/logger";
import { FileOperation, User } from "@server/models";
import { zipAsFileTree, FileTreeNode } from "@server/utils/zip";
import ImportTask, { StructuredImportData } from "./ImportTask";
export default class ImportNotionTask extends ImportTask {
public async parseData(
buffer: Buffer,
fileOperation: FileOperation
): Promise<StructuredImportData> {
const zip = await JSZip.loadAsync(buffer);
const tree = zipAsFileTree(zip);
return this.parseFileTree({ fileOperation, zip, tree });
}
/**
* Converts the file structure from zipAsFileTree into documents,
* collections, and attachments.
*
* @param tree An array of FileTreeNode representing root files in the zip
* @returns A StructuredImportData object
*/
private async parseFileTree({
zip,
tree,
fileOperation,
}: {
zip: JSZip;
fileOperation: FileOperation;
tree: FileTreeNode[];
}): Promise<StructuredImportData> {
const user = await User.findByPk(fileOperation.userId);
if (!user) {
throw new Error("User not found");
}
const output: StructuredImportData = {
collections: [],
documents: [],
attachments: [],
};
const parseNodeChildren = async (
children: FileTreeNode[],
collectionId: string,
parentDocumentId?: string
): Promise<void> => {
if (!user) {
throw new Error("User not found");
}
await Promise.all(
children.map(async (child) => {
// Ignore the CSV's for databases upfront
if (child.path.endsWith(".csv")) {
return;
}
const zipObject = zip.files[child.path];
const id = uuidv4();
const match = child.title.match(this.NotionUUIDRegex);
const name = child.title.replace(this.NotionUUIDRegex, "");
const sourceId = match ? match[0].trim() : undefined;
// If it's not a text file we're going to treat it as an attachment.
const mimeType = mime.lookup(child.name);
const isDocument =
mimeType === "text/markdown" ||
mimeType === "text/plain" ||
mimeType === "text/html";
// If it's not a document and not a folder, treat it as an attachment
if (!isDocument && mimeType) {
output.attachments.push({
id,
name: child.name,
path: child.path,
mimeType,
buffer: await zipObject.async("nodebuffer"),
sourceId,
});
return;
}
Logger.debug("task", `Processing ${name} as ${mimeType}`);
const { title, text } = await documentImporter({
mimeType: mimeType || "text/markdown",
fileName: name,
content: await zipObject.async("string"),
user,
ip: user.lastActiveIp || undefined,
});
const existingDocumentIndex = output.documents.findIndex(
(doc) => doc.sourceId === sourceId
);
const existingDocument = output.documents[existingDocumentIndex];
// If there is an existing document with the same sourceId that means
// we've already parsed either a folder or a file referencing the same
// document, as such we should merge.
if (existingDocument) {
if (existingDocument.text === "") {
output.documents[existingDocumentIndex].text = text;
}
await parseNodeChildren(
child.children,
collectionId,
existingDocument.id
);
} else {
output.documents.push({
id,
title,
text,
collectionId,
parentDocumentId,
path: child.path,
sourceId,
});
await parseNodeChildren(child.children, collectionId, id);
}
})
);
};
const replaceInternalLinksAndImages = (text: string) => {
// Find if there are any images in this document
const imagesInText = this.parseImages(text);
for (const image of imagesInText) {
const name = path.basename(image.src);
const attachment = output.attachments.find((att) => att.name === name);
if (!attachment) {
Logger.info(
"task",
`Could not find referenced attachment with name ${name} and src ${image.src}`
);
} else {
text = text.replace(
new RegExp(image.src, "g"),
`<<${attachment.id}>>`
);
}
}
// With Notion's HTML import, images sometimes come wrapped in anchor tags
// This isn't supported in Outline's editor, so we need to strip them.
text = text.replace(/\[!\[([^[]+)]/g, "![]");
// Find if there are any links in this document pointing to other documents
const internalLinksInText = this.parseInternalLinks(text);
// For each link update to the standardized format of <<documentId>>
// instead of a relative or absolute URL within the original zip file.
for (const link of internalLinksInText) {
const doc = output.documents.find(
(doc) => doc.sourceId === link.sourceId
);
if (!doc) {
Logger.info(
"task",
`Could not find referenced document with sourceId ${link.sourceId}`
);
} else {
text = text.replace(link.href, `<<${doc.id}>>`);
}
}
return text;
};
// All nodes in the root level should become collections
for (const node of tree) {
const match = node.title.match(this.NotionUUIDRegex);
const name = node.title.replace(this.NotionUUIDRegex, "");
const sourceId = match ? match[0].trim() : undefined;
const mimeType = mime.lookup(node.name);
const existingCollectionIndex = output.collections.findIndex(
(collection) => collection.sourceId === sourceId
);
const existingCollection = output.collections[existingCollectionIndex];
const collectionId = existingCollection?.id || uuidv4();
let description;
// Root level docs become the descriptions of collections
if (
mimeType === "text/markdown" ||
mimeType === "text/plain" ||
mimeType === "text/html"
) {
const zipObject = zip.files[node.path];
const { text } = await documentImporter({
mimeType,
fileName: name,
content: await zipObject.async("string"),
user,
ip: user.lastActiveIp || undefined,
});
description = text;
} else if (node.children.length > 0) {
await parseNodeChildren(node.children, collectionId);
} else {
Logger.debug("task", `Unhandled file in zip: ${node.path}`, {
fileOperationId: fileOperation.id,
});
continue;
}
if (existingCollectionIndex !== -1) {
if (description) {
output.collections[existingCollectionIndex].description = description;
}
} else {
output.collections.push({
id: collectionId,
name,
description,
sourceId,
});
}
}
for (const document of output.documents) {
document.text = replaceInternalLinksAndImages(document.text);
}
for (const collection of output.collections) {
if (collection.description) {
collection.description = replaceInternalLinksAndImages(
collection.description
);
}
}
return output;
}
/**
* Extracts internal links from a markdown document, taking into account the
* sourceId of the document, which is part of the link title.
*
* @param text The markdown text to parse
* @returns An array of internal links
*/
private parseInternalLinks(
text: string
): { title: string; href: string; sourceId: string }[] {
return compact(
[...text.matchAll(this.NotionLinkRegex)].map((match) => ({
title: match[1],
href: match[2],
sourceId: match[3],
}))
);
}
/**
* Extracts images from the markdown document
*
* @param text The markdown text to parse
* @returns An array of internal links
*/
private parseImages(text: string): { alt: string; src: string }[] {
return compact(
[...text.matchAll(this.ImageRegex)].map((match) => ({
alt: match[1],
src: match[2],
}))
);
}
/**
* Regex to find markdown images of all types
*/
private ImageRegex = /!\[(?<alt>[^\][]*?)]\((?<filename>[^\][]*?)(?=“|\))“?(?<title>[^\][”]+)?”?\)/g;
/**
* Regex to find markdown links containing ID's that look like UUID's with the
* "-"'s removed, Notion's sourceId format.
*/
private NotionLinkRegex = /\[([^[]+)]\((.*?([0-9a-fA-F]{32})\..*?)\)/g;
/**
* Regex to find Notion document UUID's in the title of a document.
*/
private NotionUUIDRegex = /\s([0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}|[0-9a-fA-F]{32})$/;
}

View File

@@ -0,0 +1,379 @@
import invariant from "invariant";
import attachmentCreator from "@server/commands/attachmentCreator";
import documentCreator from "@server/commands/documentCreator";
import { sequelize } from "@server/database/sequelize";
import { ValidationError } from "@server/errors";
import logger from "@server/logging/logger";
import {
User,
Event,
Document,
Collection,
FileOperation,
Attachment,
} from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {
fileOperationId: string;
};
/**
* Standardized format for data importing, to be used by all import tasks.
*/
export type StructuredImportData = {
collections: {
id: string;
name: string;
/**
* The collection description. To reference an attachment or image use the
* special formatting <<attachmentId>>. It will be replaced with a reference
* to the actual attachment as part of persistData.
*
* To reference a document use <<documentId>>, it will be replaced with a
* link to the document as part of persistData once the document url is
* generated.
*/
description?: string;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
documents: {
id: string;
title: string;
/**
* The document text. To reference an attachment or image use the special
* formatting <<attachmentId>>. It will be replaced with a reference to the
* actual attachment as part of persistData.
*
* To reference another document use <<documentId>>, it will be replaced
* with a link to the document as part of persistData once the document url
* is generated.
*/
text: string;
collectionId: string;
updatedAt?: Date;
createdAt?: Date;
parentDocumentId?: string;
path: string;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
attachments: {
id: string;
name: string;
path: string;
mimeType: string;
buffer: Buffer;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
};
export default abstract class ImportTask extends BaseTask<Props> {
/**
* Runs the import task.
*
* @param props The props
*/
public async perform({ fileOperationId }: Props) {
const fileOperation = await FileOperation.findByPk(fileOperationId);
invariant(fileOperation, "fileOperation not found");
try {
logger.info("task", `ImportTask fetching data for ${fileOperationId}`);
const data = await this.fetchData(fileOperation);
logger.info("task", `ImportTask parsing data for ${fileOperationId}`);
const parsed = await this.parseData(data, fileOperation);
if (parsed.collections.length === 0) {
throw ValidationError(
"Uploaded file does not contain any collections. The root of the zip file must contain folders representing collections."
);
}
if (parsed.documents.length === 0) {
throw ValidationError(
"Uploaded file does not contain any valid documents"
);
}
let result;
try {
logger.info(
"task",
`ImportTask persisting data for ${fileOperationId}`
);
result = await this.persistData(parsed, fileOperation);
} catch (error) {
logger.error(
`ImportTask failed to persist data for ${fileOperationId}`,
error
);
throw new Error("Sorry, an internal error occurred during import");
}
await this.updateFileOperation(
fileOperation,
FileOperationState.Complete
);
return result;
} catch (error) {
await this.updateFileOperation(
fileOperation,
FileOperationState.Error,
error
);
throw error;
}
}
/**
* Update the state of the underlying FileOperation in the database and send
* an event to the client.
*
* @param fileOperation The FileOperation to update
*/
private async updateFileOperation(
fileOperation: FileOperation,
state: FileOperationState,
error?: Error
) {
await fileOperation.update({ state, error: error?.message });
await Event.schedule({
name: "fileOperations.update",
modelId: fileOperation.id,
teamId: fileOperation.teamId,
actorId: fileOperation.userId,
});
}
/**
* Fetch the remote data needed for the import, by default this will download
* any file associated with the FileOperation, save it to a temporary file,
* and return the path.
*
* @param fileOperation The FileOperation to fetch data for
* @returns string
*/
protected async fetchData(fileOperation: FileOperation) {
return fileOperation.buffer;
}
/**
* Parse the data loaded from fetchData into a consistent structured format
* that represents collections, documents, and the relationships between them.
*
* @param data The data loaded from fetchData
* @returns A promise that resolves to the structured data
*/
protected abstract parseData(
data: any,
fileOperation: FileOperation
): Promise<StructuredImportData>;
/**
* Persist the data that was already fetched and parsed into the consistent
* structured data.
*
* @param props The props
*/
protected async persistData(
data: StructuredImportData,
fileOperation: FileOperation
): Promise<{
collections: Map<string, Collection>;
documents: Map<string, Document>;
attachments: Map<string, Attachment>;
}> {
const collections = new Map<string, Collection>();
const documents = new Map<string, Document>();
const attachments = new Map<string, Attachment>();
return sequelize.transaction(async (transaction) => {
const user = await User.findByPk(fileOperation.userId, {
transaction,
});
invariant(user, "User not found");
const ip = user.lastActiveIp || undefined;
// Attachments
for (const item of data.attachments) {
const attachment = await attachmentCreator({
source: "import",
id: item.id,
name: item.name,
type: item.mimeType,
buffer: item.buffer,
user,
ip,
transaction,
});
attachments.set(item.id, attachment);
}
// Collections
for (const item of data.collections) {
let description = item.description;
if (description) {
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before saving.
for (const aitem of data.attachments) {
const attachment = attachments.get(aitem.id);
if (!attachment) {
continue;
}
description = description.replace(
new RegExp(`<<${attachment.id}>>`, "g"),
attachment.redirectUrl
);
}
// Check all of the document we've created against urls in the text
// and replace them out with a valid internal link. Because we are doing
// this before saving, we can't use the document slug, but we can take
// advantage of the fact that the document id will redirect in the client
for (const ditem of data.documents) {
description = description.replace(
new RegExp(`<<${ditem.id}>>`, "g"),
`/doc/${ditem.id}`
);
}
}
// check if collection with name exists
const response = await Collection.findOrCreate({
where: {
teamId: fileOperation.teamId,
name: item.name,
},
defaults: {
id: item.id,
description,
createdById: fileOperation.userId,
permission: "read_write",
},
transaction,
});
let collection = response[0];
const isCreated = response[1];
// create new collection if name already exists, yes it's possible that
// there is also a "Name (Imported)" but this is a case not worth dealing
// with right now
if (!isCreated) {
const name = `${item.name} (Imported)`;
collection = await Collection.create(
{
id: item.id,
description,
teamId: fileOperation.teamId,
createdById: fileOperation.userId,
name,
permission: "read_write",
},
{ transaction }
);
}
await Event.create(
{
name: "collections.create",
collectionId: collection.id,
teamId: collection.teamId,
actorId: fileOperation.userId,
data: {
name: collection.name,
},
ip,
},
{
transaction,
}
);
collections.set(item.id, collection);
}
// Documents
for (const item of data.documents) {
let text = item.text;
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before saving.
for (const aitem of data.attachments) {
const attachment = attachments.get(aitem.id);
if (!attachment) {
continue;
}
text = text.replace(
new RegExp(`<<${attachment.id}>>`, "g"),
attachment.redirectUrl
);
}
// Check all of the document we've created against urls in the text
// and replace them out with a valid internal link. Because we are doing
// this before saving, we can't use the document slug, but we can take
// advantage of the fact that the document id will redirect in the client
for (const ditem of data.documents) {
text = text.replace(
new RegExp(`<<${ditem.id}>>`, "g"),
`/doc/${ditem.id}`
);
}
const document = await documentCreator({
source: "import",
id: item.id,
title: item.title,
text,
collectionId: item.collectionId,
createdAt: item.createdAt,
updatedAt: item.updatedAt ?? item.createdAt,
publishedAt: item.updatedAt ?? item.createdAt ?? new Date(),
parentDocumentId: item.parentDocumentId,
user,
ip,
transaction,
});
documents.set(item.id, document);
const collection = collections.get(item.collectionId);
if (collection) {
await collection.addDocumentToStructure(document, 0, { transaction });
}
}
// Return value is only used for testing
return {
collections,
documents,
attachments,
};
});
}
/**
* Optional hook to remove any temporary files that were created
*/
protected async cleanupData() {
// noop
}
/**
* Job options such as priority and retry strategy, as defined by Bull.
*/
public get options() {
return {
priority: TaskPriority.Low,
attempts: 1,
};
}
}

View File

@@ -4,8 +4,10 @@ import Router from "koa-router";
import { Sequelize, Op, WhereOptions } from "sequelize";
import collectionExporter from "@server/commands/collectionExporter";
import teamUpdater from "@server/commands/teamUpdater";
import { sequelize } from "@server/database/sequelize";
import { ValidationError } from "@server/errors";
import auth from "@server/middlewares/authentication";
import {
Collection,
CollectionUser,
@@ -15,7 +17,13 @@ import {
User,
Group,
Attachment,
FileOperation,
} from "@server/models";
import {
FileOperationFormat,
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import { authorize } from "@server/policies";
import {
presentCollection,
@@ -134,22 +142,47 @@ router.post("collections.info", auth(), async (ctx) => {
});
router.post("collections.import", auth(), async (ctx) => {
const { type, attachmentId } = ctx.body;
assertIn(type, ["outline"], "type must be one of 'outline'");
const { attachmentId, format = FileOperationFormat.MarkdownZip } = ctx.body;
assertUuid(attachmentId, "attachmentId is required");
const { user } = ctx.state;
authorize(user, "importCollection", user.team);
const attachment = await Attachment.findByPk(attachmentId);
authorize(user, "read", attachment);
await Event.create({
name: "collections.import",
modelId: attachmentId,
teamId: user.teamId,
actorId: user.id,
data: {
type,
},
ip: ctx.request.ip,
assertIn(format, Object.values(FileOperationFormat), "Invalid format");
await sequelize.transaction(async (transaction) => {
const fileOperation = await FileOperation.create(
{
type: FileOperationType.Import,
state: FileOperationState.Creating,
format,
size: attachment.size,
key: attachment.key,
userId: user.id,
teamId: user.teamId,
},
{
transaction,
}
);
await Event.create(
{
name: "fileOperations.create",
teamId: user.teamId,
actorId: user.id,
modelId: fileOperation.id,
data: {
type: FileOperationType.Import,
},
},
{
transaction,
}
);
});
ctx.body = {

View File

@@ -1,3 +1,4 @@
import fs from "fs-extra";
import invariant from "invariant";
import Router from "koa-router";
import { Op, ScopeOptions, WhereOptions } from "sequelize";
@@ -6,6 +7,7 @@ import documentCreator from "@server/commands/documentCreator";
import documentImporter from "@server/commands/documentImporter";
import documentMover from "@server/commands/documentMover";
import documentPermanentDeleter from "@server/commands/documentPermanentDeleter";
import documentUpdater from "@server/commands/documentUpdater";
import { sequelize } from "@server/database/sequelize";
import {
NotFoundError,
@@ -999,8 +1001,6 @@ router.post("documents.update", auth(), async (ctx) => {
text,
fullWidth,
publish,
autosave,
done,
lastRevision,
templateId,
append,
@@ -1012,91 +1012,37 @@ router.post("documents.update", auth(), async (ctx) => {
}
const { user } = ctx.state;
const document = await Document.findByPk(id, {
userId: user.id,
});
authorize(user, "update", document);
let collection: Collection | null | undefined;
if (lastRevision && lastRevision !== document.revisionCount) {
throw InvalidRequestError("Document has changed since last revision");
}
const document = await sequelize.transaction(async (transaction) => {
const document = await Document.findByPk(id, {
userId: user.id,
transaction,
});
authorize(user, "update", document);
const previousTitle = document.title;
collection = document.collection;
// Update document
if (title !== undefined) {
document.title = title;
}
if (editorVersion) {
document.editorVersion = editorVersion;
}
if (templateId) {
document.templateId = templateId;
}
if (fullWidth !== undefined) {
document.fullWidth = fullWidth;
}
if (!user.team?.collaborativeEditing) {
if (append) {
document.text += text;
} else if (text !== undefined) {
document.text = text;
if (lastRevision && lastRevision !== document.revisionCount) {
throw InvalidRequestError("Document has changed since last revision");
}
}
document.lastModifiedById = user.id;
const { collection } = document;
const changed = document.changed();
if (publish) {
await document.publish(user.id);
} else {
await document.save();
}
if (publish) {
await Event.create({
name: "documents.publish",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
title: document.title,
},
return documentUpdater({
document,
user,
title,
text,
fullWidth,
publish,
append,
templateId,
editorVersion,
transaction,
ip: ctx.request.ip,
});
} else if (changed) {
await Event.create({
name: "documents.update",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
autosave,
done,
title: document.title,
},
ip: ctx.request.ip,
});
}
});
if (document.title !== previousTitle) {
Event.schedule({
name: "documents.title_change",
documentId: document.id,
collectionId: document.collectionId,
teamId: document.teamId,
actorId: user.id,
data: {
previousTitle,
title: document.title,
},
ip: ctx.request.ip,
});
}
invariant(collection, "collection not found");
document.updatedBy = user;
document.collection = collection;
@@ -1342,22 +1288,31 @@ router.post("documents.import", auth(), async (ctx) => {
});
}
const { text, title } = await documentImporter({
user,
file,
ip: ctx.request.ip,
});
const document = await documentCreator({
source: "import",
title,
text,
publish,
collectionId,
parentDocumentId,
index,
user,
ip: ctx.request.ip,
const content = await fs.readFile(file.path, "utf8");
const document = await sequelize.transaction(async (transaction) => {
const { text, title } = await documentImporter({
user,
fileName: file.name,
mimeType: file.type,
content,
ip: ctx.request.ip,
transaction,
});
return documentCreator({
source: "import",
title,
text,
publish,
collectionId,
parentDocumentId,
index,
user,
ip: ctx.request.ip,
transaction,
});
});
document.collection = collection;
return (ctx.body = {
@@ -1414,7 +1369,7 @@ router.post("documents.create", auth(), async (ctx) => {
});
}
let templateDocument;
let templateDocument: Document | null | undefined;
if (templateId) {
templateDocument = await Document.findByPk(templateId, {
@@ -1423,19 +1378,23 @@ router.post("documents.create", auth(), async (ctx) => {
authorize(user, "read", templateDocument);
}
const document = await documentCreator({
title,
text,
publish,
collectionId,
parentDocumentId,
templateDocument,
template,
index,
user,
editorVersion,
ip: ctx.request.ip,
const document = await sequelize.transaction(async (transaction) => {
return documentCreator({
title,
text,
publish,
collectionId,
parentDocumentId,
templateDocument,
template,
index,
user,
editorVersion,
ip: ctx.request.ip,
transaction,
});
});
document.collection = collection;
return (ctx.body = {

View File

@@ -1,5 +1,9 @@
import TestServer from "fetch-test-server";
import { Collection, User, Event, FileOperation } from "@server/models";
import {
FileOperationState,
FileOperationType,
} from "@server/models/FileOperation";
import webService from "@server/services/web";
import {
buildAdmin,
@@ -23,7 +27,7 @@ describe("#fileOperations.info", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
@@ -31,7 +35,7 @@ describe("#fileOperations.info", () => {
body: {
id: exportData.id,
token: admin.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
const body = await res.json();
@@ -49,7 +53,7 @@ describe("#fileOperations.info", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
@@ -57,7 +61,7 @@ describe("#fileOperations.info", () => {
body: {
id: exportData.id,
token: user.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
expect(res.status).toEqual(403);
@@ -71,14 +75,14 @@ describe("#fileOperations.list", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
const res = await server.post("/api/fileOperations.list", {
body: {
token: admin.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
const body = await res.json();
@@ -100,7 +104,7 @@ describe("#fileOperations.list", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
collectionId: collection.id,
@@ -108,7 +112,7 @@ describe("#fileOperations.list", () => {
const res = await server.post("/api/fileOperations.list", {
body: {
token: admin.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
const body = await res.json();
@@ -131,7 +135,7 @@ describe("#fileOperations.list", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
collectionId: collection.id,
@@ -142,7 +146,7 @@ describe("#fileOperations.list", () => {
const res = await server.post("/api/fileOperations.list", {
body: {
token: admin.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
const body = await res.json();
@@ -168,7 +172,7 @@ describe("#fileOperations.list", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
collectionId: collection.id,
@@ -179,7 +183,7 @@ describe("#fileOperations.list", () => {
const res = await server.post("/api/fileOperations.list", {
body: {
token: admin2.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
const body = await res.json();
@@ -197,7 +201,7 @@ describe("#fileOperations.list", () => {
const res = await server.post("/api/fileOperations.list", {
body: {
token: user.getJwtToken(),
type: "export",
type: FileOperationType.Export,
},
});
expect(res.status).toEqual(403);
@@ -211,7 +215,7 @@ describe("#fileOperations.redirect", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
@@ -234,7 +238,7 @@ describe("#fileOperations.info", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
@@ -259,7 +263,7 @@ describe("#fileOperations.info", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
});
@@ -280,10 +284,10 @@ describe("#fileOperations.delete", () => {
teamId: team.id,
});
const exportData = await buildFileOperation({
type: "export",
type: FileOperationType.Export,
teamId: team.id,
userId: admin.id,
state: "complete",
state: FileOperationState.Complete,
});
const deleteResponse = await server.post("/api/fileOperations.delete", {
body: {

Binary file not shown.

BIN
server/test/fixtures/notion-html.zip vendored Normal file

Binary file not shown.

BIN
server/test/fixtures/notion-markdown.zip vendored Normal file

Binary file not shown.

View File

@@ -16,83 +16,96 @@ export function flushdb() {
}
export const seed = async () => {
const team = await Team.create(
{
name: "Team",
collaborativeEditing: false,
authenticationProviders: [
{
name: "slack",
providerId: uuidv4(),
},
],
},
{
include: "authenticationProviders",
}
);
const authenticationProvider = team.authenticationProviders[0];
const admin = await User.create(
{
email: "admin@example.com",
username: "admin",
name: "Admin User",
teamId: team.id,
isAdmin: true,
createdAt: new Date("2018-01-01T00:00:00.000Z"),
authentications: [
{
authenticationProviderId: authenticationProvider.id,
providerId: uuidv4(),
},
],
},
{
include: "authentications",
}
);
const user = await User.create(
{
id: "46fde1d4-0050-428f-9f0b-0bf77f4bdf61",
email: "user1@example.com",
name: "User 1",
teamId: team.id,
createdAt: new Date("2018-01-02T00:00:00.000Z"),
authentications: [
{
authenticationProviderId: authenticationProvider.id,
providerId: uuidv4(),
},
],
},
{
include: "authentications",
}
);
const collection = await Collection.create({
name: "Collection",
urlId: "collection",
teamId: team.id,
createdById: user.id,
permission: "read_write",
return sequelize.transaction(async (transaction) => {
const team = await Team.create(
{
name: "Team",
collaborativeEditing: false,
authenticationProviders: [
{
name: "slack",
providerId: uuidv4(),
},
],
},
{
transaction,
include: "authenticationProviders",
}
);
const authenticationProvider = team.authenticationProviders[0];
const admin = await User.create(
{
email: "admin@example.com",
username: "admin",
name: "Admin User",
teamId: team.id,
isAdmin: true,
createdAt: new Date("2018-01-01T00:00:00.000Z"),
authentications: [
{
authenticationProviderId: authenticationProvider.id,
providerId: uuidv4(),
},
],
},
{
transaction,
include: "authentications",
}
);
const user = await User.create(
{
id: "46fde1d4-0050-428f-9f0b-0bf77f4bdf61",
email: "user1@example.com",
name: "User 1",
teamId: team.id,
createdAt: new Date("2018-01-02T00:00:00.000Z"),
authentications: [
{
authenticationProviderId: authenticationProvider.id,
providerId: uuidv4(),
},
],
},
{
transaction,
include: "authentications",
}
);
const collection = await Collection.create(
{
name: "Collection",
urlId: "collection",
teamId: team.id,
createdById: user.id,
permission: "read_write",
},
{
transaction,
}
);
const document = await Document.create(
{
parentDocumentId: null,
collectionId: collection.id,
teamId: team.id,
userId: collection.createdById,
lastModifiedById: collection.createdById,
createdById: collection.createdById,
title: "First ever document",
text: "# Much test support",
},
{ transaction }
);
await document.publish(collection.createdById, { transaction });
await collection.reload({ transaction });
return {
user,
admin,
collection,
document,
team,
};
});
const document = await Document.create({
parentDocumentId: null,
collectionId: collection.id,
teamId: team.id,
userId: collection.createdById,
lastModifiedById: collection.createdById,
createdById: collection.createdById,
title: "First ever document",
text: "# Much test support",
});
await document.publish(collection.createdById);
await collection.reload();
return {
user,
admin,
collection,
document,
team,
};
};

View File

@@ -104,17 +104,6 @@ export type RevisionEvent = {
teamId: string;
};
export type CollectionImportEvent = {
name: "collections.import";
modelId: string;
teamId: string;
actorId: string;
data: {
type: "outline";
};
ip: string;
};
export type CollectionExportEvent = {
name: "collections.export";
teamId: string;
@@ -268,7 +257,6 @@ export type Event =
| PinEvent
| StarEvent
| CollectionEvent
| CollectionImportEvent
| CollectionExportAllEvent
| FileOperationEvent
| IntegrationEvent

View File

@@ -1,6 +1,7 @@
import fs from "fs";
import path from "path";
import JSZip, { JSZipObject } from "jszip";
import { find } from "lodash";
import tmp from "tmp";
import Logger from "@server/logging/logger";
import Attachment from "@server/models/Attachment";
@@ -174,77 +175,65 @@ export async function archiveCollections(collections: Collection[]) {
return archiveToPath(zip);
}
export async function parseOutlineExport(
input: File | Buffer
): Promise<Item[]> {
const zip = await JSZip.loadAsync(input);
// this is so we can use async / await a little easier
const items: Item[] = [];
export type FileTreeNode = {
/** The title, extracted from the file name */
title: string;
/** The file name including extension */
name: string;
/** The full path to within the zip file */
path: string;
/** The nested children */
children: FileTreeNode[];
};
for (const rawPath in zip.files) {
const item = zip.files[rawPath];
/**
* Converts the flat structure returned by JSZIP into a nested file structure
* for easier processing.
*
* @param paths An array of paths to files in the zip
* @returns
*/
export function zipAsFileTree(zip: JSZip) {
const paths = Object.keys(zip.files).map((filePath) => `/${filePath}`);
const tree: FileTreeNode[] = [];
if (!item) {
throw new Error(
`No item at ${rawPath} in zip file. This zip file might be corrupt.`
);
paths.forEach(function (filePath) {
if (filePath.startsWith("/__MACOSX")) {
return;
}
const itemPath = rawPath.replace(/\/$/, "");
const dir = path.dirname(itemPath);
const name = path.basename(item.name);
const depth = itemPath.split("/").length - 1;
const pathParts = filePath.split("/");
// known skippable items
if (itemPath.startsWith("__MACOSX") || itemPath.endsWith(".DS_Store")) {
continue;
}
// Remove first blank element from the parts array.
pathParts.shift();
// attempt to parse extra metadata from zip comment
let metadata = {};
let currentLevel = tree; // initialize currentLevel to root
try {
metadata = item.comment ? JSON.parse(item.comment) : {};
} catch (err) {
console.log(
`ZIP comment found for ${item.name}, but could not be parsed as metadata: ${item.comment}`
);
}
pathParts.forEach(function (name) {
// check to see if the path already exists.
const existingPath = find(currentLevel, {
name,
});
if (depth === 0 && !item.dir) {
throw new Error(
"Root of zip file must only contain folders representing collections"
);
}
if (existingPath) {
// The path to this item was already in the tree, so don't add again.
// Set the current level to this path's children
currentLevel = existingPath.children;
} else if (name.endsWith(".DS_Store") || !name) {
return;
} else {
const newPart = {
name,
path: filePath.replace(/^\//, ""),
title: path.parse(path.basename(name)).name,
children: [],
};
let type: ItemType | undefined;
if (depth === 0 && item.dir && name) {
type = "collection";
}
if (depth > 0 && !item.dir && item.name.endsWith(".md")) {
type = "document";
}
if (depth > 0 && !item.dir && itemPath.includes("uploads")) {
type = "attachment";
}
if (!type) {
continue;
}
items.push({
path: itemPath,
dir,
name,
depth,
type,
metadata,
item,
currentLevel.push(newPart);
currentLevel = newPart.children;
}
});
}
});
return items;
return tree;
}