chore: Refactor data import (#3434)
* Complete refactor of import * feat: Notion data import (#3442)
This commit is contained in:
@@ -1,44 +1,60 @@
|
||||
import { Transaction } from "sequelize";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Attachment, Event, User } from "@server/models";
|
||||
import { uploadToS3FromBuffer } from "@server/utils/s3";
|
||||
|
||||
export default async function attachmentCreator({
|
||||
id,
|
||||
name,
|
||||
type,
|
||||
buffer,
|
||||
user,
|
||||
source,
|
||||
ip,
|
||||
transaction,
|
||||
}: {
|
||||
id?: string;
|
||||
name: string;
|
||||
type: string;
|
||||
buffer: Buffer;
|
||||
user: User;
|
||||
source?: "import";
|
||||
ip: string;
|
||||
ip?: string;
|
||||
transaction?: Transaction;
|
||||
}) {
|
||||
const key = `uploads/${user.id}/${uuidv4()}/${name}`;
|
||||
const acl = process.env.AWS_S3_ACL || "private";
|
||||
const url = await uploadToS3FromBuffer(buffer, type, key, acl);
|
||||
const attachment = await Attachment.create({
|
||||
key,
|
||||
acl,
|
||||
url,
|
||||
size: buffer.length,
|
||||
contentType: type,
|
||||
teamId: user.teamId,
|
||||
userId: user.id,
|
||||
});
|
||||
await Event.create({
|
||||
name: "attachments.create",
|
||||
data: {
|
||||
name,
|
||||
source,
|
||||
const attachment = await Attachment.create(
|
||||
{
|
||||
id,
|
||||
key,
|
||||
acl,
|
||||
url,
|
||||
size: buffer.length,
|
||||
contentType: type,
|
||||
teamId: user.teamId,
|
||||
userId: user.id,
|
||||
},
|
||||
modelId: attachment.id,
|
||||
teamId: user.teamId,
|
||||
actorId: user.id,
|
||||
ip,
|
||||
});
|
||||
{
|
||||
transaction,
|
||||
}
|
||||
);
|
||||
await Event.create(
|
||||
{
|
||||
name: "attachments.create",
|
||||
data: {
|
||||
name,
|
||||
source,
|
||||
},
|
||||
modelId: attachment.id,
|
||||
teamId: user.teamId,
|
||||
actorId: user.id,
|
||||
ip,
|
||||
},
|
||||
{
|
||||
transaction,
|
||||
}
|
||||
);
|
||||
return attachment;
|
||||
}
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
import path from "path";
|
||||
import File from "formidable/lib/file";
|
||||
import { Attachment, Document, Collection } from "@server/models";
|
||||
import { buildUser } from "@server/test/factories";
|
||||
import { flushdb } from "@server/test/support";
|
||||
import collectionImporter from "./collectionImporter";
|
||||
|
||||
jest.mock("../utils/s3");
|
||||
beforeEach(() => flushdb());
|
||||
|
||||
describe("collectionImporter", () => {
|
||||
const ip = "127.0.0.1";
|
||||
|
||||
it("should import documents in outline format", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "outline.zip";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/zip",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const response = await collectionImporter({
|
||||
type: "outline",
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
expect(response.collections.length).toEqual(1);
|
||||
expect(response.documents.length).toEqual(8);
|
||||
expect(response.attachments.length).toEqual(6);
|
||||
expect(await Collection.count()).toEqual(1);
|
||||
expect(await Document.count()).toEqual(8);
|
||||
expect(await Attachment.count()).toEqual(6);
|
||||
});
|
||||
|
||||
it("should throw an error with corrupt zip", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "corrupt.zip";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/zip",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
let error;
|
||||
|
||||
try {
|
||||
await collectionImporter({
|
||||
type: "outline",
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
error = err;
|
||||
}
|
||||
|
||||
expect(error && error.message).toBeTruthy();
|
||||
});
|
||||
|
||||
it("should throw an error with empty zip", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "empty.zip";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/zip",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
let error;
|
||||
|
||||
try {
|
||||
await collectionImporter({
|
||||
type: "outline",
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
error = err;
|
||||
}
|
||||
|
||||
expect(error && error.message).toBe(
|
||||
"Uploaded file does not contain importable documents"
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,206 +0,0 @@
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import path from "path";
|
||||
import File from "formidable/lib/file";
|
||||
import invariant from "invariant";
|
||||
import { values, keys } from "lodash";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import Logger from "@server/logging/logger";
|
||||
import { APM } from "@server/logging/tracing";
|
||||
import { Attachment, Event, Document, Collection, User } from "@server/models";
|
||||
import { parseOutlineExport, Item } from "@server/utils/zip";
|
||||
import { FileImportError } from "../errors";
|
||||
import attachmentCreator from "./attachmentCreator";
|
||||
import documentCreator from "./documentCreator";
|
||||
import documentImporter from "./documentImporter";
|
||||
|
||||
type FileWithPath = File & {
|
||||
path: string;
|
||||
};
|
||||
|
||||
async function collectionImporter({
|
||||
file,
|
||||
type,
|
||||
user,
|
||||
ip,
|
||||
}: {
|
||||
file: FileWithPath;
|
||||
user: User;
|
||||
type: "outline";
|
||||
ip: string;
|
||||
}) {
|
||||
// load the zip structure into memory
|
||||
const zipData = await fs.promises.readFile(file.path);
|
||||
let items: Item[];
|
||||
|
||||
try {
|
||||
items = await parseOutlineExport(zipData);
|
||||
} catch (err) {
|
||||
throw FileImportError(err.message);
|
||||
}
|
||||
|
||||
if (!items.filter((item) => item.type === "document").length) {
|
||||
throw FileImportError(
|
||||
"Uploaded file does not contain importable documents"
|
||||
);
|
||||
}
|
||||
|
||||
// store progress and pointers
|
||||
const collections: Record<string, Collection> = {};
|
||||
const documents: Record<string, Document> = {};
|
||||
const attachments: Record<string, Attachment> = {};
|
||||
|
||||
for (const item of items) {
|
||||
if (item.type === "collection") {
|
||||
// check if collection with name exists
|
||||
const response = await Collection.findOrCreate({
|
||||
where: {
|
||||
teamId: user.teamId,
|
||||
name: item.name,
|
||||
},
|
||||
defaults: {
|
||||
createdById: user.id,
|
||||
permission: "read_write",
|
||||
},
|
||||
});
|
||||
|
||||
let collection = response[0];
|
||||
const isCreated = response[1];
|
||||
|
||||
// create new collection if name already exists, yes it's possible that
|
||||
// there is also a "Name (Imported)" but this is a case not worth dealing
|
||||
// with right now
|
||||
if (!isCreated) {
|
||||
const name = `${item.name} (Imported)`;
|
||||
collection = await Collection.create({
|
||||
teamId: user.teamId,
|
||||
createdById: user.id,
|
||||
name,
|
||||
permission: "read_write",
|
||||
});
|
||||
await Event.create({
|
||||
name: "collections.create",
|
||||
collectionId: collection.id,
|
||||
teamId: collection.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
name,
|
||||
},
|
||||
ip,
|
||||
});
|
||||
}
|
||||
|
||||
collections[item.path] = collection;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.type === "document") {
|
||||
const collectionDir = item.dir.split("/")[0];
|
||||
const collection = collections[collectionDir];
|
||||
invariant(collection, `Collection must exist for document ${item.dir}`);
|
||||
|
||||
// we have a document
|
||||
const content = await item.item.async("string");
|
||||
const name = path.basename(item.name);
|
||||
const tmpDir = os.tmpdir();
|
||||
const tmpFilePath = `${tmpDir}/upload-${uuidv4()}`;
|
||||
await fs.promises.writeFile(tmpFilePath, content);
|
||||
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/markdown",
|
||||
path: tmpFilePath,
|
||||
});
|
||||
const { text, title } = await documentImporter({
|
||||
file,
|
||||
user,
|
||||
ip,
|
||||
});
|
||||
|
||||
await fs.promises.unlink(tmpFilePath);
|
||||
// must be a nested document, find and reference the parent document
|
||||
let parentDocumentId;
|
||||
|
||||
if (item.depth > 1) {
|
||||
const parentDocument =
|
||||
documents[`${item.dir}.md`] || documents[item.dir];
|
||||
invariant(parentDocument, `Document must exist for parent ${item.dir}`);
|
||||
parentDocumentId = parentDocument.id;
|
||||
}
|
||||
|
||||
const document = await documentCreator({
|
||||
source: "import",
|
||||
title,
|
||||
text,
|
||||
publish: true,
|
||||
collectionId: collection.id,
|
||||
createdAt: item.metadata.createdAt
|
||||
? new Date(item.metadata.createdAt)
|
||||
: item.item.date,
|
||||
updatedAt: item.item.date,
|
||||
parentDocumentId,
|
||||
user,
|
||||
ip,
|
||||
});
|
||||
documents[item.path] = document;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (item.type === "attachment") {
|
||||
const buffer = await item.item.async("nodebuffer");
|
||||
const attachment = await attachmentCreator({
|
||||
source: "import",
|
||||
name: item.name,
|
||||
type,
|
||||
buffer,
|
||||
user,
|
||||
ip,
|
||||
});
|
||||
attachments[item.path] = attachment;
|
||||
continue;
|
||||
}
|
||||
|
||||
Logger.info("commands", `Skipped importing ${item.path}`);
|
||||
}
|
||||
|
||||
// All collections, documents, and attachments have been created - time to
|
||||
// update the documents to point to newly uploaded attachments where possible
|
||||
for (const attachmentPath of keys(attachments)) {
|
||||
const attachment = attachments[attachmentPath];
|
||||
|
||||
for (const document of values(documents)) {
|
||||
// pull the collection and subdirectory out of the path name, upload folders
|
||||
// in an Outline export are relative to the document itself
|
||||
const normalizedAttachmentPath = attachmentPath.replace(
|
||||
/(.*)uploads\//,
|
||||
"uploads/"
|
||||
);
|
||||
|
||||
document.text = document.text
|
||||
.replace(attachmentPath, attachment.redirectUrl)
|
||||
.replace(normalizedAttachmentPath, attachment.redirectUrl)
|
||||
.replace(`/${normalizedAttachmentPath}`, attachment.redirectUrl);
|
||||
|
||||
// does nothing if the document text is unchanged
|
||||
await document.save({
|
||||
fields: ["text"],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// reload collections to get document mapping
|
||||
for (const collection of values(collections)) {
|
||||
await collection.reload();
|
||||
}
|
||||
|
||||
return {
|
||||
documents: values(documents),
|
||||
collections: values(collections),
|
||||
attachments: values(attachments),
|
||||
};
|
||||
}
|
||||
|
||||
export default APM.traceFunction({
|
||||
serviceName: "command",
|
||||
spanName: "collectionImporter",
|
||||
})(collectionImporter);
|
||||
70
server/commands/documentCollaborativeUpdater.ts
Normal file
70
server/commands/documentCollaborativeUpdater.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { yDocToProsemirrorJSON } from "@getoutline/y-prosemirror";
|
||||
import invariant from "invariant";
|
||||
import { uniq } from "lodash";
|
||||
import { Node } from "prosemirror-model";
|
||||
import * as Y from "yjs";
|
||||
import { schema, serializer } from "@server/editor";
|
||||
import { Document, Event } from "@server/models";
|
||||
|
||||
export default async function documentCollaborativeUpdater({
|
||||
documentId,
|
||||
ydoc,
|
||||
userId,
|
||||
}: {
|
||||
documentId: string;
|
||||
ydoc: Y.Doc;
|
||||
userId?: string;
|
||||
}) {
|
||||
const document = await Document.scope("withState").findByPk(documentId);
|
||||
invariant(document, "document not found");
|
||||
|
||||
const state = Y.encodeStateAsUpdate(ydoc);
|
||||
const node = Node.fromJSON(schema, yDocToProsemirrorJSON(ydoc, "default"));
|
||||
const text = serializer.serialize(node, undefined);
|
||||
const isUnchanged = document.text === text;
|
||||
const hasMultiplayerState = !!document.state;
|
||||
|
||||
if (isUnchanged && hasMultiplayerState) {
|
||||
return;
|
||||
}
|
||||
|
||||
// extract collaborators from doc user data
|
||||
const pud = new Y.PermanentUserData(ydoc);
|
||||
const pudIds = Array.from(pud.clients.values());
|
||||
const existingIds = document.collaboratorIds;
|
||||
const collaboratorIds = uniq([...pudIds, ...existingIds]);
|
||||
|
||||
await Document.scope(["withDrafts", "withState"]).update(
|
||||
{
|
||||
text,
|
||||
state: Buffer.from(state),
|
||||
updatedAt: isUnchanged ? document.updatedAt : new Date(),
|
||||
lastModifiedById:
|
||||
isUnchanged || !userId ? document.lastModifiedById : userId,
|
||||
collaboratorIds,
|
||||
},
|
||||
{
|
||||
silent: true,
|
||||
hooks: false,
|
||||
where: {
|
||||
id: documentId,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (isUnchanged) {
|
||||
return;
|
||||
}
|
||||
|
||||
await Event.schedule({
|
||||
name: "documents.update",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: userId,
|
||||
data: {
|
||||
multiplayer: true,
|
||||
title: document.title,
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
import invariant from "invariant";
|
||||
import { Transaction } from "sequelize";
|
||||
import { Document, Event, User } from "@server/models";
|
||||
|
||||
export default async function documentCreator({
|
||||
title = "",
|
||||
text = "",
|
||||
id,
|
||||
publish,
|
||||
collectionId,
|
||||
parentDocumentId,
|
||||
@@ -14,15 +16,19 @@ export default async function documentCreator({
|
||||
template,
|
||||
user,
|
||||
editorVersion,
|
||||
publishedAt,
|
||||
source,
|
||||
ip,
|
||||
transaction,
|
||||
}: {
|
||||
id?: string;
|
||||
title: string;
|
||||
text: string;
|
||||
publish?: boolean;
|
||||
collectionId: string;
|
||||
parentDocumentId?: string;
|
||||
templateDocument?: Document | null;
|
||||
publishedAt?: Date;
|
||||
template?: boolean;
|
||||
createdAt?: Date;
|
||||
updatedAt?: Date;
|
||||
@@ -30,42 +36,35 @@ export default async function documentCreator({
|
||||
user: User;
|
||||
editorVersion?: string;
|
||||
source?: "import";
|
||||
ip: string;
|
||||
ip?: string;
|
||||
transaction: Transaction;
|
||||
}): Promise<Document> {
|
||||
const templateId = templateDocument ? templateDocument.id : undefined;
|
||||
const document = await Document.create({
|
||||
parentDocumentId,
|
||||
editorVersion,
|
||||
collectionId,
|
||||
teamId: user.teamId,
|
||||
userId: user.id,
|
||||
createdAt,
|
||||
updatedAt,
|
||||
lastModifiedById: user.id,
|
||||
createdById: user.id,
|
||||
template,
|
||||
templateId,
|
||||
title: templateDocument ? templateDocument.title : title,
|
||||
text: templateDocument ? templateDocument.text : text,
|
||||
});
|
||||
await Event.create({
|
||||
name: "documents.create",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
source,
|
||||
title: document.title,
|
||||
const document = await Document.create(
|
||||
{
|
||||
id,
|
||||
parentDocumentId,
|
||||
editorVersion,
|
||||
collectionId,
|
||||
teamId: user.teamId,
|
||||
userId: user.id,
|
||||
createdAt,
|
||||
updatedAt,
|
||||
lastModifiedById: user.id,
|
||||
createdById: user.id,
|
||||
template,
|
||||
templateId,
|
||||
publishedAt,
|
||||
title: templateDocument ? templateDocument.title : title,
|
||||
text: templateDocument ? templateDocument.text : text,
|
||||
},
|
||||
ip,
|
||||
});
|
||||
|
||||
if (publish) {
|
||||
await document.publish(user.id);
|
||||
await Event.create({
|
||||
name: "documents.publish",
|
||||
{
|
||||
transaction,
|
||||
}
|
||||
);
|
||||
await Event.create(
|
||||
{
|
||||
name: "documents.create",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
@@ -73,9 +72,34 @@ export default async function documentCreator({
|
||||
data: {
|
||||
source,
|
||||
title: document.title,
|
||||
templateId,
|
||||
},
|
||||
ip,
|
||||
});
|
||||
},
|
||||
{
|
||||
transaction,
|
||||
}
|
||||
);
|
||||
|
||||
if (publish) {
|
||||
await document.publish(user.id, { transaction });
|
||||
await Event.create(
|
||||
{
|
||||
name: "documents.publish",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
source,
|
||||
title: document.title,
|
||||
},
|
||||
ip,
|
||||
},
|
||||
{
|
||||
transaction,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// reload to get all of the data needed to present (user, collection etc)
|
||||
@@ -86,6 +110,7 @@ export default async function documentCreator({
|
||||
id: document.id,
|
||||
publishedAt: document.publishedAt,
|
||||
},
|
||||
transaction,
|
||||
});
|
||||
invariant(doc, "Document must exist");
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import path from "path";
|
||||
import File from "formidable/lib/file";
|
||||
import fs from "fs-extra";
|
||||
import Attachment from "@server/models/Attachment";
|
||||
import { buildUser } from "@server/test/factories";
|
||||
import { flushdb } from "@server/test/support";
|
||||
@@ -13,16 +13,16 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type:
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType:
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -34,15 +34,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document to markdown for application/octet-stream mimetype", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -54,18 +54,18 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should error when a file with application/octet-stream mimetype doesn't have .docx extension", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "normal.docx.txt";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "normal.docx.txt";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
let error;
|
||||
|
||||
try {
|
||||
await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
@@ -77,15 +77,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Word Document on Windows to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "images.docx";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/octet-stream",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "images.docx";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/octet-stream",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
const attachments = await Attachment.count();
|
||||
@@ -97,15 +97,16 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert HTML Document to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "webpage.html";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/html",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "webpage.html";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "text/html",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("Text paragraph");
|
||||
@@ -114,15 +115,15 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should convert Confluence Word output to markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "confluence.doc";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/msword",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "confluence.doc";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/msword",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("this is a test document");
|
||||
@@ -131,49 +132,34 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should load markdown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "markdown.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/plain",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "markdown.md";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "text/plain",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("This is a test paragraph");
|
||||
expect(response.title).toEqual("Heading 1");
|
||||
});
|
||||
|
||||
it("should handle encoded slashes", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "this %2F and %2F this.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "text/plain",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", "empty.md"),
|
||||
});
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("");
|
||||
expect(response.title).toEqual("this / and / this");
|
||||
});
|
||||
|
||||
it("should fallback to extension if mimetype unknown", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "markdown.md";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "application/lol",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "markdown.md";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName),
|
||||
"utf8"
|
||||
);
|
||||
const response = await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "application/lol",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
expect(response.text).toContain("This is a test paragraph");
|
||||
@@ -182,18 +168,18 @@ describe("documentImporter", () => {
|
||||
|
||||
it("should error with unknown file type", async () => {
|
||||
const user = await buildUser();
|
||||
const name = "files.zip";
|
||||
const file = new File({
|
||||
name,
|
||||
type: "executable/zip",
|
||||
path: path.resolve(__dirname, "..", "test", "fixtures", name),
|
||||
});
|
||||
const fileName = "empty.zip";
|
||||
const content = await fs.readFile(
|
||||
path.resolve(__dirname, "..", "test", "fixtures", fileName)
|
||||
);
|
||||
let error;
|
||||
|
||||
try {
|
||||
await documentImporter({
|
||||
user,
|
||||
file,
|
||||
mimeType: "executable/zip",
|
||||
fileName,
|
||||
content,
|
||||
ip,
|
||||
});
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import emojiRegex from "emoji-regex";
|
||||
import { strikethrough, tables } from "joplin-turndown-plugin-gfm";
|
||||
import { truncate } from "lodash";
|
||||
import mammoth from "mammoth";
|
||||
import quotedPrintable from "quoted-printable";
|
||||
import { Transaction } from "sequelize";
|
||||
import TurndownService from "turndown";
|
||||
import utf8 from "utf8";
|
||||
import { MAX_TITLE_LENGTH } from "@shared/constants";
|
||||
@@ -21,7 +22,7 @@ const turndownService = new TurndownService({
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
headingStyle: "atx",
|
||||
});
|
||||
}).remove(["script", "style", "title", "head"]);
|
||||
|
||||
// Use the GitHub-flavored markdown plugin to parse
|
||||
// strikethoughs and tables
|
||||
@@ -37,7 +38,7 @@ turndownService
|
||||
|
||||
interface ImportableFile {
|
||||
type: string;
|
||||
getMarkdown: (file: any) => Promise<string>;
|
||||
getMarkdown: (content: Buffer | string) => Promise<string>;
|
||||
}
|
||||
|
||||
const importMapping: ImportableFile[] = [
|
||||
@@ -68,26 +69,34 @@ const importMapping: ImportableFile[] = [
|
||||
},
|
||||
];
|
||||
|
||||
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
|
||||
async function fileToMarkdown(file): Promise<string> {
|
||||
return fs.promises.readFile(file.path, "utf8");
|
||||
async function fileToMarkdown(content: Buffer | string): Promise<string> {
|
||||
if (content instanceof Buffer) {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
|
||||
async function docxToMarkdown(file): Promise<string> {
|
||||
const { value } = await mammoth.convertToHtml(file);
|
||||
return turndownService.turndown(value);
|
||||
async function docxToMarkdown(content: Buffer | string): Promise<string> {
|
||||
if (content instanceof Buffer) {
|
||||
const { value: html } = await mammoth.convertToHtml({ buffer: content });
|
||||
return turndownService.turndown(html);
|
||||
}
|
||||
|
||||
throw new Error("docxToMarkdown: content must be a Buffer");
|
||||
}
|
||||
|
||||
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
|
||||
async function htmlToMarkdown(file): Promise<string> {
|
||||
const value = await fs.promises.readFile(file.path, "utf8");
|
||||
return turndownService.turndown(value);
|
||||
async function htmlToMarkdown(content: Buffer | string): Promise<string> {
|
||||
if (content instanceof Buffer) {
|
||||
content = content.toString("utf8");
|
||||
}
|
||||
|
||||
return turndownService.turndown(content);
|
||||
}
|
||||
|
||||
// @ts-expect-error ts-migrate(7006) FIXME: Parameter 'file' implicitly has an 'any' type.
|
||||
async function confluenceToMarkdown(file): Promise<string> {
|
||||
let value = await fs.promises.readFile(file.path, "utf8");
|
||||
async function confluenceToMarkdown(value: Buffer | string): Promise<string> {
|
||||
if (value instanceof Buffer) {
|
||||
value = value.toString("utf8");
|
||||
}
|
||||
|
||||
// We're only supporting the ridiculous output from Confluence here, regular
|
||||
// Word documents should call into the docxToMarkdown importer.
|
||||
@@ -143,22 +152,28 @@ async function confluenceToMarkdown(file): Promise<string> {
|
||||
}
|
||||
|
||||
async function documentImporter({
|
||||
file,
|
||||
mimeType,
|
||||
fileName,
|
||||
content,
|
||||
user,
|
||||
ip,
|
||||
transaction,
|
||||
}: {
|
||||
user: User;
|
||||
file: File;
|
||||
ip: string;
|
||||
mimeType: string;
|
||||
fileName: string;
|
||||
content: Buffer | string;
|
||||
ip?: string;
|
||||
transaction?: Transaction;
|
||||
}): Promise<{
|
||||
text: string;
|
||||
title: string;
|
||||
}> {
|
||||
const fileInfo = importMapping.filter((item) => {
|
||||
if (item.type === file.type) {
|
||||
if (item.type === mimeType) {
|
||||
if (
|
||||
file.type === "application/octet-stream" &&
|
||||
path.extname(file.name) !== ".docx"
|
||||
mimeType === "application/octet-stream" &&
|
||||
path.extname(fileName) !== ".docx"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
@@ -166,7 +181,7 @@ async function documentImporter({
|
||||
return true;
|
||||
}
|
||||
|
||||
if (item.type === "text/markdown" && path.extname(file.name) === ".md") {
|
||||
if (item.type === "text/markdown" && path.extname(fileName) === ".md") {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -174,20 +189,35 @@ async function documentImporter({
|
||||
})[0];
|
||||
|
||||
if (!fileInfo) {
|
||||
throw InvalidRequestError(`File type ${file.type} not supported`);
|
||||
throw InvalidRequestError(`File type ${mimeType} not supported`);
|
||||
}
|
||||
|
||||
let title = deserializeFilename(file.name.replace(/\.[^/.]+$/, ""));
|
||||
let text = await fileInfo.getMarkdown(file);
|
||||
let title = deserializeFilename(fileName.replace(/\.[^/.]+$/, ""));
|
||||
let text = await fileInfo.getMarkdown(content);
|
||||
text = text.trim();
|
||||
|
||||
// find and extract first emoji, in the case of some imports it can be outside
|
||||
// of the title, at the top of the document.
|
||||
const regex = emojiRegex();
|
||||
const matches = regex.exec(text);
|
||||
const firstEmoji = matches ? matches[0] : undefined;
|
||||
if (firstEmoji && text.startsWith(firstEmoji)) {
|
||||
text = text.replace(firstEmoji, "").trim();
|
||||
}
|
||||
|
||||
// If the first line of the imported text looks like a markdown heading
|
||||
// then we can use this as the document title
|
||||
if (text.trim().startsWith("# ")) {
|
||||
if (text.startsWith("# ")) {
|
||||
const result = parseTitle(text);
|
||||
title = result.title;
|
||||
text = text.replace(`# ${title}\n`, "");
|
||||
}
|
||||
|
||||
// If we parsed an emoji from _above_ the title then add it back at prefixing
|
||||
if (firstEmoji) {
|
||||
title = `${firstEmoji} ${title}`;
|
||||
}
|
||||
|
||||
// find data urls, convert to blobs, upload and write attachments
|
||||
const images = parseImages(text);
|
||||
const dataURIs = images.filter((href) => href.startsWith("data:"));
|
||||
@@ -201,6 +231,7 @@ async function documentImporter({
|
||||
buffer,
|
||||
user,
|
||||
ip,
|
||||
transaction,
|
||||
});
|
||||
text = text.replace(uri, attachment.redirectUrl);
|
||||
}
|
||||
|
||||
@@ -1,70 +1,128 @@
|
||||
import { yDocToProsemirrorJSON } from "@getoutline/y-prosemirror";
|
||||
import invariant from "invariant";
|
||||
import { uniq } from "lodash";
|
||||
import { Node } from "prosemirror-model";
|
||||
import * as Y from "yjs";
|
||||
import { schema, serializer } from "@server/editor";
|
||||
import { Document, Event } from "@server/models";
|
||||
import { Transaction } from "sequelize";
|
||||
import { Event, Document, User } from "@server/models";
|
||||
|
||||
type Props = {
|
||||
/** The user updating the document */
|
||||
user: User;
|
||||
/** The existing document */
|
||||
document: Document;
|
||||
/** The new title */
|
||||
title?: string;
|
||||
/** The new text content */
|
||||
text?: string;
|
||||
/** The version of the client editor that was used */
|
||||
editorVersion?: string;
|
||||
/** The ID of the template that was used */
|
||||
templateId?: string;
|
||||
/** If the document should be displayed full-width on the screen */
|
||||
fullWidth?: boolean;
|
||||
/** Whether the text be appended to the end instead of replace */
|
||||
append?: boolean;
|
||||
/** Whether the document should be published to the collection */
|
||||
publish?: boolean;
|
||||
/** The IP address of the user creating the document */
|
||||
ip: string;
|
||||
/** The database transaction to run within */
|
||||
transaction: Transaction;
|
||||
};
|
||||
|
||||
/**
|
||||
* This command updates document properties. To update collaborative text state
|
||||
* use documentCollaborativeUpdater.
|
||||
*
|
||||
* @param Props The properties of the document to update
|
||||
* @returns Document The updated document
|
||||
*/
|
||||
export default async function documentUpdater({
|
||||
documentId,
|
||||
ydoc,
|
||||
userId,
|
||||
}: {
|
||||
documentId: string;
|
||||
ydoc: Y.Doc;
|
||||
userId?: string;
|
||||
}) {
|
||||
const document = await Document.scope("withState").findByPk(documentId);
|
||||
invariant(document, "document not found");
|
||||
user,
|
||||
document,
|
||||
title,
|
||||
text,
|
||||
editorVersion,
|
||||
templateId,
|
||||
fullWidth,
|
||||
append,
|
||||
publish,
|
||||
transaction,
|
||||
ip,
|
||||
}: Props): Promise<Document> {
|
||||
const previousTitle = document.title;
|
||||
|
||||
const state = Y.encodeStateAsUpdate(ydoc);
|
||||
const node = Node.fromJSON(schema, yDocToProsemirrorJSON(ydoc, "default"));
|
||||
const text = serializer.serialize(node, undefined);
|
||||
const isUnchanged = document.text === text;
|
||||
const hasMultiplayerState = !!document.state;
|
||||
|
||||
if (isUnchanged && hasMultiplayerState) {
|
||||
return;
|
||||
if (title !== undefined) {
|
||||
document.title = title;
|
||||
}
|
||||
|
||||
// extract collaborators from doc user data
|
||||
const pud = new Y.PermanentUserData(ydoc);
|
||||
const pudIds = Array.from(pud.clients.values());
|
||||
const existingIds = document.collaboratorIds;
|
||||
const collaboratorIds = uniq([...pudIds, ...existingIds]);
|
||||
|
||||
await Document.scope(["withDrafts", "withState"]).update(
|
||||
{
|
||||
text,
|
||||
state: Buffer.from(state),
|
||||
updatedAt: isUnchanged ? document.updatedAt : new Date(),
|
||||
lastModifiedById:
|
||||
isUnchanged || !userId ? document.lastModifiedById : userId,
|
||||
collaboratorIds,
|
||||
},
|
||||
{
|
||||
silent: true,
|
||||
hooks: false,
|
||||
where: {
|
||||
id: documentId,
|
||||
},
|
||||
if (editorVersion) {
|
||||
document.editorVersion = editorVersion;
|
||||
}
|
||||
if (templateId) {
|
||||
document.templateId = templateId;
|
||||
}
|
||||
if (fullWidth !== undefined) {
|
||||
document.fullWidth = fullWidth;
|
||||
}
|
||||
if (!user.team?.collaborativeEditing) {
|
||||
if (append) {
|
||||
document.text += text;
|
||||
} else if (text !== undefined) {
|
||||
document.text = text;
|
||||
}
|
||||
);
|
||||
|
||||
if (isUnchanged) {
|
||||
return;
|
||||
}
|
||||
|
||||
await Event.schedule({
|
||||
name: "documents.update",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: userId,
|
||||
data: {
|
||||
multiplayer: true,
|
||||
title: document.title,
|
||||
},
|
||||
});
|
||||
document.lastModifiedById = user.id;
|
||||
const changed = document.changed();
|
||||
|
||||
if (publish) {
|
||||
await document.publish(user.id, { transaction });
|
||||
} else {
|
||||
await document.save({ transaction });
|
||||
}
|
||||
|
||||
if (publish) {
|
||||
await Event.create(
|
||||
{
|
||||
name: "documents.publish",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
title: document.title,
|
||||
},
|
||||
ip,
|
||||
},
|
||||
{ transaction }
|
||||
);
|
||||
} else if (changed) {
|
||||
await Event.create(
|
||||
{
|
||||
name: "documents.update",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
title: document.title,
|
||||
},
|
||||
ip,
|
||||
},
|
||||
{ transaction }
|
||||
);
|
||||
}
|
||||
|
||||
if (document.title !== previousTitle) {
|
||||
Event.schedule({
|
||||
name: "documents.title_change",
|
||||
documentId: document.id,
|
||||
collectionId: document.collectionId,
|
||||
teamId: document.teamId,
|
||||
actorId: user.id,
|
||||
data: {
|
||||
previousTitle,
|
||||
title: document.title,
|
||||
},
|
||||
ip,
|
||||
});
|
||||
}
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user