diff --git a/server/commands/documentImporter.ts b/server/commands/documentImporter.ts index a4dd45706..46ba48b46 100644 --- a/server/commands/documentImporter.ts +++ b/server/commands/documentImporter.ts @@ -1,5 +1,6 @@ import path from "path"; import emojiRegex from "emoji-regex"; +import escapeRegExp from "lodash/escapeRegExp"; import truncate from "lodash/truncate"; import mammoth from "mammoth"; import quotedPrintable from "quoted-printable"; @@ -131,6 +132,15 @@ async function confluenceToMarkdown(value: Buffer | string): Promise { return html.replace(/
/g, " \\n "); } +type Props = { + user: User; + mimeType: string; + fileName: string; + content: Buffer | string; + ip?: string; + transaction?: Transaction; +}; + async function documentImporter({ mimeType, fileName, @@ -138,14 +148,8 @@ async function documentImporter({ user, ip, transaction, -}: { - user: User; - mimeType: string; - fileName: string; - content: Buffer | string; - ip?: string; - transaction?: Transaction; -}): Promise<{ +}: Props): Promise<{ + emoji?: string; text: string; title: string; state: Buffer; @@ -177,27 +181,22 @@ async function documentImporter({ let text = await fileInfo.getMarkdown(content); text = text.trim(); - // find and extract first emoji, in the case of some imports it can be outside - // of the title, at the top of the document. + // find and extract emoji near the beginning of the document. const regex = emojiRegex(); - const matches = regex.exec(text); - const firstEmoji = matches ? matches[0] : undefined; - const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji); - if (textStartsWithEmoji) { - text = text.replace(firstEmoji, "").trim(); + const matches = regex.exec(text.slice(0, 10)); + const emoji = matches ? matches[0] : undefined; + if (emoji) { + text = text.replace(emoji, ""); } // If the first line of the imported text looks like a markdown heading - // then we can use this as the document title + // then we can use this as the document title rather than the file name. if (text.startsWith("# ")) { const result = parseTitle(text); title = result.title; - text = text.replace(`# ${title}`, "").trimStart(); - } - - // If we parsed an emoji from _above_ the title then add it back at prefixing - if (textStartsWithEmoji) { - title = `${firstEmoji} ${title}`; + text = text + .replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "") + .trimStart(); } // Replace any
generated by the turndown plugin with escaped newlines @@ -227,6 +226,7 @@ async function documentImporter({ text, state, title, + emoji, }; } diff --git a/server/queues/tasks/ImportMarkdownZipTask.ts b/server/queues/tasks/ImportMarkdownZipTask.ts index dfa725bd2..5828c1179 100644 --- a/server/queues/tasks/ImportMarkdownZipTask.ts +++ b/server/queues/tasks/ImportMarkdownZipTask.ts @@ -81,7 +81,7 @@ export default class ImportMarkdownZipTask extends ImportTask { return; } - const { title, text } = await documentImporter({ + const { title, emoji, text } = await documentImporter({ mimeType: "text/markdown", fileName: child.name, content: await zipObject.async("string"), @@ -123,6 +123,7 @@ export default class ImportMarkdownZipTask extends ImportTask { output.documents.push({ id, title, + emoji, text, updatedAt, createdAt, diff --git a/server/queues/tasks/ImportNotionTask.ts b/server/queues/tasks/ImportNotionTask.ts index d90f668af..8bd28991b 100644 --- a/server/queues/tasks/ImportNotionTask.ts +++ b/server/queues/tasks/ImportNotionTask.ts @@ -86,7 +86,7 @@ export default class ImportNotionTask extends ImportTask { Logger.debug("task", `Processing ${name} as ${mimeType}`); - const { title, text } = await documentImporter({ + const { title, emoji, text } = await documentImporter({ mimeType: mimeType || "text/markdown", fileName: name, content: zipObject ? await zipObject.async("string") : "", @@ -117,6 +117,7 @@ export default class ImportNotionTask extends ImportTask { output.documents.push({ id, title, + emoji, text, collectionId, parentDocumentId, diff --git a/server/queues/tasks/ImportTask.ts b/server/queues/tasks/ImportTask.ts index ab4fe7522..b5c186d0c 100644 --- a/server/queues/tasks/ImportTask.ts +++ b/server/queues/tasks/ImportTask.ts @@ -55,6 +55,7 @@ export type StructuredImportData = { id: string; urlId?: string; title: string; + emoji?: string; /** * The document text. To reference an attachment or image use the special * formatting <>. It will be replaced with a reference to the diff --git a/server/routes/api/documents/documents.ts b/server/routes/api/documents/documents.ts index e2ce59799..6ccd2017e 100644 --- a/server/routes/api/documents/documents.ts +++ b/server/routes/api/documents/documents.ts @@ -1249,7 +1249,7 @@ router.post( } const content = await fs.readFile(file.filepath); - const { text, state, title } = await documentImporter({ + const { text, state, title, emoji } = await documentImporter({ user, fileName: file.originalFilename ?? file.newFilename, mimeType: file.mimetype ?? "", @@ -1261,6 +1261,7 @@ router.post( const document = await documentCreator({ source: "import", title, + emoji, text, state, publish,