@@ -1,5 +1,6 @@
|
||||
import path from "path";
|
||||
import emojiRegex from "emoji-regex";
|
||||
import escapeRegExp from "lodash/escapeRegExp";
|
||||
import truncate from "lodash/truncate";
|
||||
import mammoth from "mammoth";
|
||||
import quotedPrintable from "quoted-printable";
|
||||
@@ -131,6 +132,15 @@ async function confluenceToMarkdown(value: Buffer | string): Promise<string> {
|
||||
return html.replace(/<br>/g, " \\n ");
|
||||
}
|
||||
|
||||
type Props = {
|
||||
user: User;
|
||||
mimeType: string;
|
||||
fileName: string;
|
||||
content: Buffer | string;
|
||||
ip?: string;
|
||||
transaction?: Transaction;
|
||||
};
|
||||
|
||||
async function documentImporter({
|
||||
mimeType,
|
||||
fileName,
|
||||
@@ -138,14 +148,8 @@ async function documentImporter({
|
||||
user,
|
||||
ip,
|
||||
transaction,
|
||||
}: {
|
||||
user: User;
|
||||
mimeType: string;
|
||||
fileName: string;
|
||||
content: Buffer | string;
|
||||
ip?: string;
|
||||
transaction?: Transaction;
|
||||
}): Promise<{
|
||||
}: Props): Promise<{
|
||||
emoji?: string;
|
||||
text: string;
|
||||
title: string;
|
||||
state: Buffer;
|
||||
@@ -177,27 +181,22 @@ async function documentImporter({
|
||||
let text = await fileInfo.getMarkdown(content);
|
||||
text = text.trim();
|
||||
|
||||
// find and extract first emoji, in the case of some imports it can be outside
|
||||
// of the title, at the top of the document.
|
||||
// find and extract emoji near the beginning of the document.
|
||||
const regex = emojiRegex();
|
||||
const matches = regex.exec(text);
|
||||
const firstEmoji = matches ? matches[0] : undefined;
|
||||
const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji);
|
||||
if (textStartsWithEmoji) {
|
||||
text = text.replace(firstEmoji, "").trim();
|
||||
const matches = regex.exec(text.slice(0, 10));
|
||||
const emoji = matches ? matches[0] : undefined;
|
||||
if (emoji) {
|
||||
text = text.replace(emoji, "");
|
||||
}
|
||||
|
||||
// If the first line of the imported text looks like a markdown heading
|
||||
// then we can use this as the document title
|
||||
// then we can use this as the document title rather than the file name.
|
||||
if (text.startsWith("# ")) {
|
||||
const result = parseTitle(text);
|
||||
title = result.title;
|
||||
text = text.replace(`# ${title}`, "").trimStart();
|
||||
}
|
||||
|
||||
// If we parsed an emoji from _above_ the title then add it back at prefixing
|
||||
if (textStartsWithEmoji) {
|
||||
title = `${firstEmoji} ${title}`;
|
||||
text = text
|
||||
.replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "")
|
||||
.trimStart();
|
||||
}
|
||||
|
||||
// Replace any <br> generated by the turndown plugin with escaped newlines
|
||||
@@ -227,6 +226,7 @@ async function documentImporter({
|
||||
text,
|
||||
state,
|
||||
title,
|
||||
emoji,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
return;
|
||||
}
|
||||
|
||||
const { title, text } = await documentImporter({
|
||||
const { title, emoji, text } = await documentImporter({
|
||||
mimeType: "text/markdown",
|
||||
fileName: child.name,
|
||||
content: await zipObject.async("string"),
|
||||
@@ -123,6 +123,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
output.documents.push({
|
||||
id,
|
||||
title,
|
||||
emoji,
|
||||
text,
|
||||
updatedAt,
|
||||
createdAt,
|
||||
|
||||
@@ -86,7 +86,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
|
||||
Logger.debug("task", `Processing ${name} as ${mimeType}`);
|
||||
|
||||
const { title, text } = await documentImporter({
|
||||
const { title, emoji, text } = await documentImporter({
|
||||
mimeType: mimeType || "text/markdown",
|
||||
fileName: name,
|
||||
content: zipObject ? await zipObject.async("string") : "",
|
||||
@@ -117,6 +117,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
output.documents.push({
|
||||
id,
|
||||
title,
|
||||
emoji,
|
||||
text,
|
||||
collectionId,
|
||||
parentDocumentId,
|
||||
|
||||
@@ -55,6 +55,7 @@ export type StructuredImportData = {
|
||||
id: string;
|
||||
urlId?: string;
|
||||
title: string;
|
||||
emoji?: string;
|
||||
/**
|
||||
* The document text. To reference an attachment or image use the special
|
||||
* formatting <<attachmentId>>. It will be replaced with a reference to the
|
||||
|
||||
@@ -1249,7 +1249,7 @@ router.post(
|
||||
}
|
||||
|
||||
const content = await fs.readFile(file.filepath);
|
||||
const { text, state, title } = await documentImporter({
|
||||
const { text, state, title, emoji } = await documentImporter({
|
||||
user,
|
||||
fileName: file.originalFilename ?? file.newFilename,
|
||||
mimeType: file.mimetype ?? "",
|
||||
@@ -1261,6 +1261,7 @@ router.post(
|
||||
const document = await documentCreator({
|
||||
source: "import",
|
||||
title,
|
||||
emoji,
|
||||
text,
|
||||
state,
|
||||
publish,
|
||||
|
||||
Reference in New Issue
Block a user