chore: Update documentImporter with changes from enterprise, improved Confluence compat

This commit is contained in:
Tom Moor
2022-06-02 21:42:32 +02:00
parent 9113989635
commit 68dd76cfa3
8 changed files with 144 additions and 31 deletions

View File

@@ -1,11 +1,9 @@
import path from "path";
import emojiRegex from "emoji-regex";
import { strikethrough, tables } from "joplin-turndown-plugin-gfm";
import { truncate } from "lodash";
import mammoth from "mammoth";
import quotedPrintable from "quoted-printable";
import { Transaction } from "sequelize";
import TurndownService from "turndown";
import utf8 from "utf8";
import { MAX_TITLE_LENGTH } from "@shared/constants";
import parseTitle from "@shared/utils/parseTitle";
@@ -13,28 +11,10 @@ import { APM } from "@server/logging/tracing";
import { User } from "@server/models";
import dataURItoBuffer from "@server/utils/dataURItoBuffer";
import parseImages from "@server/utils/parseImages";
import turndownService from "@server/utils/turndown";
import { FileImportError, InvalidRequestError } from "../errors";
import attachmentCreator from "./attachmentCreator";
// https://github.com/domchristie/turndown#options
const turndownService = new TurndownService({
hr: "---",
bulletListMarker: "-",
headingStyle: "atx",
}).remove(["script", "style", "title", "head"]);
// Use the GitHub-flavored markdown plugin to parse
// strikethoughs and tables
turndownService
.use(strikethrough)
.use(tables)
.addRule("breaks", {
filter: ["br"],
replacement: function () {
return "\n";
},
});
interface ImportableFile {
type: string;
getMarkdown: (content: Buffer | string) => Promise<string>;
@@ -200,7 +180,8 @@ async function documentImporter({
const regex = emojiRegex();
const matches = regex.exec(text);
const firstEmoji = matches ? matches[0] : undefined;
if (firstEmoji && text.startsWith(firstEmoji)) {
const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji);
if (textStartsWithEmoji) {
text = text.replace(firstEmoji, "").trim();
}
@@ -213,10 +194,14 @@ async function documentImporter({
}
// If we parsed an emoji from _above_ the title then add it back at prefixing
if (firstEmoji) {
if (textStartsWithEmoji) {
title = `${firstEmoji} ${title}`;
}
// Replace any <br> generated by the turndown plugin with escaped newlines
// to match our hardbreak parser.
text = text.replace(/<br>/gi, "\\n");
// find data urls, convert to blobs, upload and write attachments
const images = parseImages(text);
const dataURIs = images.filter((href) => href.startsWith("data:"));