chore: Update documentImporter with changes from enterprise, improved Confluence compat
This commit is contained in:
@@ -1,11 +1,9 @@
|
||||
import path from "path";
|
||||
import emojiRegex from "emoji-regex";
|
||||
import { strikethrough, tables } from "joplin-turndown-plugin-gfm";
|
||||
import { truncate } from "lodash";
|
||||
import mammoth from "mammoth";
|
||||
import quotedPrintable from "quoted-printable";
|
||||
import { Transaction } from "sequelize";
|
||||
import TurndownService from "turndown";
|
||||
import utf8 from "utf8";
|
||||
import { MAX_TITLE_LENGTH } from "@shared/constants";
|
||||
import parseTitle from "@shared/utils/parseTitle";
|
||||
@@ -13,28 +11,10 @@ import { APM } from "@server/logging/tracing";
|
||||
import { User } from "@server/models";
|
||||
import dataURItoBuffer from "@server/utils/dataURItoBuffer";
|
||||
import parseImages from "@server/utils/parseImages";
|
||||
import turndownService from "@server/utils/turndown";
|
||||
import { FileImportError, InvalidRequestError } from "../errors";
|
||||
import attachmentCreator from "./attachmentCreator";
|
||||
|
||||
// https://github.com/domchristie/turndown#options
|
||||
const turndownService = new TurndownService({
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
headingStyle: "atx",
|
||||
}).remove(["script", "style", "title", "head"]);
|
||||
|
||||
// Use the GitHub-flavored markdown plugin to parse
|
||||
// strikethoughs and tables
|
||||
turndownService
|
||||
.use(strikethrough)
|
||||
.use(tables)
|
||||
.addRule("breaks", {
|
||||
filter: ["br"],
|
||||
replacement: function () {
|
||||
return "\n";
|
||||
},
|
||||
});
|
||||
|
||||
interface ImportableFile {
|
||||
type: string;
|
||||
getMarkdown: (content: Buffer | string) => Promise<string>;
|
||||
@@ -200,7 +180,8 @@ async function documentImporter({
|
||||
const regex = emojiRegex();
|
||||
const matches = regex.exec(text);
|
||||
const firstEmoji = matches ? matches[0] : undefined;
|
||||
if (firstEmoji && text.startsWith(firstEmoji)) {
|
||||
const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji);
|
||||
if (textStartsWithEmoji) {
|
||||
text = text.replace(firstEmoji, "").trim();
|
||||
}
|
||||
|
||||
@@ -213,10 +194,14 @@ async function documentImporter({
|
||||
}
|
||||
|
||||
// If we parsed an emoji from _above_ the title then add it back at prefixing
|
||||
if (firstEmoji) {
|
||||
if (textStartsWithEmoji) {
|
||||
title = `${firstEmoji} ${title}`;
|
||||
}
|
||||
|
||||
// Replace any <br> generated by the turndown plugin with escaped newlines
|
||||
// to match our hardbreak parser.
|
||||
text = text.replace(/<br>/gi, "\\n");
|
||||
|
||||
// find data urls, convert to blobs, upload and write attachments
|
||||
const images = parseImages(text);
|
||||
const dataURIs = images.filter((href) => href.startsWith("data:"));
|
||||
|
||||
Reference in New Issue
Block a user