chore: Update documentImporter with changes from enterprise, improved Confluence compat
This commit is contained in:
@@ -56,6 +56,7 @@
|
||||
"@getoutline/y-prosemirror": "^1.0.18",
|
||||
"@hocuspocus/provider": "^1.0.0-alpha.36",
|
||||
"@hocuspocus/server": "^1.0.0-alpha.102",
|
||||
"@joplin/turndown-plugin-gfm": "^1.0.44",
|
||||
"@outlinewiki/koa-passport": "^4.1.4",
|
||||
"@outlinewiki/passport-azure-ad-oauth2": "^0.1.0",
|
||||
"@renderlesskit/react": "^0.6.0",
|
||||
@@ -100,7 +101,6 @@
|
||||
"invariant": "^2.2.4",
|
||||
"ioredis": "^4.28.0",
|
||||
"is-printable-key-event": "^1.0.0",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||
"json-loader": "0.5.4",
|
||||
"jsonwebtoken": "^8.5.0",
|
||||
"jszip": "^3.7.1",
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import path from "path";
|
||||
import emojiRegex from "emoji-regex";
|
||||
import { strikethrough, tables } from "joplin-turndown-plugin-gfm";
|
||||
import { truncate } from "lodash";
|
||||
import mammoth from "mammoth";
|
||||
import quotedPrintable from "quoted-printable";
|
||||
import { Transaction } from "sequelize";
|
||||
import TurndownService from "turndown";
|
||||
import utf8 from "utf8";
|
||||
import { MAX_TITLE_LENGTH } from "@shared/constants";
|
||||
import parseTitle from "@shared/utils/parseTitle";
|
||||
@@ -13,28 +11,10 @@ import { APM } from "@server/logging/tracing";
|
||||
import { User } from "@server/models";
|
||||
import dataURItoBuffer from "@server/utils/dataURItoBuffer";
|
||||
import parseImages from "@server/utils/parseImages";
|
||||
import turndownService from "@server/utils/turndown";
|
||||
import { FileImportError, InvalidRequestError } from "../errors";
|
||||
import attachmentCreator from "./attachmentCreator";
|
||||
|
||||
// https://github.com/domchristie/turndown#options
|
||||
const turndownService = new TurndownService({
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
headingStyle: "atx",
|
||||
}).remove(["script", "style", "title", "head"]);
|
||||
|
||||
// Use the GitHub-flavored markdown plugin to parse
|
||||
// strikethoughs and tables
|
||||
turndownService
|
||||
.use(strikethrough)
|
||||
.use(tables)
|
||||
.addRule("breaks", {
|
||||
filter: ["br"],
|
||||
replacement: function () {
|
||||
return "\n";
|
||||
},
|
||||
});
|
||||
|
||||
interface ImportableFile {
|
||||
type: string;
|
||||
getMarkdown: (content: Buffer | string) => Promise<string>;
|
||||
@@ -200,7 +180,8 @@ async function documentImporter({
|
||||
const regex = emojiRegex();
|
||||
const matches = regex.exec(text);
|
||||
const firstEmoji = matches ? matches[0] : undefined;
|
||||
if (firstEmoji && text.startsWith(firstEmoji)) {
|
||||
const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji);
|
||||
if (textStartsWithEmoji) {
|
||||
text = text.replace(firstEmoji, "").trim();
|
||||
}
|
||||
|
||||
@@ -213,10 +194,14 @@ async function documentImporter({
|
||||
}
|
||||
|
||||
// If we parsed an emoji from _above_ the title then add it back at prefixing
|
||||
if (firstEmoji) {
|
||||
if (textStartsWithEmoji) {
|
||||
title = `${firstEmoji} ${title}`;
|
||||
}
|
||||
|
||||
// Replace any <br> generated by the turndown plugin with escaped newlines
|
||||
// to match our hardbreak parser.
|
||||
text = text.replace(/<br>/gi, "\\n");
|
||||
|
||||
// find data urls, convert to blobs, upload and write attachments
|
||||
const images = parseImages(text);
|
||||
const dataURIs = images.filter((href) => href.startsWith("data:"));
|
||||
|
||||
5
server/typings/index.d.ts
vendored
5
server/typings/index.d.ts
vendored
@@ -12,10 +12,11 @@ declare module "oy-vey";
|
||||
|
||||
declare module "fetch-test-server";
|
||||
|
||||
declare module "joplin-turndown-plugin-gfm" {
|
||||
declare module "@joplin/turndown-plugin-gfm" {
|
||||
import { Plugin } from "turndown";
|
||||
|
||||
export const strikethrough: Plugin;
|
||||
|
||||
export const tables: Plugin;
|
||||
export const taskListItems: Plugin;
|
||||
export const gfm: Plugin;
|
||||
}
|
||||
|
||||
15
server/utils/turndown/breaks.ts
Normal file
15
server/utils/turndown/breaks.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting break tags to newlines.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function breaks(turndownService: TurndownService) {
|
||||
turndownService.addRule("breaks", {
|
||||
filter: ["br"],
|
||||
replacement: function () {
|
||||
return "\n";
|
||||
},
|
||||
});
|
||||
}
|
||||
57
server/utils/turndown/confluence-code-block.ts
Normal file
57
server/utils/turndown/confluence-code-block.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { repeat } from "lodash";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
const highlightRegExp = /brush: ([a-z0-9]+);/;
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting a confluence code block to markdown.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function confluenceCodeBlock(turndownService: TurndownService) {
|
||||
turndownService.addRule("fencedConfluenceHighlightedCodeBlock", {
|
||||
filter: function (node) {
|
||||
const firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === "DIV" &&
|
||||
firstChild?.nodeName === "PRE" &&
|
||||
// @ts-expect-error className exists
|
||||
firstChild.className === "syntaxhighlighter-pre"
|
||||
);
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
const dataSyntaxhighlighterParams =
|
||||
// @ts-expect-error getAttribute exists
|
||||
node.firstChild?.getAttribute("data-syntaxhighlighter-params") ?? "";
|
||||
const language = (dataSyntaxhighlighterParams.match(highlightRegExp) || [
|
||||
null,
|
||||
"",
|
||||
])[1];
|
||||
const code = node.firstChild?.textContent ?? "";
|
||||
|
||||
const fenceChar = "`";
|
||||
let fenceSize = 3;
|
||||
const fenceInCodeRegex = new RegExp("^" + fenceChar + "{3,}", "gm");
|
||||
|
||||
let match;
|
||||
while ((match = fenceInCodeRegex.exec(code))) {
|
||||
if (match[0].length >= fenceSize) {
|
||||
fenceSize = match[0].length + 1;
|
||||
}
|
||||
}
|
||||
|
||||
const fence = repeat(fenceChar, fenceSize);
|
||||
|
||||
return (
|
||||
"\n\n" +
|
||||
fence +
|
||||
language +
|
||||
"\n" +
|
||||
code.replace(/\n$/, "") +
|
||||
"\n" +
|
||||
fence +
|
||||
"\n\n"
|
||||
);
|
||||
},
|
||||
});
|
||||
}
|
||||
25
server/utils/turndown/confluence-task-list.ts
Normal file
25
server/utils/turndown/confluence-task-list.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import TurndownService from "turndown";
|
||||
|
||||
/**
|
||||
* A turndown plugin for converting a confluence task list to markdown.
|
||||
*
|
||||
* @param turndownService The TurndownService instance.
|
||||
*/
|
||||
export default function confluenceTaskList(turndownService: TurndownService) {
|
||||
turndownService.addRule("confluenceTaskList", {
|
||||
filter: function (node) {
|
||||
return (
|
||||
node.nodeName === "LI" &&
|
||||
node.parentNode?.nodeName === "UL" &&
|
||||
// @ts-expect-error className exists
|
||||
node.parentNode?.className.includes("inline-task-list")
|
||||
);
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (
|
||||
// @ts-expect-error className exists
|
||||
(node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
|
||||
);
|
||||
},
|
||||
});
|
||||
}
|
||||
30
server/utils/turndown/index.ts
Normal file
30
server/utils/turndown/index.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import { gfm } from "@joplin/turndown-plugin-gfm";
|
||||
import TurndownService from "turndown";
|
||||
import breaks from "./breaks";
|
||||
import confluenceCodeBlock from "./confluence-code-block";
|
||||
import confluenceTaskList from "./confluence-task-list";
|
||||
|
||||
/**
|
||||
* Turndown converts HTML to Markdown and is used in the importer code.
|
||||
*
|
||||
* For options, see: https://github.com/domchristie/turndown#options
|
||||
*/
|
||||
const service = new TurndownService({
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
headingStyle: "atx",
|
||||
codeBlockStyle: "fenced",
|
||||
blankReplacement: (content, node) => {
|
||||
if (node.nodeName === "P") {
|
||||
return "\n\n\\\n";
|
||||
}
|
||||
return "";
|
||||
},
|
||||
})
|
||||
.remove(["script", "style", "title", "head"])
|
||||
.use(gfm)
|
||||
.use(confluenceTaskList)
|
||||
.use(confluenceCodeBlock)
|
||||
.use(breaks);
|
||||
|
||||
export default service;
|
||||
10
yarn.lock
10
yarn.lock
@@ -1813,6 +1813,11 @@
|
||||
"@babel/runtime" "^7.7.2"
|
||||
regenerator-runtime "^0.13.3"
|
||||
|
||||
"@joplin/turndown-plugin-gfm@^1.0.44":
|
||||
version "1.0.44"
|
||||
resolved "https://registry.yarnpkg.com/@joplin/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.44.tgz#028e4c56bf58e57a4d0d923bb7d10a21c30d5c7d"
|
||||
integrity sha512-lpVI/fpj0CKzWzpsOxsmqwjWlIrw+IZlIEz3h8Vqoviz8dCYbqSSY/4VxpiUDmBpxX/3Xk73R5BfzqiAHBmYqA==
|
||||
|
||||
"@lifeomic/attempt@^3.0.2":
|
||||
version "3.0.3"
|
||||
resolved "https://registry.yarnpkg.com/@lifeomic/attempt/-/attempt-3.0.3.tgz#e742a5b85eb673e2f1746b0f39cb932cbc6145bb"
|
||||
@@ -9509,11 +9514,6 @@ jmespath@0.15.0:
|
||||
resolved "https://registry.yarnpkg.com/jmespath/-/jmespath-0.15.0.tgz#a3f222a9aae9f966f5d27c796510e28091764217"
|
||||
integrity sha1-o/Iiqarp+Wb10nx5ZRDigJF2Qhc=
|
||||
|
||||
joplin-turndown-plugin-gfm@^1.0.12:
|
||||
version "1.0.12"
|
||||
resolved "https://registry.yarnpkg.com/joplin-turndown-plugin-gfm/-/joplin-turndown-plugin-gfm-1.0.12.tgz#f0774183177895c6fedeec951053cab6046dede8"
|
||||
integrity sha512-qL4+1iycQjZ1fs8zk3jSRk7cg3ROBUHk7GKtiLAQLFzLPKErnILUvz5DLszSQvz3s1sTjPbywLDISVUtBY6HaA==
|
||||
|
||||
jpeg-js@0.4.2:
|
||||
version "0.4.2"
|
||||
resolved "https://registry.yarnpkg.com/jpeg-js/-/jpeg-js-0.4.2.tgz#8b345b1ae4abde64c2da2fe67ea216a114ac279d"
|
||||
|
||||
Reference in New Issue
Block a user