fix: Paragraphs in table cells skipped in import
Port HTML importer rules from enterprise fork
This commit is contained in:
@@ -1,57 +0,0 @@
|
|||||||
import repeat from "lodash/repeat";
|
|
||||||
import TurndownService from "turndown";
|
|
||||||
|
|
||||||
const highlightRegExp = /brush: ([a-z0-9]+);/;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A turndown plugin for converting a confluence code block to markdown.
|
|
||||||
*
|
|
||||||
* @param turndownService The TurndownService instance.
|
|
||||||
*/
|
|
||||||
export default function confluenceCodeBlock(turndownService: TurndownService) {
|
|
||||||
turndownService.addRule("fencedConfluenceHighlightedCodeBlock", {
|
|
||||||
filter(node) {
|
|
||||||
const firstChild = node.firstChild;
|
|
||||||
return (
|
|
||||||
node.nodeName === "DIV" &&
|
|
||||||
firstChild?.nodeName === "PRE" &&
|
|
||||||
// @ts-expect-error className exists
|
|
||||||
firstChild.className === "syntaxhighlighter-pre"
|
|
||||||
);
|
|
||||||
},
|
|
||||||
replacement(content, node) {
|
|
||||||
const dataSyntaxhighlighterParams =
|
|
||||||
// @ts-expect-error getAttribute exists
|
|
||||||
node.firstChild?.getAttribute("data-syntaxhighlighter-params") ?? "";
|
|
||||||
const language = (dataSyntaxhighlighterParams.match(highlightRegExp) || [
|
|
||||||
null,
|
|
||||||
"",
|
|
||||||
])[1];
|
|
||||||
const code = node.firstChild?.textContent ?? "";
|
|
||||||
|
|
||||||
const fenceChar = "`";
|
|
||||||
let fenceSize = 3;
|
|
||||||
const fenceInCodeRegex = new RegExp("^" + fenceChar + "{3,}", "gm");
|
|
||||||
|
|
||||||
let match;
|
|
||||||
while ((match = fenceInCodeRegex.exec(code))) {
|
|
||||||
if (match[0].length >= fenceSize) {
|
|
||||||
fenceSize = match[0].length + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const fence = repeat(fenceChar, fenceSize);
|
|
||||||
|
|
||||||
return (
|
|
||||||
"\n\n" +
|
|
||||||
fence +
|
|
||||||
language +
|
|
||||||
"\n" +
|
|
||||||
code.replace(/\n$/, "") +
|
|
||||||
"\n" +
|
|
||||||
fence +
|
|
||||||
"\n\n"
|
|
||||||
);
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
import TurndownService from "turndown";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A turndown plugin for converting a confluence task list to markdown.
|
|
||||||
*
|
|
||||||
* @param turndownService The TurndownService instance.
|
|
||||||
*/
|
|
||||||
export default function confluenceTaskList(turndownService: TurndownService) {
|
|
||||||
turndownService.addRule("confluenceTaskList", {
|
|
||||||
filter(node) {
|
|
||||||
return (
|
|
||||||
node.nodeName === "LI" &&
|
|
||||||
node.parentElement?.nodeName === "UL" &&
|
|
||||||
node.parentElement?.className.includes("inline-task-list")
|
|
||||||
);
|
|
||||||
},
|
|
||||||
replacement(content, node) {
|
|
||||||
return "className" in node
|
|
||||||
? (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
|
|
||||||
: content;
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
21
server/utils/turndown/emptyParagraph.ts
Normal file
21
server/utils/turndown/emptyParagraph.ts
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A turndown plugin for converting paragraphs with only breaks to newlines.
|
||||||
|
*
|
||||||
|
* @param turndownService The TurndownService instance.
|
||||||
|
*/
|
||||||
|
export default function emptyParagraphs(turndownService: TurndownService) {
|
||||||
|
turndownService.addRule("emptyParagraphs", {
|
||||||
|
filter(node) {
|
||||||
|
return (
|
||||||
|
node.nodeName === "P" &&
|
||||||
|
node.children.length === 1 &&
|
||||||
|
node.children[0].nodeName === "BR"
|
||||||
|
);
|
||||||
|
},
|
||||||
|
replacement() {
|
||||||
|
return "\n\n\\\n";
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -1,11 +1,12 @@
|
|||||||
import { gfm } from "@joplin/turndown-plugin-gfm";
|
import { gfm } from "@joplin/turndown-plugin-gfm";
|
||||||
import TurndownService from "turndown";
|
import TurndownService from "turndown";
|
||||||
import breaks from "./breaks";
|
import breaks from "./breaks";
|
||||||
import confluenceCodeBlock from "./confluence-code-block";
|
import emptyLists from "./emptyLists";
|
||||||
import confluenceTaskList from "./confluence-task-list";
|
import emptyParagraphs from "./emptyParagraph";
|
||||||
import emptyLists from "./empty-lists";
|
|
||||||
import frames from "./frames";
|
import frames from "./frames";
|
||||||
import images from "./images";
|
import images from "./images";
|
||||||
|
import sanitizeTables from "./sanitizeTables";
|
||||||
|
import underlines from "./underlines";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Turndown converts HTML to Markdown and is used in the importer code.
|
* Turndown converts HTML to Markdown and is used in the importer code.
|
||||||
@@ -26,9 +27,10 @@ const service = new TurndownService({
|
|||||||
})
|
})
|
||||||
.remove(["script", "style", "title", "head"])
|
.remove(["script", "style", "title", "head"])
|
||||||
.use(gfm)
|
.use(gfm)
|
||||||
|
.use(emptyParagraphs)
|
||||||
|
.use(sanitizeTables)
|
||||||
|
.use(underlines)
|
||||||
.use(frames)
|
.use(frames)
|
||||||
.use(confluenceTaskList)
|
|
||||||
.use(confluenceCodeBlock)
|
|
||||||
.use(images)
|
.use(images)
|
||||||
.use(breaks)
|
.use(breaks)
|
||||||
.use(emptyLists);
|
.use(emptyLists);
|
||||||
|
|||||||
43
server/utils/turndown/sanitizeTables.ts
Normal file
43
server/utils/turndown/sanitizeTables.ts
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A turndown plugin for removing incompatible nodes from tables.
|
||||||
|
*
|
||||||
|
* @param turndownService The TurndownService instance.
|
||||||
|
*/
|
||||||
|
export default function sanitizeTables(turndownService: TurndownService) {
|
||||||
|
function inHtmlContext(node: HTMLElement, selector: string) {
|
||||||
|
let currentNode = node;
|
||||||
|
// start at the closest element
|
||||||
|
while (currentNode !== null && currentNode.nodeType !== 1) {
|
||||||
|
currentNode = (currentNode.parentElement ||
|
||||||
|
currentNode.parentNode) as HTMLElement;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
currentNode !== null &&
|
||||||
|
currentNode.nodeType === 1 &&
|
||||||
|
currentNode.closest(selector) !== null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
turndownService.addRule("headingsInTables", {
|
||||||
|
filter(node) {
|
||||||
|
return (
|
||||||
|
["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
|
||||||
|
inHtmlContext(node, "table")
|
||||||
|
);
|
||||||
|
},
|
||||||
|
replacement(content) {
|
||||||
|
return `**${content.trim()}**`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
turndownService.addRule("paragraphsInCells", {
|
||||||
|
filter(node) {
|
||||||
|
return node.nodeName === "P" && inHtmlContext(node, "table");
|
||||||
|
},
|
||||||
|
replacement(content) {
|
||||||
|
return content.trim();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
15
server/utils/turndown/underlines.ts
Normal file
15
server/utils/turndown/underlines.ts
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A turndown plugin for converting u tags to underlines.
|
||||||
|
*
|
||||||
|
* @param turndownService The TurndownService instance.
|
||||||
|
*/
|
||||||
|
export default function underlines(turndownService: TurndownService) {
|
||||||
|
turndownService.addRule("underlines", {
|
||||||
|
filter: ["u"],
|
||||||
|
replacement(content) {
|
||||||
|
return `__${content.trim()}__`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user