diff --git a/server/utils/turndown/emptyParagraph.ts b/server/utils/turndown/emptyParagraph.ts index 1b2017876..449f64814 100644 --- a/server/utils/turndown/emptyParagraph.ts +++ b/server/utils/turndown/emptyParagraph.ts @@ -11,6 +11,7 @@ export default function emptyParagraphs(turndownService: TurndownService) { return ( node.nodeName === "P" && node.children.length === 1 && + node.textContent?.trim() === "" && node.children[0].nodeName === "BR" ); }, diff --git a/server/utils/turndown/images.ts b/server/utils/turndown/images.ts index 3676d03d7..b59b99eed 100644 --- a/server/utils/turndown/images.ts +++ b/server/utils/turndown/images.ts @@ -7,20 +7,44 @@ import TurndownService from "turndown"; */ export default function images(turndownService: TurndownService) { turndownService.addRule("image", { - filter: "img", + filter(node) { + return node.nodeName === "IMG" && !node?.className.includes("emoticon"); + }, replacement(content, node) { if (!("className" in node)) { return content; } const alt = cleanAttribute(node.getAttribute("alt") || ""); - const src = (node.getAttribute("src") || "").replace(/\n+/g, ""); + const src = cleanAttribute(node.getAttribute("src") || ""); const title = cleanAttribute(node.getAttribute("title") || ""); - const titlePart = title ? ' "' + title + '"' : ""; - return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : ""; + + // Remove icons in issue keys as they will not resolve correctly and mess + // up the layout. + if ( + node.className === "icon" && + node.parentElement?.className.includes("jira-issue-key") + ) { + return ""; + } + + // Respect embedded Confluence image size + let size; + const naturalWidth = node.getAttribute("data-width"); + const naturalHeight = node.getAttribute("data-height"); + const width = node.getAttribute("width"); + + if (naturalWidth && naturalHeight && width) { + const ratio = parseInt(naturalWidth) / parseInt(width); + size = ` =${width}x${parseInt(naturalHeight) / ratio}`; + } + + const titlePart = title || size ? ` "${title}${size}"` : ""; + + return src ? `![${alt}](${src}${titlePart})` : ""; }, }); } function cleanAttribute(attribute: string) { - return attribute ? attribute.replace(/(\n+\s*)+/g, "\n") : ""; + return (attribute ? attribute.replace(/\n+/g, "") : "").trim(); } diff --git a/server/utils/turndown/index.ts b/server/utils/turndown/index.ts index 4d3596368..120024c6f 100644 --- a/server/utils/turndown/index.ts +++ b/server/utils/turndown/index.ts @@ -2,9 +2,10 @@ import { gfm } from "@joplin/turndown-plugin-gfm"; import TurndownService from "turndown"; import breaks from "./breaks"; import emptyLists from "./emptyLists"; -import emptyParagraphs from "./emptyParagraph"; +import emptyParagraph from "./emptyParagraph"; import frames from "./frames"; import images from "./images"; +import sanitizeLists from "./sanitizeLists"; import sanitizeTables from "./sanitizeTables"; import underlines from "./underlines"; @@ -18,17 +19,14 @@ const service = new TurndownService({ bulletListMarker: "-", headingStyle: "atx", codeBlockStyle: "fenced", - blankReplacement: (content, node) => { - if (node.nodeName === "P") { - return "\n\n\\\n"; - } - return ""; - }, + blankReplacement: (content, node) => + node.nodeName === "P" ? "\n\n\\\n" : "", }) .remove(["script", "style", "title", "head"]) .use(gfm) - .use(emptyParagraphs) + .use(emptyParagraph) .use(sanitizeTables) + .use(sanitizeLists) .use(underlines) .use(frames) .use(images) diff --git a/server/utils/turndown/sanitizeLists.ts b/server/utils/turndown/sanitizeLists.ts new file mode 100644 index 000000000..d9dcc43f8 --- /dev/null +++ b/server/utils/turndown/sanitizeLists.ts @@ -0,0 +1,51 @@ +import TurndownService from "turndown"; + +/** + * A turndown plugin for removing incompatible nodes from lists. + * + * @param turndownService The TurndownService instance. + */ +export default function sanitizeLists(turndownService: TurndownService) { + function inHtmlContext(node: HTMLElement, selector: string) { + let currentNode = node; + // start at the closest element + while (currentNode !== null && currentNode.nodeType !== 1) { + currentNode = (currentNode.parentElement || + currentNode.parentNode) as HTMLElement; + } + return ( + currentNode !== null && + currentNode.nodeType === 1 && + currentNode.closest(selector) !== null + ); + } + + turndownService.addRule("headingsInLists", { + filter(node) { + return ( + ["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) && + inHtmlContext(node, "LI") + ); + }, + replacement(content, node, options) { + if (!content.trim()) { + return ""; + } + return options.strongDelimiter + content + options.strongDelimiter; + }, + }); + + turndownService.addRule("strongInHeadings", { + filter(node) { + return ( + (node.nodeName === "STRONG" || node.nodeName === "B") && + ["H1", "H2", "H3", "H4", "H5", "H6"].some((tag) => + inHtmlContext(node, tag) + ) + ); + }, + replacement(content) { + return content; + }, + }); +}