Port HTML import improvements from enterprise codebase

This commit is contained in:
Tom Moor
2023-10-28 19:09:53 -04:00
parent 846fb122cd
commit 89f3d47327
4 changed files with 87 additions and 13 deletions

View File

@@ -11,6 +11,7 @@ export default function emptyParagraphs(turndownService: TurndownService) {
return (
node.nodeName === "P" &&
node.children.length === 1 &&
node.textContent?.trim() === "" &&
node.children[0].nodeName === "BR"
);
},

View File

@@ -7,20 +7,44 @@ import TurndownService from "turndown";
*/
export default function images(turndownService: TurndownService) {
turndownService.addRule("image", {
filter: "img",
filter(node) {
return node.nodeName === "IMG" && !node?.className.includes("emoticon");
},
replacement(content, node) {
if (!("className" in node)) {
return content;
}
const alt = cleanAttribute(node.getAttribute("alt") || "");
const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
const src = cleanAttribute(node.getAttribute("src") || "");
const title = cleanAttribute(node.getAttribute("title") || "");
const titlePart = title ? ' "' + title + '"' : "";
return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : "";
// Remove icons in issue keys as they will not resolve correctly and mess
// up the layout.
if (
node.className === "icon" &&
node.parentElement?.className.includes("jira-issue-key")
) {
return "";
}
// Respect embedded Confluence image size
let size;
const naturalWidth = node.getAttribute("data-width");
const naturalHeight = node.getAttribute("data-height");
const width = node.getAttribute("width");
if (naturalWidth && naturalHeight && width) {
const ratio = parseInt(naturalWidth) / parseInt(width);
size = ` =${width}x${parseInt(naturalHeight) / ratio}`;
}
const titlePart = title || size ? ` "${title}${size}"` : "";
return src ? `![${alt}](${src}${titlePart})` : "";
},
});
}
function cleanAttribute(attribute: string) {
return attribute ? attribute.replace(/(\n+\s*)+/g, "\n") : "";
return (attribute ? attribute.replace(/\n+/g, "") : "").trim();
}

View File

@@ -2,9 +2,10 @@ import { gfm } from "@joplin/turndown-plugin-gfm";
import TurndownService from "turndown";
import breaks from "./breaks";
import emptyLists from "./emptyLists";
import emptyParagraphs from "./emptyParagraph";
import emptyParagraph from "./emptyParagraph";
import frames from "./frames";
import images from "./images";
import sanitizeLists from "./sanitizeLists";
import sanitizeTables from "./sanitizeTables";
import underlines from "./underlines";
@@ -18,17 +19,14 @@ const service = new TurndownService({
bulletListMarker: "-",
headingStyle: "atx",
codeBlockStyle: "fenced",
blankReplacement: (content, node) => {
if (node.nodeName === "P") {
return "\n\n\\\n";
}
return "";
},
blankReplacement: (content, node) =>
node.nodeName === "P" ? "\n\n\\\n" : "",
})
.remove(["script", "style", "title", "head"])
.use(gfm)
.use(emptyParagraphs)
.use(emptyParagraph)
.use(sanitizeTables)
.use(sanitizeLists)
.use(underlines)
.use(frames)
.use(images)

View File

@@ -0,0 +1,51 @@
import TurndownService from "turndown";
/**
* A turndown plugin for removing incompatible nodes from lists.
*
* @param turndownService The TurndownService instance.
*/
export default function sanitizeLists(turndownService: TurndownService) {
function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}
turndownService.addRule("headingsInLists", {
filter(node) {
return (
["H1", "H2", "H3", "H4", "H5", "H6"].includes(node.nodeName) &&
inHtmlContext(node, "LI")
);
},
replacement(content, node, options) {
if (!content.trim()) {
return "";
}
return options.strongDelimiter + content + options.strongDelimiter;
},
});
turndownService.addRule("strongInHeadings", {
filter(node) {
return (
(node.nodeName === "STRONG" || node.nodeName === "B") &&
["H1", "H2", "H3", "H4", "H5", "H6"].some((tag) =>
inHtmlContext(node, tag)
)
);
},
replacement(content) {
return content;
},
});
}