fix: Remove empty top-level list items in imported HTML content

This commit is contained in:
Tom Moor
2023-05-25 21:34:26 -04:00
parent e9ec31e5b8
commit be3bcebf6b
5 changed files with 39 additions and 15 deletions

View File

@@ -56,7 +56,10 @@ async function fileToMarkdown(content: Buffer | string): Promise<string> {
async function docxToMarkdown(content: Buffer | string): Promise<string> { async function docxToMarkdown(content: Buffer | string): Promise<string> {
if (content instanceof Buffer) { if (content instanceof Buffer) {
const { value: html } = await mammoth.convertToHtml({ buffer: content }); const { value: html } = await mammoth.convertToHtml({
buffer: content,
});
return turndownService.turndown(html); return turndownService.turndown(html);
} }

View File

@@ -10,16 +10,14 @@ export default function confluenceTaskList(turndownService: TurndownService) {
filter(node) { filter(node) {
return ( return (
node.nodeName === "LI" && node.nodeName === "LI" &&
node.parentNode?.nodeName === "UL" && node.parentElement?.nodeName === "UL" &&
// @ts-expect-error className exists node.parentElement?.className.includes("inline-task-list")
node.parentNode?.className.includes("inline-task-list")
); );
}, },
replacement(content, node) { replacement(content, node) {
return ( return "className" in node
// @ts-expect-error className exists ? (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
(node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n` : content;
);
}, },
}); });
} }

View File

@@ -0,0 +1,22 @@
import TurndownService from "turndown";
/**
* A turndown plugin for unwrapping top-level empty list items.
*
* @param turndownService The TurndownService instance.
*/
export default function emptyLists(turndownService: TurndownService) {
turndownService.addRule("empty-lists", {
filter(node) {
return (
node.nodeName === "LI" &&
node.childNodes.length === 1 &&
(node.firstChild?.nodeName === "OL" ||
node.firstChild?.nodeName === "UL")
);
},
replacement(content) {
return content;
},
});
}

View File

@@ -8,14 +8,13 @@ import TurndownService from "turndown";
export default function images(turndownService: TurndownService) { export default function images(turndownService: TurndownService) {
turndownService.addRule("image", { turndownService.addRule("image", {
filter: "img", filter: "img",
replacement(content, node) { replacement(content, node) {
// @ts-expect-error getAttribute exists if (!("className" in node)) {
const alt = cleanAttribute(node.getAttribute("alt")); return content;
// @ts-expect-error getAttribute exists }
const alt = cleanAttribute(node.getAttribute("alt") || "");
const src = (node.getAttribute("src") || "").replace(/\n+/g, ""); const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
// @ts-expect-error getAttribute exists const title = cleanAttribute(node.getAttribute("title") || "");
const title = cleanAttribute(node.getAttribute("title"));
const titlePart = title ? ' "' + title + '"' : ""; const titlePart = title ? ' "' + title + '"' : "";
return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : ""; return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : "";
}, },

View File

@@ -3,6 +3,7 @@ import TurndownService from "turndown";
import breaks from "./breaks"; import breaks from "./breaks";
import confluenceCodeBlock from "./confluence-code-block"; import confluenceCodeBlock from "./confluence-code-block";
import confluenceTaskList from "./confluence-task-list"; import confluenceTaskList from "./confluence-task-list";
import emptyLists from "./empty-lists";
import images from "./images"; import images from "./images";
/** /**
@@ -27,6 +28,7 @@ const service = new TurndownService({
.use(confluenceTaskList) .use(confluenceTaskList)
.use(confluenceCodeBlock) .use(confluenceCodeBlock)
.use(images) .use(images)
.use(breaks); .use(breaks)
.use(emptyLists);
export default service; export default service;