fix: Remove empty top-level list items in imported HTML content

This commit is contained in:
Tom Moor
2023-05-25 21:34:26 -04:00
parent e9ec31e5b8
commit be3bcebf6b
5 changed files with 39 additions and 15 deletions

View File

@@ -56,7 +56,10 @@ async function fileToMarkdown(content: Buffer | string): Promise<string> {
async function docxToMarkdown(content: Buffer | string): Promise<string> {
if (content instanceof Buffer) {
const { value: html } = await mammoth.convertToHtml({ buffer: content });
const { value: html } = await mammoth.convertToHtml({
buffer: content,
});
return turndownService.turndown(html);
}

View File

@@ -10,16 +10,14 @@ export default function confluenceTaskList(turndownService: TurndownService) {
filter(node) {
return (
node.nodeName === "LI" &&
node.parentNode?.nodeName === "UL" &&
// @ts-expect-error className exists
node.parentNode?.className.includes("inline-task-list")
node.parentElement?.nodeName === "UL" &&
node.parentElement?.className.includes("inline-task-list")
);
},
replacement(content, node) {
return (
// @ts-expect-error className exists
(node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
);
return "className" in node
? (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
: content;
},
});
}

View File

@@ -0,0 +1,22 @@
import TurndownService from "turndown";
/**
* A turndown plugin for unwrapping top-level empty list items.
*
* @param turndownService The TurndownService instance.
*/
export default function emptyLists(turndownService: TurndownService) {
turndownService.addRule("empty-lists", {
filter(node) {
return (
node.nodeName === "LI" &&
node.childNodes.length === 1 &&
(node.firstChild?.nodeName === "OL" ||
node.firstChild?.nodeName === "UL")
);
},
replacement(content) {
return content;
},
});
}

View File

@@ -8,14 +8,13 @@ import TurndownService from "turndown";
export default function images(turndownService: TurndownService) {
turndownService.addRule("image", {
filter: "img",
replacement(content, node) {
// @ts-expect-error getAttribute exists
const alt = cleanAttribute(node.getAttribute("alt"));
// @ts-expect-error getAttribute exists
if (!("className" in node)) {
return content;
}
const alt = cleanAttribute(node.getAttribute("alt") || "");
const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
// @ts-expect-error getAttribute exists
const title = cleanAttribute(node.getAttribute("title"));
const title = cleanAttribute(node.getAttribute("title") || "");
const titlePart = title ? ' "' + title + '"' : "";
return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : "";
},

View File

@@ -3,6 +3,7 @@ import TurndownService from "turndown";
import breaks from "./breaks";
import confluenceCodeBlock from "./confluence-code-block";
import confluenceTaskList from "./confluence-task-list";
import emptyLists from "./empty-lists";
import images from "./images";
/**
@@ -27,6 +28,7 @@ const service = new TurndownService({
.use(confluenceTaskList)
.use(confluenceCodeBlock)
.use(images)
.use(breaks);
.use(breaks)
.use(emptyLists);
export default service;