From be3bcebf6bc8a5909b38abfff12be2a4a975592d Mon Sep 17 00:00:00 2001 From: Tom Moor Date: Thu, 25 May 2023 21:34:26 -0400 Subject: [PATCH] fix: Remove empty top-level list items in imported HTML content --- server/commands/documentImporter.ts | 5 ++++- server/utils/turndown/confluence-task-list.ts | 12 +++++----- server/utils/turndown/empty-lists.ts | 22 +++++++++++++++++++ server/utils/turndown/images.ts | 11 +++++----- server/utils/turndown/index.ts | 4 +++- 5 files changed, 39 insertions(+), 15 deletions(-) create mode 100644 server/utils/turndown/empty-lists.ts diff --git a/server/commands/documentImporter.ts b/server/commands/documentImporter.ts index e2fbfcabc..4fbc18f35 100644 --- a/server/commands/documentImporter.ts +++ b/server/commands/documentImporter.ts @@ -56,7 +56,10 @@ async function fileToMarkdown(content: Buffer | string): Promise { async function docxToMarkdown(content: Buffer | string): Promise { if (content instanceof Buffer) { - const { value: html } = await mammoth.convertToHtml({ buffer: content }); + const { value: html } = await mammoth.convertToHtml({ + buffer: content, + }); + return turndownService.turndown(html); } diff --git a/server/utils/turndown/confluence-task-list.ts b/server/utils/turndown/confluence-task-list.ts index 7b5d512ef..8ecaac15a 100644 --- a/server/utils/turndown/confluence-task-list.ts +++ b/server/utils/turndown/confluence-task-list.ts @@ -10,16 +10,14 @@ export default function confluenceTaskList(turndownService: TurndownService) { filter(node) { return ( node.nodeName === "LI" && - node.parentNode?.nodeName === "UL" && - // @ts-expect-error className exists - node.parentNode?.className.includes("inline-task-list") + node.parentElement?.nodeName === "UL" && + node.parentElement?.className.includes("inline-task-list") ); }, replacement(content, node) { - return ( - // @ts-expect-error className exists - (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n` - ); + return "className" in node + ? (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n` + : content; }, }); } diff --git a/server/utils/turndown/empty-lists.ts b/server/utils/turndown/empty-lists.ts new file mode 100644 index 000000000..6bceebb8d --- /dev/null +++ b/server/utils/turndown/empty-lists.ts @@ -0,0 +1,22 @@ +import TurndownService from "turndown"; + +/** + * A turndown plugin for unwrapping top-level empty list items. + * + * @param turndownService The TurndownService instance. + */ +export default function emptyLists(turndownService: TurndownService) { + turndownService.addRule("empty-lists", { + filter(node) { + return ( + node.nodeName === "LI" && + node.childNodes.length === 1 && + (node.firstChild?.nodeName === "OL" || + node.firstChild?.nodeName === "UL") + ); + }, + replacement(content) { + return content; + }, + }); +} diff --git a/server/utils/turndown/images.ts b/server/utils/turndown/images.ts index 2fd4eb027..3676d03d7 100644 --- a/server/utils/turndown/images.ts +++ b/server/utils/turndown/images.ts @@ -8,14 +8,13 @@ import TurndownService from "turndown"; export default function images(turndownService: TurndownService) { turndownService.addRule("image", { filter: "img", - replacement(content, node) { - // @ts-expect-error getAttribute exists - const alt = cleanAttribute(node.getAttribute("alt")); - // @ts-expect-error getAttribute exists + if (!("className" in node)) { + return content; + } + const alt = cleanAttribute(node.getAttribute("alt") || ""); const src = (node.getAttribute("src") || "").replace(/\n+/g, ""); - // @ts-expect-error getAttribute exists - const title = cleanAttribute(node.getAttribute("title")); + const title = cleanAttribute(node.getAttribute("title") || ""); const titlePart = title ? ' "' + title + '"' : ""; return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : ""; }, diff --git a/server/utils/turndown/index.ts b/server/utils/turndown/index.ts index 7b17746af..03eaab61d 100644 --- a/server/utils/turndown/index.ts +++ b/server/utils/turndown/index.ts @@ -3,6 +3,7 @@ import TurndownService from "turndown"; import breaks from "./breaks"; import confluenceCodeBlock from "./confluence-code-block"; import confluenceTaskList from "./confluence-task-list"; +import emptyLists from "./empty-lists"; import images from "./images"; /** @@ -27,6 +28,7 @@ const service = new TurndownService({ .use(confluenceTaskList) .use(confluenceCodeBlock) .use(images) - .use(breaks); + .use(breaks) + .use(emptyLists); export default service;