fix: Remove empty top-level list items in imported HTML content
This commit is contained in:
@@ -56,7 +56,10 @@ async function fileToMarkdown(content: Buffer | string): Promise<string> {
|
|||||||
|
|
||||||
async function docxToMarkdown(content: Buffer | string): Promise<string> {
|
async function docxToMarkdown(content: Buffer | string): Promise<string> {
|
||||||
if (content instanceof Buffer) {
|
if (content instanceof Buffer) {
|
||||||
const { value: html } = await mammoth.convertToHtml({ buffer: content });
|
const { value: html } = await mammoth.convertToHtml({
|
||||||
|
buffer: content,
|
||||||
|
});
|
||||||
|
|
||||||
return turndownService.turndown(html);
|
return turndownService.turndown(html);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,16 +10,14 @@ export default function confluenceTaskList(turndownService: TurndownService) {
|
|||||||
filter(node) {
|
filter(node) {
|
||||||
return (
|
return (
|
||||||
node.nodeName === "LI" &&
|
node.nodeName === "LI" &&
|
||||||
node.parentNode?.nodeName === "UL" &&
|
node.parentElement?.nodeName === "UL" &&
|
||||||
// @ts-expect-error className exists
|
node.parentElement?.className.includes("inline-task-list")
|
||||||
node.parentNode?.className.includes("inline-task-list")
|
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
replacement(content, node) {
|
replacement(content, node) {
|
||||||
return (
|
return "className" in node
|
||||||
// @ts-expect-error className exists
|
? (node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
|
||||||
(node.className === "checked" ? "- [x]" : "- [ ]") + ` ${content} \n`
|
: content;
|
||||||
);
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
22
server/utils/turndown/empty-lists.ts
Normal file
22
server/utils/turndown/empty-lists.ts
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import TurndownService from "turndown";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A turndown plugin for unwrapping top-level empty list items.
|
||||||
|
*
|
||||||
|
* @param turndownService The TurndownService instance.
|
||||||
|
*/
|
||||||
|
export default function emptyLists(turndownService: TurndownService) {
|
||||||
|
turndownService.addRule("empty-lists", {
|
||||||
|
filter(node) {
|
||||||
|
return (
|
||||||
|
node.nodeName === "LI" &&
|
||||||
|
node.childNodes.length === 1 &&
|
||||||
|
(node.firstChild?.nodeName === "OL" ||
|
||||||
|
node.firstChild?.nodeName === "UL")
|
||||||
|
);
|
||||||
|
},
|
||||||
|
replacement(content) {
|
||||||
|
return content;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -8,14 +8,13 @@ import TurndownService from "turndown";
|
|||||||
export default function images(turndownService: TurndownService) {
|
export default function images(turndownService: TurndownService) {
|
||||||
turndownService.addRule("image", {
|
turndownService.addRule("image", {
|
||||||
filter: "img",
|
filter: "img",
|
||||||
|
|
||||||
replacement(content, node) {
|
replacement(content, node) {
|
||||||
// @ts-expect-error getAttribute exists
|
if (!("className" in node)) {
|
||||||
const alt = cleanAttribute(node.getAttribute("alt"));
|
return content;
|
||||||
// @ts-expect-error getAttribute exists
|
}
|
||||||
|
const alt = cleanAttribute(node.getAttribute("alt") || "");
|
||||||
const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
|
const src = (node.getAttribute("src") || "").replace(/\n+/g, "");
|
||||||
// @ts-expect-error getAttribute exists
|
const title = cleanAttribute(node.getAttribute("title") || "");
|
||||||
const title = cleanAttribute(node.getAttribute("title"));
|
|
||||||
const titlePart = title ? ' "' + title + '"' : "";
|
const titlePart = title ? ' "' + title + '"' : "";
|
||||||
return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : "";
|
return src ? "![" + alt + "]" + "(" + src + titlePart + ")" : "";
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import TurndownService from "turndown";
|
|||||||
import breaks from "./breaks";
|
import breaks from "./breaks";
|
||||||
import confluenceCodeBlock from "./confluence-code-block";
|
import confluenceCodeBlock from "./confluence-code-block";
|
||||||
import confluenceTaskList from "./confluence-task-list";
|
import confluenceTaskList from "./confluence-task-list";
|
||||||
|
import emptyLists from "./empty-lists";
|
||||||
import images from "./images";
|
import images from "./images";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -27,6 +28,7 @@ const service = new TurndownService({
|
|||||||
.use(confluenceTaskList)
|
.use(confluenceTaskList)
|
||||||
.use(confluenceCodeBlock)
|
.use(confluenceCodeBlock)
|
||||||
.use(images)
|
.use(images)
|
||||||
.use(breaks);
|
.use(breaks)
|
||||||
|
.use(emptyLists);
|
||||||
|
|
||||||
export default service;
|
export default service;
|
||||||
|
|||||||
Reference in New Issue
Block a user