diff --git a/server/utils/turndown/index.ts b/server/utils/turndown/index.ts index 990a17454..649129d5b 100644 --- a/server/utils/turndown/index.ts +++ b/server/utils/turndown/index.ts @@ -9,6 +9,7 @@ import inlineLink from "./inlineLink"; import sanitizeLists from "./sanitizeLists"; import sanitizeTables from "./sanitizeTables"; import underlines from "./underlines"; +import { inHtmlContext } from "./utils"; /** * Turndown converts HTML to Markdown and is used in the importer code. @@ -20,8 +21,10 @@ const service = new TurndownService({ bulletListMarker: "-", headingStyle: "atx", codeBlockStyle: "fenced", - blankReplacement: (content, node) => - node.nodeName === "P" ? "\n\n\\\n" : "", + blankReplacement: (_, node) => + node.nodeName === "P" && !inHtmlContext(node as HTMLElement, "td, th") + ? "\n\n\\\n" + : "", }) .remove(["script", "style", "title", "head"]) .use(gfm) diff --git a/server/utils/turndown/sanitizeLists.ts b/server/utils/turndown/sanitizeLists.ts index d9dcc43f8..e68a3bdfd 100644 --- a/server/utils/turndown/sanitizeLists.ts +++ b/server/utils/turndown/sanitizeLists.ts @@ -1,4 +1,5 @@ import TurndownService from "turndown"; +import { inHtmlContext } from "./utils"; /** * A turndown plugin for removing incompatible nodes from lists. @@ -6,20 +7,6 @@ import TurndownService from "turndown"; * @param turndownService The TurndownService instance. */ export default function sanitizeLists(turndownService: TurndownService) { - function inHtmlContext(node: HTMLElement, selector: string) { - let currentNode = node; - // start at the closest element - while (currentNode !== null && currentNode.nodeType !== 1) { - currentNode = (currentNode.parentElement || - currentNode.parentNode) as HTMLElement; - } - return ( - currentNode !== null && - currentNode.nodeType === 1 && - currentNode.closest(selector) !== null - ); - } - turndownService.addRule("headingsInLists", { filter(node) { return ( diff --git a/server/utils/turndown/sanitizeTables.ts b/server/utils/turndown/sanitizeTables.ts index 70c86bf8d..8210a473a 100644 --- a/server/utils/turndown/sanitizeTables.ts +++ b/server/utils/turndown/sanitizeTables.ts @@ -1,4 +1,5 @@ import TurndownService from "turndown"; +import { inHtmlContext } from "./utils"; /** * A turndown plugin for removing incompatible nodes from tables. @@ -6,20 +7,6 @@ import TurndownService from "turndown"; * @param turndownService The TurndownService instance. */ export default function sanitizeTables(turndownService: TurndownService) { - function inHtmlContext(node: HTMLElement, selector: string) { - let currentNode = node; - // start at the closest element - while (currentNode !== null && currentNode.nodeType !== 1) { - currentNode = (currentNode.parentElement || - currentNode.parentNode) as HTMLElement; - } - return ( - currentNode !== null && - currentNode.nodeType === 1 && - currentNode.closest(selector) !== null - ); - } - turndownService.addRule("headingsInTables", { filter(node) { return ( diff --git a/server/utils/turndown/utils.ts b/server/utils/turndown/utils.ts new file mode 100644 index 000000000..01deec9db --- /dev/null +++ b/server/utils/turndown/utils.ts @@ -0,0 +1,13 @@ +export function inHtmlContext(node: HTMLElement, selector: string) { + let currentNode = node; + // start at the closest element + while (currentNode !== null && currentNode.nodeType !== 1) { + currentNode = (currentNode.parentElement || + currentNode.parentNode) as HTMLElement; + } + return ( + currentNode !== null && + currentNode.nodeType === 1 && + currentNode.closest(selector) !== null + ); +}