fix: HTML table fails to import with empty cells

This commit is contained in:
Tom Moor
2024-02-17 22:44:18 -05:00
parent 71d41378db
commit 4e7ea0b7f1
4 changed files with 20 additions and 30 deletions

View File

@@ -9,6 +9,7 @@ import inlineLink from "./inlineLink";
import sanitizeLists from "./sanitizeLists"; import sanitizeLists from "./sanitizeLists";
import sanitizeTables from "./sanitizeTables"; import sanitizeTables from "./sanitizeTables";
import underlines from "./underlines"; import underlines from "./underlines";
import { inHtmlContext } from "./utils";
/** /**
* Turndown converts HTML to Markdown and is used in the importer code. * Turndown converts HTML to Markdown and is used in the importer code.
@@ -20,8 +21,10 @@ const service = new TurndownService({
bulletListMarker: "-", bulletListMarker: "-",
headingStyle: "atx", headingStyle: "atx",
codeBlockStyle: "fenced", codeBlockStyle: "fenced",
blankReplacement: (content, node) => blankReplacement: (_, node) =>
node.nodeName === "P" ? "\n\n\\\n" : "", node.nodeName === "P" && !inHtmlContext(node as HTMLElement, "td, th")
? "\n\n\\\n"
: "",
}) })
.remove(["script", "style", "title", "head"]) .remove(["script", "style", "title", "head"])
.use(gfm) .use(gfm)

View File

@@ -1,4 +1,5 @@
import TurndownService from "turndown"; import TurndownService from "turndown";
import { inHtmlContext } from "./utils";
/** /**
* A turndown plugin for removing incompatible nodes from lists. * A turndown plugin for removing incompatible nodes from lists.
@@ -6,20 +7,6 @@ import TurndownService from "turndown";
* @param turndownService The TurndownService instance. * @param turndownService The TurndownService instance.
*/ */
export default function sanitizeLists(turndownService: TurndownService) { export default function sanitizeLists(turndownService: TurndownService) {
function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}
turndownService.addRule("headingsInLists", { turndownService.addRule("headingsInLists", {
filter(node) { filter(node) {
return ( return (

View File

@@ -1,4 +1,5 @@
import TurndownService from "turndown"; import TurndownService from "turndown";
import { inHtmlContext } from "./utils";
/** /**
* A turndown plugin for removing incompatible nodes from tables. * A turndown plugin for removing incompatible nodes from tables.
@@ -6,20 +7,6 @@ import TurndownService from "turndown";
* @param turndownService The TurndownService instance. * @param turndownService The TurndownService instance.
*/ */
export default function sanitizeTables(turndownService: TurndownService) { export default function sanitizeTables(turndownService: TurndownService) {
function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}
turndownService.addRule("headingsInTables", { turndownService.addRule("headingsInTables", {
filter(node) { filter(node) {
return ( return (

View File

@@ -0,0 +1,13 @@
export function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}