fix: HTML table fails to import with empty cells

This commit is contained in:
Tom Moor
2024-02-17 22:44:18 -05:00
parent 71d41378db
commit 4e7ea0b7f1
4 changed files with 20 additions and 30 deletions

View File

@@ -9,6 +9,7 @@ import inlineLink from "./inlineLink";
import sanitizeLists from "./sanitizeLists";
import sanitizeTables from "./sanitizeTables";
import underlines from "./underlines";
import { inHtmlContext } from "./utils";
/**
* Turndown converts HTML to Markdown and is used in the importer code.
@@ -20,8 +21,10 @@ const service = new TurndownService({
bulletListMarker: "-",
headingStyle: "atx",
codeBlockStyle: "fenced",
blankReplacement: (content, node) =>
node.nodeName === "P" ? "\n\n\\\n" : "",
blankReplacement: (_, node) =>
node.nodeName === "P" && !inHtmlContext(node as HTMLElement, "td, th")
? "\n\n\\\n"
: "",
})
.remove(["script", "style", "title", "head"])
.use(gfm)

View File

@@ -1,4 +1,5 @@
import TurndownService from "turndown";
import { inHtmlContext } from "./utils";
/**
* A turndown plugin for removing incompatible nodes from lists.
@@ -6,20 +7,6 @@ import TurndownService from "turndown";
* @param turndownService The TurndownService instance.
*/
export default function sanitizeLists(turndownService: TurndownService) {
function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}
turndownService.addRule("headingsInLists", {
filter(node) {
return (

View File

@@ -1,4 +1,5 @@
import TurndownService from "turndown";
import { inHtmlContext } from "./utils";
/**
* A turndown plugin for removing incompatible nodes from tables.
@@ -6,20 +7,6 @@ import TurndownService from "turndown";
* @param turndownService The TurndownService instance.
*/
export default function sanitizeTables(turndownService: TurndownService) {
function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}
turndownService.addRule("headingsInTables", {
filter(node) {
return (

View File

@@ -0,0 +1,13 @@
export function inHtmlContext(node: HTMLElement, selector: string) {
let currentNode = node;
// start at the closest element
while (currentNode !== null && currentNode.nodeType !== 1) {
currentNode = (currentNode.parentElement ||
currentNode.parentNode) as HTMLElement;
}
return (
currentNode !== null &&
currentNode.nodeType === 1 &&
currentNode.closest(selector) !== null
);
}