338 lines
8.1 KiB
TypeScript
338 lines
8.1 KiB
TypeScript
// Based on https://www.npmjs.com/package/joplin-turndown-plugin-gfm
|
|
import type TurndownService from "turndown";
|
|
import { inHtmlContext } from "./utils";
|
|
|
|
const rules: Record<string, TurndownService.Rule> = {};
|
|
const alignMap = { left: ":---", right: "---:", center: ":---:" };
|
|
|
|
// Note use of WeakMap to enable garbage collection
|
|
const tableShouldBeSkippedCache = new WeakMap<HTMLTableElement, boolean>();
|
|
|
|
function getAlignment(node: HTMLElement) {
|
|
return node
|
|
? (node.getAttribute("align") || node.style.textAlign || "").toLowerCase()
|
|
: "";
|
|
}
|
|
|
|
function getBorder(alignment: keyof typeof alignMap) {
|
|
return alignment ? alignMap[alignment] : "---";
|
|
}
|
|
|
|
function getColumnAlignment(
|
|
table: HTMLTableElement | null,
|
|
columnIndex: number
|
|
) {
|
|
const votes = {
|
|
left: 0,
|
|
right: 0,
|
|
center: 0,
|
|
"": 0,
|
|
};
|
|
|
|
let align: keyof typeof alignMap = "left";
|
|
if (!table) {
|
|
return align;
|
|
}
|
|
|
|
// Reference is important as .rows is an expensive getter.
|
|
const rows = table.rows;
|
|
|
|
for (let i = 0; i < rows.length; ++i) {
|
|
const row = rows[i];
|
|
if (columnIndex < row.childNodes.length) {
|
|
const cellAlignment = getAlignment(
|
|
row.childNodes[columnIndex] as HTMLElement
|
|
);
|
|
++votes[cellAlignment];
|
|
|
|
if (
|
|
votes[cellAlignment] > votes[align] &&
|
|
Object.keys(alignMap).includes(cellAlignment)
|
|
) {
|
|
align = cellAlignment as keyof typeof alignMap;
|
|
}
|
|
}
|
|
}
|
|
|
|
return align;
|
|
}
|
|
|
|
rules.tableCell = {
|
|
filter: ["th", "td"],
|
|
replacement(content, node: HTMLTableCellElement) {
|
|
if (tableShouldBeSkipped(nodeParentTable(node))) {
|
|
return content;
|
|
}
|
|
return cell(content, node);
|
|
},
|
|
};
|
|
|
|
rules.tableRow = {
|
|
filter: "tr",
|
|
replacement(content, node: HTMLTableRowElement) {
|
|
const parentTable = nodeParentTable(node);
|
|
if (tableShouldBeSkipped(parentTable)) {
|
|
return content;
|
|
}
|
|
|
|
let borderCells = "";
|
|
|
|
if (isHeadingRow(node)) {
|
|
const colCount = tableColCount(parentTable);
|
|
for (let i = 0; i < colCount; i++) {
|
|
const childNode =
|
|
i < node.childNodes.length ? node.childNodes[i] : null;
|
|
const border = getBorder(getColumnAlignment(parentTable, i));
|
|
borderCells += cell(border, childNode, i);
|
|
}
|
|
}
|
|
return "\n" + content + (borderCells ? "\n" + borderCells : "");
|
|
},
|
|
};
|
|
|
|
rules.table = {
|
|
// Only convert tables that can result in valid Markdown
|
|
// Other tables are kept as HTML using `keep` (see below).
|
|
filter(node) {
|
|
return node.nodeName === "TABLE" && !tableShouldBeHtml(node);
|
|
},
|
|
|
|
replacement(content, node: HTMLTableElement) {
|
|
if (tableShouldBeSkipped(node)) {
|
|
return content;
|
|
}
|
|
|
|
// Ensure there are no blank lines
|
|
content = content.replace(/\n+/g, "\n");
|
|
|
|
// If table has no heading, add an empty one so as to get a valid Markdown table
|
|
const secondLineParts = content.trim().split("\n");
|
|
let secondLine = "";
|
|
if (secondLineParts.length >= 2) {
|
|
secondLine = secondLineParts[1];
|
|
}
|
|
const secondLineIsDivider = /\| :?---/.test(secondLine);
|
|
|
|
const columnCount = tableColCount(node);
|
|
let emptyHeader = "";
|
|
if (columnCount && !secondLineIsDivider) {
|
|
emptyHeader = "|" + " |".repeat(columnCount) + "\n" + "|";
|
|
for (let columnIndex = 0; columnIndex < columnCount; ++columnIndex) {
|
|
emptyHeader +=
|
|
" " + getBorder(getColumnAlignment(node, columnIndex)) + " |";
|
|
}
|
|
}
|
|
|
|
return "\n\n" + emptyHeader + content + "\n\n";
|
|
},
|
|
};
|
|
|
|
rules.tableSection = {
|
|
filter: ["thead", "tbody", "tfoot"],
|
|
replacement(content) {
|
|
return content;
|
|
},
|
|
};
|
|
|
|
/**
|
|
* A tr is a heading row if the parent is a THEAD or its the first child of the TABLE or the first
|
|
* TBODY (possibly following a blank THEAD) and every cell is a TH.
|
|
*
|
|
* @param tr The tr node to check
|
|
* @returns Whether the tr is a heading row
|
|
*/
|
|
function isHeadingRow(tr: Node) {
|
|
const parentNode = tr.parentNode;
|
|
if (!parentNode) {
|
|
return false;
|
|
}
|
|
|
|
return (
|
|
parentNode.nodeName === "THEAD" ||
|
|
(parentNode.firstChild === tr &&
|
|
(parentNode.nodeName === "TABLE" || isFirstTbody(parentNode)) &&
|
|
Array.from(tr.childNodes).every((n) => n.nodeName === "TH"))
|
|
);
|
|
}
|
|
|
|
function isFirstTbody(element: Node) {
|
|
const previousSibling = element?.previousSibling;
|
|
if (!previousSibling) {
|
|
return false;
|
|
}
|
|
|
|
return (
|
|
element.nodeName === "TBODY" &&
|
|
(!previousSibling ||
|
|
(previousSibling.nodeName === "THEAD" &&
|
|
/^\s*$/i.test(previousSibling.textContent ?? "")))
|
|
);
|
|
}
|
|
|
|
function cell(
|
|
content: string,
|
|
node: ChildNode | null = null,
|
|
index: number | null = null
|
|
) {
|
|
if (index === null && node) {
|
|
index = Array.from(node?.parentNode?.childNodes ?? []).indexOf(node);
|
|
}
|
|
let prefix = " ";
|
|
if (index === 0) {
|
|
prefix = "| ";
|
|
}
|
|
let filteredContent = content
|
|
.trim()
|
|
.replace(/\n\r/g, "<br>")
|
|
.replace(/\n/g, "<br>");
|
|
filteredContent = filteredContent.replace(/\|+/g, "\\|");
|
|
while (filteredContent.length < 3) {
|
|
filteredContent += " ";
|
|
}
|
|
if (node) {
|
|
filteredContent = handleColSpan(filteredContent, node, " ");
|
|
}
|
|
return prefix + filteredContent + " |";
|
|
}
|
|
|
|
function nodeContainsTable(node: Node) {
|
|
if (!node?.childNodes) {
|
|
return false;
|
|
}
|
|
|
|
for (let i = 0; i < node.childNodes.length; i++) {
|
|
const child = node.childNodes[i];
|
|
if (child.nodeName === "TABLE") {
|
|
return true;
|
|
}
|
|
if (nodeContainsTable(child)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const nodeContains = (node: HTMLElement, types: string | string[]) => {
|
|
if (!node?.childNodes) {
|
|
return false;
|
|
}
|
|
|
|
for (let i = 0; i < node.childNodes.length; i++) {
|
|
const child = node.childNodes[i] as HTMLElement;
|
|
if (types === "code" && inHtmlContext(child, "CODE")) {
|
|
return true;
|
|
}
|
|
if (types.includes(child.nodeName)) {
|
|
return true;
|
|
}
|
|
if (nodeContains(child, types)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
};
|
|
|
|
const tableShouldBeHtml = (tableNode: HTMLElement) =>
|
|
nodeContains(tableNode, ["code", "table"]);
|
|
|
|
// Various conditions under which a table should be skipped - i.e. each cell
|
|
// will be rendered one after the other as if they were paragraphs.
|
|
function tableShouldBeSkipped(tableNode: HTMLTableElement | null) {
|
|
if (!tableNode) {
|
|
return true;
|
|
}
|
|
|
|
const cached = tableShouldBeSkippedCache.get(tableNode);
|
|
if (cached !== undefined) {
|
|
return cached;
|
|
}
|
|
|
|
const process = () => {
|
|
if (!tableNode) {
|
|
return true;
|
|
}
|
|
|
|
// Reference is important as .rows is an expensive getter.
|
|
const rows = tableNode.rows;
|
|
|
|
if (!rows) {
|
|
return true;
|
|
}
|
|
if (rows.length === 1 && rows[0].childNodes.length <= 1) {
|
|
return true;
|
|
}
|
|
if (nodeContainsTable(tableNode)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
const result = process();
|
|
tableShouldBeSkippedCache.set(tableNode, result);
|
|
return result;
|
|
}
|
|
|
|
function nodeParentTable(
|
|
node: HTMLTableCellElement | HTMLTableRowElement
|
|
): HTMLTableElement | null {
|
|
let parent = node.parentNode;
|
|
if (!parent) {
|
|
return null;
|
|
}
|
|
|
|
while (parent.nodeName !== "TABLE") {
|
|
parent = parent.parentNode;
|
|
if (!parent) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
return parent as HTMLTableElement;
|
|
}
|
|
|
|
function handleColSpan(content: string, node: ChildNode, emptyChar: string) {
|
|
if (!node) {
|
|
return content;
|
|
}
|
|
|
|
const colspan = Number((node as HTMLElement).getAttribute("colspan") || 1);
|
|
for (let i = 1; i < colspan; i++) {
|
|
content += " | " + emptyChar.repeat(3);
|
|
}
|
|
return content;
|
|
}
|
|
|
|
function tableColCount(node: HTMLTableElement | null) {
|
|
if (!node) {
|
|
return 0;
|
|
}
|
|
|
|
let maxColCount = 0;
|
|
|
|
// Reference is important as .rows is an expensive getter.
|
|
const rows = node.rows;
|
|
|
|
for (let i = 0; i < rows.length; i++) {
|
|
const row = rows[i];
|
|
const colCount = row.childNodes.length;
|
|
if (colCount > maxColCount) {
|
|
maxColCount = colCount;
|
|
}
|
|
}
|
|
return maxColCount;
|
|
}
|
|
|
|
export default function tables(turndownService: TurndownService) {
|
|
turndownService.keep(function (node) {
|
|
if (node.nodeName === "TABLE" && tableShouldBeHtml(node)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
});
|
|
|
|
for (const key in rules) {
|
|
turndownService.addRule(key, rules[key]);
|
|
}
|
|
}
|