feat: Add HTML export option (#4056)

* tidy

* Add title to HTML export

* fix: Add compatability for documents without collab state

* Add HTML download option to UI

* docs

* fix nodes that required document to render

* Refactor to allow for styling of HTML export

* div>article for easier programatic content extraction
This commit is contained in:
Tom Moor
2022-09-07 13:34:39 +02:00
committed by GitHub
parent eb5126335c
commit e8a6de3f18
30 changed files with 1756 additions and 1790 deletions

View File

@@ -13,6 +13,7 @@ type Props = {
id?: string;
shareId?: string;
user?: User;
includeState?: boolean;
};
type Result = {
@@ -25,6 +26,7 @@ export default async function loadDocument({
id,
shareId,
user,
includeState,
}: Props): Promise<Result> {
let document;
let collection;
@@ -156,6 +158,7 @@ export default async function loadDocument({
document = await Document.findByPk(id as string, {
userId: user ? user.id : undefined,
paranoid: false,
includeState,
});
if (!document) {

View File

@@ -1,5 +1,6 @@
import { Transaction } from "sequelize";
import { Event, Document, User } from "@server/models";
import DocumentHelper from "@server/models/helpers/DocumentHelper";
type Props = {
/** The user updating the document */
@@ -62,7 +63,7 @@ export default async function documentUpdater({
}
if (text !== undefined) {
if (user.team?.collaborativeEditing) {
document.updateFromMarkdown(text, append);
document = DocumentHelper.applyMarkdownToDocument(document, text, append);
} else if (append) {
document.text += text;
} else {

View File

@@ -1,129 +0,0 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`renders blockquote 1`] = `
"<blockquote>
<p>blockquote</p>
</blockquote>"
`;
exports[`renders bold marks 1`] = `"<p>this is <strong>bold</strong> text</p>"`;
exports[`renders bullet list 1`] = `
"<ul>
<li>item one</li>
<li>item two
<ul>
<li>nested item</li>
</ul>
</li>
</ul>"
`;
exports[`renders checkbox list 1`] = `
"<ul>
<li class=\\"checkbox-list-item\\"><span class=\\"checkbox \\">[ ]</span>unchecked</li>
<li class=\\"checkbox-list-item\\"><span class=\\"checkbox checked\\">[x]</span>checked</li>
</ul>"
`;
exports[`renders code block 1`] = `
"<pre><code>this is indented code
</code></pre>"
`;
exports[`renders code fence 1`] = `
"<pre><code class=\\"language-javascript\\">this is code
</code></pre>"
`;
exports[`renders code marks 1`] = `"<p>this is <code>inline code</code> text</p>"`;
exports[`renders headings 1`] = `
"<h1>Heading 1</h1>
<h2>Heading 2</h2>
<h3>Heading 3</h3>
<h4>Heading 4</h4>"
`;
exports[`renders highlight marks 1`] = `"<p>this is <span class=\\"highlight\\">highlighted</span> text</p>"`;
exports[`renders horizontal rule 1`] = `"<hr>"`;
exports[`renders image 1`] = `"<p><img src=\\"https://lorempixel.com/200/200\\" alt=\\"caption\\"></p>"`;
exports[`renders image with alignment 1`] = `"<p><img src=\\"https://lorempixel.com/200/200\\" alt=\\"caption\\" title=\\"left-40\\"></p>"`;
exports[`renders info notice 1`] = `
"<div class=\\"notice notice-info\\">
<p>content of notice</p>
</div>"
`;
exports[`renders italic marks 1`] = `"<p>this is <em>italic</em> text</p>"`;
exports[`renders italic marks 2`] = `"<p>this is <em>also italic</em> text</p>"`;
exports[`renders link marks 1`] = `"<p>this is <a href=\\"https://www.example.com\\">linked</a> text</p>"`;
exports[`renders ordered list 1`] = `
"<ol>
<li>item one</li>
<li>item two</li>
</ol>"
`;
exports[`renders ordered list 2`] = `
"<ol>
<li>item one</li>
<li>item two</li>
</ol>"
`;
exports[`renders plain text as paragraph 1`] = `"<p>plain text</p>"`;
exports[`renders table 1`] = `
"<table>
<tr>
<th>
<p>heading</p></th>
<th style=\\"text-align:center\\">
<p>centered</p></th>
<th style=\\"text-align:right\\">
<p>right aligned</p></th>
</tr>
<tr>
<td>
<p></p></td>
<td style=\\"text-align:center\\">
<p>center</p></td>
<td style=\\"text-align:right\\">
<p></p></td>
</tr>
<tr>
<td>
<p></p></td>
<td style=\\"text-align:center\\">
<p></p></td>
<td style=\\"text-align:right\\">
<p>bottom r</p></td>
</tr>
</table>"
`;
exports[`renders template placeholder marks 1`] = `"<p>this is <span class=\\"placeholder\\">a placeholder</span></p>"`;
exports[`renders tip notice 1`] = `
"<div class=\\"notice notice-tip\\">
<p>content of notice</p>
</div>"
`;
exports[`renders underline marks 1`] = `"<p>this is <underline>underlined</underline> text</p>"`;
exports[`renders underline marks 2`] = `"<p>this is <s>strikethrough</s> text</p>"`;
exports[`renders warning notice 1`] = `
"<div class=\\"notice notice-warning\\">
<p>content of notice</p>
</div>"
`;

View File

@@ -1,7 +1,6 @@
import { Schema } from "prosemirror-model";
import ExtensionManager from "@shared/editor/lib/ExtensionManager";
import fullPackage from "@shared/editor/packages/full";
import render from "./renderToHtml";
const extensions = new ExtensionManager(fullPackage);
@@ -16,6 +15,3 @@ export const parser = extensions.parser({
});
export const serializer = extensions.serializer();
export const renderToHtml = (markdown: string): string =>
render(markdown, extensions.rulePlugins);

View File

@@ -1,154 +0,0 @@
import renderToHtml from "./renderToHtml";
test("renders an empty string", () => {
expect(renderToHtml("")).toBe("");
});
test("renders plain text as paragraph", () => {
expect(renderToHtml("plain text")).toMatchSnapshot();
});
test("renders blockquote", () => {
expect(renderToHtml("> blockquote")).toMatchSnapshot();
});
test("renders code block", () => {
expect(
renderToHtml(`
this is indented code
`)
).toMatchSnapshot();
});
test("renders code fence", () => {
expect(
renderToHtml(`\`\`\`javascript
this is code
\`\`\``)
).toMatchSnapshot();
});
test("renders checkbox list", () => {
expect(
renderToHtml(`- [ ] unchecked
- [x] checked`)
).toMatchSnapshot();
});
test("renders bullet list", () => {
expect(
renderToHtml(`- item one
- item two
- nested item`)
).toMatchSnapshot();
});
test("renders info notice", () => {
expect(
renderToHtml(`:::info
content of notice
:::`)
).toMatchSnapshot();
});
test("renders warning notice", () => {
expect(
renderToHtml(`:::warning
content of notice
:::`)
).toMatchSnapshot();
});
test("renders tip notice", () => {
expect(
renderToHtml(`:::tip
content of notice
:::`)
).toMatchSnapshot();
});
test("renders headings", () => {
expect(
renderToHtml(`# Heading 1
## Heading 2
### Heading 3
#### Heading 4`)
).toMatchSnapshot();
});
test("renders horizontal rule", () => {
expect(renderToHtml(`---`)).toMatchSnapshot();
});
test("renders image", () => {
expect(
renderToHtml(`![caption](https://lorempixel.com/200/200)`)
).toMatchSnapshot();
});
test("renders image with alignment", () => {
expect(
renderToHtml(`![caption](https://lorempixel.com/200/200 "left-40")`)
).toMatchSnapshot();
});
test("renders table", () => {
expect(
renderToHtml(`
| heading | centered | right aligned |
|---------|:--------:|--------------:|
| | center | |
| | | bottom r |
`)
).toMatchSnapshot();
});
test("renders bold marks", () => {
expect(renderToHtml(`this is **bold** text`)).toMatchSnapshot();
});
test("renders code marks", () => {
expect(renderToHtml(`this is \`inline code\` text`)).toMatchSnapshot();
});
test("renders highlight marks", () => {
expect(renderToHtml(`this is ==highlighted== text`)).toMatchSnapshot();
});
test("renders italic marks", () => {
expect(renderToHtml(`this is *italic* text`)).toMatchSnapshot();
expect(renderToHtml(`this is _also italic_ text`)).toMatchSnapshot();
});
test("renders template placeholder marks", () => {
expect(renderToHtml(`this is !!a placeholder!!`)).toMatchSnapshot();
});
test("renders underline marks", () => {
expect(renderToHtml(`this is __underlined__ text`)).toMatchSnapshot();
});
test("renders link marks", () => {
expect(
renderToHtml(`this is [linked](https://www.example.com) text`)
).toMatchSnapshot();
});
test("renders underline marks", () => {
expect(renderToHtml(`this is ~~strikethrough~~ text`)).toMatchSnapshot();
});
test("renders ordered list", () => {
expect(
renderToHtml(`1. item one
1. item two`)
).toMatchSnapshot();
expect(
renderToHtml(`1. item one
2. item two`)
).toMatchSnapshot();
});

View File

@@ -1,31 +0,0 @@
import { PluginSimple } from "markdown-it";
import createMarkdown from "@shared/editor/lib/markdown/rules";
import attachmentsRule from "@shared/editor/rules/attachments";
import breakRule from "@shared/editor/rules/breaks";
import checkboxRule from "@shared/editor/rules/checkboxes";
import embedsRule from "@shared/editor/rules/embeds";
import emojiRule from "@shared/editor/rules/emoji";
import markRule from "@shared/editor/rules/mark";
import noticesRule from "@shared/editor/rules/notices";
import tablesRule from "@shared/editor/rules/tables";
import underlinesRule from "@shared/editor/rules/underlines";
const defaultRules = [
embedsRule([]),
breakRule,
checkboxRule,
markRule({ delim: "==", mark: "highlight" }),
markRule({ delim: "!!", mark: "placeholder" }),
underlinesRule,
tablesRule,
noticesRule,
attachmentsRule,
emojiRule,
];
export default function renderToHtml(
markdown: string,
rulePlugins: PluginSimple[] = defaultRules
): string {
return createMarkdown({ plugins: rulePlugins }).render(markdown).trim();
}

View File

@@ -1,4 +1,3 @@
import { updateYFragment } from "@getoutline/y-prosemirror";
import removeMarkdown from "@tommoor/remove-markdown";
import invariant from "invariant";
import { compact, find, map, uniq } from "lodash";
@@ -34,14 +33,12 @@ import {
} from "sequelize-typescript";
import MarkdownSerializer from "slate-md-serializer";
import isUUID from "validator/lib/isUUID";
import * as Y from "yjs";
import { DateFilter } from "@shared/types";
import getTasks from "@shared/utils/getTasks";
import parseTitle from "@shared/utils/parseTitle";
import unescape from "@shared/utils/unescape";
import { SLUG_URL_REGEX } from "@shared/utils/urlHelpers";
import { DocumentValidation } from "@shared/validations";
import { parser } from "@server/editor";
import slugify from "@server/utils/slugify";
import Backlink from "./Backlink";
import Collection from "./Collection";
@@ -482,7 +479,7 @@ class Document extends ParanoidModel {
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const wildcardQuery = `${escape(query)}:*`;
const wildcardQuery = `${escapeQuery(query)}:*`;
const {
snippetMinWords = 20,
snippetMaxWords = 30,
@@ -610,7 +607,7 @@ class Document extends ParanoidModel {
limit = 15,
offset = 0,
} = options;
const wildcardQuery = `${escape(query)}:*`;
const wildcardQuery = `${escapeQuery(query)}:*`;
// Ensure we're filtering by the users accessible collections. If
// collectionId is passed as an option it is assumed that the authorization
@@ -731,38 +728,6 @@ class Document extends ParanoidModel {
// instance methods
updateFromMarkdown = (text: string, append = false) => {
this.text = append ? this.text + text : text;
if (this.state) {
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, this.state);
const type = ydoc.get("default", Y.XmlFragment) as Y.XmlFragment;
const doc = parser.parse(this.text);
if (!type.doc) {
throw new Error("type.doc not found");
}
// apply new document to existing ydoc
updateYFragment(type.doc, type, doc, new Map());
const state = Y.encodeStateAsUpdate(ydoc);
this.state = Buffer.from(state);
this.changed("state", true);
}
};
toMarkdown = () => {
const text = unescape(this.text);
if (this.version) {
return `# ${this.title}\n\n${text}`;
}
return text;
};
migrateVersion = () => {
let migrated = false;
@@ -1054,7 +1019,7 @@ class Document extends ParanoidModel {
};
}
function escape(query: string): string {
function escapeQuery(query: string): string {
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
// https://github.com/sequelize/sequelize/issues/2950
return Document.sequelize!.escape(query).replace(/\\/g, "\\\\");

View File

@@ -0,0 +1,154 @@
import {
updateYFragment,
yDocToProsemirrorJSON,
} from "@getoutline/y-prosemirror";
import { JSDOM } from "jsdom";
import { Node, DOMSerializer } from "prosemirror-model";
import * as React from "react";
import { renderToString } from "react-dom/server";
import styled, { ServerStyleSheet, ThemeProvider } from "styled-components";
import * as Y from "yjs";
import EditorContainer from "@shared/editor/components/Styles";
import GlobalStyles from "@shared/styles/globals";
import light from "@shared/styles/theme";
import unescape from "@shared/utils/unescape";
import { parser, schema } from "@server/editor";
import Logger from "@server/logging/Logger";
import type Document from "@server/models/Document";
export default class DocumentHelper {
/**
* Returns the document as a Prosemirror Node. This method uses the
* collaborative state if available, otherwise it falls back to Markdown->HTML.
*
* @param document The document to convert
* @returns The document content as a Prosemirror Node
*/
static toProsemirror(document: Document) {
if (document.state) {
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, document.state);
return Node.fromJSON(schema, yDocToProsemirrorJSON(ydoc, "default"));
}
return parser.parse(document.text);
}
/**
* Returns the document as Markdown. This is a lossy conversion and should
* only be used for export.
*
* @param document The document to convert
* @returns The document title and content as a Markdown string
*/
static toMarkdown(document: Document) {
const text = unescape(document.text);
if (document.version) {
return `# ${document.title}\n\n${text}`;
}
return text;
}
/**
* Returns the document as plain HTML. This is a lossy conversion and should
* only be used for export.
*
* @param document The document to convert
* @returns The document title and content as a HTML string
*/
static toHTML(document: Document) {
const node = DocumentHelper.toProsemirror(document);
const sheet = new ServerStyleSheet();
let html, styleTags;
const Centered = styled.article`
max-width: 46em;
margin: 0 auto;
padding: 0 1em;
`;
// First render the containing document which has all the editor styles,
// global styles, layout and title.
try {
html = renderToString(
sheet.collectStyles(
<ThemeProvider theme={light}>
<>
<GlobalStyles />
<Centered>
<h1>{document.title}</h1>
<EditorContainer rtl={false}>
<div id="content" className="ProseMirror"></div>
</EditorContainer>
</Centered>
</>
</ThemeProvider>
)
);
styleTags = sheet.getStyleTags();
} catch (error) {
Logger.error("Failed to render styles on document export", error, {
id: document.id,
});
} finally {
sheet.seal();
}
// Render the Prosemirror document using virtual DOM and serialize the
// result to a string
const dom = new JSDOM(`<!DOCTYPE html>${styleTags}${html}`);
const doc = dom.window.document;
const target = doc.getElementById("content");
DOMSerializer.fromSchema(schema).serializeFragment(
node.content,
{
document: doc,
},
// @ts-expect-error incorrect library type, third argument is target node
target
);
return dom.serialize();
}
/**
* Applies the given Markdown to the document, this essentially creates a
* single change in the collaborative state that makes all the edits to get
* to the provided Markdown.
*
* @param document The document to apply the changes to
* @param text The markdown to apply
* @param append If true appends the markdown instead of replacing existing
* content
* @returns The document
*/
static applyMarkdownToDocument(
document: Document,
text: string,
append = false
) {
document.text = append ? document.text + text : text;
if (document.state) {
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, document.state);
const type = ydoc.get("default", Y.XmlFragment) as Y.XmlFragment;
const doc = parser.parse(document.text);
if (!type.doc) {
throw new Error("type.doc not found");
}
// apply new document to existing ydoc
updateYFragment(type.doc, type, doc, new Map());
const state = Y.encodeStateAsUpdate(ydoc);
document.state = Buffer.from(state);
document.changed("state", true);
}
return document;
}
}

View File

@@ -8,6 +8,7 @@ import {
SearchQuery,
Event,
} from "@server/models";
import DocumentHelper from "@server/models/helpers/DocumentHelper";
import {
buildShare,
buildCollection,
@@ -462,7 +463,22 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should return document text with accept=text/markdown", async () => {
const { user, document } = await seed();
const res = await server.post("/api/documents.export", {
body: {
token: user.getJwtToken(),
id: document.id,
},
headers: {
accept: "text/markdown",
},
});
const body = await res.text();
expect(body).toEqual(DocumentHelper.toMarkdown(document));
});
it("should return archived document", async () => {
@@ -476,7 +492,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should not return published document in collection not a member of", async () => {
@@ -509,7 +525,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should return document from shareId without token", async () => {
@@ -526,7 +542,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should not return document from revoked shareId", async () => {
@@ -576,7 +592,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should return draft document from shareId with token", async () => {
@@ -596,7 +612,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should return document from shareId in collection not a member of", async () => {
@@ -616,7 +632,7 @@ describe("#documents.export", () => {
});
const body = await res.json();
expect(res.status).toEqual(200);
expect(body.data).toEqual(document.toMarkdown());
expect(body.data).toEqual(DocumentHelper.toMarkdown(document));
});
it("should require authorization without token", async () => {

View File

@@ -1,6 +1,7 @@
import fs from "fs-extra";
import invariant from "invariant";
import Router from "koa-router";
import mime from "mime-types";
import { Op, ScopeOptions, WhereOptions } from "sequelize";
import { subtractDate } from "@shared/utils/date";
import documentCreator from "@server/commands/documentCreator";
@@ -27,6 +28,7 @@ import {
User,
View,
} from "@server/models";
import DocumentHelper from "@server/models/helpers/DocumentHelper";
import { authorize, cannot } from "@server/policies";
import {
presentCollection,
@@ -439,14 +441,46 @@ router.post(
async (ctx) => {
const { id, shareId } = ctx.body;
assertPresent(id || shareId, "id or shareId is required");
const { user } = ctx.state;
const accept = ctx.request.headers["accept"];
const { document } = await documentLoader({
id,
shareId,
user,
// We need the collaborative state to generate HTML.
includeState: accept === "text/html",
});
let contentType;
let content;
if (accept?.includes("text/html")) {
contentType = "text/html";
content = DocumentHelper.toHTML(document);
} else if (accept?.includes("text/markdown")) {
contentType = "text/markdown";
content = DocumentHelper.toMarkdown(document);
} else {
contentType = "application/json";
content = DocumentHelper.toMarkdown(document);
}
if (contentType !== "application/json") {
ctx.set("Content-Type", contentType);
ctx.set(
"Content-Disposition",
`attachment; filename="${document.title}.${mime.extension(
contentType
)}"`
);
ctx.body = content;
return;
}
ctx.body = {
data: document.toMarkdown(),
data: content,
};
}
);

View File

@@ -8,6 +8,7 @@ import Logger from "@server/logging/Logger";
import Attachment from "@server/models/Attachment";
import Collection from "@server/models/Collection";
import Document from "@server/models/Document";
import DocumentHelper from "@server/models/helpers/DocumentHelper";
import { NavigationNode } from "~/types";
import { deserializeFilename, serializeFilename } from "./fs";
import parseAttachmentIds from "./parseAttachmentIds";
@@ -36,7 +37,7 @@ async function addDocumentTreeToArchive(
continue;
}
let text = document.toMarkdown();
let text = DocumentHelper.toMarkdown(document);
const attachments = await Attachment.findAll({
where: {
teamId: document.teamId,

View File

@@ -25,7 +25,7 @@ export function assertArray(
export const assertIn = (
value: string,
options: (string | undefined | null)[],
options: Primitive[],
message?: string
) => {
if (!options.includes(value)) {