Files
outline/server/commands/documentImporter.ts
2024-05-24 05:29:00 -07:00

106 lines
3.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import emojiRegex from "emoji-regex";
import escapeRegExp from "lodash/escapeRegExp";
import truncate from "lodash/truncate";
import { Transaction } from "sequelize";
import parseTitle from "@shared/utils/parseTitle";
import { DocumentValidation } from "@shared/validations";
import { traceFunction } from "@server/logging/tracing";
import { User } from "@server/models";
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
import { TextHelper } from "@server/models/helpers/TextHelper";
import { DocumentConverter } from "@server/utils/DocumentConverter";
import { InvalidRequestError } from "../errors";
type Props = {
user: User;
mimeType: string;
fileName: string;
content: Buffer | string;
ip?: string;
transaction?: Transaction;
};
async function documentImporter({
mimeType,
fileName,
content,
user,
ip,
transaction,
}: Props): Promise<{
emoji?: string;
text: string;
title: string;
state: Buffer;
}> {
let text = await DocumentConverter.convertToMarkdown(
content,
fileName,
mimeType
);
let title = fileName.replace(/\.[^/.]+$/, "");
// find and extract emoji near the beginning of the document.
const regex = emojiRegex();
const matches = regex.exec(text.slice(0, 10));
const emoji = matches ? matches[0] : undefined;
if (emoji) {
text = text.replace(emoji, "");
}
// If the first line of the imported text looks like a markdown heading
// then we can use this as the document title rather than the file name.
if (text.trim().startsWith("# ")) {
const result = parseTitle(text);
title = result.title;
text = text
.trim()
.replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "")
.trimStart();
}
// Replace any <br> generated by the turndown plugin with escaped newlines
// to match our hardbreak parser.
text = text.trim().replace(/<br>/gi, "\\n");
// Remove any closed and immediately reopened formatting marks
text = text.replace(/\*\*\*\*/gi, "").replace(/____/gi, "");
text = await TextHelper.replaceImagesWithAttachments(
text,
user,
ip,
transaction
);
// Sanity check text cannot possibly be longer than state so if it is, we can short-circuit here
if (text.length > DocumentValidation.maxStateLength) {
throw InvalidRequestError(
`The document "${title}" is too large to import, please reduce the length and try again`
);
}
// It's better to truncate particularly long titles than fail the import
title = truncate(title, { length: DocumentValidation.maxTitleLength });
const ydoc = ProsemirrorHelper.toYDoc(text);
const state = ProsemirrorHelper.toState(ydoc);
if (state.length > DocumentValidation.maxStateLength) {
throw InvalidRequestError(
`The document "${title}" is too large to import, please reduce the length and try again`
);
}
return {
text,
state,
title,
emoji,
};
}
export default traceFunction({
spanName: "documentImporter",
})(documentImporter);