106 lines
2.9 KiB
TypeScript
106 lines
2.9 KiB
TypeScript
import emojiRegex from "emoji-regex";
|
||
import escapeRegExp from "lodash/escapeRegExp";
|
||
import truncate from "lodash/truncate";
|
||
import { Transaction } from "sequelize";
|
||
import parseTitle from "@shared/utils/parseTitle";
|
||
import { DocumentValidation } from "@shared/validations";
|
||
import { traceFunction } from "@server/logging/tracing";
|
||
import { User } from "@server/models";
|
||
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
|
||
import { TextHelper } from "@server/models/helpers/TextHelper";
|
||
import { DocumentConverter } from "@server/utils/DocumentConverter";
|
||
import { InvalidRequestError } from "../errors";
|
||
|
||
type Props = {
|
||
user: User;
|
||
mimeType: string;
|
||
fileName: string;
|
||
content: Buffer | string;
|
||
ip?: string;
|
||
transaction?: Transaction;
|
||
};
|
||
|
||
async function documentImporter({
|
||
mimeType,
|
||
fileName,
|
||
content,
|
||
user,
|
||
ip,
|
||
transaction,
|
||
}: Props): Promise<{
|
||
icon?: string;
|
||
text: string;
|
||
title: string;
|
||
state: Buffer;
|
||
}> {
|
||
let text = await DocumentConverter.convertToMarkdown(
|
||
content,
|
||
fileName,
|
||
mimeType
|
||
);
|
||
let title = fileName.replace(/\.[^/.]+$/, "");
|
||
|
||
// find and extract emoji near the beginning of the document.
|
||
const regex = emojiRegex();
|
||
const matches = regex.exec(text.slice(0, 10));
|
||
const icon = matches ? matches[0] : undefined;
|
||
if (icon) {
|
||
text = text.replace(icon, "");
|
||
}
|
||
|
||
// If the first line of the imported text looks like a markdown heading
|
||
// then we can use this as the document title rather than the file name.
|
||
if (text.trim().startsWith("# ")) {
|
||
const result = parseTitle(text);
|
||
title = result.title;
|
||
text = text
|
||
.trim()
|
||
.replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "")
|
||
.trimStart();
|
||
}
|
||
|
||
// Replace any <br> generated by the turndown plugin with escaped newlines
|
||
// to match our hardbreak parser.
|
||
text = text.trim().replace(/<br>/gi, "\\n");
|
||
|
||
// Remove any closed and immediately reopened formatting marks
|
||
text = text.replace(/\*\*\*\*/gi, "").replace(/____/gi, "");
|
||
|
||
text = await TextHelper.replaceImagesWithAttachments(
|
||
text,
|
||
user,
|
||
ip,
|
||
transaction
|
||
);
|
||
|
||
// Sanity check – text cannot possibly be longer than state so if it is, we can short-circuit here
|
||
if (text.length > DocumentValidation.maxStateLength) {
|
||
throw InvalidRequestError(
|
||
`The document "${title}" is too large to import, please reduce the length and try again`
|
||
);
|
||
}
|
||
|
||
// It's better to truncate particularly long titles than fail the import
|
||
title = truncate(title, { length: DocumentValidation.maxTitleLength });
|
||
|
||
const ydoc = ProsemirrorHelper.toYDoc(text);
|
||
const state = ProsemirrorHelper.toState(ydoc);
|
||
|
||
if (state.length > DocumentValidation.maxStateLength) {
|
||
throw InvalidRequestError(
|
||
`The document "${title}" is too large to import, please reduce the length and try again`
|
||
);
|
||
}
|
||
|
||
return {
|
||
text,
|
||
state,
|
||
title,
|
||
icon,
|
||
};
|
||
}
|
||
|
||
export default traceFunction({
|
||
spanName: "documentImporter",
|
||
})(documentImporter);
|