import emojiRegex from "emoji-regex";
import escapeRegExp from "lodash/escapeRegExp";
import truncate from "lodash/truncate";
import { Transaction } from "sequelize";
import parseTitle from "@shared/utils/parseTitle";
import { DocumentValidation } from "@shared/validations";
import { traceFunction } from "@server/logging/tracing";
import { User } from "@server/models";
import { ProsemirrorHelper } from "@server/models/helpers/ProsemirrorHelper";
import { TextHelper } from "@server/models/helpers/TextHelper";
import { DocumentConverter } from "@server/utils/DocumentConverter";
import { InvalidRequestError } from "../errors";
type Props = {
user: User;
mimeType: string;
fileName: string;
content: Buffer | string;
ip?: string;
transaction?: Transaction;
};
async function documentImporter({
mimeType,
fileName,
content,
user,
ip,
transaction,
}: Props): Promise<{
emoji?: string;
text: string;
title: string;
state: Buffer;
}> {
let text = await DocumentConverter.convertToMarkdown(
content,
fileName,
mimeType
);
let title = fileName.replace(/\.[^/.]+$/, "");
// find and extract emoji near the beginning of the document.
const regex = emojiRegex();
const matches = regex.exec(text.slice(0, 10));
const emoji = matches ? matches[0] : undefined;
if (emoji) {
text = text.replace(emoji, "");
}
// If the first line of the imported text looks like a markdown heading
// then we can use this as the document title rather than the file name.
if (text.trim().startsWith("# ")) {
const result = parseTitle(text);
title = result.title;
text = text
.trim()
.replace(new RegExp(`#\\s+${escapeRegExp(title)}`), "")
.trimStart();
}
// Replace any
generated by the turndown plugin with escaped newlines
// to match our hardbreak parser.
text = text.trim().replace(/
/gi, "\\n");
// Remove any closed and immediately reopened formatting marks
text = text.replace(/\*\*\*\*/gi, "").replace(/____/gi, "");
text = await TextHelper.replaceImagesWithAttachments(
text,
user,
ip,
transaction
);
// Sanity check – text cannot possibly be longer than state so if it is, we can short-circuit here
if (text.length > DocumentValidation.maxStateLength) {
throw InvalidRequestError(
`The document "${title}" is too large to import, please reduce the length and try again`
);
}
// It's better to truncate particularly long titles than fail the import
title = truncate(title, { length: DocumentValidation.maxTitleLength });
const ydoc = ProsemirrorHelper.toYDoc(text);
const state = ProsemirrorHelper.toState(ydoc);
if (state.length > DocumentValidation.maxStateLength) {
throw InvalidRequestError(
`The document "${title}" is too large to import, please reduce the length and try again`
);
}
return {
text,
state,
title,
emoji,
};
}
export default traceFunction({
spanName: "documentImporter",
})(documentImporter);