Files
outline/server/queues/tasks/ImportTask.ts
Tom Moor eeb8008927 chore: Refactor collection export to match import (#3483)
* chore: Refactor collection export to use FileOperations processor and task

* Tweak options
2022-05-01 21:06:07 -07:00

384 lines
11 KiB
TypeScript

import invariant from "invariant";
import { truncate } from "lodash";
import attachmentCreator from "@server/commands/attachmentCreator";
import documentCreator from "@server/commands/documentCreator";
import { sequelize } from "@server/database/sequelize";
import { ValidationError } from "@server/errors";
import Logger from "@server/logging/logger";
import {
User,
Event,
Document,
Collection,
FileOperation,
Attachment,
} from "@server/models";
import { FileOperationState } from "@server/models/FileOperation";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {
fileOperationId: string;
};
/**
* Standardized format for data importing, to be used by all import tasks.
*/
export type StructuredImportData = {
collections: {
id: string;
name: string;
/**
* The collection description. To reference an attachment or image use the
* special formatting <<attachmentId>>. It will be replaced with a reference
* to the actual attachment as part of persistData.
*
* To reference a document use <<documentId>>, it will be replaced with a
* link to the document as part of persistData once the document url is
* generated.
*/
description?: string;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
documents: {
id: string;
title: string;
/**
* The document text. To reference an attachment or image use the special
* formatting <<attachmentId>>. It will be replaced with a reference to the
* actual attachment as part of persistData.
*
* To reference another document use <<documentId>>, it will be replaced
* with a link to the document as part of persistData once the document url
* is generated.
*/
text: string;
collectionId: string;
updatedAt?: Date;
createdAt?: Date;
parentDocumentId?: string;
path: string;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
attachments: {
id: string;
name: string;
path: string;
mimeType: string;
buffer: Buffer;
/** Optional id from import source, useful for mapping */
sourceId?: string;
}[];
};
export default abstract class ImportTask extends BaseTask<Props> {
/**
* Runs the import task.
*
* @param props The props
*/
public async perform({ fileOperationId }: Props) {
const fileOperation = await FileOperation.findByPk(fileOperationId);
invariant(fileOperation, "fileOperation not found");
try {
Logger.info("task", `ImportTask fetching data for ${fileOperationId}`);
const data = await this.fetchData(fileOperation);
Logger.info("task", `ImportTask parsing data for ${fileOperationId}`);
const parsed = await this.parseData(data, fileOperation);
if (parsed.collections.length === 0) {
throw ValidationError(
"Uploaded file does not contain any collections. The root of the zip file must contain folders representing collections."
);
}
if (parsed.documents.length === 0) {
throw ValidationError(
"Uploaded file does not contain any valid documents"
);
}
let result;
try {
Logger.info(
"task",
`ImportTask persisting data for ${fileOperationId}`
);
result = await this.persistData(parsed, fileOperation);
} catch (error) {
Logger.error(
`ImportTask failed to persist data for ${fileOperationId}`,
error
);
throw new Error("Sorry, an internal error occurred during import");
}
await this.updateFileOperation(
fileOperation,
FileOperationState.Complete
);
return result;
} catch (error) {
await this.updateFileOperation(
fileOperation,
FileOperationState.Error,
error
);
throw error;
}
}
/**
* Update the state of the underlying FileOperation in the database and send
* an event to the client.
*
* @param fileOperation The FileOperation to update
*/
private async updateFileOperation(
fileOperation: FileOperation,
state: FileOperationState,
error?: Error
) {
await fileOperation.update({
state,
error: error ? truncate(error.message, { length: 255 }) : undefined,
});
await Event.schedule({
name: "fileOperations.update",
modelId: fileOperation.id,
teamId: fileOperation.teamId,
actorId: fileOperation.userId,
});
}
/**
* Fetch the remote data needed for the import, by default this will download
* any file associated with the FileOperation, save it to a temporary file,
* and return the path.
*
* @param fileOperation The FileOperation to fetch data for
* @returns string
*/
protected async fetchData(fileOperation: FileOperation) {
return fileOperation.buffer;
}
/**
* Parse the data loaded from fetchData into a consistent structured format
* that represents collections, documents, and the relationships between them.
*
* @param data The data loaded from fetchData
* @returns A promise that resolves to the structured data
*/
protected abstract parseData(
data: any,
fileOperation: FileOperation
): Promise<StructuredImportData>;
/**
* Persist the data that was already fetched and parsed into the consistent
* structured data.
*
* @param props The props
*/
protected async persistData(
data: StructuredImportData,
fileOperation: FileOperation
): Promise<{
collections: Map<string, Collection>;
documents: Map<string, Document>;
attachments: Map<string, Attachment>;
}> {
const collections = new Map<string, Collection>();
const documents = new Map<string, Document>();
const attachments = new Map<string, Attachment>();
return sequelize.transaction(async (transaction) => {
const user = await User.findByPk(fileOperation.userId, {
transaction,
});
invariant(user, "User not found");
const ip = user.lastActiveIp || undefined;
// Attachments
for (const item of data.attachments) {
const attachment = await attachmentCreator({
source: "import",
id: item.id,
name: item.name,
type: item.mimeType,
buffer: item.buffer,
user,
ip,
transaction,
});
attachments.set(item.id, attachment);
}
// Collections
for (const item of data.collections) {
let description = item.description;
if (description) {
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before saving.
for (const aitem of data.attachments) {
const attachment = attachments.get(aitem.id);
if (!attachment) {
continue;
}
description = description.replace(
new RegExp(`<<${attachment.id}>>`, "g"),
attachment.redirectUrl
);
}
// Check all of the document we've created against urls in the text
// and replace them out with a valid internal link. Because we are doing
// this before saving, we can't use the document slug, but we can take
// advantage of the fact that the document id will redirect in the client
for (const ditem of data.documents) {
description = description.replace(
new RegExp(`<<${ditem.id}>>`, "g"),
`/doc/${ditem.id}`
);
}
}
// check if collection with name exists
const response = await Collection.findOrCreate({
where: {
teamId: fileOperation.teamId,
name: item.name,
},
defaults: {
id: item.id,
description,
createdById: fileOperation.userId,
permission: "read_write",
},
transaction,
});
let collection = response[0];
const isCreated = response[1];
// create new collection if name already exists, yes it's possible that
// there is also a "Name (Imported)" but this is a case not worth dealing
// with right now
if (!isCreated) {
const name = `${item.name} (Imported)`;
collection = await Collection.create(
{
id: item.id,
description,
teamId: fileOperation.teamId,
createdById: fileOperation.userId,
name,
permission: "read_write",
},
{ transaction }
);
}
await Event.create(
{
name: "collections.create",
collectionId: collection.id,
teamId: collection.teamId,
actorId: fileOperation.userId,
data: {
name: collection.name,
},
ip,
},
{
transaction,
}
);
collections.set(item.id, collection);
}
// Documents
for (const item of data.documents) {
let text = item.text;
// Check all of the attachments we've created against urls in the text
// and replace them out with attachment redirect urls before saving.
for (const aitem of data.attachments) {
const attachment = attachments.get(aitem.id);
if (!attachment) {
continue;
}
text = text.replace(
new RegExp(`<<${attachment.id}>>`, "g"),
attachment.redirectUrl
);
}
// Check all of the document we've created against urls in the text
// and replace them out with a valid internal link. Because we are doing
// this before saving, we can't use the document slug, but we can take
// advantage of the fact that the document id will redirect in the client
for (const ditem of data.documents) {
text = text.replace(
new RegExp(`<<${ditem.id}>>`, "g"),
`/doc/${ditem.id}`
);
}
const document = await documentCreator({
source: "import",
id: item.id,
title: item.title,
text,
collectionId: item.collectionId,
createdAt: item.createdAt,
updatedAt: item.updatedAt ?? item.createdAt,
publishedAt: item.updatedAt ?? item.createdAt ?? new Date(),
parentDocumentId: item.parentDocumentId,
user,
ip,
transaction,
});
documents.set(item.id, document);
const collection = collections.get(item.collectionId);
if (collection) {
await collection.addDocumentToStructure(document, 0, { transaction });
}
}
// Return value is only used for testing
return {
collections,
documents,
attachments,
};
});
}
/**
* Optional hook to remove any temporary files that were created
*/
protected async cleanupData() {
// noop
}
/**
* Job options such as priority and retry strategy, as defined by Bull.
*/
public get options() {
return {
priority: TaskPriority.Low,
attempts: 1,
};
}
}