Store source metadata for imported documents (#6136)
This commit is contained in:
@@ -75,11 +75,12 @@ export default class ImportJSONTask extends ImportTask {
|
||||
updatedAt: node.updatedAt ? new Date(node.updatedAt) : undefined,
|
||||
publishedAt: node.publishedAt ? new Date(node.publishedAt) : null,
|
||||
collectionId,
|
||||
sourceId: node.id,
|
||||
externalId: node.id,
|
||||
mimeType: "application/json",
|
||||
parentDocumentId: node.parentDocumentId
|
||||
? find(
|
||||
output.documents,
|
||||
(d) => d.sourceId === node.parentDocumentId
|
||||
(d) => d.externalId === node.parentDocumentId
|
||||
)?.id
|
||||
: null,
|
||||
id,
|
||||
@@ -101,7 +102,7 @@ export default class ImportJSONTask extends ImportTask {
|
||||
buffer: () => zipObject.async("nodebuffer"),
|
||||
mimeType,
|
||||
path: node.key,
|
||||
sourceId: node.id,
|
||||
externalId: node.id,
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -132,7 +133,7 @@ export default class ImportJSONTask extends ImportTask {
|
||||
)
|
||||
: item.collection.description,
|
||||
id: collectionId,
|
||||
sourceId: item.collection.id,
|
||||
externalId: item.collection.id,
|
||||
});
|
||||
|
||||
if (Object.values(item.documents).length) {
|
||||
@@ -149,7 +150,7 @@ export default class ImportJSONTask extends ImportTask {
|
||||
for (const document of output.documents) {
|
||||
for (const attachment of output.attachments) {
|
||||
const encodedPath = encodeURI(
|
||||
`/api/attachments.redirect?id=${attachment.sourceId}`
|
||||
`/api/attachments.redirect?id=${attachment.externalId}`
|
||||
);
|
||||
|
||||
document.text = document.text.replace(
|
||||
|
||||
@@ -139,6 +139,7 @@ export default class ImportMarkdownZipTask extends ImportTask {
|
||||
collectionId,
|
||||
parentDocumentId,
|
||||
path: child.path,
|
||||
mimeType: "text/markdown",
|
||||
});
|
||||
|
||||
await parseNodeChildren(child.children, collectionId, id);
|
||||
|
||||
@@ -62,7 +62,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
const id = uuidv4();
|
||||
const match = child.title.match(this.NotionUUIDRegex);
|
||||
const name = child.title.replace(this.NotionUUIDRegex, "");
|
||||
const sourceId = match ? match[0].trim() : undefined;
|
||||
const externalId = match ? match[0].trim() : undefined;
|
||||
|
||||
// If it's not a text file we're going to treat it as an attachment.
|
||||
const mimeType = mime.lookup(child.name);
|
||||
@@ -79,7 +79,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
path: child.path,
|
||||
mimeType,
|
||||
buffer: () => zipObject.async("nodebuffer"),
|
||||
sourceId,
|
||||
externalId,
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -95,12 +95,12 @@ export default class ImportNotionTask extends ImportTask {
|
||||
});
|
||||
|
||||
const existingDocumentIndex = output.documents.findIndex(
|
||||
(doc) => doc.sourceId === sourceId
|
||||
(doc) => doc.externalId === externalId
|
||||
);
|
||||
|
||||
const existingDocument = output.documents[existingDocumentIndex];
|
||||
|
||||
// If there is an existing document with the same sourceId that means
|
||||
// If there is an existing document with the same externalId that means
|
||||
// we've already parsed either a folder or a file referencing the same
|
||||
// document, as such we should merge.
|
||||
if (existingDocument) {
|
||||
@@ -122,7 +122,8 @@ export default class ImportNotionTask extends ImportTask {
|
||||
collectionId,
|
||||
parentDocumentId,
|
||||
path: child.path,
|
||||
sourceId,
|
||||
mimeType: mimeType || "text/markdown",
|
||||
externalId,
|
||||
});
|
||||
await parseNodeChildren(child.children, collectionId, id);
|
||||
}
|
||||
@@ -168,13 +169,13 @@ export default class ImportNotionTask extends ImportTask {
|
||||
// instead of a relative or absolute URL within the original zip file.
|
||||
for (const link of internalLinksInText) {
|
||||
const doc = output.documents.find(
|
||||
(doc) => doc.sourceId === link.sourceId
|
||||
(doc) => doc.externalId === link.externalId
|
||||
);
|
||||
|
||||
if (!doc) {
|
||||
Logger.info(
|
||||
"task",
|
||||
`Could not find referenced document with sourceId ${link.sourceId}`
|
||||
`Could not find referenced document with externalId ${link.externalId}`
|
||||
);
|
||||
} else {
|
||||
text = text.replace(link.href, `<<${doc.id}>>`);
|
||||
@@ -188,11 +189,11 @@ export default class ImportNotionTask extends ImportTask {
|
||||
for (const node of tree) {
|
||||
const match = node.title.match(this.NotionUUIDRegex);
|
||||
const name = node.title.replace(this.NotionUUIDRegex, "");
|
||||
const sourceId = match ? match[0].trim() : undefined;
|
||||
const externalId = match ? match[0].trim() : undefined;
|
||||
const mimeType = mime.lookup(node.name);
|
||||
|
||||
const existingCollectionIndex = output.collections.findIndex(
|
||||
(collection) => collection.sourceId === sourceId
|
||||
(collection) => collection.externalId === externalId
|
||||
);
|
||||
const existingCollection = output.collections[existingCollectionIndex];
|
||||
const collectionId = existingCollection?.id || uuidv4();
|
||||
@@ -232,7 +233,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
id: collectionId,
|
||||
name,
|
||||
description,
|
||||
sourceId,
|
||||
externalId,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -254,19 +255,19 @@ export default class ImportNotionTask extends ImportTask {
|
||||
|
||||
/**
|
||||
* Extracts internal links from a markdown document, taking into account the
|
||||
* sourceId of the document, which is part of the link title.
|
||||
* externalId of the document, which is part of the link title.
|
||||
*
|
||||
* @param text The markdown text to parse
|
||||
* @returns An array of internal links
|
||||
*/
|
||||
private parseInternalLinks(
|
||||
text: string
|
||||
): { title: string; href: string; sourceId: string }[] {
|
||||
): { title: string; href: string; externalId: string }[] {
|
||||
return compact(
|
||||
[...text.matchAll(this.NotionLinkRegex)].map((match) => ({
|
||||
title: match[1],
|
||||
href: match[2],
|
||||
sourceId: match[3],
|
||||
externalId: match[3],
|
||||
}))
|
||||
);
|
||||
}
|
||||
@@ -294,7 +295,7 @@ export default class ImportNotionTask extends ImportTask {
|
||||
|
||||
/**
|
||||
* Regex to find markdown links containing ID's that look like UUID's with the
|
||||
* "-"'s removed, Notion's sourceId format.
|
||||
* "-"'s removed, Notion's externalId format.
|
||||
*/
|
||||
private NotionLinkRegex = /\[([^[]+)]\((.*?([0-9a-fA-F]{32})\..*?)\)/g;
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import path from "path";
|
||||
import truncate from "lodash/truncate";
|
||||
import {
|
||||
AttachmentPreset,
|
||||
@@ -49,7 +50,7 @@ export type StructuredImportData = {
|
||||
*/
|
||||
description?: string | Record<string, any> | null;
|
||||
/** Optional id from import source, useful for mapping */
|
||||
sourceId?: string;
|
||||
externalId?: string;
|
||||
}[];
|
||||
documents: {
|
||||
id: string;
|
||||
@@ -75,8 +76,9 @@ export type StructuredImportData = {
|
||||
createdById?: string;
|
||||
createdByEmail?: string | null;
|
||||
path: string;
|
||||
mimeType: string;
|
||||
/** Optional id from import source, useful for mapping */
|
||||
sourceId?: string;
|
||||
externalId?: string;
|
||||
}[];
|
||||
attachments: {
|
||||
id: string;
|
||||
@@ -85,7 +87,7 @@ export type StructuredImportData = {
|
||||
mimeType: string;
|
||||
buffer: () => Promise<Buffer>;
|
||||
/** Optional id from import source, useful for mapping */
|
||||
sourceId?: string;
|
||||
externalId?: string;
|
||||
}[];
|
||||
};
|
||||
|
||||
@@ -428,7 +430,11 @@ export default abstract class ImportTask extends BaseTask<Props> {
|
||||
|
||||
const document = await documentCreator({
|
||||
...options,
|
||||
source: "import",
|
||||
sourceMetadata: {
|
||||
fileName: path.basename(item.path),
|
||||
mimeType: item.mimeType,
|
||||
externalId: item.externalId,
|
||||
},
|
||||
id: item.id,
|
||||
title: item.title,
|
||||
text,
|
||||
|
||||
Reference in New Issue
Block a user