fix: Remove unzipper as it cannot handle zip within zip (#6162)

This commit is contained in:
Tom Moor
2023-11-15 19:32:17 -05:00
committed by GitHub
parent 68a3d327f6
commit 726613bf1d
11 changed files with 272 additions and 145 deletions

View File

@@ -83,6 +83,13 @@ class FileOperation extends ParanoidModel {
return FileStorage.getFileStream(this.key);
}
/**
* The file operation contents as a handle which contains a path and cleanup function.
*/
get handle() {
return FileStorage.getFileHandle(this.key);
}
// hooks
@BeforeDestroy

View File

@@ -53,7 +53,11 @@ export default class ImportJSONTask extends ImportTask {
rootPath = path.dirname(node.path);
}
if (node.path === "metadata.json") {
metadata = JSON.parse(await fs.readFile(node.path, "utf8"));
try {
metadata = JSON.parse(await fs.readFile(node.path, "utf8"));
} catch (err) {
throw new Error(`Could not parse metadata.json. ${err.message}`);
}
}
}
@@ -116,9 +120,12 @@ export default class ImportJSONTask extends ImportTask {
continue;
}
const item: CollectionJSONExport = JSON.parse(
await fs.readFile(node.path, "utf8")
);
let item: CollectionJSONExport;
try {
item = JSON.parse(await fs.readFile(node.path, "utf8"));
} catch (err) {
throw new Error(`Could not parse ${node.path}. ${err.message}`);
}
const collectionId = uuidv4();
output.collections.push({

View File

@@ -1,4 +1,4 @@
import fs from "fs";
/* eslint-disable @typescript-eslint/no-empty-function */
import path from "path";
import { FileOperation } from "@server/models";
import { buildFileOperation } from "@server/test/factories";
@@ -7,11 +7,19 @@ import ImportMarkdownZipTask from "./ImportMarkdownZipTask";
describe("ImportMarkdownZipTask", () => {
it("should import the documents, attachments", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "stream", {
Object.defineProperty(fileOperation, "handle", {
get() {
return fs.createReadStream(
path.resolve(__dirname, "..", "..", "test", "fixtures", "outline.zip")
);
return {
path: path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"outline.zip"
),
cleanup: async () => {},
};
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
@@ -30,11 +38,19 @@ describe("ImportMarkdownZipTask", () => {
it("should throw an error with corrupt zip", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "stream", {
Object.defineProperty(fileOperation, "handle", {
get() {
return fs.createReadStream(
path.resolve(__dirname, "..", "..", "test", "fixtures", "corrupt.zip")
);
return {
path: path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"corrupt.zip"
),
cleanup: async () => {},
};
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
@@ -56,11 +72,19 @@ describe("ImportMarkdownZipTask", () => {
it("should throw an error with empty collection in zip", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "stream", {
Object.defineProperty(fileOperation, "handle", {
get() {
return fs.createReadStream(
path.resolve(__dirname, "..", "..", "test", "fixtures", "empty.zip")
);
return {
path: path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"empty.zip"
),
cleanup: async () => {},
};
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);

View File

@@ -1,4 +1,4 @@
import fs from "fs";
/* eslint-disable @typescript-eslint/no-empty-function */
import path from "path";
import { FileOperation } from "@server/models";
import { buildFileOperation } from "@server/test/factories";
@@ -7,18 +7,19 @@ import ImportNotionTask from "./ImportNotionTask";
describe("ImportNotionTask", () => {
it("should import successfully from a Markdown export", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "stream", {
Object.defineProperty(fileOperation, "handle", {
get() {
return fs.createReadStream(
path.resolve(
return {
path: path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"notion-markdown.zip"
)
);
),
cleanup: async () => {},
};
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);
@@ -44,18 +45,19 @@ describe("ImportNotionTask", () => {
it("should import successfully from a HTML export", async () => {
const fileOperation = await buildFileOperation();
Object.defineProperty(fileOperation, "stream", {
Object.defineProperty(fileOperation, "handle", {
get() {
return fs.createReadStream(
path.resolve(
return {
path: path.resolve(
__dirname,
"..",
"..",
"test",
"fixtures",
"notion-html.zip"
)
);
),
cleanup: async () => {},
};
},
});
jest.spyOn(FileOperation, "findByPk").mockResolvedValue(fileOperation);

View File

@@ -2,7 +2,6 @@ import path from "path";
import { rm } from "fs-extra";
import truncate from "lodash/truncate";
import tmp from "tmp";
import unzipper from "unzipper";
import {
AttachmentPreset,
CollectionPermission,
@@ -13,7 +12,6 @@ import { CollectionValidation } from "@shared/validations";
import attachmentCreator from "@server/commands/attachmentCreator";
import documentCreator from "@server/commands/documentCreator";
import { serializer } from "@server/editor";
import env from "@server/env";
import { InternalError, ValidationError } from "@server/errors";
import Logger from "@server/logging/Logger";
import {
@@ -25,6 +23,7 @@ import {
Attachment,
} from "@server/models";
import { sequelize } from "@server/storage/database";
import ZipHelper from "@server/utils/ZipHelper";
import BaseTask, { TaskPriority } from "./BaseTask";
type Props = {
@@ -198,30 +197,44 @@ export default abstract class ImportTask extends BaseTask<Props> {
protected async fetchAndExtractData(
fileOperation: FileOperation
): Promise<string> {
return new Promise((resolve, reject) => {
const stream = fileOperation.stream;
if (!stream) {
return reject(new Error("No stream available"));
}
let cleanup;
let filePath: string;
tmp.dir((err, path) => {
if (err) {
return reject(err);
}
try {
const res = await fileOperation.handle;
filePath = res.path;
cleanup = res.cleanup;
const dest = unzipper
.Extract({ path, verbose: env.isDevelopment })
.on("error", reject)
.on("close", () => resolve(path));
const path = await new Promise<string>((resolve, reject) => {
tmp.dir((err, tmpDir) => {
if (err) {
Logger.error("Could not create temporary directory", err);
return reject(err);
}
stream
.on("error", (err) => {
dest.end();
reject(err);
})
.pipe(dest);
Logger.debug(
"task",
`ImportTask extracting data for ${fileOperation.id}`
);
void ZipHelper.extract(filePath, tmpDir)
.then(() => resolve(tmpDir))
.catch((err) => {
Logger.error("Could not extract zip file", err);
reject(err);
});
});
});
});
return path;
} finally {
Logger.debug(
"task",
`ImportTask cleaning up temporary data for ${fileOperation.id}`
);
await cleanup?.();
}
}
/**

View File

@@ -84,6 +84,17 @@ export default abstract class BaseStorage {
acl?: string;
}): Promise<string | undefined>;
/**
* Returns a file handle for a file from the storage provider.
*
* @param key The path to the file
* @returns The file path and a cleanup function
*/
public abstract getFileHandle(key: string): Promise<{
path: string;
cleanup: () => Promise<void>;
}>;
/**
* Returns a buffer of a file from the storage provider.
*

View File

@@ -128,6 +128,15 @@ export default class LocalStorage extends BaseStorage {
return Promise.resolve(`${env.URL}/api/files.get?sig=${sig}`);
};
public async getFileHandle(key: string) {
return {
path: this.getFilePath(key),
cleanup: async () => {
// no-op, as we're reading the canonical file directly
},
};
}
public getFileStream(key: string) {
return createReadStream(this.getFilePath(key));
}

View File

@@ -1,7 +1,10 @@
import path from "path";
import util from "util";
import AWS, { S3 } from "aws-sdk";
import { createWriteStream, remove } from "fs-extra";
import invariant from "invariant";
import compact from "lodash/compact";
import tmp from "tmp";
import env from "@server/env";
import Logger from "@server/logging/Logger";
import BaseStorage from "./BaseStorage";
@@ -159,6 +162,37 @@ export default class S3Storage extends BaseStorage {
return url;
};
public getFileHandle(key: string): Promise<{
path: string;
cleanup: () => Promise<void>;
}> {
return new Promise((resolve, reject) => {
tmp.dir((err, tmpDir) => {
if (err) {
return reject(err);
}
const tmpFile = path.join(tmpDir, "tmp");
const dest = createWriteStream(tmpFile);
dest.on("error", reject);
dest.on("finish", () =>
resolve({ path: tmpFile, cleanup: () => remove(tmpFile) })
);
const stream = this.getFileStream(key);
if (!stream) {
return reject(new Error("No stream available"));
}
stream
.on("error", (err) => {
dest.end();
reject(err);
})
.pipe(dest);
});
});
}
public getFileStream(key: string) {
invariant(
env.AWS_S3_UPLOAD_BUCKET_NAME,

View File

@@ -1,6 +1,9 @@
import fs from "fs";
import path from "path";
import { mkdirp } from "fs-extra";
import JSZip from "jszip";
import tmp from "tmp";
import yauzl from "yauzl";
import { bytesToHumanReadable } from "@shared/utils/files";
import Logger from "@server/logging/Logger";
import { trace } from "@server/logging/tracing";
@@ -20,6 +23,7 @@ export default class ZipHelper {
/**
* Write a zip file to a temporary disk location
*
* @deprecated Use `extract` instead
* @param zip JSZip object
* @returns pathname of the temporary file where the zip was written to disk
*/
@@ -87,4 +91,75 @@ export default class ZipHelper {
);
});
}
/**
* Write a zip file to a disk location
*
* @param filePath The file path where the zip is located
* @param outputDir The directory where the zip should be extracted
*/
public static extract(filePath: string, outputDir: string): Promise<void> {
return new Promise((resolve, reject) => {
Logger.debug("utils", "Opening zip file", { filePath });
yauzl.open(
filePath,
{ lazyEntries: true, autoClose: true },
function (err, zipfile) {
if (err) {
return reject(err);
}
try {
zipfile.readEntry();
zipfile.on("entry", function (entry) {
Logger.debug("utils", "Extracting zip entry", entry);
if (/\/$/.test(entry.fileName)) {
// directory file names end with '/'
mkdirp(
path.join(outputDir, entry.fileName),
function (err: Error) {
if (err) {
throw err;
}
zipfile.readEntry();
}
);
} else {
// file entry
zipfile.openReadStream(entry, function (err, readStream) {
if (err) {
throw err;
}
// ensure parent directory exists
mkdirp(
path.join(outputDir, path.dirname(entry.fileName)),
function (err) {
if (err) {
throw err;
}
readStream.pipe(
fs.createWriteStream(
path.join(outputDir, entry.fileName)
)
);
readStream.on("end", function () {
zipfile.readEntry();
});
readStream.on("error", (err) => {
throw err;
});
}
);
});
}
});
zipfile.on("close", resolve);
zipfile.on("error", reject);
} catch (err) {
reject(err);
}
}
);
});
}
}