Refactor document search

This commit is contained in:
Tom Moor
2022-10-30 12:41:52 -04:00
parent 86aa531fad
commit 1e62d25861
6 changed files with 652 additions and 602 deletions

View File

@@ -5,7 +5,6 @@ import {
buildCollection, buildCollection,
buildTeam, buildTeam,
buildUser, buildUser,
buildShare,
} from "@server/test/factories"; } from "@server/test/factories";
import { setupTestDatabase, seed } from "@server/test/support"; import { setupTestDatabase, seed } from "@server/test/support";
import slugify from "@server/utils/slugify"; import slugify from "@server/utils/slugify";
@@ -163,319 +162,6 @@ paragraph`);
}); });
}); });
describe("#searchForTeam", () => {
test("should return search results from public collections", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await Document.searchForTeam(team, "test");
expect(results.length).toBe(1);
expect(results[0].document?.id).toBe(document.id);
});
test("should not return results from private collections without providing collectionId", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await Document.searchForTeam(team, "test");
expect(results.length).toBe(0);
});
test("should return results from private collections when collectionId is provided", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await Document.searchForTeam(team, "test", {
collectionId: collection.id,
});
expect(results.length).toBe(1);
});
test("should return results from document tree of shared document", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test 1",
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test 2",
});
const share = await buildShare({
documentId: document.id,
includeChildDocuments: true,
});
const { results } = await Document.searchForTeam(team, "test", {
collectionId: collection.id,
share,
});
expect(results.length).toBe(1);
});
test("should handle no collections", async () => {
const team = await buildTeam();
const { results } = await Document.searchForTeam(team, "test");
expect(results.length).toBe(0);
});
test("should handle backslashes in search term", async () => {
const team = await buildTeam();
const { results } = await Document.searchForTeam(team, "\\\\");
expect(results.length).toBe(0);
});
test("should return the total count of search results", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 2",
});
const { totalCount } = await Document.searchForTeam(team, "test");
expect(totalCount).toBe("2");
});
test("should return the document when searched with their previous titles", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await Document.searchForTeam(team, "test number");
expect(totalCount).toBe("1");
});
test("should not return the document when searched with neither the titles nor the previous titles", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await Document.searchForTeam(
team,
"title doesn't exist"
);
expect(totalCount).toBe("0");
});
});
describe("#searchForUser", () => {
test("should return search results from collections", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
userId: user.id,
teamId: team.id,
});
const document = await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await Document.searchForUser(user, "test");
expect(results.length).toBe(1);
expect(results[0].document?.id).toBe(document.id);
});
test("should handle no collections", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const { results } = await Document.searchForUser(user, "test");
expect(results.length).toBe(0);
});
test("should search only drafts created by user", async () => {
const user = await buildUser();
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "test",
});
const { results } = await Document.searchForUser(user, "test", {
includeDrafts: true,
});
expect(results.length).toBe(1);
});
test("should not include drafts", async () => {
const user = await buildUser();
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "test",
});
const { results } = await Document.searchForUser(user, "test", {
includeDrafts: false,
});
expect(results.length).toBe(0);
});
test("should include results from drafts as well", async () => {
const user = await buildUser();
await buildDocument({
userId: user.id,
teamId: user.teamId,
createdById: user.id,
title: "not draft",
});
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "draft",
});
const { results } = await Document.searchForUser(user, "draft", {
includeDrafts: true,
});
expect(results.length).toBe(2);
});
test("should not include results from drafts", async () => {
const user = await buildUser();
await buildDocument({
userId: user.id,
teamId: user.teamId,
createdById: user.id,
title: "not draft",
});
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "draft",
});
const { results } = await Document.searchForUser(user, "draft", {
includeDrafts: false,
});
expect(results.length).toBe(1);
});
test("should return the total count of search results", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
userId: user.id,
teamId: team.id,
});
await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test number 2",
});
const { totalCount } = await Document.searchForUser(user, "test");
expect(totalCount).toBe("2");
});
test("should return the document when searched with their previous titles", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
teamId: team.id,
userId: user.id,
});
const document = await buildDocument({
teamId: team.id,
userId: user.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await Document.searchForUser(user, "test number");
expect(totalCount).toBe("1");
});
test("should not return the document when searched with neither the titles nor the previous titles", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
teamId: team.id,
userId: user.id,
});
const document = await buildDocument({
teamId: team.id,
userId: user.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await Document.searchForUser(
user,
"title doesn't exist"
);
expect(totalCount).toBe("0");
});
});
describe("#delete", () => { describe("#delete", () => {
test("should soft delete and set last modified", async () => { test("should soft delete and set last modified", async () => {
const document = await buildDocument(); const document = await buildDocument();

View File

@@ -1,12 +1,10 @@
import removeMarkdown from "@tommoor/remove-markdown"; import removeMarkdown from "@tommoor/remove-markdown";
import invariant from "invariant"; import { compact, uniq } from "lodash";
import { compact, find, map, uniq } from "lodash";
import randomstring from "randomstring"; import randomstring from "randomstring";
import type { SaveOptions } from "sequelize"; import type { SaveOptions } from "sequelize";
import { import {
Transaction, Transaction,
Op, Op,
QueryTypes,
FindOptions, FindOptions,
ScopeOptions, ScopeOptions,
WhereOptions, WhereOptions,
@@ -33,7 +31,6 @@ import {
} from "sequelize-typescript"; } from "sequelize-typescript";
import MarkdownSerializer from "slate-md-serializer"; import MarkdownSerializer from "slate-md-serializer";
import isUUID from "validator/lib/isUUID"; import isUUID from "validator/lib/isUUID";
import { DateFilter } from "@shared/types";
import getTasks from "@shared/utils/getTasks"; import getTasks from "@shared/utils/getTasks";
import parseTitle from "@shared/utils/parseTitle"; import parseTitle from "@shared/utils/parseTitle";
import unescape from "@shared/utils/unescape"; import unescape from "@shared/utils/unescape";
@@ -43,7 +40,6 @@ import slugify from "@server/utils/slugify";
import Backlink from "./Backlink"; import Backlink from "./Backlink";
import Collection from "./Collection"; import Collection from "./Collection";
import Revision from "./Revision"; import Revision from "./Revision";
import Share from "./Share";
import Star from "./Star"; import Star from "./Star";
import Team from "./Team"; import Team from "./Team";
import User from "./User"; import User from "./User";
@@ -52,28 +48,6 @@ import ParanoidModel from "./base/ParanoidModel";
import Fix from "./decorators/Fix"; import Fix from "./decorators/Fix";
import Length from "./validators/Length"; import Length from "./validators/Length";
export type SearchResponse = {
results: {
ranking: number;
context: string;
document: Document;
}[];
totalCount: number;
};
type SearchOptions = {
limit?: number;
offset?: number;
collectionId?: string;
share?: Share;
dateFilter?: DateFilter;
collaboratorIds?: string[];
includeArchived?: boolean;
includeDrafts?: boolean;
snippetMinWords?: number;
snippetMaxWords?: number;
};
const serializer = new MarkdownSerializer(); const serializer = new MarkdownSerializer();
export const DOCUMENT_VERSION = 2; export const DOCUMENT_VERSION = 2;
@@ -474,257 +448,6 @@ class Document extends ParanoidModel {
return null; return null;
} }
static async searchForTeam(
team: Team,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const wildcardQuery = `${escapeQuery(query)}:*`;
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
// restrict to specific collection if provided
// enables search in private collections if specified
let collectionIds;
if (options.collectionId) {
collectionIds = [options.collectionId];
} else {
collectionIds = await team.collectionIds();
}
// short circuit if no relevant collections
if (!collectionIds.length) {
return {
results: [],
totalCount: 0,
};
}
// restrict to documents in the tree of a shared document when one is provided
let documentIds;
if (options.share?.includeChildDocuments) {
const sharedDocument = await options.share.$get("document");
invariant(sharedDocument, "Cannot find document for share");
const childDocumentIds = await sharedDocument.getChildDocumentIds({
archivedAt: {
[Op.is]: null,
},
});
documentIds = [sharedDocument.id, ...childDocumentIds];
}
const documentClause = documentIds ? `"id" IN(:documentIds) AND` : "";
// Build the SQL query to get result documentIds, ranking, and search term context
const whereClause = `
"searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
"collectionId" IN(:collectionIds) AND
${documentClause}
"deletedAt" IS NULL AND
"publishedAt" IS NOT NULL
`;
const selectSql = `
SELECT
id,
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
FROM documents
WHERE ${whereClause}
ORDER BY
"searchRanking" DESC,
"updatedAt" DESC
LIMIT :limit
OFFSET :offset;
`;
const countSql = `
SELECT COUNT(id)
FROM documents
WHERE ${whereClause}
`;
const queryReplacements = {
teamId: team.id,
query: wildcardQuery,
collectionIds,
documentIds,
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const resultsQuery = this.sequelize!.query(selectSql, {
type: QueryTypes.SELECT,
replacements: { ...queryReplacements, limit, offset },
});
const countQuery = this.sequelize!.query(countSql, {
type: QueryTypes.SELECT,
replacements: queryReplacements,
});
const [results, [{ count }]]: [any, any] = await Promise.all([
resultsQuery,
countQuery,
]);
// Final query to get associated document data
const documents = await this.findAll({
where: {
id: map(results, "id"),
teamId: team.id,
},
include: [
{
model: Collection,
as: "collection",
},
],
});
return {
results: map(results, (result: any) => ({
ranking: result.searchRanking,
context: removeMarkdown(unescape(result.searchContext), {
stripHTML: false,
}),
document: find(documents, {
id: result.id,
}) as Document,
})),
totalCount: count,
};
}
static async searchForUser(
user: User,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
const wildcardQuery = `${escapeQuery(query)}:*`;
// Ensure we're filtering by the users accessible collections. If
// collectionId is passed as an option it is assumed that the authorization
// has already been done in the router
let collectionIds;
if (options.collectionId) {
collectionIds = [options.collectionId];
} else {
collectionIds = await user.collectionIds();
}
let dateFilter;
if (options.dateFilter) {
dateFilter = `1 ${options.dateFilter}`;
}
// Build the SQL query to get documentIds, ranking, and search term context
const whereClause = `
"searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
${
collectionIds.length
? `(
"collectionId" IN(:collectionIds) OR
("collectionId" IS NULL AND "createdById" = :userId)
) AND`
: '"collectionId" IS NULL AND "createdById" = :userId AND'
}
${
options.dateFilter ? '"updatedAt" > now() - interval :dateFilter AND' : ""
}
${
options.collaboratorIds
? '"collaboratorIds" @> ARRAY[:collaboratorIds]::uuid[] AND'
: ""
}
${options.includeArchived ? "" : '"archivedAt" IS NULL AND'}
"deletedAt" IS NULL AND
${
options.includeDrafts
? '("publishedAt" IS NOT NULL OR "createdById" = :userId)'
: '"publishedAt" IS NOT NULL'
}
`;
const selectSql = `
SELECT
id,
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
FROM documents
WHERE ${whereClause}
ORDER BY
"searchRanking" DESC,
"updatedAt" DESC
LIMIT :limit
OFFSET :offset;
`;
const countSql = `
SELECT COUNT(id)
FROM documents
WHERE ${whereClause}
`;
const queryReplacements = {
teamId: user.teamId,
userId: user.id,
collaboratorIds: options.collaboratorIds,
query: wildcardQuery,
collectionIds,
dateFilter,
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const resultsQuery = this.sequelize!.query(selectSql, {
type: QueryTypes.SELECT,
replacements: { ...queryReplacements, limit, offset },
});
const countQuery = this.sequelize!.query(countSql, {
type: QueryTypes.SELECT,
replacements: queryReplacements,
});
const [results, [{ count }]]: [any, any] = await Promise.all([
resultsQuery,
countQuery,
]);
// Final query to get associated document data
const documents = await this.scope([
"withoutState",
"withDrafts",
{
method: ["withViews", user.id],
},
{
method: ["withCollectionPermissions", user.id],
},
]).findAll({
where: {
teamId: user.teamId,
id: map(results, "id"),
},
});
return {
results: map(results, (result: any) => ({
ranking: result.searchRanking,
context: removeMarkdown(unescape(result.searchContext), {
stripHTML: false,
}),
document: find(documents, {
id: result.id,
}) as Document,
})),
totalCount: count,
};
}
// instance methods // instance methods
migrateVersion = () => { migrateVersion = () => {
@@ -1022,10 +745,4 @@ class Document extends ParanoidModel {
}; };
} }
function escapeQuery(query: string): string {
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
// https://github.com/sequelize/sequelize/issues/2950
return Document.sequelize!.escape(query).replace(/\\/g, "\\\\");
}
export default Document; export default Document;

View File

@@ -0,0 +1,335 @@
import SearchHelper from "@server/models/helpers/SearchHelper";
import {
buildDocument,
buildDraftDocument,
buildCollection,
buildTeam,
buildUser,
buildShare,
} from "@server/test/factories";
import { setupTestDatabase } from "@server/test/support";
setupTestDatabase();
beforeEach(() => {
jest.resetAllMocks();
});
describe("#searchForTeam", () => {
test("should return search results from public collections", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await SearchHelper.searchForTeam(team, "test");
expect(results.length).toBe(1);
expect(results[0].document?.id).toBe(document.id);
});
test("should not return results from private collections without providing collectionId", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await SearchHelper.searchForTeam(team, "test");
expect(results.length).toBe(0);
});
test("should return results from private collections when collectionId is provided", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await SearchHelper.searchForTeam(team, "test", {
collectionId: collection.id,
});
expect(results.length).toBe(1);
});
test("should return results from document tree of shared document", async () => {
const team = await buildTeam();
const collection = await buildCollection({
permission: null,
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test 1",
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test 2",
});
const share = await buildShare({
documentId: document.id,
includeChildDocuments: true,
});
const { results } = await SearchHelper.searchForTeam(team, "test", {
collectionId: collection.id,
share,
});
expect(results.length).toBe(1);
});
test("should handle no collections", async () => {
const team = await buildTeam();
const { results } = await SearchHelper.searchForTeam(team, "test");
expect(results.length).toBe(0);
});
test("should handle backslashes in search term", async () => {
const team = await buildTeam();
const { results } = await SearchHelper.searchForTeam(team, "\\\\");
expect(results.length).toBe(0);
});
test("should return the total count of search results", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 2",
});
const { totalCount } = await SearchHelper.searchForTeam(team, "test");
expect(totalCount).toBe("2");
});
test("should return the document when searched with their previous titles", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await SearchHelper.searchForTeam(
team,
"test number"
);
expect(totalCount).toBe("1");
});
test("should not return the document when searched with neither the titles nor the previous titles", async () => {
const team = await buildTeam();
const collection = await buildCollection({
teamId: team.id,
});
const document = await buildDocument({
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await SearchHelper.searchForTeam(
team,
"title doesn't exist"
);
expect(totalCount).toBe("0");
});
});
describe("#searchForUser", () => {
test("should return search results from collections", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
userId: user.id,
teamId: team.id,
});
const document = await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test",
});
const { results } = await SearchHelper.searchForUser(user, "test");
expect(results.length).toBe(1);
expect(results[0].document?.id).toBe(document.id);
});
test("should handle no collections", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const { results } = await SearchHelper.searchForUser(user, "test");
expect(results.length).toBe(0);
});
test("should search only drafts created by user", async () => {
const user = await buildUser();
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "test",
});
const { results } = await SearchHelper.searchForUser(user, "test", {
includeDrafts: true,
});
expect(results.length).toBe(1);
});
test("should not include drafts", async () => {
const user = await buildUser();
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "test",
});
const { results } = await SearchHelper.searchForUser(user, "test", {
includeDrafts: false,
});
expect(results.length).toBe(0);
});
test("should include results from drafts as well", async () => {
const user = await buildUser();
await buildDocument({
userId: user.id,
teamId: user.teamId,
createdById: user.id,
title: "not draft",
});
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "draft",
});
const { results } = await SearchHelper.searchForUser(user, "draft", {
includeDrafts: true,
});
expect(results.length).toBe(2);
});
test("should not include results from drafts", async () => {
const user = await buildUser();
await buildDocument({
userId: user.id,
teamId: user.teamId,
createdById: user.id,
title: "not draft",
});
await buildDraftDocument({
teamId: user.teamId,
userId: user.id,
createdById: user.id,
title: "draft",
});
const { results } = await SearchHelper.searchForUser(user, "draft", {
includeDrafts: false,
});
expect(results.length).toBe(1);
});
test("should return the total count of search results", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
userId: user.id,
teamId: team.id,
});
await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test number 1",
});
await buildDocument({
userId: user.id,
teamId: team.id,
collectionId: collection.id,
title: "test number 2",
});
const { totalCount } = await SearchHelper.searchForUser(user, "test");
expect(totalCount).toBe("2");
});
test("should return the document when searched with their previous titles", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
teamId: team.id,
userId: user.id,
});
const document = await buildDocument({
teamId: team.id,
userId: user.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await SearchHelper.searchForUser(
user,
"test number"
);
expect(totalCount).toBe("1");
});
test("should not return the document when searched with neither the titles nor the previous titles", async () => {
const team = await buildTeam();
const user = await buildUser({
teamId: team.id,
});
const collection = await buildCollection({
teamId: team.id,
userId: user.id,
});
const document = await buildDocument({
teamId: team.id,
userId: user.id,
collectionId: collection.id,
title: "test number 1",
});
document.title = "change";
await document.save();
const { totalCount } = await SearchHelper.searchForUser(
user,
"title doesn't exist"
);
expect(totalCount).toBe("0");
});
});

View File

@@ -0,0 +1,310 @@
import removeMarkdown from "@tommoor/remove-markdown";
import invariant from "invariant";
import { find, map } from "lodash";
import { Op, QueryTypes } from "sequelize";
import { DateFilter } from "@shared/types";
import unescape from "@shared/utils/unescape";
import { sequelize } from "@server/database/sequelize";
import Collection from "@server/models/Collection";
import Document from "@server/models/Document";
import Share from "@server/models/Share";
import Team from "@server/models/Team";
import User from "@server/models/User";
type SearchResponse = {
results: {
/** The search ranking, for sorting results */
ranking: number;
/** A snippet of contextual text around the search result */
context: string;
/** The document result */
document: Document;
}[];
/** The total number of results for the search query without pagination */
totalCount: number;
};
type SearchOptions = {
/** The query limit for pagination */
limit?: number;
/** The query offset for pagination */
offset?: number;
/** Limit results to a collection. Authorization is presumed to have been done before passing to this helper. */
collectionId?: string;
/** Limit results to a shared document. */
share?: Share;
/** Limit results to a date range. */
dateFilter?: DateFilter;
/** Limit results to a list of users that collaborated on the document. */
collaboratorIds?: string[];
/** Include archived documents in the results */
includeArchived?: boolean;
/** Include draft documents in the results (will only ever return your own) */
includeDrafts?: boolean;
/** The minimum number of words to be returned in the contextual snippet */
snippetMinWords?: number;
/** The maximum number of words to be returned in the contextual snippet */
snippetMaxWords?: number;
};
type Results = {
searchRanking: number;
searchContext: string;
id: string;
};
export default class SearchHelper {
public static async searchForTeam(
team: Team,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const wildcardQuery = `${this.escapeQuery(query)}:*`;
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
// restrict to specific collection if provided
// enables search in private collections if specified
let collectionIds: string[];
if (options.collectionId) {
collectionIds = [options.collectionId];
} else {
collectionIds = await team.collectionIds();
}
// short circuit if no relevant collections
if (!collectionIds.length) {
return {
results: [],
totalCount: 0,
};
}
// restrict to documents in the tree of a shared document when one is provided
let documentIds: string[] | undefined;
if (options.share?.includeChildDocuments) {
const sharedDocument = await options.share.$get("document");
invariant(sharedDocument, "Cannot find document for share");
const childDocumentIds = await sharedDocument.getChildDocumentIds({
archivedAt: {
[Op.is]: null,
},
});
documentIds = [sharedDocument.id, ...childDocumentIds];
}
const documentClause = documentIds ? `"id" IN(:documentIds) AND` : "";
// Build the SQL query to get result documentIds, ranking, and search term context
const whereClause = `
"searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
"collectionId" IN(:collectionIds) AND
${documentClause}
"deletedAt" IS NULL AND
"publishedAt" IS NOT NULL
`;
const selectSql = `
SELECT
id,
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
FROM documents
WHERE ${whereClause}
ORDER BY
"searchRanking" DESC,
"updatedAt" DESC
LIMIT :limit
OFFSET :offset;
`;
const countSql = `
SELECT COUNT(id)
FROM documents
WHERE ${whereClause}
`;
const queryReplacements = {
teamId: team.id,
query: wildcardQuery,
collectionIds,
documentIds,
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const resultsQuery = sequelize.query<Results>(selectSql, {
type: QueryTypes.SELECT,
replacements: { ...queryReplacements, limit, offset },
});
const countQuery = sequelize.query<{ count: number }>(countSql, {
type: QueryTypes.SELECT,
replacements: queryReplacements,
});
const [results, [{ count }]] = await Promise.all([
resultsQuery,
countQuery,
]);
// Final query to get associated document data
const documents = await Document.findAll({
where: {
id: map(results, "id"),
teamId: team.id,
},
include: [
{
model: Collection,
as: "collection",
},
],
});
return SearchHelper.buildResponse(results, documents, count);
}
public static async searchForUser(
user: User,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
const wildcardQuery = `${SearchHelper.escapeQuery(query)}:*`;
// Ensure we're filtering by the users accessible collections. If
// collectionId is passed as an option it is assumed that the authorization
// has already been done in the router
let collectionIds;
if (options.collectionId) {
collectionIds = [options.collectionId];
} else {
collectionIds = await user.collectionIds();
}
let dateFilter;
if (options.dateFilter) {
dateFilter = `1 ${options.dateFilter}`;
}
// Build the SQL query to get documentIds, ranking, and search term context
const whereClause = `
"searchVector" @@ to_tsquery('english', :query) AND
"teamId" = :teamId AND
${
collectionIds.length
? `(
"collectionId" IN(:collectionIds) OR
("collectionId" IS NULL AND "createdById" = :userId)
) AND`
: '"collectionId" IS NULL AND "createdById" = :userId AND'
}
${
options.dateFilter ? '"updatedAt" > now() - interval :dateFilter AND' : ""
}
${
options.collaboratorIds
? '"collaboratorIds" @> ARRAY[:collaboratorIds]::uuid[] AND'
: ""
}
${options.includeArchived ? "" : '"archivedAt" IS NULL AND'}
"deletedAt" IS NULL AND
${
options.includeDrafts
? '("publishedAt" IS NOT NULL OR "createdById" = :userId)'
: '"publishedAt" IS NOT NULL'
}
`;
const selectSql = `
SELECT
id,
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
FROM documents
WHERE ${whereClause}
ORDER BY
"searchRanking" DESC,
"updatedAt" DESC
LIMIT :limit
OFFSET :offset;
`;
const countSql = `
SELECT COUNT(id)
FROM documents
WHERE ${whereClause}
`;
const queryReplacements = {
teamId: user.teamId,
userId: user.id,
collaboratorIds: options.collaboratorIds,
query: wildcardQuery,
collectionIds,
dateFilter,
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const resultsQuery = sequelize.query<Results>(selectSql, {
type: QueryTypes.SELECT,
replacements: { ...queryReplacements, limit, offset },
});
const countQuery = sequelize.query<{ count: number }>(countSql, {
type: QueryTypes.SELECT,
replacements: queryReplacements,
});
const [results, [{ count }]] = await Promise.all([
resultsQuery,
countQuery,
]);
// Final query to get associated document data
const documents = await Document.scope([
"withoutState",
"withDrafts",
{
method: ["withViews", user.id],
},
{
method: ["withCollectionPermissions", user.id],
},
]).findAll({
where: {
teamId: user.teamId,
id: map(results, "id"),
},
});
return SearchHelper.buildResponse(results, documents, count);
}
private static buildResponse(
results: Results[],
documents: Document[],
count: number
): SearchResponse {
return {
results: map(results, (result) => ({
ranking: result.searchRanking,
context: removeMarkdown(unescape(result.searchContext), {
stripHTML: false,
}),
document: find(documents, {
id: result.id,
}) as Document,
})),
totalCount: count,
};
}
private static escapeQuery(query: string): string {
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
// https://github.com/sequelize/sequelize/issues/2950
return sequelize.escape(query).replace(/\\/g, "\\\\");
}
}

View File

@@ -30,6 +30,7 @@ import {
View, View,
} from "@server/models"; } from "@server/models";
import DocumentHelper from "@server/models/helpers/DocumentHelper"; import DocumentHelper from "@server/models/helpers/DocumentHelper";
import SearchHelper from "@server/models/helpers/SearchHelper";
import { authorize, cannot } from "@server/policies"; import { authorize, cannot } from "@server/policies";
import { import {
presentCollection, presentCollection,
@@ -701,7 +702,7 @@ router.post(
const team = await share.$get("team"); const team = await share.$get("team");
invariant(team, "Share must belong to a team"); invariant(team, "Share must belong to a team");
response = await Document.searchForTeam(team, query, { response = await SearchHelper.searchForTeam(team, query, {
includeArchived, includeArchived,
includeDrafts, includeDrafts,
collectionId: document.collectionId, collectionId: document.collectionId,
@@ -742,7 +743,7 @@ router.post(
); );
} }
response = await Document.searchForUser(user, query, { response = await SearchHelper.searchForUser(user, query, {
includeArchived, includeArchived,
includeDrafts, includeDrafts,
collaboratorIds, collaboratorIds,

View File

@@ -14,6 +14,7 @@ import {
Integration, Integration,
IntegrationAuthentication, IntegrationAuthentication,
} from "@server/models"; } from "@server/models";
import SearchHelper from "@server/models/helpers/SearchHelper";
import { presentSlackAttachment } from "@server/presenters"; import { presentSlackAttachment } from "@server/presenters";
import * as Slack from "@server/utils/slack"; import * as Slack from "@server/utils/slack";
import { assertPresent } from "@server/validation"; import { assertPresent } from "@server/validation";
@@ -281,8 +282,8 @@ router.post("hooks.slack", async (ctx) => {
// to load more documents based on the collections they have access to. Otherwise // to load more documents based on the collections they have access to. Otherwise
// just a generic search against team-visible documents is allowed. // just a generic search against team-visible documents is allowed.
const { results, totalCount } = user const { results, totalCount } = user
? await Document.searchForUser(user, text, options) ? await SearchHelper.searchForUser(user, text, options)
: await Document.searchForTeam(team, text, options); : await SearchHelper.searchForTeam(team, text, options);
SearchQuery.create({ SearchQuery.create({
userId: user ? user.id : null, userId: user ? user.id : null,
teamId: team.id, teamId: team.id,