468 lines
12 KiB
TypeScript
468 lines
12 KiB
TypeScript
import removeMarkdown from "@tommoor/remove-markdown";
|
|
import invariant from "invariant";
|
|
import find from "lodash/find";
|
|
import map from "lodash/map";
|
|
import queryParser from "pg-tsquery";
|
|
import { Op, QueryTypes, WhereOptions } from "sequelize";
|
|
import { DateFilter } from "@shared/types";
|
|
import { sequelize } from "@server/database/sequelize";
|
|
import Collection from "@server/models/Collection";
|
|
import Document from "@server/models/Document";
|
|
import Share from "@server/models/Share";
|
|
import Team from "@server/models/Team";
|
|
import User from "@server/models/User";
|
|
|
|
type SearchResponse = {
|
|
results: {
|
|
/** The search ranking, for sorting results */
|
|
ranking: number;
|
|
/** A snippet of contextual text around the search result */
|
|
context: string;
|
|
/** The document result */
|
|
document: Document;
|
|
}[];
|
|
/** The total number of results for the search query without pagination */
|
|
totalCount: number;
|
|
};
|
|
|
|
type SearchOptions = {
|
|
/** The query limit for pagination */
|
|
limit?: number;
|
|
/** The query offset for pagination */
|
|
offset?: number;
|
|
/** Limit results to a collection. Authorization is presumed to have been done before passing to this helper. */
|
|
collectionId?: string | null;
|
|
/** Limit results to a shared document. */
|
|
share?: Share;
|
|
/** Limit results to a date range. */
|
|
dateFilter?: DateFilter;
|
|
/** Limit results to a list of users that collaborated on the document. */
|
|
collaboratorIds?: string[];
|
|
/** Include archived documents in the results */
|
|
includeArchived?: boolean;
|
|
/** Include draft documents in the results (will only ever return your own) */
|
|
includeDrafts?: boolean;
|
|
/** The minimum number of words to be returned in the contextual snippet */
|
|
snippetMinWords?: number;
|
|
/** The maximum number of words to be returned in the contextual snippet */
|
|
snippetMaxWords?: number;
|
|
};
|
|
|
|
type Results = {
|
|
searchRanking: number;
|
|
searchContext: string;
|
|
id: string;
|
|
};
|
|
|
|
export default class SearchHelper {
|
|
/**
|
|
* The maximum length of a search query.
|
|
*/
|
|
public static maxQueryLength = 1000;
|
|
|
|
public static async searchForTeam(
|
|
team: Team,
|
|
query: string,
|
|
options: SearchOptions = {}
|
|
): Promise<SearchResponse> {
|
|
const {
|
|
snippetMinWords = 20,
|
|
snippetMaxWords = 30,
|
|
limit = 15,
|
|
offset = 0,
|
|
} = options;
|
|
|
|
// restrict to specific collection if provided
|
|
// enables search in private collections if specified
|
|
let collectionIds: string[];
|
|
if (options.collectionId) {
|
|
collectionIds = [options.collectionId];
|
|
} else {
|
|
collectionIds = await team.collectionIds();
|
|
}
|
|
|
|
// short circuit if no relevant collections
|
|
if (!collectionIds.length) {
|
|
return {
|
|
results: [],
|
|
totalCount: 0,
|
|
};
|
|
}
|
|
|
|
// restrict to documents in the tree of a shared document when one is provided
|
|
let documentIds: string[] | undefined;
|
|
|
|
if (options.share?.includeChildDocuments) {
|
|
const sharedDocument = await options.share.$get("document");
|
|
invariant(sharedDocument, "Cannot find document for share");
|
|
|
|
const childDocumentIds = await sharedDocument.getChildDocumentIds({
|
|
archivedAt: {
|
|
[Op.is]: null,
|
|
},
|
|
});
|
|
documentIds = [sharedDocument.id, ...childDocumentIds];
|
|
}
|
|
|
|
const documentClause = documentIds ? `"id" IN(:documentIds) AND` : "";
|
|
|
|
// Build the SQL query to get result documentIds, ranking, and search term context
|
|
const whereClause = `
|
|
"searchVector" @@ to_tsquery('english', :query) AND
|
|
"teamId" = :teamId AND
|
|
"collectionId" IN(:collectionIds) AND
|
|
${documentClause}
|
|
"deletedAt" IS NULL AND
|
|
"publishedAt" IS NOT NULL
|
|
`;
|
|
const selectSql = `
|
|
SELECT
|
|
id,
|
|
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
|
|
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
|
FROM documents
|
|
WHERE ${whereClause}
|
|
ORDER BY
|
|
"searchRanking" DESC,
|
|
"updatedAt" DESC
|
|
LIMIT :limit
|
|
OFFSET :offset;
|
|
`;
|
|
const countSql = `
|
|
SELECT COUNT(id)
|
|
FROM documents
|
|
WHERE ${whereClause}
|
|
`;
|
|
const queryReplacements = {
|
|
teamId: team.id,
|
|
query: this.webSearchQuery(query),
|
|
collectionIds,
|
|
documentIds,
|
|
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
|
};
|
|
const resultsQuery = sequelize.query<Results>(selectSql, {
|
|
type: QueryTypes.SELECT,
|
|
replacements: { ...queryReplacements, limit, offset },
|
|
});
|
|
const countQuery = sequelize.query<{ count: number }>(countSql, {
|
|
type: QueryTypes.SELECT,
|
|
replacements: queryReplacements,
|
|
});
|
|
const [results, [{ count }]] = await Promise.all([
|
|
resultsQuery,
|
|
countQuery,
|
|
]);
|
|
|
|
// Final query to get associated document data
|
|
const documents = await Document.findAll({
|
|
where: {
|
|
id: map(results, "id"),
|
|
teamId: team.id,
|
|
},
|
|
include: [
|
|
{
|
|
model: Collection,
|
|
as: "collection",
|
|
},
|
|
],
|
|
});
|
|
|
|
return SearchHelper.buildResponse(results, documents, count);
|
|
}
|
|
|
|
public static async searchTitlesForUser(
|
|
user: User,
|
|
query: string,
|
|
options: SearchOptions = {}
|
|
): Promise<Document[]> {
|
|
const { limit = 15, offset = 0 } = options;
|
|
|
|
const where: WhereOptions<Document> = {
|
|
teamId: user.teamId,
|
|
title: {
|
|
[Op.iLike]: `%${query}%`,
|
|
},
|
|
[Op.and]: [],
|
|
};
|
|
|
|
// Ensure we're filtering by the users accessible collections. If
|
|
// collectionId is passed as an option it is assumed that the authorization
|
|
// has already been done in the router
|
|
if (options.collectionId) {
|
|
where[Op.and].push({
|
|
collectionId: options.collectionId,
|
|
});
|
|
} else {
|
|
where[Op.and].push({
|
|
[Op.or]: [
|
|
{
|
|
collectionId: {
|
|
[Op.in]: await user.collectionIds(),
|
|
},
|
|
},
|
|
{
|
|
collectionId: {
|
|
[Op.is]: null,
|
|
},
|
|
createdById: user.id,
|
|
},
|
|
],
|
|
});
|
|
}
|
|
|
|
if (options.dateFilter) {
|
|
where[Op.and].push({
|
|
updatedAt: {
|
|
[Op.gt]: sequelize.literal(
|
|
`now() - interval '1 ${options.dateFilter}'`
|
|
),
|
|
},
|
|
});
|
|
}
|
|
|
|
if (!options.includeArchived) {
|
|
where[Op.and].push({
|
|
archivedAt: {
|
|
[Op.is]: null,
|
|
},
|
|
});
|
|
}
|
|
|
|
if (options.includeDrafts) {
|
|
where[Op.and].push({
|
|
[Op.or]: [
|
|
{
|
|
publishedAt: {
|
|
[Op.ne]: null,
|
|
},
|
|
},
|
|
{
|
|
createdById: user.id,
|
|
},
|
|
],
|
|
});
|
|
} else {
|
|
where[Op.and].push({
|
|
publishedAt: {
|
|
[Op.ne]: null,
|
|
},
|
|
});
|
|
}
|
|
|
|
if (options.collaboratorIds) {
|
|
where[Op.and].push({
|
|
collaboratorIds: {
|
|
[Op.contains]: options.collaboratorIds,
|
|
},
|
|
});
|
|
}
|
|
|
|
return await Document.scope([
|
|
"withoutState",
|
|
"withDrafts",
|
|
{
|
|
method: ["withViews", user.id],
|
|
},
|
|
{
|
|
method: ["withCollectionPermissions", user.id],
|
|
},
|
|
]).findAll({
|
|
where,
|
|
order: [["updatedAt", "DESC"]],
|
|
include: [
|
|
{
|
|
model: User,
|
|
as: "createdBy",
|
|
paranoid: false,
|
|
},
|
|
{
|
|
model: User,
|
|
as: "updatedBy",
|
|
paranoid: false,
|
|
},
|
|
],
|
|
offset,
|
|
limit,
|
|
});
|
|
}
|
|
|
|
public static async searchForUser(
|
|
user: User,
|
|
query: string,
|
|
options: SearchOptions = {}
|
|
): Promise<SearchResponse> {
|
|
const {
|
|
snippetMinWords = 20,
|
|
snippetMaxWords = 30,
|
|
limit = 15,
|
|
offset = 0,
|
|
} = options;
|
|
// Ensure we're filtering by the users accessible collections. If
|
|
// collectionId is passed as an option it is assumed that the authorization
|
|
// has already been done in the router
|
|
let collectionIds;
|
|
|
|
if (options.collectionId) {
|
|
collectionIds = [options.collectionId];
|
|
} else {
|
|
collectionIds = await user.collectionIds();
|
|
}
|
|
|
|
let dateFilter;
|
|
|
|
if (options.dateFilter) {
|
|
dateFilter = `1 ${options.dateFilter}`;
|
|
}
|
|
|
|
// Build the SQL query to get documentIds, ranking, and search term context
|
|
const whereClause = `
|
|
"searchVector" @@ to_tsquery('english', :query) AND
|
|
"teamId" = :teamId AND
|
|
${
|
|
collectionIds.length
|
|
? `(
|
|
"collectionId" IN(:collectionIds) OR
|
|
("collectionId" IS NULL AND "createdById" = :userId)
|
|
) AND`
|
|
: '"collectionId" IS NULL AND "createdById" = :userId AND'
|
|
}
|
|
${
|
|
options.dateFilter ? '"updatedAt" > now() - interval :dateFilter AND' : ""
|
|
}
|
|
${
|
|
options.collaboratorIds
|
|
? '"collaboratorIds" @> ARRAY[:collaboratorIds]::uuid[] AND'
|
|
: ""
|
|
}
|
|
${options.includeArchived ? "" : '"archivedAt" IS NULL AND'}
|
|
"deletedAt" IS NULL AND
|
|
${
|
|
options.includeDrafts
|
|
? '("publishedAt" IS NOT NULL OR "createdById" = :userId)'
|
|
: '"publishedAt" IS NOT NULL'
|
|
}
|
|
`;
|
|
const selectSql = `
|
|
SELECT
|
|
id,
|
|
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
|
|
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
|
FROM documents
|
|
WHERE ${whereClause}
|
|
ORDER BY
|
|
"searchRanking" DESC,
|
|
"updatedAt" DESC
|
|
LIMIT :limit
|
|
OFFSET :offset;
|
|
`;
|
|
const countSql = `
|
|
SELECT COUNT(id)
|
|
FROM documents
|
|
WHERE ${whereClause}
|
|
`;
|
|
const queryReplacements = {
|
|
teamId: user.teamId,
|
|
userId: user.id,
|
|
collaboratorIds: options.collaboratorIds,
|
|
query: this.webSearchQuery(query),
|
|
collectionIds,
|
|
dateFilter,
|
|
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
|
};
|
|
const resultsQuery = sequelize.query<Results>(selectSql, {
|
|
type: QueryTypes.SELECT,
|
|
replacements: { ...queryReplacements, limit, offset },
|
|
});
|
|
const countQuery = sequelize.query<{ count: number }>(countSql, {
|
|
type: QueryTypes.SELECT,
|
|
replacements: queryReplacements,
|
|
});
|
|
const [results, [{ count }]] = await Promise.all([
|
|
resultsQuery,
|
|
countQuery,
|
|
]);
|
|
|
|
// Final query to get associated document data
|
|
const documents = await Document.scope([
|
|
"withoutState",
|
|
"withDrafts",
|
|
{
|
|
method: ["withViews", user.id],
|
|
},
|
|
{
|
|
method: ["withCollectionPermissions", user.id],
|
|
},
|
|
]).findAll({
|
|
where: {
|
|
teamId: user.teamId,
|
|
id: map(results, "id"),
|
|
},
|
|
});
|
|
|
|
return SearchHelper.buildResponse(results, documents, count);
|
|
}
|
|
|
|
private static buildResponse(
|
|
results: Results[],
|
|
documents: Document[],
|
|
count: number
|
|
): SearchResponse {
|
|
return {
|
|
results: map(results, (result) => ({
|
|
ranking: result.searchRanking,
|
|
context: removeMarkdown(result.searchContext, {
|
|
stripHTML: false,
|
|
}),
|
|
document: find(documents, {
|
|
id: result.id,
|
|
}) as Document,
|
|
})),
|
|
totalCount: count,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Convert a user search query into a format that can be used by Postgres
|
|
*
|
|
* @param query The user search query
|
|
* @returns The query formatted for Postgres ts_query
|
|
*/
|
|
private static webSearchQuery(query: string): string {
|
|
// limit length of search queries as we're using regex against untrusted input
|
|
let limitedQuery = this.escapeQuery(query.slice(0, this.maxQueryLength));
|
|
|
|
// if the search term is one unquoted word then allow partial matches automatically
|
|
const queryWordCount = limitedQuery.split(" ").length;
|
|
const singleUnquotedSearch =
|
|
queryWordCount === 1 &&
|
|
!limitedQuery.startsWith('"') &&
|
|
!limitedQuery.endsWith('"');
|
|
|
|
// Replace single quote characters with &.
|
|
const singleQuotes = limitedQuery.matchAll(/'/g);
|
|
|
|
for (const match of singleQuotes) {
|
|
if (
|
|
match.index &&
|
|
match.index > 0 &&
|
|
match.index < limitedQuery.length - 1
|
|
) {
|
|
limitedQuery =
|
|
limitedQuery.substring(0, match.index) +
|
|
"&" +
|
|
limitedQuery.substring(match.index + 1);
|
|
}
|
|
}
|
|
|
|
return queryParser()(
|
|
singleUnquotedSearch ? `${limitedQuery}*` : limitedQuery
|
|
);
|
|
}
|
|
|
|
private static escapeQuery(query: string): string {
|
|
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
|
|
// https://github.com/sequelize/sequelize/issues/2950
|
|
return query.replace(/\\/g, "\\\\");
|
|
}
|
|
}
|