import removeMarkdown from "@tommoor/remove-markdown"; import invariant from "invariant"; import find from "lodash/find"; import map from "lodash/map"; import queryParser from "pg-tsquery"; import { Op, Sequelize, WhereOptions } from "sequelize"; import { DateFilter, StatusFilter } from "@shared/types"; import Collection from "@server/models/Collection"; import Document from "@server/models/Document"; import Share from "@server/models/Share"; import Team from "@server/models/Team"; import User from "@server/models/User"; import { sequelize } from "@server/storage/database"; type SearchResponse = { results: { /** The search ranking, for sorting results */ ranking: number; /** A snippet of contextual text around the search result */ context: string; /** The document result */ document: Document; }[]; /** The total number of results for the search query without pagination */ totalCount: number; }; type SearchOptions = { /** The query limit for pagination */ limit?: number; /** The query offset for pagination */ offset?: number; /** Limit results to a collection. Authorization is presumed to have been done before passing to this helper. */ collectionId?: string | null; /** Limit results to a shared document. */ share?: Share; /** Limit results to a date range. */ dateFilter?: DateFilter; /** Status of the documents to return */ statusFilter?: StatusFilter[]; /** Limit results to a list of users that collaborated on the document. */ collaboratorIds?: string[]; /** The minimum number of words to be returned in the contextual snippet */ snippetMinWords?: number; /** The maximum number of words to be returned in the contextual snippet */ snippetMaxWords?: number; }; type RankedDocument = Document & { id: string; dataValues: Partial & { searchRanking: number; searchContext: string; }; }; export default class SearchHelper { /** * The maximum length of a search query. */ public static maxQueryLength = 1000; public static async searchForTeam( team: Team, query: string, options: SearchOptions = {} ): Promise { const { snippetMinWords = 20, snippetMaxWords = 30, limit = 15, offset = 0, } = options; const where = await this.buildWhere(team, { ...options, statusFilter: [...(options.statusFilter || []), StatusFilter.Published], }); if (options.share?.includeChildDocuments) { const sharedDocument = await options.share.$get("document"); invariant(sharedDocument, "Cannot find document for share"); const childDocumentIds = await sharedDocument.findAllChildDocumentIds({ archivedAt: { [Op.is]: null, }, }); where[Op.and].push({ id: [sharedDocument.id, ...childDocumentIds], }); } where[Op.and].push( Sequelize.fn( `"searchVector" @@ to_tsquery`, "english", Sequelize.literal(":query") ) ); const queryReplacements = { query: this.webSearchQuery(query), headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`, }; const resultsQuery = Document.unscoped().findAll({ attributes: [ "id", [ Sequelize.literal( `ts_rank("searchVector", to_tsquery('english', :query))` ), "searchRanking", ], [ Sequelize.literal( `ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions)` ), "searchContext", ], ], replacements: queryReplacements, where, order: [ ["searchRanking", "DESC"], ["updatedAt", "DESC"], ], limit, offset, }) as any as Promise; const countQuery = Document.unscoped().count({ // @ts-expect-error Types are incorrect for count replacements: queryReplacements, where, }) as any as Promise; const [results, count] = await Promise.all([resultsQuery, countQuery]); // Final query to get associated document data const documents = await Document.findAll({ where: { id: map(results, "id"), teamId: team.id, }, include: [ { model: Collection, as: "collection", }, ], }); return this.buildResponse(results, documents, count); } public static async searchTitlesForUser( user: User, query: string, options: SearchOptions = {} ): Promise { const { limit = 15, offset = 0 } = options; const where = await this.buildWhere(user, options); where[Op.and].push({ title: { [Op.iLike]: `%${query}%`, }, }); const include = [ { association: "memberships", where: { userId: user.id, }, required: false, separate: false, }, { model: User, as: "createdBy", paranoid: false, }, { model: User, as: "updatedBy", paranoid: false, }, ]; return Document.scope([ "withoutState", "withDrafts", { method: ["withViews", user.id], }, { method: ["withCollectionPermissions", user.id], }, { method: ["withMembership", user.id], }, ]).findAll({ where, subQuery: false, order: [["updatedAt", "DESC"]], include, offset, limit, }); } public static async searchForUser( user: User, query: string, options: SearchOptions = {} ): Promise { const { snippetMinWords = 20, snippetMaxWords = 30, limit = 15, offset = 0, } = options; const where = await this.buildWhere(user, options); where[Op.and].push( Sequelize.fn( `"searchVector" @@ to_tsquery`, "english", Sequelize.literal(":query") ) ); const queryReplacements = { query: this.webSearchQuery(query), headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`, }; const include = [ { association: "memberships", where: { userId: user.id, }, required: false, separate: false, }, ]; const resultsQuery = Document.unscoped().findAll({ attributes: [ "id", [ Sequelize.literal( `ts_rank("searchVector", to_tsquery('english', :query))` ), "searchRanking", ], [ Sequelize.literal( `ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions)` ), "searchContext", ], ], subQuery: false, include, replacements: queryReplacements, where, order: [ ["searchRanking", "DESC"], ["updatedAt", "DESC"], ], limit, offset, }) as any as Promise; const countQuery = Document.unscoped().count({ // @ts-expect-error Types are incorrect for count subQuery: false, include, replacements: queryReplacements, where, }) as any as Promise; const [results, count] = await Promise.all([resultsQuery, countQuery]); // Final query to get associated document data const documents = await Document.scope([ "withoutState", "withDrafts", { method: ["withViews", user.id], }, { method: ["withCollectionPermissions", user.id], }, { method: ["withMembership", user.id], }, ]).findAll({ where: { teamId: user.teamId, id: map(results, "id"), }, }); return this.buildResponse(results, documents, count); } private static async buildWhere(model: User | Team, options: SearchOptions) { const teamId = model instanceof Team ? model.id : model.teamId; const where: WhereOptions = { teamId, [Op.or]: [], [Op.and]: [ { deletedAt: { [Op.eq]: null, }, }, ], }; if (model instanceof User) { where[Op.or].push({ "$memberships.id$": { [Op.ne]: null } }); } // Ensure we're filtering by the users accessible collections. If // collectionId is passed as an option it is assumed that the authorization // has already been done in the router const collectionIds = options.collectionId ? [options.collectionId] : await model.collectionIds(); if (collectionIds.length) { where[Op.or].push({ collectionId: collectionIds }); } if (options.dateFilter) { where[Op.and].push({ updatedAt: { [Op.gt]: sequelize.literal( `now() - interval '1 ${options.dateFilter}'` ), }, }); } if (options.collaboratorIds) { where[Op.and].push({ collaboratorIds: { [Op.contains]: options.collaboratorIds, }, }); } const statusQuery = []; if (options.statusFilter?.includes(StatusFilter.Published)) { statusQuery.push({ [Op.and]: [ { publishedAt: { [Op.ne]: null, }, archivedAt: { [Op.eq]: null, }, }, ], }); } if ( options.statusFilter?.includes(StatusFilter.Draft) && // Only ever include draft results for the user's own documents model instanceof User ) { statusQuery.push({ [Op.and]: [ { publishedAt: { [Op.eq]: null, }, archivedAt: { [Op.eq]: null, }, [Op.or]: [ { createdById: model.id }, { "$memberships.id$": { [Op.ne]: null } }, ], }, ], }); } if (options.statusFilter?.includes(StatusFilter.Archived)) { statusQuery.push({ archivedAt: { [Op.ne]: null, }, }); } if (statusQuery.length) { where[Op.and].push({ [Op.or]: statusQuery, }); } return where; } private static buildResponse( results: RankedDocument[], documents: Document[], count: number ): SearchResponse { return { results: map(results, (result) => ({ ranking: result.dataValues.searchRanking, context: removeMarkdown(result.dataValues.searchContext, { stripHTML: false, }), document: find(documents, { id: result.id, }) as Document, })), totalCount: count, }; } /** * Convert a user search query into a format that can be used by Postgres * * @param query The user search query * @returns The query formatted for Postgres ts_query */ public static webSearchQuery(query: string): string { // limit length of search queries as we're using regex against untrusted input let limitedQuery = this.escapeQuery(query.slice(0, this.maxQueryLength)); const quotedSearch = limitedQuery.startsWith('"') && limitedQuery.endsWith('"'); // Replace single quote characters with &. const singleQuotes = limitedQuery.matchAll(/'+/g); for (const match of singleQuotes) { if ( match.index && match.index > 0 && match.index < limitedQuery.length - 1 ) { limitedQuery = limitedQuery.substring(0, match.index) + "&" + limitedQuery.substring(match.index + 1); } } return ( queryParser()(quotedSearch ? limitedQuery : `${limitedQuery}*`) // Remove any trailing join characters .replace(/&$/, "") ); } private static escapeQuery(query: string): string { return ( query // replace "\" with escaped "\\" because sequelize.escape doesn't do it // see: https://github.com/sequelize/sequelize/issues/2950 .replace(/\\/g, "\\\\") // replace ":" with escaped "\:" because it's a reserved character in tsquery // see: https://github.com/outline/outline/issues/6542 .replace(/:/g, "\\:") ); } }