Files
outline/server/models/helpers/SearchHelper.ts
Apoorv Mishra 1490c3a14b Individual document sharing with permissions (#5814)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Tom Moor <tom@getoutline.com>
2024-01-30 17:48:22 -08:00

455 lines
11 KiB
TypeScript

import removeMarkdown from "@tommoor/remove-markdown";
import invariant from "invariant";
import find from "lodash/find";
import map from "lodash/map";
import queryParser from "pg-tsquery";
import { Op, Sequelize, WhereOptions } from "sequelize";
import { DateFilter } from "@shared/types";
import Collection from "@server/models/Collection";
import Document from "@server/models/Document";
import Share from "@server/models/Share";
import Team from "@server/models/Team";
import User from "@server/models/User";
import { sequelize } from "@server/storage/database";
type SearchResponse = {
results: {
/** The search ranking, for sorting results */
ranking: number;
/** A snippet of contextual text around the search result */
context: string;
/** The document result */
document: Document;
}[];
/** The total number of results for the search query without pagination */
totalCount: number;
};
type SearchOptions = {
/** The query limit for pagination */
limit?: number;
/** The query offset for pagination */
offset?: number;
/** Limit results to a collection. Authorization is presumed to have been done before passing to this helper. */
collectionId?: string | null;
/** Limit results to a shared document. */
share?: Share;
/** Limit results to a date range. */
dateFilter?: DateFilter;
/** Limit results to a list of users that collaborated on the document. */
collaboratorIds?: string[];
/** Include archived documents in the results */
includeArchived?: boolean;
/** Include draft documents in the results (will only ever return your own) */
includeDrafts?: boolean;
/** The minimum number of words to be returned in the contextual snippet */
snippetMinWords?: number;
/** The maximum number of words to be returned in the contextual snippet */
snippetMaxWords?: number;
};
type RankedDocument = Document & {
searchRanking: number;
searchContext: string;
id: string;
};
export default class SearchHelper {
/**
* The maximum length of a search query.
*/
public static maxQueryLength = 1000;
public static async searchForTeam(
team: Team,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
const where = await this.buildWhere(team, options);
if (options.share?.includeChildDocuments) {
const sharedDocument = await options.share.$get("document");
invariant(sharedDocument, "Cannot find document for share");
const childDocumentIds = await sharedDocument.findAllChildDocumentIds({
archivedAt: {
[Op.is]: null,
},
});
where[Op.and].push({
id: [sharedDocument.id, ...childDocumentIds],
});
}
where[Op.and].push(
Sequelize.fn(
`"searchVector" @@ to_tsquery`,
"english",
Sequelize.literal(":query")
)
);
const queryReplacements = {
query: this.webSearchQuery(query),
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const resultsQuery = Document.unscoped().findAll({
attributes: [
"id",
[
Sequelize.literal(
`ts_rank("searchVector", to_tsquery('english', :query))`
),
"searchRanking",
],
[
Sequelize.literal(
`ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions)`
),
"searchContext",
],
],
replacements: queryReplacements,
where,
order: [
["searchRanking", "DESC"],
["updatedAt", "DESC"],
],
limit,
offset,
}) as any as Promise<RankedDocument[]>;
const countQuery = Document.unscoped().count({
// @ts-expect-error Types are incorrect for count
replacements: queryReplacements,
where,
}) as any as Promise<number>;
const [results, count] = await Promise.all([resultsQuery, countQuery]);
// Final query to get associated document data
const documents = await Document.findAll({
where: {
id: map(results, "id"),
teamId: team.id,
},
include: [
{
model: Collection,
as: "collection",
},
],
});
return this.buildResponse(results, documents, count);
}
public static async searchTitlesForUser(
user: User,
query: string,
options: SearchOptions = {}
): Promise<Document[]> {
const { limit = 15, offset = 0 } = options;
const where = await this.buildWhere(user, options);
where[Op.and].push({
title: {
[Op.iLike]: `%${query}%`,
},
});
const include = [
{
association: "memberships",
where: {
userId: user.id,
},
required: false,
separate: false,
},
{
model: User,
as: "createdBy",
paranoid: false,
},
{
model: User,
as: "updatedBy",
paranoid: false,
},
];
return Document.scope([
"withoutState",
"withDrafts",
{
method: ["withViews", user.id],
},
{
method: ["withCollectionPermissions", user.id],
},
{
method: ["withMembership", user.id],
},
]).findAll({
where,
subQuery: false,
order: [["updatedAt", "DESC"]],
include,
offset,
limit,
});
}
public static async searchForUser(
user: User,
query: string,
options: SearchOptions = {}
): Promise<SearchResponse> {
const {
snippetMinWords = 20,
snippetMaxWords = 30,
limit = 15,
offset = 0,
} = options;
const where = await this.buildWhere(user, options);
where[Op.and].push(
Sequelize.fn(
`"searchVector" @@ to_tsquery`,
"english",
Sequelize.literal(":query")
)
);
const queryReplacements = {
query: this.webSearchQuery(query),
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
};
const include = [
{
association: "memberships",
where: {
userId: user.id,
},
required: false,
separate: false,
},
];
const resultsQuery = Document.unscoped().findAll({
attributes: [
"id",
[
Sequelize.literal(
`ts_rank("searchVector", to_tsquery('english', :query))`
),
"searchRanking",
],
[
Sequelize.literal(
`ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions)`
),
"searchContext",
],
],
subQuery: false,
include,
replacements: queryReplacements,
where,
order: [
["searchRanking", "DESC"],
["updatedAt", "DESC"],
],
limit,
offset,
}) as any as Promise<RankedDocument[]>;
const countQuery = Document.unscoped().count({
// @ts-expect-error Types are incorrect for count
subQuery: false,
include,
replacements: queryReplacements,
where,
}) as any as Promise<number>;
const [results, count] = await Promise.all([resultsQuery, countQuery]);
// Final query to get associated document data
const documents = await Document.scope([
"withoutState",
"withDrafts",
{
method: ["withViews", user.id],
},
{
method: ["withCollectionPermissions", user.id],
},
{
method: ["withMembership", user.id],
},
]).findAll({
where: {
teamId: user.teamId,
id: map(results, "id"),
},
});
return this.buildResponse(results, documents, count);
}
private static async buildWhere(model: User | Team, options: SearchOptions) {
const teamId = model instanceof Team ? model.id : model.teamId;
const where: WhereOptions<Document> = {
teamId,
[Op.or]: [],
[Op.and]: [
{
deletedAt: {
[Op.eq]: null,
},
},
],
};
if (model instanceof User) {
where[Op.or].push({ "$memberships.id$": { [Op.ne]: null } });
}
// Ensure we're filtering by the users accessible collections. If
// collectionId is passed as an option it is assumed that the authorization
// has already been done in the router
const collectionIds = options.collectionId
? [options.collectionId]
: await model.collectionIds();
if (collectionIds.length) {
where[Op.or].push({ collectionId: collectionIds });
}
if (options.dateFilter) {
where[Op.and].push({
updatedAt: {
[Op.gt]: sequelize.literal(
`now() - interval '1 ${options.dateFilter}'`
),
},
});
}
if (options.collaboratorIds) {
where[Op.and].push({
collaboratorIds: {
[Op.contains]: options.collaboratorIds,
},
});
}
if (!options.includeArchived) {
where[Op.and].push({
archivedAt: {
[Op.eq]: null,
},
});
}
if (options.includeDrafts && model instanceof User) {
where[Op.and].push({
[Op.or]: [
{
publishedAt: {
[Op.ne]: null,
},
},
{
createdById: model.id,
},
{ "$memberships.id$": { [Op.ne]: null } },
],
});
} else {
where[Op.and].push({
publishedAt: {
[Op.ne]: null,
},
});
}
return where;
}
private static buildResponse(
results: RankedDocument[],
documents: Document[],
count: number
): SearchResponse {
return {
results: map(results, (result) => ({
ranking: result.searchRanking,
context: removeMarkdown(result.searchContext, {
stripHTML: false,
}),
document: find(documents, {
id: result.id,
}) as Document,
})),
totalCount: count,
};
}
/**
* Convert a user search query into a format that can be used by Postgres
*
* @param query The user search query
* @returns The query formatted for Postgres ts_query
*/
public static webSearchQuery(query: string): string {
// limit length of search queries as we're using regex against untrusted input
let limitedQuery = this.escapeQuery(query.slice(0, this.maxQueryLength));
// if the search term is one unquoted word then allow partial matches automatically
const queryWordCount = limitedQuery.split(" ").length;
const singleUnquotedSearch =
queryWordCount === 1 &&
!limitedQuery.startsWith('"') &&
!limitedQuery.endsWith('"');
// Replace single quote characters with &.
const singleQuotes = limitedQuery.matchAll(/'+/g);
for (const match of singleQuotes) {
if (
match.index &&
match.index > 0 &&
match.index < limitedQuery.length - 1
) {
limitedQuery =
limitedQuery.substring(0, match.index) +
"&" +
limitedQuery.substring(match.index + 1);
}
}
return (
queryParser()(singleUnquotedSearch ? `${limitedQuery}*` : limitedQuery)
// Remove any trailing join characters
.replace(/&$/, "")
);
}
private static escapeQuery(query: string): string {
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
// https://github.com/sequelize/sequelize/issues/2950
return query.replace(/\\/g, "\\\\");
}
}