Further improved search matches
This commit is contained in:
@@ -150,6 +150,7 @@
|
|||||||
"passport-slack-oauth2": "^1.1.1",
|
"passport-slack-oauth2": "^1.1.1",
|
||||||
"pg": "^8.5.1",
|
"pg": "^8.5.1",
|
||||||
"pg-hstore": "^2.3.4",
|
"pg-hstore": "^2.3.4",
|
||||||
|
"pg-tsquery": "^8.4.0",
|
||||||
"polished": "^3.7.2",
|
"polished": "^3.7.2",
|
||||||
"prosemirror-commands": "1.2.2",
|
"prosemirror-commands": "1.2.2",
|
||||||
"prosemirror-dropcursor": "^1.4.0",
|
"prosemirror-dropcursor": "^1.4.0",
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import removeMarkdown from "@tommoor/remove-markdown";
|
import removeMarkdown from "@tommoor/remove-markdown";
|
||||||
import invariant from "invariant";
|
import invariant from "invariant";
|
||||||
import { find, map } from "lodash";
|
import { find, map } from "lodash";
|
||||||
|
import queryParser from "pg-tsquery";
|
||||||
import { Op, QueryTypes } from "sequelize";
|
import { Op, QueryTypes } from "sequelize";
|
||||||
import { DateFilter } from "@shared/types";
|
import { DateFilter } from "@shared/types";
|
||||||
import unescape from "@shared/utils/unescape";
|
import unescape from "@shared/utils/unescape";
|
||||||
@@ -54,12 +55,16 @@ type Results = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export default class SearchHelper {
|
export default class SearchHelper {
|
||||||
|
/**
|
||||||
|
* The maximum length of a search query.
|
||||||
|
*/
|
||||||
|
public static maxQueryLength = 1000;
|
||||||
|
|
||||||
public static async searchForTeam(
|
public static async searchForTeam(
|
||||||
team: Team,
|
team: Team,
|
||||||
query: string,
|
query: string,
|
||||||
options: SearchOptions = {}
|
options: SearchOptions = {}
|
||||||
): Promise<SearchResponse> {
|
): Promise<SearchResponse> {
|
||||||
const wildcardQuery = `${this.escapeQuery(query)}:*`;
|
|
||||||
const {
|
const {
|
||||||
snippetMinWords = 20,
|
snippetMinWords = 20,
|
||||||
snippetMaxWords = 30,
|
snippetMaxWords = 30,
|
||||||
@@ -103,7 +108,7 @@ export default class SearchHelper {
|
|||||||
|
|
||||||
// Build the SQL query to get result documentIds, ranking, and search term context
|
// Build the SQL query to get result documentIds, ranking, and search term context
|
||||||
const whereClause = `
|
const whereClause = `
|
||||||
"searchVector" @@ websearch_to_tsquery('english', :query) AND
|
"searchVector" @@ to_tsquery('english', :query) AND
|
||||||
"teamId" = :teamId AND
|
"teamId" = :teamId AND
|
||||||
"collectionId" IN(:collectionIds) AND
|
"collectionId" IN(:collectionIds) AND
|
||||||
${documentClause}
|
${documentClause}
|
||||||
@@ -113,8 +118,8 @@ export default class SearchHelper {
|
|||||||
const selectSql = `
|
const selectSql = `
|
||||||
SELECT
|
SELECT
|
||||||
id,
|
id,
|
||||||
ts_rank(documents."searchVector", websearch_to_tsquery('english', :query)) as "searchRanking",
|
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
|
||||||
ts_headline('english', "text", websearch_to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
||||||
FROM documents
|
FROM documents
|
||||||
WHERE ${whereClause}
|
WHERE ${whereClause}
|
||||||
ORDER BY
|
ORDER BY
|
||||||
@@ -130,7 +135,7 @@ export default class SearchHelper {
|
|||||||
`;
|
`;
|
||||||
const queryReplacements = {
|
const queryReplacements = {
|
||||||
teamId: team.id,
|
teamId: team.id,
|
||||||
query: wildcardQuery,
|
query: this.webSearchQuery(query),
|
||||||
collectionIds,
|
collectionIds,
|
||||||
documentIds,
|
documentIds,
|
||||||
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
||||||
@@ -176,8 +181,6 @@ export default class SearchHelper {
|
|||||||
limit = 15,
|
limit = 15,
|
||||||
offset = 0,
|
offset = 0,
|
||||||
} = options;
|
} = options;
|
||||||
const wildcardQuery = `${SearchHelper.escapeQuery(query)}:*`;
|
|
||||||
|
|
||||||
// Ensure we're filtering by the users accessible collections. If
|
// Ensure we're filtering by the users accessible collections. If
|
||||||
// collectionId is passed as an option it is assumed that the authorization
|
// collectionId is passed as an option it is assumed that the authorization
|
||||||
// has already been done in the router
|
// has already been done in the router
|
||||||
@@ -197,7 +200,7 @@ export default class SearchHelper {
|
|||||||
|
|
||||||
// Build the SQL query to get documentIds, ranking, and search term context
|
// Build the SQL query to get documentIds, ranking, and search term context
|
||||||
const whereClause = `
|
const whereClause = `
|
||||||
"searchVector" @@ websearch_to_tsquery('english', :query) AND
|
"searchVector" @@ to_tsquery('english', :query) AND
|
||||||
"teamId" = :teamId AND
|
"teamId" = :teamId AND
|
||||||
${
|
${
|
||||||
collectionIds.length
|
collectionIds.length
|
||||||
@@ -226,8 +229,8 @@ export default class SearchHelper {
|
|||||||
const selectSql = `
|
const selectSql = `
|
||||||
SELECT
|
SELECT
|
||||||
id,
|
id,
|
||||||
ts_rank(documents."searchVector", websearch_to_tsquery('english', :query)) as "searchRanking",
|
ts_rank(documents."searchVector", to_tsquery('english', :query)) as "searchRanking",
|
||||||
ts_headline('english', "text", websearch_to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
ts_headline('english', "text", to_tsquery('english', :query), :headlineOptions) as "searchContext"
|
||||||
FROM documents
|
FROM documents
|
||||||
WHERE ${whereClause}
|
WHERE ${whereClause}
|
||||||
ORDER BY
|
ORDER BY
|
||||||
@@ -245,7 +248,7 @@ export default class SearchHelper {
|
|||||||
teamId: user.teamId,
|
teamId: user.teamId,
|
||||||
userId: user.id,
|
userId: user.id,
|
||||||
collaboratorIds: options.collaboratorIds,
|
collaboratorIds: options.collaboratorIds,
|
||||||
query: wildcardQuery,
|
query: this.webSearchQuery(query),
|
||||||
collectionIds,
|
collectionIds,
|
||||||
dateFilter,
|
dateFilter,
|
||||||
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
||||||
@@ -302,9 +305,29 @@ export default class SearchHelper {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a user search query into a format that can be used by Postgres
|
||||||
|
*
|
||||||
|
* @param query The user search query
|
||||||
|
* @returns The query formatted for Postgres ts_query
|
||||||
|
*/
|
||||||
|
private static webSearchQuery(query: string): string {
|
||||||
|
// limit length of search queries as we're using regex against untrusted input
|
||||||
|
const limitedQuery = this.escapeQuery(query.slice(0, this.maxQueryLength));
|
||||||
|
|
||||||
|
// if the search term is one unquoted word then allow partial matches automatically
|
||||||
|
const queryWordCount = limitedQuery.split(" ").length;
|
||||||
|
const singleUnquotedSearch =
|
||||||
|
queryWordCount === 1 && !limitedQuery.startsWith('"');
|
||||||
|
|
||||||
|
return queryParser({ singleQuoteReplacement: "&" })(
|
||||||
|
singleUnquotedSearch ? `${limitedQuery}*` : limitedQuery
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private static escapeQuery(query: string): string {
|
private static escapeQuery(query: string): string {
|
||||||
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
|
// replace "\" with escaped "\\" because sequelize.escape doesn't do it
|
||||||
// https://github.com/sequelize/sequelize/issues/2950
|
// https://github.com/sequelize/sequelize/issues/2950
|
||||||
return sequelize.escape(query).replace(/\\/g, "\\\\");
|
return query.replace(/\\/g, "\\\\");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1243,7 +1243,7 @@ describe("#documents.search", () => {
|
|||||||
const res = await server.post("/api/documents.search", {
|
const res = await server.post("/api/documents.search", {
|
||||||
body: {
|
body: {
|
||||||
token: user.getJwtToken(),
|
token: user.getJwtToken(),
|
||||||
query: "sear &",
|
query: "sear",
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
const body = await res.json();
|
const body = await res.json();
|
||||||
@@ -1254,24 +1254,58 @@ describe("#documents.search", () => {
|
|||||||
expect(body.data[2].document.id).toEqual(thirdResult.id);
|
expect(body.data[2].document.id).toEqual(thirdResult.id);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should strip junk from search term", async () => {
|
describe("search operators", () => {
|
||||||
const user = await buildUser();
|
it("negative search operator", async () => {
|
||||||
const firstResult = await buildDocument({
|
const { user } = await seed();
|
||||||
title: "search term",
|
await buildDocument({
|
||||||
text: "this is some random text of the document body",
|
title: "search term",
|
||||||
userId: user.id,
|
text: "random text",
|
||||||
teamId: user.teamId,
|
userId: user.id,
|
||||||
|
teamId: user.teamId,
|
||||||
|
});
|
||||||
|
const firstResult = await buildDocument({
|
||||||
|
title: "title text",
|
||||||
|
text: "search term",
|
||||||
|
userId: user.id,
|
||||||
|
teamId: user.teamId,
|
||||||
|
});
|
||||||
|
const res = await server.post("/api/documents.search", {
|
||||||
|
body: {
|
||||||
|
token: user.getJwtToken(),
|
||||||
|
query: `search -random`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const body = await res.json();
|
||||||
|
expect(res.status).toEqual(200);
|
||||||
|
expect(body.data.length).toEqual(1);
|
||||||
|
expect(body.data[0].document.id).toEqual(firstResult.id);
|
||||||
});
|
});
|
||||||
const res = await server.post("/api/documents.search", {
|
|
||||||
body: {
|
it("quoted search operator", async () => {
|
||||||
token: user.getJwtToken(),
|
const { user } = await seed();
|
||||||
query: "rando &\\;:()",
|
await buildDocument({
|
||||||
},
|
title: "document one",
|
||||||
|
text: "term search",
|
||||||
|
userId: user.id,
|
||||||
|
teamId: user.teamId,
|
||||||
|
});
|
||||||
|
const firstResult = await buildDocument({
|
||||||
|
title: "search term",
|
||||||
|
text: "content",
|
||||||
|
userId: user.id,
|
||||||
|
teamId: user.teamId,
|
||||||
|
});
|
||||||
|
const res = await server.post("/api/documents.search", {
|
||||||
|
body: {
|
||||||
|
token: user.getJwtToken(),
|
||||||
|
query: `"search term"`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const body = await res.json();
|
||||||
|
expect(res.status).toEqual(200);
|
||||||
|
expect(body.data.length).toEqual(1);
|
||||||
|
expect(body.data[0].document.id).toEqual(firstResult.id);
|
||||||
});
|
});
|
||||||
const body = await res.json();
|
|
||||||
expect(res.status).toEqual(200);
|
|
||||||
expect(body.data.length).toEqual(1);
|
|
||||||
expect(body.data[0].document.id).toEqual(firstResult.id);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should not return draft documents", async () => {
|
it("should not return draft documents", async () => {
|
||||||
|
|||||||
@@ -12014,6 +12014,11 @@ pg-protocol@^1.4.0:
|
|||||||
resolved "https://registry.yarnpkg.com/pg-protocol/-/pg-protocol-1.4.0.tgz#43a71a92f6fe3ac559952555aa3335c8cb4908be"
|
resolved "https://registry.yarnpkg.com/pg-protocol/-/pg-protocol-1.4.0.tgz#43a71a92f6fe3ac559952555aa3335c8cb4908be"
|
||||||
integrity sha512-El+aXWcwG/8wuFICMQjM5ZSAm6OWiJicFdNYo+VY3QP+8vI4SvLIWVe51PppTzMhikUJR+PsyIFKqfdXPz/yxA==
|
integrity sha512-El+aXWcwG/8wuFICMQjM5ZSAm6OWiJicFdNYo+VY3QP+8vI4SvLIWVe51PppTzMhikUJR+PsyIFKqfdXPz/yxA==
|
||||||
|
|
||||||
|
pg-tsquery@^8.4.0:
|
||||||
|
version "8.4.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/pg-tsquery/-/pg-tsquery-8.4.0.tgz#411293cce23ca1eeb8c29109af9fadf28f20a7d9"
|
||||||
|
integrity sha512-m0jIxUVwLKSdmOAlqtlbo6K+EFIOZ/hyOMnoe8DmYFqEmOmvafIjGQFmcPP+z5MWd/p7ExxoKNIL31gmM+CwxQ==
|
||||||
|
|
||||||
pg-types@^2.1.0:
|
pg-types@^2.1.0:
|
||||||
version "2.2.0"
|
version "2.2.0"
|
||||||
resolved "https://registry.yarnpkg.com/pg-types/-/pg-types-2.2.0.tgz#2d0250d636454f7cfa3b6ae0382fdfa8063254a3"
|
resolved "https://registry.yarnpkg.com/pg-types/-/pg-types-2.2.0.tgz#2d0250d636454f7cfa3b6ae0382fdfa8063254a3"
|
||||||
|
|||||||
Reference in New Issue
Block a user