fix: Improved phrase matching in search (#6800)
* fix: Improved phrase matching in search * test
This commit is contained in:
@@ -37,9 +37,8 @@ type Props = {
|
|||||||
const SEARCH_RESULT_REGEX = /<b\b[^>]*>(.*?)<\/b>/gi;
|
const SEARCH_RESULT_REGEX = /<b\b[^>]*>(.*?)<\/b>/gi;
|
||||||
|
|
||||||
function replaceResultMarks(tag: string) {
|
function replaceResultMarks(tag: string) {
|
||||||
// don't use SEARCH_RESULT_REGEX here as it causes
|
// don't use SEARCH_RESULT_REGEX directly here as it causes an infinite loop
|
||||||
// an infinite loop to trigger a regex inside it's own callback
|
return tag.replace(new RegExp(SEARCH_RESULT_REGEX.source), "$1");
|
||||||
return tag.replace(/<b\b[^>]*>(.*?)<\/b>/gi, "$1");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function DocumentListItem(
|
function DocumentListItem(
|
||||||
|
|||||||
@@ -464,6 +464,28 @@ describe("SearchHelper", () => {
|
|||||||
);
|
);
|
||||||
expect(totalCount).toBe(0);
|
expect(totalCount).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("should find extact phrases", async () => {
|
||||||
|
const team = await buildTeam();
|
||||||
|
const user = await buildUser({ teamId: team.id });
|
||||||
|
const collection = await buildCollection({
|
||||||
|
teamId: team.id,
|
||||||
|
userId: user.id,
|
||||||
|
});
|
||||||
|
const document = await buildDocument({
|
||||||
|
teamId: team.id,
|
||||||
|
userId: user.id,
|
||||||
|
collectionId: collection.id,
|
||||||
|
text: "test number 1",
|
||||||
|
});
|
||||||
|
document.title = "change";
|
||||||
|
await document.save();
|
||||||
|
const { totalCount } = await SearchHelper.searchForUser(
|
||||||
|
user,
|
||||||
|
`"test number"`
|
||||||
|
);
|
||||||
|
expect(totalCount).toBe(1);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("#searchTitlesForUser", () => {
|
describe("#searchTitlesForUser", () => {
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import removeMarkdown from "@tommoor/remove-markdown";
|
import removeMarkdown from "@tommoor/remove-markdown";
|
||||||
import invariant from "invariant";
|
import invariant from "invariant";
|
||||||
|
import escapeRegExp from "lodash/escapeRegExp";
|
||||||
import find from "lodash/find";
|
import find from "lodash/find";
|
||||||
import map from "lodash/map";
|
import map from "lodash/map";
|
||||||
import queryParser from "pg-tsquery";
|
import queryParser from "pg-tsquery";
|
||||||
@@ -72,7 +73,7 @@ export default class SearchHelper {
|
|||||||
offset = 0,
|
offset = 0,
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const where = await this.buildWhere(team, {
|
const where = await this.buildWhere(team, query, {
|
||||||
...options,
|
...options,
|
||||||
statusFilter: [...(options.statusFilter || []), StatusFilter.Published],
|
statusFilter: [...(options.statusFilter || []), StatusFilter.Published],
|
||||||
});
|
});
|
||||||
@@ -92,14 +93,6 @@ export default class SearchHelper {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
where[Op.and].push(
|
|
||||||
Sequelize.fn(
|
|
||||||
`"searchVector" @@ to_tsquery`,
|
|
||||||
"english",
|
|
||||||
Sequelize.literal(":query")
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
const queryReplacements = {
|
const queryReplacements = {
|
||||||
query: this.webSearchQuery(query),
|
query: this.webSearchQuery(query),
|
||||||
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
headlineOptions: `MaxFragments=1, MinWords=${snippetMinWords}, MaxWords=${snippetMaxWords}`,
|
||||||
@@ -152,7 +145,7 @@ export default class SearchHelper {
|
|||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
return this.buildResponse(results, documents, count);
|
return this.buildResponse(query, results, documents, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static async searchTitlesForUser(
|
public static async searchTitlesForUser(
|
||||||
@@ -161,7 +154,7 @@ export default class SearchHelper {
|
|||||||
options: SearchOptions = {}
|
options: SearchOptions = {}
|
||||||
): Promise<Document[]> {
|
): Promise<Document[]> {
|
||||||
const { limit = 15, offset = 0 } = options;
|
const { limit = 15, offset = 0 } = options;
|
||||||
const where = await this.buildWhere(user, options);
|
const where = await this.buildWhere(user, undefined, options);
|
||||||
|
|
||||||
where[Op.and].push({
|
where[Op.and].push({
|
||||||
title: {
|
title: {
|
||||||
@@ -224,15 +217,7 @@ export default class SearchHelper {
|
|||||||
offset = 0,
|
offset = 0,
|
||||||
} = options;
|
} = options;
|
||||||
|
|
||||||
const where = await this.buildWhere(user, options);
|
const where = await this.buildWhere(user, query, options);
|
||||||
|
|
||||||
where[Op.and].push(
|
|
||||||
Sequelize.fn(
|
|
||||||
`"searchVector" @@ to_tsquery`,
|
|
||||||
"english",
|
|
||||||
Sequelize.literal(":query")
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
const queryReplacements = {
|
const queryReplacements = {
|
||||||
query: this.webSearchQuery(query),
|
query: this.webSearchQuery(query),
|
||||||
@@ -307,10 +292,14 @@ export default class SearchHelper {
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
return this.buildResponse(results, documents, count);
|
return this.buildResponse(query, results, documents, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async buildWhere(model: User | Team, options: SearchOptions) {
|
private static async buildWhere(
|
||||||
|
model: User | Team,
|
||||||
|
query: string | undefined,
|
||||||
|
options: SearchOptions
|
||||||
|
) {
|
||||||
const teamId = model instanceof Team ? model.id : model.teamId;
|
const teamId = model instanceof Team ? model.id : model.teamId;
|
||||||
const where: WhereOptions<Document> = {
|
const where: WhereOptions<Document> = {
|
||||||
teamId,
|
teamId,
|
||||||
@@ -410,24 +399,80 @@ export default class SearchHelper {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (query) {
|
||||||
|
const limitedQuery = this.escapeQuery(
|
||||||
|
query.slice(0, this.maxQueryLength)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Extract quoted queries and add them to the where clause, up to a maximum of 3 total.
|
||||||
|
const quotedQueries = Array.from(
|
||||||
|
limitedQuery.matchAll(/"([^"]*)"/g)
|
||||||
|
).slice(0, 3);
|
||||||
|
|
||||||
|
for (const match of quotedQueries) {
|
||||||
|
where[Op.and].push({
|
||||||
|
[Op.or]: [
|
||||||
|
{
|
||||||
|
title: {
|
||||||
|
[Op.iLike]: `%${match[1]}%`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: {
|
||||||
|
[Op.iLike]: `%${match[1]}%`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
where[Op.and].push(
|
||||||
|
Sequelize.fn(
|
||||||
|
`"searchVector" @@ to_tsquery`,
|
||||||
|
"english",
|
||||||
|
Sequelize.literal(":query")
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return where;
|
return where;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static buildResponse(
|
private static buildResponse(
|
||||||
|
query: string,
|
||||||
results: RankedDocument[],
|
results: RankedDocument[],
|
||||||
documents: Document[],
|
documents: Document[],
|
||||||
count: number
|
count: number
|
||||||
): SearchResponse {
|
): SearchResponse {
|
||||||
|
const quotedQueries = Array.from(query.matchAll(/"([^"]*)"/g)).slice(0, 3);
|
||||||
|
|
||||||
|
// Regex to highlight quoted queries as ts_headline will not do this by default due to stemming.
|
||||||
|
const quotedRegex = new RegExp(
|
||||||
|
quotedQueries.map((match) => escapeRegExp(match[1])).join("|"),
|
||||||
|
"gi"
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
results: map(results, (result) => ({
|
results: map(results, (result) => {
|
||||||
ranking: result.dataValues.searchRanking,
|
let context = removeMarkdown(result.dataValues.searchContext, {
|
||||||
context: removeMarkdown(result.dataValues.searchContext, {
|
|
||||||
stripHTML: false,
|
stripHTML: false,
|
||||||
}),
|
});
|
||||||
|
|
||||||
|
// If there are any quoted queries, highlighting these takes precedence over the default
|
||||||
|
if (quotedQueries.length) {
|
||||||
|
context = context
|
||||||
|
.replace(/<\/?b>/g, "")
|
||||||
|
.replace(quotedRegex, "<b>$&</b>");
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
ranking: result.dataValues.searchRanking,
|
||||||
|
context,
|
||||||
document: find(documents, {
|
document: find(documents, {
|
||||||
id: result.id,
|
id: result.id,
|
||||||
}) as Document,
|
}) as Document,
|
||||||
})),
|
};
|
||||||
|
}),
|
||||||
totalCount: count,
|
totalCount: count,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user