fix: CJK content results in long context strings in search results
closes #7183
This commit is contained in:
@@ -275,6 +275,8 @@ const ResultContext = styled(Highlight)`
|
||||
font-size: 15px;
|
||||
margin-top: -0.25em;
|
||||
margin-bottom: 0.25em;
|
||||
max-height: 90px;
|
||||
overflow: hidden;
|
||||
`;
|
||||
|
||||
export default observer(React.forwardRef(DocumentListItem));
|
||||
|
||||
@@ -465,7 +465,7 @@ describe("SearchHelper", () => {
|
||||
expect(totalCount).toBe(0);
|
||||
});
|
||||
|
||||
test("should find extact phrases", async () => {
|
||||
test("should find exact phrases", async () => {
|
||||
const team = await buildTeam();
|
||||
const user = await buildUser({ teamId: team.id });
|
||||
const collection = await buildCollection({
|
||||
|
||||
@@ -5,6 +5,7 @@ import map from "lodash/map";
|
||||
import queryParser from "pg-tsquery";
|
||||
import { Op, Sequelize, WhereOptions } from "sequelize";
|
||||
import { DateFilter, StatusFilter } from "@shared/types";
|
||||
import { regexIndexOf, regexLastIndexOf } from "@shared/utils/string";
|
||||
import { getUrls } from "@shared/utils/urls";
|
||||
import Collection from "@server/models/Collection";
|
||||
import Document from "@server/models/Document";
|
||||
@@ -304,16 +305,39 @@ export default class SearchHelper {
|
||||
"gi"
|
||||
);
|
||||
|
||||
// Breaking characters
|
||||
const breakChars = [
|
||||
" ",
|
||||
".",
|
||||
",",
|
||||
`"`,
|
||||
"'",
|
||||
"\n",
|
||||
"。",
|
||||
"!",
|
||||
"?",
|
||||
"!",
|
||||
"?",
|
||||
"…",
|
||||
];
|
||||
const breakCharsRegex = new RegExp(`[${breakChars.join("")}]`, "g");
|
||||
|
||||
// chop text around the first match, prefer the first full match if possible.
|
||||
const fullMatchIndex = text.search(fullMatchRegex);
|
||||
const offsetStartIndex =
|
||||
(fullMatchIndex >= 0 ? fullMatchIndex : text.search(highlightRegex)) - 65;
|
||||
const startIndex = Math.max(
|
||||
0,
|
||||
offsetStartIndex <= 0 ? 0 : text.indexOf(" ", offsetStartIndex)
|
||||
offsetStartIndex <= 0
|
||||
? 0
|
||||
: regexIndexOf(text, breakCharsRegex, offsetStartIndex)
|
||||
);
|
||||
const context = text.replace(highlightRegex, "<b>$&</b>");
|
||||
const endIndex = context.lastIndexOf(" ", startIndex + 250);
|
||||
const endIndex = regexLastIndexOf(
|
||||
context,
|
||||
breakCharsRegex,
|
||||
startIndex + 250
|
||||
);
|
||||
|
||||
return context.slice(startIndex, endIndex);
|
||||
}
|
||||
|
||||
65
shared/utils/string.ts
Normal file
65
shared/utils/string.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Returns the index of the first occurrence of a substring in a string that matches a regular expression.
|
||||
*
|
||||
* @param text The string to search in.
|
||||
* @param re The regular expression to search for.
|
||||
* @param startPos The position in the string at which to begin the search. Defaults to 0.
|
||||
*/
|
||||
export const regexIndexOf = function (
|
||||
text: string,
|
||||
re: RegExp,
|
||||
startPos?: number
|
||||
) {
|
||||
startPos = startPos || 0;
|
||||
|
||||
if (!re.global) {
|
||||
const flags = "g" + (re.multiline ? "m" : "") + (re.ignoreCase ? "i" : "");
|
||||
re = new RegExp(re.source, flags);
|
||||
}
|
||||
|
||||
re.lastIndex = startPos;
|
||||
const match = re.exec(text);
|
||||
|
||||
if (match) {
|
||||
return match.index;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the index of the last occurrence of a substring in a string that matches a regular expression.
|
||||
*
|
||||
* @param text The string to search in.
|
||||
* @param re The regular expression to search for.
|
||||
* @param startPos The position in the string at which to begin the search. Defaults to the end of the string.
|
||||
*/
|
||||
export const regexLastIndexOf = function (
|
||||
text: string,
|
||||
re: RegExp,
|
||||
startPos?: number
|
||||
) {
|
||||
startPos = startPos === undefined ? text.length : startPos;
|
||||
|
||||
if (!re.global) {
|
||||
const flags = "g" + (re.multiline ? "m" : "") + (re.ignoreCase ? "i" : "");
|
||||
re = new RegExp(re.source, flags);
|
||||
}
|
||||
|
||||
let lastSuccess = -1;
|
||||
for (let pos = 0; pos <= startPos; pos++) {
|
||||
re.lastIndex = pos;
|
||||
|
||||
const match = re.exec(text);
|
||||
if (!match) {
|
||||
break;
|
||||
}
|
||||
|
||||
pos = match.index;
|
||||
if (pos <= startPos) {
|
||||
lastSuccess = pos;
|
||||
}
|
||||
}
|
||||
|
||||
return lastSuccess;
|
||||
};
|
||||
Reference in New Issue
Block a user