fix: CJK content results in long context strings in search results
closes #7183
This commit is contained in:
@@ -275,6 +275,8 @@ const ResultContext = styled(Highlight)`
|
|||||||
font-size: 15px;
|
font-size: 15px;
|
||||||
margin-top: -0.25em;
|
margin-top: -0.25em;
|
||||||
margin-bottom: 0.25em;
|
margin-bottom: 0.25em;
|
||||||
|
max-height: 90px;
|
||||||
|
overflow: hidden;
|
||||||
`;
|
`;
|
||||||
|
|
||||||
export default observer(React.forwardRef(DocumentListItem));
|
export default observer(React.forwardRef(DocumentListItem));
|
||||||
|
|||||||
@@ -465,7 +465,7 @@ describe("SearchHelper", () => {
|
|||||||
expect(totalCount).toBe(0);
|
expect(totalCount).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should find extact phrases", async () => {
|
test("should find exact phrases", async () => {
|
||||||
const team = await buildTeam();
|
const team = await buildTeam();
|
||||||
const user = await buildUser({ teamId: team.id });
|
const user = await buildUser({ teamId: team.id });
|
||||||
const collection = await buildCollection({
|
const collection = await buildCollection({
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import map from "lodash/map";
|
|||||||
import queryParser from "pg-tsquery";
|
import queryParser from "pg-tsquery";
|
||||||
import { Op, Sequelize, WhereOptions } from "sequelize";
|
import { Op, Sequelize, WhereOptions } from "sequelize";
|
||||||
import { DateFilter, StatusFilter } from "@shared/types";
|
import { DateFilter, StatusFilter } from "@shared/types";
|
||||||
|
import { regexIndexOf, regexLastIndexOf } from "@shared/utils/string";
|
||||||
import { getUrls } from "@shared/utils/urls";
|
import { getUrls } from "@shared/utils/urls";
|
||||||
import Collection from "@server/models/Collection";
|
import Collection from "@server/models/Collection";
|
||||||
import Document from "@server/models/Document";
|
import Document from "@server/models/Document";
|
||||||
@@ -304,16 +305,39 @@ export default class SearchHelper {
|
|||||||
"gi"
|
"gi"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Breaking characters
|
||||||
|
const breakChars = [
|
||||||
|
" ",
|
||||||
|
".",
|
||||||
|
",",
|
||||||
|
`"`,
|
||||||
|
"'",
|
||||||
|
"\n",
|
||||||
|
"。",
|
||||||
|
"!",
|
||||||
|
"?",
|
||||||
|
"!",
|
||||||
|
"?",
|
||||||
|
"…",
|
||||||
|
];
|
||||||
|
const breakCharsRegex = new RegExp(`[${breakChars.join("")}]`, "g");
|
||||||
|
|
||||||
// chop text around the first match, prefer the first full match if possible.
|
// chop text around the first match, prefer the first full match if possible.
|
||||||
const fullMatchIndex = text.search(fullMatchRegex);
|
const fullMatchIndex = text.search(fullMatchRegex);
|
||||||
const offsetStartIndex =
|
const offsetStartIndex =
|
||||||
(fullMatchIndex >= 0 ? fullMatchIndex : text.search(highlightRegex)) - 65;
|
(fullMatchIndex >= 0 ? fullMatchIndex : text.search(highlightRegex)) - 65;
|
||||||
const startIndex = Math.max(
|
const startIndex = Math.max(
|
||||||
0,
|
0,
|
||||||
offsetStartIndex <= 0 ? 0 : text.indexOf(" ", offsetStartIndex)
|
offsetStartIndex <= 0
|
||||||
|
? 0
|
||||||
|
: regexIndexOf(text, breakCharsRegex, offsetStartIndex)
|
||||||
);
|
);
|
||||||
const context = text.replace(highlightRegex, "<b>$&</b>");
|
const context = text.replace(highlightRegex, "<b>$&</b>");
|
||||||
const endIndex = context.lastIndexOf(" ", startIndex + 250);
|
const endIndex = regexLastIndexOf(
|
||||||
|
context,
|
||||||
|
breakCharsRegex,
|
||||||
|
startIndex + 250
|
||||||
|
);
|
||||||
|
|
||||||
return context.slice(startIndex, endIndex);
|
return context.slice(startIndex, endIndex);
|
||||||
}
|
}
|
||||||
|
|||||||
65
shared/utils/string.ts
Normal file
65
shared/utils/string.ts
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
/**
|
||||||
|
* Returns the index of the first occurrence of a substring in a string that matches a regular expression.
|
||||||
|
*
|
||||||
|
* @param text The string to search in.
|
||||||
|
* @param re The regular expression to search for.
|
||||||
|
* @param startPos The position in the string at which to begin the search. Defaults to 0.
|
||||||
|
*/
|
||||||
|
export const regexIndexOf = function (
|
||||||
|
text: string,
|
||||||
|
re: RegExp,
|
||||||
|
startPos?: number
|
||||||
|
) {
|
||||||
|
startPos = startPos || 0;
|
||||||
|
|
||||||
|
if (!re.global) {
|
||||||
|
const flags = "g" + (re.multiline ? "m" : "") + (re.ignoreCase ? "i" : "");
|
||||||
|
re = new RegExp(re.source, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
re.lastIndex = startPos;
|
||||||
|
const match = re.exec(text);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
return match.index;
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the last occurrence of a substring in a string that matches a regular expression.
|
||||||
|
*
|
||||||
|
* @param text The string to search in.
|
||||||
|
* @param re The regular expression to search for.
|
||||||
|
* @param startPos The position in the string at which to begin the search. Defaults to the end of the string.
|
||||||
|
*/
|
||||||
|
export const regexLastIndexOf = function (
|
||||||
|
text: string,
|
||||||
|
re: RegExp,
|
||||||
|
startPos?: number
|
||||||
|
) {
|
||||||
|
startPos = startPos === undefined ? text.length : startPos;
|
||||||
|
|
||||||
|
if (!re.global) {
|
||||||
|
const flags = "g" + (re.multiline ? "m" : "") + (re.ignoreCase ? "i" : "");
|
||||||
|
re = new RegExp(re.source, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
let lastSuccess = -1;
|
||||||
|
for (let pos = 0; pos <= startPos; pos++) {
|
||||||
|
re.lastIndex = pos;
|
||||||
|
|
||||||
|
const match = re.exec(text);
|
||||||
|
if (!match) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = match.index;
|
||||||
|
if (pos <= startPos) {
|
||||||
|
lastSuccess = pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lastSuccess;
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user