forked from drowl87/hextra_mirror
feat: enhance FlexSearch encoding for CJK support (#553)
- Added support for CJK (Chinese, Japanese, Korean) languages in FlexSearch encoding. - Introduced `isCJK` function to detect language and select appropriate encoding method. - Implemented `encodeCJK` and `encodeDefault` functions for different tokenization strategies.
This commit is contained in:
parent
a1232ecf9f
commit
14036ffea6
@ -195,8 +195,19 @@ document.addEventListener("DOMContentLoaded", function () {
|
|||||||
*/
|
*/
|
||||||
async function preloadIndex() {
|
async function preloadIndex() {
|
||||||
const tokenize = '{{- site.Params.search.flexsearch.tokenize | default "forward" -}}';
|
const tokenize = '{{- site.Params.search.flexsearch.tokenize | default "forward" -}}';
|
||||||
|
|
||||||
|
const isCJK = () => {
|
||||||
|
const lang = document.documentElement.lang || "en";
|
||||||
|
return lang.startsWith("zh") || lang.startsWith("ja") || lang.startsWith("ko");
|
||||||
|
}
|
||||||
|
|
||||||
|
const encodeCJK = (str) => str.replace(/[\x00-\x7F]/g, "").split("");
|
||||||
|
const encodeDefault = (str) => (""+str).toLocaleLowerCase().split(/[\p{Z}\p{S}\p{P}\p{C}]+/u);
|
||||||
|
const encodeFunction = isCJK() ? encodeCJK : encodeDefault;
|
||||||
|
|
||||||
window.pageIndex = new FlexSearch.Document({
|
window.pageIndex = new FlexSearch.Document({
|
||||||
tokenize,
|
tokenize,
|
||||||
|
encode: encodeFunction,
|
||||||
cache: 100,
|
cache: 100,
|
||||||
document: {
|
document: {
|
||||||
id: 'id',
|
id: 'id',
|
||||||
@ -207,6 +218,7 @@ document.addEventListener("DOMContentLoaded", function () {
|
|||||||
|
|
||||||
window.sectionIndex = new FlexSearch.Document({
|
window.sectionIndex = new FlexSearch.Document({
|
||||||
tokenize,
|
tokenize,
|
||||||
|
encode: encodeFunction,
|
||||||
cache: 100,
|
cache: 100,
|
||||||
document: {
|
document: {
|
||||||
id: 'id',
|
id: 'id',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user