forked from drowl87/hextra_mirror
feat: enhance FlexSearch encoding for CJK support (#553)
- Added support for CJK (Chinese, Japanese, Korean) languages in FlexSearch encoding. - Introduced `isCJK` function to detect language and select appropriate encoding method. - Implemented `encodeCJK` and `encodeDefault` functions for different tokenization strategies.
This commit is contained in:
parent
a1232ecf9f
commit
14036ffea6
@ -195,8 +195,19 @@ document.addEventListener("DOMContentLoaded", function () {
|
||||
*/
|
||||
async function preloadIndex() {
|
||||
const tokenize = '{{- site.Params.search.flexsearch.tokenize | default "forward" -}}';
|
||||
|
||||
const isCJK = () => {
|
||||
const lang = document.documentElement.lang || "en";
|
||||
return lang.startsWith("zh") || lang.startsWith("ja") || lang.startsWith("ko");
|
||||
}
|
||||
|
||||
const encodeCJK = (str) => str.replace(/[\x00-\x7F]/g, "").split("");
|
||||
const encodeDefault = (str) => (""+str).toLocaleLowerCase().split(/[\p{Z}\p{S}\p{P}\p{C}]+/u);
|
||||
const encodeFunction = isCJK() ? encodeCJK : encodeDefault;
|
||||
|
||||
window.pageIndex = new FlexSearch.Document({
|
||||
tokenize,
|
||||
encode: encodeFunction,
|
||||
cache: 100,
|
||||
document: {
|
||||
id: 'id',
|
||||
@ -207,6 +218,7 @@ document.addEventListener("DOMContentLoaded", function () {
|
||||
|
||||
window.sectionIndex = new FlexSearch.Document({
|
||||
tokenize,
|
||||
encode: encodeFunction,
|
||||
cache: 100,
|
||||
document: {
|
||||
id: 'id',
|
||||
|
Loading…
x
Reference in New Issue
Block a user