From 7446599c0f6e947f6c8384edfa024818f82c59bc Mon Sep 17 00:00:00 2001 From: Daniel <845765@qq.com> Date: Wed, 27 Nov 2024 20:13:22 +0800 Subject: [PATCH] :art: Remove Unicode PUA characters https://github.com/siyuan-note/siyuan/issues/13291 --- kernel/api/workspace.go | 2 +- kernel/go.mod | 2 +- kernel/go.sum | 4 ++-- kernel/model/assets.go | 2 +- kernel/model/cloud_service.go | 4 ++-- kernel/model/export.go | 2 +- kernel/model/file.go | 2 +- kernel/model/history.go | 4 ++-- kernel/model/import.go | 1 + kernel/model/mount.go | 2 +- kernel/model/repository.go | 2 +- kernel/model/search.go | 2 +- kernel/model/sync.go | 2 +- kernel/model/virutalref.go | 3 ++- kernel/util/file.go | 2 +- kernel/util/misc.go | 3 +-- kernel/util/ocr.go | 4 ++-- kernel/util/rune.go | 8 +++++++- 18 files changed, 29 insertions(+), 22 deletions(-) diff --git a/kernel/api/workspace.go b/kernel/api/workspace.go index 65aa76b89..e90a1fff8 100644 --- a/kernel/api/workspace.go +++ b/kernel/api/workspace.go @@ -94,7 +94,7 @@ func createWorkspaceDir(c *gin.Context) { } absPath := arg["path"].(string) - absPath = gulu.Str.RemoveInvisible(absPath) + absPath = util.RemoveInvalid(absPath) absPath = strings.TrimSpace(absPath) if isInvalidWorkspacePath(absPath) { ret.Code = -1 diff --git a/kernel/go.mod b/kernel/go.mod index a432f8751..b25ea45d9 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -9,7 +9,7 @@ require ( github.com/88250/clipboard v0.1.5 github.com/88250/epub v0.0.0-20230830085737-c19055cd1f48 github.com/88250/go-humanize v0.0.0-20240424102817-4f78fac47ea7 - github.com/88250/gulu v1.2.3-0.20240612035750-c9cf5f7a4d02 + github.com/88250/gulu v1.2.3-0.20241127120230-1ae6a9868a2d github.com/88250/lute v1.7.7-0.20241127031345-f772b0ee2be8 github.com/88250/pdfcpu v0.3.14-0.20230401044135-c7369a99720c github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 diff --git a/kernel/go.sum b/kernel/go.sum index 6526387eb..1b21baece 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -12,8 +12,8 @@ github.com/88250/go-humanize v0.0.0-20240424102817-4f78fac47ea7 h1:MafIFwSS0x6A4 github.com/88250/go-humanize v0.0.0-20240424102817-4f78fac47ea7/go.mod h1:HrKCCTin3YNDSLBD02K0AOljjV6eNwc3/zyEI+xyV1I= github.com/88250/go-sqlite3 v1.14.13-0.20231214121541-e7f54c482950 h1:Pa5hMiBceTVVqrYaDlLio2QSKbXMUmAZPbzCwT5eNCw= github.com/88250/go-sqlite3 v1.14.13-0.20231214121541-e7f54c482950/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/88250/gulu v1.2.3-0.20240612035750-c9cf5f7a4d02 h1:3e5+yobj655pTeKOYMbJrnc1mE51ZkbXIxquTYZuYCY= -github.com/88250/gulu v1.2.3-0.20240612035750-c9cf5f7a4d02/go.mod h1:MUfzyfmbPrRDZLqxc7aPrVYveatTHRfoUa5TynPS0i8= +github.com/88250/gulu v1.2.3-0.20241127120230-1ae6a9868a2d h1:dexFyk3UkR4c2xpyrC4Zk4L28xFbfLYAeowIW/7QYEA= +github.com/88250/gulu v1.2.3-0.20241127120230-1ae6a9868a2d/go.mod h1:MUfzyfmbPrRDZLqxc7aPrVYveatTHRfoUa5TynPS0i8= github.com/88250/lute v1.7.7-0.20241127031345-f772b0ee2be8 h1:1gWOQT9m2o3E6X//wvI+bexbXgdHheN6YUZcABHMm4k= github.com/88250/lute v1.7.7-0.20241127031345-f772b0ee2be8/go.mod h1:VDAzL8b+oCh+e3NAlmwwLzC53ten0rZlS8NboB7ljtk= github.com/88250/pdfcpu v0.3.14-0.20230401044135-c7369a99720c h1:Dl/8S9iLyPMTElnWIBxmjaLiWrkI5P4a21ivwAn5pU0= diff --git a/kernel/model/assets.go b/kernel/model/assets.go index a1074750b..175ca67a8 100644 --- a/kernel/model/assets.go +++ b/kernel/model/assets.go @@ -607,7 +607,7 @@ func RenameAsset(oldPath, newName string) (newPath string, err error) { defer util.PushClearProgress() newName = strings.TrimSpace(newName) - newName = gulu.Str.RemoveInvisible(newName) + newName = util.RemoveInvalid(newName) if path.Base(oldPath) == newName { return } diff --git a/kernel/model/cloud_service.go b/kernel/model/cloud_service.go index 6623931c3..64d162b1b 100644 --- a/kernel/model/cloud_service.go +++ b/kernel/model/cloud_service.go @@ -566,7 +566,7 @@ func getUser(token string) (*conf.User, error) { func UseActivationcode(code string) (err error) { code = strings.TrimSpace(code) - code = gulu.Str.RemoveInvisible(code) + code = util.RemoveInvalid(code) requestResult := gulu.Ret.NewResult() request := httpclient.NewCloudRequest30s() resp, err := request. @@ -590,7 +590,7 @@ func UseActivationcode(code string) (err error) { func CheckActivationcode(code string) (retCode int, msg string) { code = strings.TrimSpace(code) - code = gulu.Str.RemoveInvisible(code) + code = util.RemoveInvalid(code) retCode = 1 requestResult := gulu.Ret.NewResult() request := httpclient.NewCloudRequest30s() diff --git a/kernel/model/export.go b/kernel/model/export.go index 127e2ab1f..2f998cfdb 100644 --- a/kernel/model/export.go +++ b/kernel/model/export.go @@ -609,7 +609,7 @@ func ExportDocx(id, savePath string, removeAssets, merge bool) (fullPath string, } // Pandoc template for exporting docx https://github.com/siyuan-note/siyuan/issues/8740 - docxTemplate := gulu.Str.RemoveInvisible(Conf.Export.DocxTemplate) + docxTemplate := util.RemoveInvalid(Conf.Export.DocxTemplate) docxTemplate = strings.TrimSpace(docxTemplate) if "" != docxTemplate { if !gulu.File.IsExist(docxTemplate) { diff --git a/kernel/model/file.go b/kernel/model/file.go index 260268cd0..a2b6d0cb6 100644 --- a/kernel/model/file.go +++ b/kernel/model/file.go @@ -1936,7 +1936,7 @@ func createDoc(boxID, p, title, dom string) (tree *parse.Tree, err error) { func removeInvisibleCharsInTitle(title string) string { // 不要踢掉 零宽连字符,否则有的 Emoji 会变形 https://github.com/siyuan-note/siyuan/issues/11480 title = strings.ReplaceAll(title, string(gulu.ZWJ), "__@ZWJ@__") - title = gulu.Str.RemoveInvisible(title) + title = util.RemoveInvalid(title) title = strings.ReplaceAll(title, "__@ZWJ@__", string(gulu.ZWJ)) return title } diff --git a/kernel/model/history.go b/kernel/model/history.go index 22cbb859a..700498654 100644 --- a/kernel/model/history.go +++ b/kernel/model/history.go @@ -405,7 +405,7 @@ type HistoryItem struct { const fileHistoryPageSize = 32 func FullTextSearchHistory(query, box, op string, typ, page int) (ret []string, pageCount, totalCount int) { - query = gulu.Str.RemoveInvisible(query) + query = util.RemoveInvalid(query) if "" != query && HistoryTypeDocID != typ { query = stringQuery(query) } @@ -440,7 +440,7 @@ func FullTextSearchHistory(query, box, op string, typ, page int) (ret []string, } func FullTextSearchHistoryItems(created, query, box, op string, typ int) (ret []*HistoryItem) { - query = gulu.Str.RemoveInvisible(query) + query = util.RemoveInvalid(query) if "" != query && HistoryTypeDocID != typ { query = stringQuery(query) } diff --git a/kernel/model/import.go b/kernel/model/import.go index b8d6b2de0..d3215cfff 100644 --- a/kernel/model/import.go +++ b/kernel/model/import.go @@ -67,6 +67,7 @@ func HTML2Markdown(htmlStr string, luteEngine *lute.Lute) (markdown string, with } func HTML2Tree(htmlStr string, luteEngine *lute.Lute) (tree *parse.Tree, withMath bool) { + htmlStr = util.RemoveInvalid(htmlStr) assetDirPath := filepath.Join(util.DataDir, "assets") tree = luteEngine.HTML2Tree(htmlStr) ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { diff --git a/kernel/model/mount.go b/kernel/model/mount.go index 367c5d4dc..41eabccd1 100644 --- a/kernel/model/mount.go +++ b/kernel/model/mount.go @@ -35,7 +35,7 @@ import ( ) func CreateBox(name string) (id string, err error) { - name = gulu.Str.RemoveInvisible(name) + name = util.RemoveInvalid(name) if 512 < utf8.RuneCountInString(name) { // 限制笔记本名和文档名最大长度为 `512` https://github.com/siyuan-note/siyuan/issues/6299 err = errors.New(Conf.Language(106)) diff --git a/kernel/model/repository.go b/kernel/model/repository.go index 851f14a9c..987ad3493 100644 --- a/kernel/model/repository.go +++ b/kernel/model/repository.go @@ -1022,7 +1022,7 @@ func TagSnapshot(id, name string) (err error) { } name = strings.TrimSpace(name) - name = gulu.Str.RemoveInvisible(name) + name = util.RemoveInvalid(name) if "" == name { err = errors.New(Conf.Language(142)) return diff --git a/kernel/model/search.go b/kernel/model/search.go index b00088f52..dd50121e4 100644 --- a/kernel/model/search.go +++ b/kernel/model/search.go @@ -1993,7 +1993,7 @@ func getRefSearchIgnoreLines() (ret []string) { func filterQueryInvisibleChars(query string) string { query = strings.ReplaceAll(query, " ", "_@full_width_space@_") - query = gulu.Str.RemoveInvisible(query) + query = util.RemoveInvalid(query) query = strings.ReplaceAll(query, "_@full_width_space@_", " ") return query } diff --git a/kernel/model/sync.go b/kernel/model/sync.go index 740bdd9b4..625ddebe1 100644 --- a/kernel/model/sync.go +++ b/kernel/model/sync.go @@ -470,7 +470,7 @@ func CreateCloudSyncDir(name string) (err error) { } name = strings.TrimSpace(name) - name = gulu.Str.RemoveInvisible(name) + name = util.RemoveInvalid(name) if !cloud.IsValidCloudDirName(name) { return errors.New(Conf.Language(37)) } diff --git a/kernel/model/virutalref.go b/kernel/model/virutalref.go index 5cb7d94eb..1ad3018a1 100644 --- a/kernel/model/virutalref.go +++ b/kernel/model/virutalref.go @@ -33,6 +33,7 @@ import ( "github.com/siyuan-note/siyuan/kernel/sql" "github.com/siyuan-note/siyuan/kernel/task" "github.com/siyuan-note/siyuan/kernel/treenode" + "github.com/siyuan-note/siyuan/kernel/util" ) // virtualBlockRefCache 用于保存块关联的虚拟引用关键字。 @@ -170,7 +171,7 @@ func processVirtualRef(n *ast.Node, unlinks *[]*ast.Node, virtualBlockRefKeyword } content := string(n.Tokens) - tmp := gulu.Str.RemoveInvisible(content) + tmp := util.RemoveInvalid(content) tmp = strings.TrimSpace(tmp) if "" == tmp { return false diff --git a/kernel/util/file.go b/kernel/util/file.go index 619e6e336..112d0c2a6 100644 --- a/kernel/util/file.go +++ b/kernel/util/file.go @@ -242,7 +242,7 @@ func FilterFileName(name string) string { name = strings.ReplaceAll(name, ">", "_") name = strings.ReplaceAll(name, "|", "_") name = strings.TrimSpace(name) - name = gulu.Str.RemoveInvisible(name) // Remove invisible characters from file names when uploading assets https://github.com/siyuan-note/siyuan/issues/11683 + name = RemoveInvalid(name) // Remove invisible characters from file names when uploading assets https://github.com/siyuan-note/siyuan/issues/11683 return name } diff --git a/kernel/util/misc.go b/kernel/util/misc.go index 1f2ef1369..c1708465e 100644 --- a/kernel/util/misc.go +++ b/kernel/util/misc.go @@ -26,7 +26,6 @@ import ( "time" "unicode" - "github.com/88250/gulu" "github.com/88250/lute/html" ) @@ -133,7 +132,7 @@ func RemoveRedundantSpace(str string) string { } func Convert2Float(s string) (float64, bool) { - s = gulu.Str.RemoveInvisible(s) + s = RemoveInvalid(s) s = strings.ReplaceAll(s, " ", "") s = strings.ReplaceAll(s, ",", "") buf := bytes.Buffer{} diff --git a/kernel/util/ocr.go b/kernel/util/ocr.go index 1233808a1..4a75a7c2a 100644 --- a/kernel/util/ocr.go +++ b/kernel/util/ocr.go @@ -248,7 +248,7 @@ func Tesseract(imgAbsPath string) (ret []map[string]interface{}) { ret = append(ret, dataMap) } - tsv = gulu.Str.RemoveInvisible(tsv) + tsv = RemoveInvalid(tsv) tsv = RemoveRedundantSpace(tsv) msg := fmt.Sprintf("OCR [%s] [%s]", html.EscapeString(info.Name()), html.EscapeString(GetOcrJsonText(ret))) PushStatusBar(msg) @@ -266,7 +266,7 @@ func GetOcrJsonText(jsonData []map[string]interface{}) (ret string) { } } } - ret = gulu.Str.RemoveInvisible(ret) + ret = RemoveInvalid(ret) ret = RemoveRedundantSpace(ret) return ret } diff --git a/kernel/util/rune.go b/kernel/util/rune.go index 9f53be800..232e6bcbf 100644 --- a/kernel/util/rune.go +++ b/kernel/util/rune.go @@ -56,6 +56,12 @@ var emojiRegex = regexp.MustCompile(`/([0-9#][\x{20E3}])|` + func RemoveEmojiInvisible(text string) (ret string) { ret = emojiRegex.ReplaceAllString(text, "") - ret = gulu.Str.RemoveInvisible(ret) + ret = RemoveInvalid(ret) + return +} + +func RemoveInvalid(text string) (ret string) { + ret = gulu.Str.RemoveInvisible(text) + ret = gulu.Str.RemovePUA(text) return }