📝 更新 OCR 注意事项

This commit is contained in:
Liang Ding 2023-02-02 23:20:51 +08:00
parent 4fd89b759f
commit f2004f2b07
No known key found for this signature in database
GPG Key ID: 136F30F901A2231D
3 changed files with 298 additions and 59 deletions

View File

@ -6,7 +6,7 @@
"id": "20200924100744-br924ar", "id": "20200924100744-br924ar",
"title": "Assets", "title": "Assets",
"type": "doc", "type": "doc",
"updated": "20230117003842" "updated": "20230202231916"
}, },
"Children": [ "Children": [
{ {
@ -696,33 +696,110 @@
{ {
"Type": "NodeText", "Type": "NodeText",
"Data": " program needs to be manually installed first. " "Data": " program needs to be manually installed first. "
}
]
},
{
"ID": "20230202231728-0z5bs0m",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231728-0z5bs0m",
"updated": "20230202231731"
},
"Children": [
{
"Type": "NodeText",
"Data": ""
}, },
{ {
"Type": "NodeTextMark", "Type": "NodeTextMark",
"TextMarkType": "tag", "TextMarkType": "tag",
"TextMarkTextContent": "Note" "TextMarkTextContent": "Note:"
}
]
},
{
"ID": "20230202231731-bdh7lab",
"Type": "NodeList",
"ListData": {},
"Properties": {
"id": "20230202231731-bdh7lab",
"updated": "20230202231916"
},
"Children": [
{
"ID": "20230202231732-n7z8jth",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230202231732-n7z8jth",
"updated": "20230202231916"
},
"Children": [
{
"ID": "20230202231732-f3jkj7p",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231732-f3jkj7p",
"updated": "20230202231916"
},
"Children": [
{
"Type": "NodeText",
"Data": "When installing Tesseract OCR, you need to check the language pack you need, and add the installed Tesseract-OCR directory path to the environment variable PATH, so that SiYuan can directly call the "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract"
},
{
"Type": "NodeText",
"Data": " command to extract text from image OCR. If the installation is normal, you can search for "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract-ocr enabled"
},
{
"Type": "NodeText",
"Data": " in the kernel boot log"
}
]
}
]
}, },
{ {
"Type": "NodeText", "ID": "20230202231800-z8hswmk",
"Data": " that you need to check the Chinese language pack when installing Tesseract OCR, and add the installed Tesseract-OCR directory path to the environment variable PATH, so that SiYuan can directly call the " "Type": "NodeListItem",
}, "ListData": {
{ "BulletChar": 42,
"Type": "NodeTextMark", "Marker": "Kg=="
"TextMarkType": "code", },
"TextMarkTextContent": "tesseract" "Properties": {
}, "id": "20230202231800-z8hswmk",
{ "updated": "20230202231819"
"Type": "NodeText", },
"Data": " command to extract text from image OCR. If the installation is normal, you can search for " "Children": [
}, {
{ "ID": "20230202231800-c3x45ky",
"Type": "NodeTextMark", "Type": "NodeParagraph",
"TextMarkType": "code", "Properties": {
"TextMarkTextContent": "tesseract-ocr enabled" "id": "20230202231800-c3x45ky",
}, "updated": "20230202231819"
{ },
"Type": "NodeText", "Children": [
"Data": " in the kernel boot log." {
"Type": "NodeText",
"Data": "Do not install too many language packs, otherwise it will cause OCR to be slow or even timeout to return empty results, and take up too many system resources"
}
]
}
]
} }
] ]
}, },

View File

@ -6,7 +6,7 @@
"id": "20200915214115-42b8zma", "id": "20200915214115-42b8zma",
"title": "资源文件", "title": "资源文件",
"type": "doc", "type": "doc",
"updated": "20230117003750" "updated": "20230202231842"
}, },
"Children": [ "Children": [
{ {
@ -718,6 +718,20 @@
{ {
"Type": "NodeText", "Type": "NodeText",
"Data": " 程序。" "Data": " 程序。"
}
]
},
{
"ID": "20230202231304-22lvszc",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231304-22lvszc",
"updated": "20230202231309"
},
"Children": [
{
"Type": "NodeText",
"Data": ""
}, },
{ {
"Type": "NodeTextMark", "Type": "NodeTextMark",
@ -726,25 +740,92 @@
}, },
{ {
"Type": "NodeText", "Type": "NodeText",
"Data": "​在安装 Tesseract OCR 时需要勾选中文语言包,并将安装后的 Tesseract-OCR 目录路径添加到环境变量 PATH 中,这样思源才能直接调用 " "Data": ""
}
]
},
{
"ID": "20230202231309-pcjl7c2",
"Type": "NodeList",
"ListData": {},
"Properties": {
"id": "20230202231309-pcjl7c2",
"updated": "20230202231842"
},
"Children": [
{
"ID": "20230202231311-7qdk1za",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230202231311-7qdk1za",
"updated": "20230202231842"
},
"Children": [
{
"ID": "20230202231311-n1pf7in",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231311-n1pf7in",
"updated": "20230202231842"
},
"Children": [
{
"Type": "NodeText",
"Data": "在安装 Tesseract OCR 时需要勾选你需要的语言包,并将安装后的 Tesseract-OCR 目录路径添加到环境变量 PATH 中,这样思源才能直接调用 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract"
},
{
"Type": "NodeText",
"Data": " 命令进行图片 OCR 提取文本。如果安装正常的话,在内核启动日志中可以搜索到 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract-ocr enabled"
},
{
"Type": "NodeText",
"Data": ""
}
]
}
]
}, },
{ {
"Type": "NodeTextMark", "ID": "20230202231321-q1b1tza",
"TextMarkType": "code", "Type": "NodeListItem",
"TextMarkTextContent": "tesseract" "ListData": {
}, "BulletChar": 42,
{ "Marker": "Kg=="
"Type": "NodeText", },
"Data": " 命令进行图片 OCR 提取文本。如果安装正常的话,在内核启动日志中可以搜索到 " "Properties": {
}, "id": "20230202231321-q1b1tza",
{ "updated": "20230202231443"
"Type": "NodeTextMark", },
"TextMarkType": "code", "Children": [
"TextMarkTextContent": "tesseract-ocr enabled" {
}, "ID": "20230202231321-5ugmgf0",
{ "Type": "NodeParagraph",
"Type": "NodeText", "Properties": {
"Data": "​。" "id": "20230202231321-5ugmgf0",
"updated": "20230202231443"
},
"Children": [
{
"Type": "NodeText",
"Data": "语言包不要安装太多,否则会导致 OCR 缓慢甚至超时返回空结果,并且占用过多的系统资源"
}
]
}
]
} }
] ]
}, },

View File

@ -5,7 +5,7 @@
"Properties": { "Properties": {
"id": "20211226123038-4umgpxy", "id": "20211226123038-4umgpxy",
"title": "資料文件", "title": "資料文件",
"updated": "20230117003908" "updated": "20230202231927"
}, },
"Children": [ "Children": [
{ {
@ -696,7 +696,21 @@
}, },
{ {
"Type": "NodeText", "Type": "NodeText",
"Data": " 程序。​" "Data": " 程序。"
}
]
},
{
"ID": "20230202231513-80u3j7f",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231513-80u3j7f",
"updated": "20230202231515"
},
"Children": [
{
"Type": "NodeText",
"Data": ""
}, },
{ {
"Type": "NodeTextMark", "Type": "NodeTextMark",
@ -705,25 +719,92 @@
}, },
{ {
"Type": "NodeText", "Type": "NodeText",
"Data": "​在安裝 Tesseract OCR 時需要勾選中文語言包,並將安裝後的 Tesseract-OCR 目錄路徑添加到環境變量 PATH 中,這樣思源才能直接調用 " "Data": ""
}
]
},
{
"ID": "20230202231516-o6k9mj1",
"Type": "NodeList",
"ListData": {},
"Properties": {
"id": "20230202231516-o6k9mj1",
"updated": "20230202231927"
},
"Children": [
{
"ID": "20230202231516-pwj2ndg",
"Type": "NodeListItem",
"ListData": {
"BulletChar": 42,
"Marker": "Kg=="
},
"Properties": {
"id": "20230202231516-pwj2ndg",
"updated": "20230202231927"
},
"Children": [
{
"ID": "20230202231516-8trf08t",
"Type": "NodeParagraph",
"Properties": {
"id": "20230202231516-8trf08t",
"updated": "20230202231927"
},
"Children": [
{
"Type": "NodeText",
"Data": "在安裝 Tesseract OCR 時需要勾選你需要的語言包,並將安裝後的 Tesseract-OCR 目錄路徑添加到環境變量 PATH 中,這樣思源才能直接調用 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract"
},
{
"Type": "NodeText",
"Data": " 命令進行圖片 OCR 提取文本。如果安裝正常的話,在內核啟動日誌中可以搜索到 "
},
{
"Type": "NodeTextMark",
"TextMarkType": "code",
"TextMarkTextContent": "tesseract-ocr enabled"
},
{
"Type": "NodeText",
"Data": ""
}
]
}
]
}, },
{ {
"Type": "NodeTextMark", "ID": "20230202231519-x47s7he",
"TextMarkType": "code", "Type": "NodeListItem",
"TextMarkTextContent": "tesseract" "ListData": {
}, "BulletChar": 42,
{ "Marker": "Kg=="
"Type": "NodeText", },
"Data": " 命令進行圖片 OCR 提取文本。如果安裝正常的話,在內核啟動日誌中可以搜索到 " "Properties": {
}, "id": "20230202231519-x47s7he",
{ "updated": "20230202231811"
"Type": "NodeTextMark", },
"TextMarkType": "code", "Children": [
"TextMarkTextContent": "tesseract-ocr enabled" {
}, "ID": "20230202231519-04f6dh6",
{ "Type": "NodeParagraph",
"Type": "NodeText", "Properties": {
"Data": "​。" "id": "20230202231519-04f6dh6",
"updated": "20230202231811"
},
"Children": [
{
"Type": "NodeText",
"Data": "語言包不要安裝太多,否則會導致 OCR 緩慢甚至超時返回空結果,並且佔用過多的系統資源"
}
]
}
]
} }
] ]
}, },