From 2033ece38432c9dc01c4b7eff92beec702a9aa29 Mon Sep 17 00:00:00 2001 From: sml2h3 Date: Sat, 26 Feb 2022 15:59:40 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0Readme=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ddddocr/README.md | 22 ++++++++++++- ddddocr/__init__.py | 79 +++++++++++++++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 25 deletions(-) diff --git a/ddddocr/README.md b/ddddocr/README.md index c6b7e02..46eba74 100644 --- a/ddddocr/README.md +++ b/ddddocr/README.md @@ -3,7 +3,27 @@ # 带带弟弟OCR通用验证码识别SDK免费开源版 # 今天ddddocr又更新啦! - ## 当前版本为1.4.0 + ## 当前版本为1.4.3 + +本次升级的主要原因为,[dddd_trainer](https://github.com/sml2h3/dddd_trainer) 的开源进行适配,使[dddd_trainer](https://github.com/sml2h3/dddd_trainer) 训练出的模型可以直接无缝导入到ddddocr里面来使用 + +## 使用ddddocr调用[dddd_trainer](https://github.com/sml2h3/dddd_trainer) 训练后的模型 + +[dddd_trainer](https://github.com/sml2h3/dddd_trainer) 训练后会在models目录里导出charsets.json和onnx模型 + +如下所示,import_onnx_path为onnx所在地址,charsets_path为onnx所在地址 +```python +import ddddocr + +ocr = ddddocr.DdddOcr(det=False, ocr=False, import_onnx_path="myproject_0.984375_139_13000_2022-02-26-15-34-13.onnx", charsets_path="charsets.json") + +with open('888e28774f815b01e871d474e5c84ff2.jpg', 'rb') as f: + image_bytes = f.read() + +res = ocr.classification(image_bytes) +print(res) + +``` # 捐赠 (如果项目有帮助到您,可以选择捐赠一些费用用于ddddocr的后续版本维护,本项目长期维护) diff --git a/ddddocr/__init__.py b/ddddocr/__init__.py index 41aa654..eac66cc 100644 --- a/ddddocr/__init__.py +++ b/ddddocr/__init__.py @@ -5,7 +5,7 @@ warnings.filterwarnings('ignore') import io import os import base64 -import pathlib +import json import onnxruntime from PIL import Image, ImageChops import numpy as np @@ -29,13 +29,26 @@ class TypeError(Exception): class DdddOcr(object): def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, use_gpu: bool = False, - device_id: int = 0, show_ad=True): + device_id: int = 0, show_ad=True, import_onnx_path: str = "", charsets_path: str = ""): if show_ad: print("欢迎使用ddddocr,本项目专注带动行业内卷,个人博客:wenanzhe.com") print("训练数据支持来源于:http://146.56.204.113:19199/preview") print("爬虫框架feapder可快速一键接入,快速开启爬虫之旅:https://github.com/Boris-code/feapder") - - + self.use_import_onnx = False + self.__word = False + self.__resize = [] + self.__channel = 1 + if import_onnx_path != "": + det = False + ocr = False + self.__graph_path = import_onnx_path + with open(charsets_path, 'r', encoding="utf-8") as f: + info = json.loads(f.read()) + self.__charset = info['charset'] + self.__word = info['word'] + self.__resize = info['image'] + self.__channel = info['channel'] + self.use_import_onnx = True if det: ocr = False print("开启det后自动关闭ocr") @@ -1440,7 +1453,7 @@ class DdddOcr(object): self.__providers = [ 'CPUExecutionProvider', ] - if ocr or det: + if ocr or det or self.use_import_onnx: self.__ort_session = onnxruntime.InferenceSession(self.__graph_path, providers=self.__providers) def preproc(self, img, input_size, swap=(2, 0, 1)): @@ -1581,35 +1594,53 @@ class DdddOcr(object): return [] return result - def classification(self, img): + def classification(self, img_bytes: bytes = None, img_base64: str = None): if self.det: raise TypeError("当前识别类型为目标检测") - if not isinstance(img, (bytes, str, pathlib.PurePath, Image.Image)): - raise TypeError("未知图片类型") - if isinstance(img, bytes): - image = Image.open(io.BytesIO(img)) - elif isinstance(img, Image.Image): - image = img.copy() - elif isinstance(img, str): - image = base64_to_image(img) + if img_bytes: + image = Image.open(io.BytesIO(img_bytes)) else: - assert isinstance(img, pathlib.PurePath) - image = Image.open(img) - image = image.resize((int(image.size[0] * (64 / image.size[1])), 64), Image.ANTIALIAS).convert('L') + image = base64_to_image(img_base64) + if not self.use_import_onnx: + image = image.resize((int(image.size[0] * (64 / image.size[1])), 64), Image.ANTIALIAS).convert('L') + else: + if self.__resize[0] == -1: + if self.__word: + image = image.resize((self.__resize[1], self.__resize[1]), Image.ANTIALIAS) + else: + image = image.resize((int(image.size[0] * (self.__resize[1] / image.size[1])), self.__resize[1]), Image.ANTIALIAS) + else: + image = image.resize((self.__resize[0], self.__resize[1]), Image.ANTIALIAS) + if self.__channel == 1: + image = image.convert('L') + else: + image = image.convert('RGB') image = np.array(image).astype(np.float32) image = np.expand_dims(image, axis=0) / 255. - image = (image - 0.5) / 0.5 + if not self.use_import_onnx: + image = (image - 0.5) / 0.5 + else: + if self.__channel == 1: + image = (image - 0.456) / 0.224 + else: + image = (image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225]) + ort_inputs = {'input1': np.array([image])} ort_outs = self.__ort_session.run(None, ort_inputs) result = [] + last_item = 0 - for item in ort_outs[0][0]: - if item == last_item: - continue - else: - last_item = item - if item != 0: + if self.__word: + for item in ort_outs[1]: result.append(self.__charset[item]) + else: + for item in ort_outs[0][0]: + if item == last_item: + continue + else: + last_item = item + if item != 0: + result.append(self.__charset[item]) return ''.join(result)