diff --git a/README.md b/README.md index c93b3b2..4b5eff9 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,61 @@ # 带带弟弟OCR通用验证码识别SDK免费开源版 # 今天ddddocr又更新啦! - 当前版本为1.3.1 + ## 当前版本为1.4.0 + +# 1.4.0版本更新内容 + + 本次更新新增了两种滑块识别算法,算法非深度神经网络实现,仅使用opencv和PIL完成。 + + ## 算法1 + 小滑块为单独的png图片,背景是透明图,如下图 + + ![Test](https://cdn.wenanzhe.com/img/b.png) + + 然后背景为带小滑块坑位的,如下图 + + ![Test](https://cdn.wenanzhe.com/img/a.png) + + ```python + det = ddddocr.DdddOcr(det=False, ocr=False) + + with open('target.png', 'rb') as f: + target_bytes = f.read() + + with open('background.png', 'rb') as f: + background_bytes = f.read() + + res = det.slide_match(target_bytes, background_bytes) + + print(res) + ``` + ## 算法2 + 一张图为带坑位的原图,如下图 + + ![Test](https://cdn.wenanzhe.com/img/bg.jpg) + + 一张图为原图,如下图 + + ![Test](https://cdn.wenanzhe.com/img/fullpage.jpg) + + ```python + det = ddddocr.DdddOcr(det=False, ocr=False) + + with open('bg.jpg', 'rb') as f: + target_bytes = f.read() + + with open('fullpage.jpg', 'rb') as f: + background_bytes = f.read() + + img = cv2.imread("bg.jpg") + + res = det.slide_comparison(target_bytes, background_bytes) + + print(res) + ``` + + +# 1.3.1版本更新内容 想必很多做验证码的新手,一定头疼碰到点选类型的图像,做样本费时费力,神经网络不会写,训练设备太昂贵,模型效果又不好。 diff --git a/ddddocr/__init__.py b/ddddocr/__init__.py index aa1570a..1095707 100644 --- a/ddddocr/__init__.py +++ b/ddddocr/__init__.py @@ -6,7 +6,7 @@ import io import os import base64 import onnxruntime -from PIL import Image +from PIL import Image, ImageChops import numpy as np import cv2 @@ -27,14 +27,16 @@ class TypeError(Exception): class DdddOcr(object): - def __init__(self, det: bool = False, old: bool = False, use_gpu: bool = False, device_id: int = 0, show_ad=True): + def __init__(self, ocr: bool = True, det: bool = False, old: bool = False, use_gpu: bool = False, + device_id: int = 0, show_ad=True): if show_ad: print("欢迎使用ddddocr,本项目专注带动行业内卷,个人博客:wenanzhe.com") print("训练数据支持来源于:http://146.56.204.113:19199/preview") + if det: self.__graph_path = os.path.join(os.path.dirname(__file__), 'common_det.onnx') self.__charset = [] - else: + if ocr: if old: self.__graph_path = os.path.join(os.path.dirname(__file__), 'common_old.onnx') self.__charset = ["", "掀", "袜", "顧", "徕", "榱", "荪", "浡", "其", "炎", "玉", "恩", "劣", "徽", "廉", "桂", "拂", @@ -1433,8 +1435,8 @@ class DdddOcr(object): self.__providers = [ 'CPUExecutionProvider', ] - self.__ort_session = onnxruntime.InferenceSession(self.__graph_path, providers=self.__providers) - + if ocr or det: + self.__ort_session = onnxruntime.InferenceSession(self.__graph_path, providers=self.__providers) def preproc(self, img, input_size, swap=(2, 0, 1)): if len(img.shape) == 3: @@ -1606,3 +1608,69 @@ class DdddOcr(object): img_bytes = base64.b64decode(img_base64) result = self.get_bbox(img_bytes) return result + + def get_target(self, img_bytes: bytes = None): + image = Image.open(io.BytesIO(img_bytes)) + w, h = image.size + starttx = 0 + startty = 0 + end_x = 0 + end_y = 0 + for x in range(w): + for y in range(h): + p = image.getpixel((x, y)) + if p[-1] == 0: + if startty != 0 and end_y == 0: + end_y = y + + if starttx != 0 and end_x == 0: + end_x = x + else: + if startty == 0: + startty = y + end_y = 0 + else: + if y < startty: + startty = y + end_y = 0 + if starttx == 0 and startty != 0: + starttx = x + if end_y != 0: + end_x = x + return image.crop([starttx, startty, end_x, end_y]), startty + + def slide_match(self, target_bytes: bytes = None, background_bytes: bytes = None): + target, target_y = self.get_target(target_bytes) + target = cv2.cvtColor(np.asarray(target), cv2.COLOR_RGBA2GRAY) + background = cv2.imdecode(np.frombuffer(background_bytes, np.uint8), cv2.IMREAD_GRAYSCALE) + res = cv2.matchTemplate(background, target, cv2.TM_CCOEFF) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) + w, h = target.shape[::-1] + bottom_right = (max_loc[0] + w, max_loc[1] + h) + return {"target_y": target_y, + "target": [int(max_loc[0]), int(max_loc[1]), int(bottom_right[0]), int(bottom_right[1])]} + + def slide_comparison(self, target_bytes: bytes = None, background_bytes: bytes = None): + target = Image.open(io.BytesIO(target_bytes)) + background = Image.open(io.BytesIO(background_bytes)) + image = ImageChops.difference(background, target) + background.close() + target.close() + image = image.point(lambda x: 255 if x > 80 else 0) + start_y = 0 + start_x = 0 + for i in range(0, image.width): + count = 0 + for j in range(0, image.height): + pixel = image.getpixel((i, j)) + if pixel != (0, 0, 0): + count += 1 + if count >= 5 and start_y == 0: + start_y = j - 5 + + if count >= 5: + start_x = i + 2 + break + return { + "target": [start_x, start_y] + } diff --git a/test.py b/test.py index 9bb99c4..8208c97 100644 --- a/test.py +++ b/test.py @@ -16,4 +16,30 @@ with open('test.jpg', 'rb') as f: img_bytes = f.read() res = det.detection(img_bytes) +print(res) + +# 滑块模板匹配方式 + +det = ddddocr.DdddOcr(det=False, ocr=False) + +with open('b.png', 'rb') as f: + target_bytes = f.read() + +with open('a.png', 'rb') as f: + background_bytes = f.read() + +res = det.slide_match(target_bytes, background_bytes) +print(res) + +det = ddddocr.DdddOcr(det=False, ocr=False) + +with open('bg.jpg', 'rb') as f: + target_bytes = f.read() + +with open('fullpage.jpg', 'rb') as f: + background_bytes = f.read() + + +res = det.slide_comparison(target_bytes, background_bytes) + print(res) \ No newline at end of file