整体项目重构

2025-05-02 20:00:03 +08:00 · 2024-07-25 10:25:15 +08:00 · 2024-07-25 10:25:15 +08:00 · de0afe7a4c
commit de0afe7a4c
parent 7f37543ffb
18 changed files with 482 additions and 511 deletions
--- a/28
+++ b/28
@ -1,17 +1,17 @@
-FROM python:3.8-slim-buster
-
-RUN mkdir /app
-
-COPY ./*.txt ./*.py ./*.sh ./*.onnx /app/
-
-
-RUN cd /app \
-    && python3 -m pip install --upgrade pip -i https://pypi.douban.com/simple/\
-    && pip3 install --no-cache-dir -r requirements.txt --extra-index-url https://pypi.douban.com/simple/ \
-    && rm -rf /tmp/* && rm -rf /root/.cache/* \
-    && sed -i 's#http://deb.debian.org#http://mirrors.aliyun.com/#g' /etc/apt/sources.list\
-    && apt-get --allow-releaseinfo-change update && apt install libgl1-mesa-glx libglib2.0-0 -y
+# 使用官方 Python 运行时作为父镜像
+FROM python:3.9-slim

+# 设置工作目录
 WORKDIR /app

-CMD ["python3", "ocr_server.py", "--port", "9898", "--ocr", "--det"]
+# 将当前目录内容复制到容器的 /app 中
+COPY . /app
+
+# 安装项目依赖
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 暴露端口 8000
+EXPOSE 8000
+
+# 运行应用
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/201
+++ b/201
@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/README.md
+++ b/README.md
@ -1,85 +1,339 @@
-# ocr_api_server
-使用ddddocr的最简api搭建项目，支持docker
+# 🚀 DdddOcr API

-**建议python版本3.7-3.9 64位**
+![DdddOcr Logo](https://cdn.wenanzhe.com/img/logo.png!/crop/700x500a400a500)

-再有不好好看文档的我就不管了啊！！！
+> 基于 FastAPI 和 DdddOcr 的高性能 OCR API 服务，提供图像文字识别、滑动验证码匹配和目标检测功能。
+> 
+> [自营各类GPT聚合平台](https://juxiangyun.com)

-# 运行方式
+## 📋 目录

-## 最简单运行方式
+- [系统要求](#-系统要求)
+- [安装和启动](#-安装和启动)
+- [API 端点](#-api-端点)
+- [API 调用示例](#-api-调用示例)
+- [注意事项](#-注意事项)
+- [故障排除](#-故障排除)
+- [许可证](#-许可证)

-```shell
-# 安装依赖
-pip install -r requirements.txt -i https://pypi.douban.com/simple
+## 💻 系统要求

-# 运行  可选参数如下
-# --port 9898 指定端口,默认为9898
-# --ocr 开启ocr模块 默认开启
-# --old 只有ocr模块开启的情况下生效 默认不开启
-# --det 开启目标检测模式
+| 组件 | 版本 |
+|------|------|
+| 操作系统 | Linux（推荐 Ubuntu 20.04 LTS 或更高版本）|
+| Docker | 20.10 或更高 |
+| Docker Compose | 1.29 或更高 |

-# 最简单运行方式，只开启ocr模块并以新模型计算
-python ocr_server.py --port 9898 --ocr
+## 🚀 安装和启动

-# 开启ocr模块并使用旧模型计算
-python ocr_server.py --port 9898 --ocr --old
+1. **克隆仓库**
+   ```bash
+   git clone https://github.com/your-repo/ddddocr-api.git
+   cd ddddocr-api
+   ```

-# 只开启目标检测模块
-python ocr_server.py --port 9898  --det
+2. **构建 Docker 镜像 [一键docker环境服务器购买，可一元试用](https://app.rainyun.com/apps/rcs/buy) **
+   ```bash
+   docker build -t ddddocr-api .
+   ```

-# 同时开启ocr模块以及目标检测模块
-python ocr_server.py --port 9898 --ocr --det
+3. **启动服务**
+   ```bash
+   docker run -d -p 8000:8000 --name ddddocr-api-container ddddocr-api
+   ```

-# 同时开启ocr模块并使用旧模型计算以及目标检测模块
-python ocr_server.py --port 9898 --ocr --old --det
+4. **验证服务**
+   ```bash
+   curl http://localhost:8000/docs
+   ```
+   > 如果成功，您将看到 Swagger UI 文档页面。
   
-```
+5. **停止服务**

-## docker运行方式(目测只能在Linux下部署)
+- 如果使用 Docker：
+  ```bash
+  docker stop ddddocr-api-container
+  ```

-```shell
-git clone https://github.com/sml2h3/ocr_api_server.git
-# docker怎么安装？百度吧
+- 如果使用 Docker Compose：
+  ```bash
+  docker-compose down
+  ```
  
-cd ocr_api_server
+6. **查看日志**

-# 修改entrypoint.sh中的参数，具体参数往上翻，默认9898端口，同时开启ocr模块以及目标检测模块
+- 如果使用 Docker：
+  ```bash
+  docker logs ddddocr-api-container
+  ```

-# 编译镜像
-docker build -t ocr_server:v1 .
+- 如果使用 Docker Compose：
+  ```bash
+  docker-compose logs
+  ```

-# 运行镜像
-docker run -p 9898:9898 -d ocr_server:v1
+## 🔌 API 端点

-```
+### 1. OCR 识别

-# 接口
+🔗 **端点**：`POST /ocr`

-**具体请看test_api.py文件**
+| 参数 | 类型 | 描述 |
+|------|------|------|
+| `file` | File | 图片文件（可选） |
+| `image` | String | Base64 编码的图片字符串（可选） |
+| `probability` | Boolean | 是否返回概率（默认：false） |
+| `charsets` | String | 字符集（可选） |
+| `png_fix` | Boolean | 是否进行 PNG 修复（默认：false） |
+
+### 2. 滑动验证码匹配
+
+🔗 **端点**：`POST /slide_match`
+
+| 参数 | 类型 | 描述 |
+|------|------|------|
+| `target_file` | File | 目标图片文件（可选） |
+| `background_file` | File | 背景图片文件（可选） |
+| `target` | String | Base64 编码的目标图片字符串（可选） |
+| `background` | String | Base64 编码的背景图片字符串（可选） |
+| `simple_target` | Boolean | 是否使用简单目标（默认：false） |
+
+### 3. 目标检测
+
+🔗 **端点**：`POST /detection`
+
+| 参数 | 类型 | 描述 |
+|------|------|------|
+| `file` | File | 图片文件（可选） |
+| `image` | String | Base64 编码的图片字符串（可选） |
+
+## 📘 API 调用示例
+
+<details>
+<summary>Python</summary>

 ```python
-# 1、测试是否启动成功，可以通过直接GET访问http://{host}:{port}/ping来测试，如果返回pong则启动成功
+import requests
+import base64

-# 2、OCR/目标检测请求接口格式：
+url = "http://localhost:8000/ocr"
+image_path = "path/to/your/image.jpg"

-# http://{host}:{port}/{opt}/{img_type}/{ret_type}
-# opt：操作类型 ocr=OCR det=目标检测 slide=滑块（match和compare两种算法，默认为compare)
-# img_type: 数据类型 file=文件上传方式 b64=base64(imgbyte)方式 默认为file方式
-# ret_type: 返回类型 json=返回json（识别出错会在msg里返回错误信息） text=返回文本格式（识别出错时回直接返回空文本）
+with open(image_path, "rb") as image_file:
+    encoded_string = base64.b64encode(image_file.read()).decode('utf-8')

-# 例子：
+data = {
+    "image": encoded_string,
+    "probability": False,
+    "png_fix": False
+}

-# OCR请求
-# resp = requests.post("http://{host}:{port}/ocr/file", files={'image': image_bytes})
-# resp = requests.post("http://{host}:{port}/ocr/b64/text", data=base64.b64encode(file).decode())
-
-# 目标检测请求
-# resp = requests.post("http://{host}:{port}/det/file", files={'image': image_bytes})
-# resp = requests.post("http://{host}:{port}/det/b64/json", data=base64.b64encode(file).decode())
-
-# 滑块识别请求
-# resp = requests.post("http://{host}:{port}/slide/match/file", files={'target_img': target_bytes, 'bg_img': bg_bytes})
-# jsonstr = json.dumps({'target_img': target_b64str, 'bg_img': bg_b64str})
-# resp = requests.post("http://{host}:{port}/slide/compare/b64", files=base64.b64encode(jsonstr.encode()).decode())
+response = requests.post(url, data=data)
+print(response.json())
 ```
+</details>
+<details>
+<summary>Node.js</summary>
+
+```javascript
+const axios = require('axios');
+const fs = require('fs');
+
+const url = 'http://localhost:8000/ocr';
+const imagePath = 'path/to/your/image.jpg';
+
+const imageBuffer = fs.readFileSync(imagePath);
+const base64Image = imageBuffer.toString('base64');
+
+const data = {
+  image: base64Image,
+  probability: false,
+  png_fix: false
+};
+
+axios.post(url, data)
+  .then(response => {
+    console.log(response.data);
+  })
+  .catch(error => {
+    console.error('Error:', error);
+  });
+```
+</details>
+
+<details>
+<summary>C#</summary>
+
+```csharp
+using System;
+using System.Net.Http;
+using System.IO;
+using System.Threading.Tasks;
+
+class Program
+{
+    static async Task Main(string[] args)
+    {
+        var url = "http://localhost:8000/ocr";
+        var imagePath = "path/to/your/image.jpg";
+
+        var imageBytes = File.ReadAllBytes(imagePath);
+        var base64Image = Convert.ToBase64String(imageBytes);
+
+        var client = new HttpClient();
+        var content = new MultipartFormDataContent();
+        content.Add(new StringContent(base64Image), "image");
+        content.Add(new StringContent("false"), "probability");
+        content.Add(new StringContent("false"), "png_fix");
+
+        var response = await client.PostAsync(url, content);
+        var result = await response.Content.ReadAsStringAsync();
+        Console.WriteLine(result);
+    }
+}
+```
+</details>
+
+<details>
+<summary>PHP</summary>
+
+```php
+<?php
+
+$url = 'http://localhost:8000/ocr';
+$imagePath = 'path/to/your/image.jpg';
+
+$imageData = base64_encode(file_get_contents($imagePath));
+
+$data = array(
+    'image' => $imageData,
+    'probability' => 'false',
+    'png_fix' => 'false'
+);
+
+$options = array(
+    'http' => array(
+        'header'  => "Content-type: application/x-www-form-urlencoded\r\n",
+        'method'  => 'POST',
+        'content' => http_build_query($data)
+    )
+);
+
+$context  = stream_context_create($options);
+$result = file_get_contents($url, false, $context);
+
+echo $result;
+?>
+```
+</details>
+
+<details>
+<summary>Go</summary>
+
+```go
+package main
+
+import (
+    "bytes"
+    "encoding/base64"
+    "encoding/json"
+    "fmt"
+    "io/ioutil"
+    "net/http"
+    "net/url"
+)
+
+func main() {
+    apiURL := "http://localhost:8000/ocr"
+    imagePath := "path/to/your/image.jpg"
+
+    imageData, err := ioutil.ReadFile(imagePath)
+    if err != nil {
+        panic(err)
+    }
+
+    base64Image := base64.StdEncoding.EncodeToString(imageData)
+
+    data := url.Values{}
+    data.Set("image", base64Image)
+    data.Set("probability", "false")
+    data.Set("png_fix", "false")
+
+    resp, err := http.PostForm(apiURL, data)
+    if err != nil {
+        panic(err)
+    }
+    defer resp.Body.Close()
+
+    body, err := ioutil.ReadAll(resp.Body)
+    if err != nil {
+        panic(err)
+    }
+
+    fmt.Println(string(body))
+}
+```
+</details>
+
+<details>
+<summary>易语言</summary>
+
+```易语言
+.版本 2
+
+.程序集 调用OCR接口
+
+.子程序 主函数, 整数型
+.局部变量 请求头, QQ.HttpHeaders
+.局部变量 请求内容, QQ.HttpMultiData
+.局部变量 图片路径, 文本型
+.局部变量 图片数据, 字节集
+.局部变量 HTTP, QQ.Http
+
+图片路径 ＝ "path/to/your/image.jpg"
+图片数据 ＝ 读入文件 (图片路径)
+
+请求头.添加 ("Content-Type", "application/x-www-form-urlencoded")
+
+请求内容.添加文本 ("image", 到Base64 (图片数据))
+请求内容.添加文本 ("probability", "false")
+请求内容.添加文本 ("png_fix", "false")
+
+HTTP.发送POST请求 ("http://localhost:8000/ocr", 请求内容, 请求头)
+
+调试输出 (HTTP.获取返回文本())
+
+返回 (0)
+```
+</details>
+
+> **注意**：使用示例前，请确保安装了必要的依赖库，并根据实际环境修改服务器地址和图片路径。
+
+## ⚠️ 注意事项
+
+- 确保防火墙允许访问 8000 端口。
+- 生产环境建议配置 HTTPS 和适当的身份验证机制。
+- 定期更新 Docker 镜像以获取最新的安全补丁和功能更新。
+
+## 🔧 故障排除
+
+遇到问题？请检查以下几点：
+
+1. 确保 Docker 服务正在运行。
+2. 检查容器日志：
+   ```bash
+   docker logs ddddocr-api-container
+   ```
+3. 确保没有其他服务占用 8000 端口。
+
+> 如果问题仍然存在，请提交 issue 到本项目的 GitHub 仓库。
+
+## 📄 许可证
+
+本项目采用 MIT 许可证。详情请参见 [LICENSE](LICENSE) 文件。
+
+---
+
+<p align="center">
+  Made with ❤️ by sml2h3
+</p>
--- a/app/init.py
+++ b/app/init.py
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,77 @@
+from fastapi import FastAPI, File, UploadFile, HTTPException, Form
+from fastapi.responses import JSONResponse
+from typing import Optional, Union
+import base64
+from .models import OCRRequest, SlideMatchRequest, DetectionRequest, APIResponse
+from .services import ocr_service
+
+app = FastAPI()
+
+
+def decode_image(image: Union[UploadFile, str, None]) -> bytes:
+    if isinstance(image, UploadFile):
+        return image.file.read()
+    elif isinstance(image, str):
+        try:
+            return base64.b64decode(image)
+        except:
+            raise HTTPException(status_code=400, detail="Invalid base64 string")
+    elif image is None:
+        raise HTTPException(status_code=400, detail="No image provided")
+    else:
+        raise HTTPException(status_code=400, detail="Invalid image input")
+
+
+@app.post("/ocr", response_model=APIResponse)
+async def ocr_endpoint(
+        file: Optional[UploadFile] = File(None),
+        image: Optional[str] = Form(None),
+        probability: bool = Form(False),
+        charsets: Optional[str] = Form(None),
+        png_fix: bool = Form(False)
+):
+    try:
+        if file is None and image is None:
+            return APIResponse(code=400, message="Either file or image must be provided")
+
+        image_bytes = decode_image(file or image)
+        result = ocr_service.ocr_classification(image_bytes, probability, charsets, png_fix)
+        return APIResponse(code=200, message="Success", data=result)
+    except Exception as e:
+        return APIResponse(code=500, message=str(e))
+
+
+@app.post("/slide_match", response_model=APIResponse)
+async def slide_match_endpoint(
+        target_file: Optional[UploadFile] = File(None),
+        background_file: Optional[UploadFile] = File(None),
+        target: Optional[str] = Form(None),
+        background: Optional[str] = Form(None),
+        simple_target: bool = Form(False)
+):
+    try:
+        if (target_file is None and target is None) or (background_file is None and background is None):
+            return APIResponse(code=400, message="Both target and background must be provided")
+
+        target_bytes = decode_image(target_file or target)
+        background_bytes = decode_image(background_file or background)
+        result = ocr_service.slide_match(target_bytes, background_bytes, simple_target)
+        return APIResponse(code=200, message="Success", data=result)
+    except Exception as e:
+        return APIResponse(code=500, message=str(e))
+
+
+@app.post("/detection", response_model=APIResponse)
+async def detection_endpoint(
+        file: Optional[UploadFile] = File(None),
+        image: Optional[str] = Form(None)
+):
+    try:
+        if file is None and image is None:
+            return APIResponse(code=400, message="Either file or image must be provided")
+
+        image_bytes = decode_image(file or image)
+        bboxes = ocr_service.detection(image_bytes)
+        return APIResponse(code=200, message="Success", data=bboxes)
+    except Exception as e:
+        return APIResponse(code=500, message=str(e))
--- a/app/models.py
+++ b/app/models.py
@ -0,0 +1,40 @@
+from pydantic import BaseModel
+from typing import Optional, List, Union, Any
+
+
+class ImageInput(BaseModel):
+    image: Optional[str] = None  # For base64 string
+
+
+class OCRRequest(ImageInput):
+    probability: bool = False
+    charsets: Optional[str] = None
+    png_fix: bool = False
+
+
+class OCRResponse(BaseModel):
+    result: Union[str, dict]
+
+
+class SlideMatchRequest(BaseModel):
+    target: Optional[str] = None  # For base64 string
+    background: Optional[str] = None  # For base64 string
+    simple_target: bool = False
+
+
+class SlideMatchResponse(BaseModel):
+    result: List[int]
+
+
+class DetectionRequest(ImageInput):
+    pass
+
+
+class DetectionResponse(BaseModel):
+    bboxes: List[List[int]]
+
+
+class APIResponse(BaseModel):
+    code: int
+    message: str
+    data: Optional[Any] = None
--- a/app/services.py
+++ b/app/services.py
@ -0,0 +1,24 @@
+import ddddocr
+from typing import Union, List, Optional
+
+class OCRService:
+    def __init__(self):
+        self.ocr = ddddocr.DdddOcr()
+        self.det = ddddocr.DdddOcr(det=True)
+        self.slide = ddddocr.DdddOcr(det=False, ocr=False)
+
+    def ocr_classification(self, image: bytes, probability: bool = False, charsets: Optional[str] = None, png_fix: bool = False) -> Union[str, dict]:
+        if charsets:
+            self.ocr.set_ranges(charsets)
+        result = self.ocr.classification(image, probability=probability, png_fix=png_fix)
+        return result
+
+    def slide_match(self, target: bytes, background: bytes, simple_target: bool = False) -> List[int]:
+        result = self.slide.slide_match(target, background, simple_target=simple_target)
+        return result
+
+    def detection(self, image: bytes) -> List[List[int]]:
+        bboxes = self.det.detection(image)
+        return bboxes
+
+ocr_service = OCRService()
--- a/compare_bg.jpg
+++ b/compare_bg.jpg
--- a/compare_target.jpg
+++ b/compare_target.jpg
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,12 @@
+version: '3.8'
+
+services:
+  ddddocr-api:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - .:/app
+    environment:
+      - DEBUG=1
+    restart: always
--- a/match_bg.png
+++ b/match_bg.png
--- a/match_target.png
+++ b/match_target.png
--- a/ocr_server.py
+++ b/ocr_server.py
@ -1,126 +0,0 @@
-# encoding=utf-8
-import argparse
-import base64
-import json
-
-import ddddocr
-from flask import Flask, request
-
-parser = argparse.ArgumentParser(description="使用ddddocr搭建的最简api服务")
-parser.add_argument("-p", "--port", type=int, default=9898)
-parser.add_argument("--ocr", action="store_true", help="开启ocr识别")
-parser.add_argument("--old", action="store_true", help="OCR是否启动旧模型")
-parser.add_argument("--det", action="store_true", help="开启目标检测")
-
-args = parser.parse_args()
-
-app = Flask(__name__)
-
-
-class Server(object):
-    def __init__(self, ocr=True, det=False, old=False):
-        self.ocr_option = ocr
-        self.det_option = det
-        self.old_option = old
-        self.ocr = None
-        self.det = None
-        if self.ocr_option:
-            print("ocr模块开启")
-            if self.old_option:
-                print("使用OCR旧模型启动")
-                self.ocr = ddddocr.DdddOcr(old=True)
-            else:
-                print("使用OCR新模型启动，如需要使用旧模型，请额外添加参数  --old开启")
-                self.ocr = ddddocr.DdddOcr()
-        else:
-            print("ocr模块未开启，如需要使用，请使用参数  --ocr开启")
-        if self.det_option:
-            print("目标检测模块开启")
-            self.det = ddddocr.DdddOcr(det=True)
-        else:
-            print("目标检测模块未开启，如需要使用，请使用参数  --det开启")
-
-    def classification(self, img: bytes):
-        if self.ocr_option:
-            return self.ocr.classification(img)
-        else:
-            raise Exception("ocr模块未开启")
-
-    def detection(self, img: bytes):
-        if self.det_option:
-            return self.det.detection(img)
-        else:
-            raise Exception("目标检测模块模块未开启")
-
-    def slide(self, target_img: bytes, bg_img: bytes, algo_type: str):
-        dddd = self.ocr or self.det or ddddocr.DdddOcr(ocr=False)
-        if algo_type == 'match':
-            return dddd.slide_match(target_img, bg_img)
-        elif algo_type == 'compare':
-            return dddd.slide_comparison(target_img, bg_img)
-        else:
-            raise Exception(f"不支持的滑块算法类型: {algo_type}")
-
-server = Server(ocr=args.ocr, det=args.det, old=args.old)
-
-
-def get_img(request, img_type='file', img_name='image'):
-    if img_type == 'b64':
-        img = base64.b64decode(request.get_data()) # 
-        try: # json str of multiple images
-            dic = json.loads(img)
-            img = base64.b64decode(dic.get(img_name).encode())
-        except Exception as e: # just base64 of single image
-            pass
-    else:
-        img = request.files.get(img_name).read()
-    return img
-
-
-def set_ret(result, ret_type='text'):
-    if ret_type == 'json':
-        if isinstance(result, Exception):
-            return json.dumps({"status": 200, "result": "", "msg": str(result)})
-        else:
-            return json.dumps({"status": 200, "result": result, "msg": ""})
-        # return json.dumps({"succ": isinstance(result, str), "result": str(result)})
-    else:
-        if isinstance(result, Exception):
-            return ''
-        else:
-            return str(result).strip()
-
-
-@app.route('/<opt>/<img_type>', methods=['POST'])
-@app.route('/<opt>/<img_type>/<ret_type>', methods=['POST'])
-def ocr(opt, img_type='file', ret_type='text'):
-    try:
-        img = get_img(request, img_type)
-        if opt == 'ocr':
-            result = server.classification(img)
-        elif opt == 'det':
-            result = server.detection(img)
-        else:
-            raise f"<opt={opt}> is invalid"
-        return set_ret(result, ret_type)
-    except Exception as e:
-        return set_ret(e, ret_type)
-
-@app.route('/slide/<algo_type>/<img_type>', methods=['POST'])
-@app.route('/slide/<algo_type>/<img_type>/<ret_type>', methods=['POST'])
-def slide(algo_type='compare', img_type='file', ret_type='text'):
-    try:
-        target_img = get_img(request, img_type, 'target_img')
-        bg_img = get_img(request, img_type, 'bg_img')
-        result = server.slide(target_img, bg_img, algo_type)
-        return set_ret(result, ret_type)
-    except Exception as e:
-        return set_ret(e, ret_type)
-
-@app.route('/ping', methods=['GET'])
-def ping():
-    return "pong"
-
-
-if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=args.port)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,4 @@
-ddddocr>=1.3.1
-flask
+fastapi==0.68.0
+uvicorn==0.15.0
+ddddocr==1.5.5
+python-multipart==0.0.5
--- a/test.jpg
+++ b/test.jpg
--- a/test_api.py
+++ b/test_api.py
@ -1,111 +0,0 @@
-#!/usr/bin/python3.6
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2021 #
-# @Time    : 2022/1/6 23:28
-# @Author  : sml2h3
-# @Email   : sml2h3@gmail.com
-# @File    : test_api.py
-# @Software: PyCharm
-import base64
-import json
-import requests
-
-print(' ')
-# ******************OCR识别部分开始******************
-host = "http://127.0.0.1:9898"
-# 目标检测就把ocr改成det,其他相同
-# 方式一
-file = open(r'test.jpg', 'rb').read()
-# file = open(r'test_calc.png', 'rb').read()
-
-
-api_url = f"{host}/ocr/file"
-resp = requests.post(api_url, files={'image': file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/ocr/file/json"
-resp = requests.post(api_url, files={'image': file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/ocr/b64"
-resp = requests.post(api_url, data=base64.b64encode(file).decode())
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/ocr/b64/json"
-resp = requests.post(api_url, data=base64.b64encode(file).decode())
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/det/file"
-resp = requests.post(api_url, files={'image': file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/det/file/json"
-resp = requests.post(api_url, files={'image': file})
-print(f"{api_url=}, {resp.text=}")
-
-# 滑块识别
-
-target_file = open(r'match_target.png', 'rb').read()
-bg_file = open(r'match_bg.png', 'rb').read()
-
-api_url = f"{host}/slide/match/file"
-resp = requests.post(api_url, files={'target_img': target_file, 'bg_img': bg_file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/match/file/json"
-resp = requests.post(api_url, files={'target_img': target_file, 'bg_img': bg_file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/match/b64"
-target_b64str = base64.b64encode(target_file).decode()
-bg_b64str = base64.b64encode(bg_file).decode()
-jsonstr = json.dumps({'target_img': target_b64str, 'bg_img': bg_b64str})
-resp = requests.post(api_url, data=base64.b64encode(jsonstr.encode()).decode())
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/match/b64/json"
-resp = requests.post(api_url, data=base64.b64encode(jsonstr.encode()).decode())
-print(f"{api_url=}, {resp.text=}")
-
-target_file = open(r'compare_target.jpg', 'rb').read()
-bg_file = open(r'compare_bg.jpg', 'rb').read()
-
-api_url = f"{host}/slide/compare/file"
-resp = requests.post(api_url, files={'target_img': target_file, 'bg_img': bg_file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/compare/file/json"
-resp = requests.post(api_url, files={'target_img': target_file, 'bg_img': bg_file})
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/compare/b64"
-target_b64str = base64.b64encode(target_file).decode()
-bg_b64str = base64.b64encode(bg_file).decode()
-jsonstr = json.dumps({'target_img': target_b64str, 'bg_img': bg_b64str})
-resp = requests.post(api_url, data=base64.b64encode(jsonstr.encode()).decode())
-print(f"{api_url=}, {resp.text=}")
-
-api_url = f"{host}/slide/compare/b64/json"
-resp = requests.post(api_url, data=base64.b64encode(jsonstr.encode()).decode())
-print(f"{api_url=}, {resp.text=}")
-
-# 方式二
-
-# 获取验证码图片
-# headers = {
-#     "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
-#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4195.1 Safari/537.36"
-# }
-# resp = requests.get('https://data.gdcic.net/Dop/CheckCode.aspx?codemark=408.15173910730016', headers=headers, verify=False)
-# captcha_img = resp.content
-#
-# 识别
-# resp = requests.post(api_url, files={'image': captcha_img})
-# print('验证码结果', resp.text)
-#
-# # 保存验证码图片以供验证
-# with open('captcha.jpg', 'wb') as f:
-#     f.write(captcha_img)
-
-# ******************OCR识别部分开始******************
--- a/test_calc.png
+++ b/test_calc.png
--- a/tests/test_main.py
+++ b/tests/test_main.py