Files
if.u.service/src/utils/ocr.py

105 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import logging
from typing import Protocol
from alibabacloud_ocr_api20210707.client import Client as OcrClient
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_ocr_api20210707 import models as ocr_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
from .config import get_instance as get_config
class OCR(Protocol):
def recognize_image_text(self, image_link: str) -> str:
"""
从图片提取文本
Args:
image_link (str): 图片链接
Returns:
str: 提取到的文本
"""
...
class AliOCR:
def __init__(self):
config = get_config()
self.access_key = config.get("ali_ocr", "access_key")
self.secret_key = config.get("ali_ocr", "secret_key")
self.endpoint = config.get("ali_ocr", "endpoint")
self.client = self._create_client()
def _create_client(self):
config = open_api_models.Config(
access_key_id=self.access_key,
access_key_secret=self.secret_key,
)
config.endpoint = self.endpoint
return OcrClient(config)
def recognize_image_text(self, image_link: str) -> str:
"""
使用阿里云OCR从图片链接提取文本
Args:
image_link (str): 图片链接
Returns:
str: 提取到的文本
"""
# 创建OCR请求
recognize_general_request = ocr_models.RecognizeGeneralRequest(url=image_link)
runtime = util_models.RuntimeOptions()
try:
resp = self.client.recognize_general_with_options(recognize_general_request, runtime)
logging.debug(resp.body.data)
except Exception as error:
# 此处仅做打印展示,请谨慎对待异常处理,在工程项目中切勿直接忽略异常。
# 错误 message
logging.error(error.message)
# 诊断地址
logging.error(error.data.get("Recommend"))
UtilClient.assert_as_string(error.message)
response = self.client.recognize_general_with_options(recognize_general_request, runtime)
if response.status_code == 200 and response.body:
result_data = response.body.data
result_body = json.loads(result_data)
if result_body and 'content' in result_body:
return result_body['content']
return ""
# 全局OCR实例
_ocr_instance = None
def init():
"""初始化OCR实例"""
global _ocr_instance
_ocr_instance = AliOCR()
def get_instance() -> OCR:
"""获取OCR实例"""
global _ocr_instance
if _ocr_instance is None:
raise RuntimeError("OCR模块未初始化请先调用init()函数")
return _ocr_instance
if __name__ == "__main__":
import os
from logger import init as init_logger
init_logger(console_log_level=logging.DEBUG)
from config import init as init_config
config_file = os.path.join(os.path.dirname(__file__), "../../configuration/test_conf.ini")
init_config(config_file)
init()
ocr = get_instance()
text = ocr.recognize_image_text(image_link="https://pic.mamamiyear.site/test.if.u/test111.PNG")
print(text)