更新OCR版本,Bata版,还不能上线
This commit is contained in:
@@ -62,7 +62,7 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
|
|||||||
captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image)
|
captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image)
|
||||||
cv2.imwrite(temp_file.name, captured_image)
|
cv2.imwrite(temp_file.name, captured_image)
|
||||||
try:
|
try:
|
||||||
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
layouts, _ = util.get_ocr_layout(OCR, temp_file.name)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
layouts = []
|
layouts = []
|
||||||
@@ -100,7 +100,7 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
result = []
|
result = []
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
layouts, _ = util.get_ocr_layout(OCR, temp_file.name)
|
||||||
# layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
# layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
@@ -198,7 +198,7 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
|
|||||||
return do_mask, i
|
return do_mask, i
|
||||||
|
|
||||||
# 打开图片
|
# 打开图片
|
||||||
image = image_util.read(img_url)
|
image, _ = image_util.read(img_url)
|
||||||
if image is None:
|
if image is None:
|
||||||
return False, image
|
return False, image
|
||||||
original_image = image
|
original_image = image
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ def check_error(error_ocr):
|
|||||||
|
|
||||||
image = mask_photo(img_url, name, id_card_num, (0, 0, 0))[1]
|
image = mask_photo(img_url, name, id_card_num, (0, 0, 0))[1]
|
||||||
final_img_url = ufile.get_private_url(error_ocr.cfjaddress, "drg100")
|
final_img_url = ufile.get_private_url(error_ocr.cfjaddress, "drg100")
|
||||||
final_image = image_util.read(final_img_url)
|
final_image, _ = image_util.read(final_img_url)
|
||||||
return image_util.combined(final_image, image)
|
return image_util.combined(final_image, image)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,14 +13,14 @@ from photo_review import auto_photo_review, SEND_ERROR_EMAIL
|
|||||||
|
|
||||||
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
program_name = '照片审核自动识别脚本'
|
program_name = "照片审核自动识别脚本"
|
||||||
logging.config.dictConfig(LOGGING_CONFIG)
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
|
parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.clean:
|
if args.clean:
|
||||||
# 主要用于启动时,清除仍在涂抹中的案子
|
# 主要用于启动时,清除仍在识别中的案子
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
|
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
@@ -34,7 +34,7 @@ if __name__ == '__main__':
|
|||||||
logging.info(f"【{program_name}】开始运行")
|
logging.info(f"【{program_name}】开始运行")
|
||||||
auto_photo_review.main()
|
auto_photo_review.main()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_logger = logging.getLogger('error')
|
error_logger = logging.getLogger("error")
|
||||||
error_logger.error(traceback.format_exc())
|
error_logger.error(traceback.format_exc())
|
||||||
if SEND_ERROR_EMAIL:
|
if SEND_ERROR_EMAIL:
|
||||||
send_error_email(program_name, repr(e), traceback.format_exc())
|
send_error_email(program_name, repr(e), traceback.format_exc())
|
||||||
|
|||||||
@@ -2,9 +2,9 @@ import jieba
|
|||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
'''
|
"""
|
||||||
项目配置
|
项目配置
|
||||||
'''
|
"""
|
||||||
# 每次从数据库获取的案子数量
|
# 每次从数据库获取的案子数量
|
||||||
PHHD_BATCH_SIZE = 10
|
PHHD_BATCH_SIZE = 10
|
||||||
# 没有查询到案子的等待时间(分钟)
|
# 没有查询到案子的等待时间(分钟)
|
||||||
@@ -18,35 +18,35 @@ LAYOUT_ANALYSIS = False
|
|||||||
信息抽取关键词配置
|
信息抽取关键词配置
|
||||||
"""
|
"""
|
||||||
# 患者姓名
|
# 患者姓名
|
||||||
PATIENT_NAME = ['患者姓名']
|
PATIENT_NAME = ["患者姓名"]
|
||||||
# 入院日期
|
# 入院日期
|
||||||
ADMISSION_DATE = ['入院日期']
|
ADMISSION_DATE = ["入院日期"]
|
||||||
# 出院日期
|
# 出院日期
|
||||||
DISCHARGE_DATE = ['出院日期']
|
DISCHARGE_DATE = ["出院日期"]
|
||||||
# 发生医疗费
|
# 发生医疗费
|
||||||
MEDICAL_EXPENSES = ['费用总额']
|
MEDICAL_EXPENSES = ["费用总额"]
|
||||||
# 个人现金支付
|
# 个人现金支付
|
||||||
PERSONAL_CASH_PAYMENT = ['个人现金支付']
|
PERSONAL_CASH_PAYMENT = ["个人现金支付"]
|
||||||
# 个人账户支付
|
# 个人账户支付
|
||||||
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
|
PERSONAL_ACCOUNT_PAYMENT = ["个人账户支付"]
|
||||||
# 个人自费金额
|
# 个人自费金额
|
||||||
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
|
PERSONAL_FUNDED_AMOUNT = ["自费金额", "个人自费"]
|
||||||
# 医保类别
|
# 医保类别
|
||||||
MEDICAL_INSURANCE_TYPE = ['医保类型']
|
MEDICAL_INSURANCE_TYPE = ["医保类型"]
|
||||||
# 就诊医院
|
# 就诊医院
|
||||||
HOSPITAL = ['医院']
|
HOSPITAL = ["医院"]
|
||||||
# 就诊科室
|
# 就诊科室
|
||||||
DEPARTMENT = ['科室']
|
DEPARTMENT = ["科室"]
|
||||||
# 主治医生
|
# 主治医生
|
||||||
DOCTOR = ['主治医生']
|
DOCTOR = ["主治医生"]
|
||||||
# 住院号
|
# 住院号
|
||||||
ADMISSION_ID = ['住院号']
|
ADMISSION_ID = ["住院号"]
|
||||||
# 医保结算单号码
|
# 医保结算单号码
|
||||||
SETTLEMENT_ID = ['医保结算单号码']
|
SETTLEMENT_ID = ["医保结算单号码"]
|
||||||
# 年龄
|
# 年龄
|
||||||
AGE = ['年龄']
|
AGE = ["年龄"]
|
||||||
# 大写总额
|
# 大写总额
|
||||||
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
|
UPPERCASE_MEDICAL_EXPENSES = ["大写总额"]
|
||||||
|
|
||||||
SETTLEMENT_LIST_SCHEMA = \
|
SETTLEMENT_LIST_SCHEMA = \
|
||||||
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
||||||
@@ -58,57 +58,55 @@ DISCHARGE_RECORD_SCHEMA = \
|
|||||||
|
|
||||||
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
||||||
|
|
||||||
'''
|
"""
|
||||||
别名配置
|
别名配置
|
||||||
'''
|
"""
|
||||||
# 使用别名中的value替换key。考虑到效率问题,只会替换第一个匹配到的key。
|
# 使用别名中的value替换key。考虑到效率问题,只会替换第一个匹配到的key。
|
||||||
HOSPITAL_ALIAS = {
|
HOSPITAL_ALIAS = {
|
||||||
'沐阳': ['沭阳'],
|
"沐阳": ["沭阳"],
|
||||||
'连水': ['涟水'],
|
"连水": ["涟水"],
|
||||||
'唯宁': ['睢宁'], # 雕宁
|
"唯宁": ["睢宁"], # 雕宁
|
||||||
'九〇四': ['904'],
|
"九〇四": ["904"],
|
||||||
'漂水': ['溧水'],
|
"漂水": ["溧水"],
|
||||||
}
|
}
|
||||||
DEPARTMENT_ALIAS = {
|
DEPARTMENT_ALIAS = {
|
||||||
'耳鼻喉': ['耳鼻咽喉'],
|
"耳鼻喉": ["耳鼻咽喉"],
|
||||||
'急症': ['急诊'],
|
"急症": ["急诊"],
|
||||||
}
|
}
|
||||||
|
|
||||||
'''
|
"""
|
||||||
搜索过滤配置
|
搜索过滤配置
|
||||||
'''
|
"""
|
||||||
# 默认会过滤单字
|
# 默认会过滤单字
|
||||||
HOSPITAL_FILTER = ['医院', '人民', '第一', '第二', '第三', '大学', '附属']
|
HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"]
|
||||||
|
|
||||||
DEPARTMENT_FILTER = ['医', '伤', '西', '新']
|
DEPARTMENT_FILTER = ["医", "伤", "西", "新"]
|
||||||
|
|
||||||
'''
|
"""
|
||||||
分词配置
|
分词配置
|
||||||
'''
|
"""
|
||||||
jieba.suggest_freq(('肿瘤', '医院'), True)
|
jieba.suggest_freq(("肿瘤", "医院"), True)
|
||||||
jieba.suggest_freq(('骨', '伤'), True)
|
jieba.suggest_freq(("骨", "伤"), True)
|
||||||
jieba.suggest_freq(('感染', '性'), True)
|
jieba.suggest_freq(("感染", "性"), True)
|
||||||
jieba.suggest_freq(('胆', '道'), True)
|
jieba.suggest_freq(("胆", "道"), True)
|
||||||
jieba.suggest_freq(('脾', '胃'), True)
|
jieba.suggest_freq(("脾", "胃"), True)
|
||||||
|
|
||||||
'''
|
"""
|
||||||
模型配置
|
模型配置
|
||||||
'''
|
"""
|
||||||
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
SETTLEMENT_IE = Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base",
|
||||||
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
task_path="model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS, precision="fp16")
|
||||||
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base",
|
||||||
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
task_path="model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS, precision="fp16")
|
||||||
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
|
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", device_id=1,
|
||||||
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
task_path="model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, precision="fp16")
|
||||||
|
|
||||||
OCR = PaddleOCR(
|
OCR = PaddleOCR(
|
||||||
gpu_id=1,
|
device="gpu:0",
|
||||||
use_angle_cls=False,
|
ocr_version="PP-OCRv4",
|
||||||
show_log=False,
|
use_textline_orientation=False,
|
||||||
det_db_thresh=0.1,
|
# 检测像素阈值,输出的概率图中,得分大于该阈值的像素点才会被认为是文字像素点
|
||||||
det_db_box_thresh=0.3,
|
text_det_thresh=0.1,
|
||||||
det_limit_side_len=1248,
|
# 检测框阈值,检测结果边框内,所有像素点的平均得分大于该阈值时,该结果会被认为是文字区域
|
||||||
drop_score=0.3,
|
text_det_box_thresh=0.3,
|
||||||
rec_model_dir='model/ocr/openatom_rec_repsvtr_ch_infer',
|
|
||||||
rec_algorithm='SVTR_LCNet',
|
|
||||||
)
|
)
|
||||||
@@ -36,14 +36,15 @@ def merge_result(result1, result2):
|
|||||||
return result1
|
return result1
|
||||||
|
|
||||||
|
|
||||||
def ie_temp_image(ie, ocr, image):
|
def ie_temp_image(ie, ocr, image, is_screenshot=False):
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
cv2.imwrite(temp_file.name, image)
|
cv2.imwrite(temp_file.name, image)
|
||||||
|
|
||||||
ie_result = []
|
ie_result = []
|
||||||
ocr_pure_text = ''
|
ocr_pure_text = ''
|
||||||
|
angle = '0'
|
||||||
try:
|
try:
|
||||||
layout = util.get_ocr_layout(ocr, temp_file.name)
|
layout, angle = util.get_ocr_layout(ocr, temp_file.name, is_screenshot)
|
||||||
if not layout:
|
if not layout:
|
||||||
# 无识别结果
|
# 无识别结果
|
||||||
ie_result = []
|
ie_result = []
|
||||||
@@ -61,7 +62,7 @@ def ie_temp_image(ie, ocr, image):
|
|||||||
os.remove(temp_file.name)
|
os.remove(temp_file.name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
||||||
return ie_result, ocr_pure_text
|
return ie_result, ocr_pure_text, angle
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
@@ -159,7 +160,7 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
if not img_path:
|
if not img_path:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image = image_util.read(img_path)
|
image, exif_data = image_util.read(img_path)
|
||||||
if image is None:
|
if image is None:
|
||||||
# 图片可能因为某些原因获取不到
|
# 图片可能因为某些原因获取不到
|
||||||
continue
|
continue
|
||||||
@@ -175,7 +176,7 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
if text:
|
if text:
|
||||||
info_extract = ie(text)[0]
|
info_extract = ie(text)[0]
|
||||||
else:
|
else:
|
||||||
info_extract = ie_temp_image(ie, OCR, image)[0]
|
info_extract = ie_temp_image(ie, OCR, image, True)[0]
|
||||||
ie_result = {'result': info_extract, 'angle': '0'}
|
ie_result = {'result': info_extract, 'angle': '0'}
|
||||||
|
|
||||||
now = util.get_default_datetime()
|
now = util.get_default_datetime()
|
||||||
@@ -193,27 +194,20 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
|
|
||||||
result = merge_result(result, ie_result['result'])
|
result = merge_result(result, ie_result['result'])
|
||||||
else:
|
else:
|
||||||
|
is_screenshot = image_util.is_screenshot(image, exif_data)
|
||||||
target_images = []
|
target_images = []
|
||||||
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪
|
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪
|
||||||
if not target_images:
|
if not target_images:
|
||||||
target_images.append(image) # 识别失败
|
target_images.append(image) # 识别失败
|
||||||
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
|
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
|
||||||
for target_image in target_images:
|
for target_image in target_images:
|
||||||
# dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲
|
split_results = image_util.split(target_image)
|
||||||
dewarped_image = target_image
|
|
||||||
angles = image_util.parse_rotation_angles(dewarped_image)
|
|
||||||
|
|
||||||
split_results = image_util.split(dewarped_image)
|
|
||||||
for split_result in split_results:
|
for split_result in split_results:
|
||||||
if split_result['img'] is None or split_result['img'].size == 0:
|
if split_result['img'] is None or split_result['img'].size == 0:
|
||||||
continue
|
continue
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
ie_temp_result = ie_temp_image(ie, OCR, split_result['img'], is_screenshot)
|
||||||
ie_temp_result = ie_temp_image(ie, OCR, rotated_img)
|
|
||||||
ocr_text += ie_temp_result[1]
|
ocr_text += ie_temp_result[1]
|
||||||
ie_results = [{'result': ie_temp_result[0], 'angle': angles[0]}]
|
ie_results = [{'result': ie_temp_result[0], 'angle': ie_temp_result[2]}]
|
||||||
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
|
||||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img)[0], 'angle': angles[1]})
|
|
||||||
now = util.get_default_datetime()
|
now = util.get_default_datetime()
|
||||||
best_angle = ['0', 0]
|
best_angle = ['0', 0]
|
||||||
for ie_result in ie_results:
|
for ie_result in ie_results:
|
||||||
|
|||||||
@@ -1,16 +1,11 @@
|
|||||||
numpy==1.26.4
|
aistudio_sdk==0.2.6
|
||||||
onnxconverter-common==1.14.0
|
onnxconverter-common==1.15.0
|
||||||
|
onnxruntime-gpu==1.22.0
|
||||||
OpenCC==1.1.6
|
OpenCC==1.1.6
|
||||||
opencv-python==4.6.0.66
|
|
||||||
paddle2onnx==1.2.3
|
paddle2onnx==1.2.3
|
||||||
paddleclas==2.5.2
|
paddlenlp==3.0.0b4
|
||||||
paddlenlp==2.6.1
|
paddleocr==3.1.1
|
||||||
paddleocr==2.7.3
|
PyMuPDF==1.26.3
|
||||||
pillow==10.4.0
|
|
||||||
pymysql==1.1.1
|
pymysql==1.1.1
|
||||||
requests==2.32.3
|
ufile==3.2.11
|
||||||
sqlacodegen==2.3.0.post1
|
zxing-cpp==2.3.0
|
||||||
sqlalchemy==1.4.52
|
|
||||||
tenacity==8.5.0
|
|
||||||
ufile==3.2.9
|
|
||||||
zxing-cpp==2.2.0
|
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy
|
import numpy
|
||||||
|
from PIL import Image
|
||||||
|
from PIL.ExifTags import TAGS
|
||||||
from paddleclas import PaddleClas
|
from paddleclas import PaddleClas
|
||||||
from tenacity import retry, stop_after_attempt, wait_random
|
from tenacity import retry, stop_after_attempt, wait_random
|
||||||
|
|
||||||
@@ -14,20 +17,36 @@ def read(image_path):
|
|||||||
"""
|
"""
|
||||||
从网络或本地读取图片
|
从网络或本地读取图片
|
||||||
:param image_path: 网络或本地路径
|
:param image_path: 网络或本地路径
|
||||||
:return: NumPy数组形式的图片
|
:return: NumPy数组形式的图片, EXIF数据
|
||||||
"""
|
"""
|
||||||
if image_path.startswith("http"):
|
if image_path.startswith("http"):
|
||||||
# 发送HTTP请求并获取图像数据
|
# 发送HTTP请求并获取图像数据
|
||||||
resp = urllib.request.urlopen(image_path, timeout=60)
|
resp = urllib.request.urlopen(image_path, timeout=60)
|
||||||
# 将数据读取为字节流
|
# 将数据读取为字节流
|
||||||
image_data = resp.read()
|
image_data = resp.read()
|
||||||
|
else:
|
||||||
|
with open(image_path, "rb") as f:
|
||||||
|
image_data = f.read()
|
||||||
|
|
||||||
|
# 解析EXIF信息(基于原始字节流)
|
||||||
|
exif_data = {}
|
||||||
|
try:
|
||||||
|
# 用PIL打开原始字节流
|
||||||
|
with Image.open(BytesIO(image_data)) as img:
|
||||||
|
# 获取EXIF字典
|
||||||
|
exif_info = img._getexif()
|
||||||
|
if exif_info:
|
||||||
|
# 将EXIF标签的数字ID转换为可读名称(如36867对应"DateTimeOriginal")
|
||||||
|
for tag_id, value in exif_info.items():
|
||||||
|
tag_name = TAGS.get(tag_id, tag_id)
|
||||||
|
exif_data[tag_name] = value
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("解析EXIF信息失败", exc_info=e)
|
||||||
# 将字节流转换为NumPy数组
|
# 将字节流转换为NumPy数组
|
||||||
image_np = numpy.frombuffer(image_data, numpy.uint8)
|
image_np = numpy.frombuffer(image_data, numpy.uint8)
|
||||||
# 解码NumPy数组为OpenCV图像格式
|
# 解码NumPy数组为OpenCV图像格式
|
||||||
image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
|
image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
|
||||||
else:
|
return image, exif_data
|
||||||
image = cv2.imread(image_path)
|
|
||||||
return image
|
|
||||||
|
|
||||||
|
|
||||||
def capture(image, rectangle):
|
def capture(image, rectangle):
|
||||||
@@ -61,7 +80,7 @@ def split(image, ratio=1.414, overlap=0.05, x_compensation=3):
|
|||||||
"""
|
"""
|
||||||
split_result = []
|
split_result = []
|
||||||
if isinstance(image, str):
|
if isinstance(image, str):
|
||||||
image = read(image)
|
image, _ = read(image)
|
||||||
height, width = image.shape[:2]
|
height, width = image.shape[:2]
|
||||||
hw_ratio = height / width
|
hw_ratio = height / width
|
||||||
wh_ratio = width / height
|
wh_ratio = width / height
|
||||||
|
|||||||
19
util/util.py
19
util/util.py
@@ -12,9 +12,10 @@ def get_default_datetime():
|
|||||||
return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
def get_ocr_layout(ocr, img_path):
|
def get_ocr_layout(ocr, img_path, is_screenshot=False):
|
||||||
"""
|
"""
|
||||||
获取ocr识别的结果,转为合适的layout形式
|
获取ocr识别的结果,转为合适的layout形式
|
||||||
|
:param is_screenshot: 是否是截图
|
||||||
:param ocr: ocr模型
|
:param ocr: ocr模型
|
||||||
:param img_path: 图片本地路径
|
:param img_path: 图片本地路径
|
||||||
:return:
|
:return:
|
||||||
@@ -36,18 +37,18 @@ def get_ocr_layout(ocr, img_path):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
layout = []
|
layout = []
|
||||||
ocr_result = ocr.ocr(img_path, cls=False)
|
ocr_result = ocr.predict(input=img_path, use_doc_orientation_classify=not is_screenshot, use_doc_unwarping=not is_screenshot)
|
||||||
ocr_result = ocr_result[0]
|
ocr_result = next(ocr_result)
|
||||||
if not ocr_result:
|
if not ocr_result:
|
||||||
return layout
|
return layout, "0"
|
||||||
for segment in ocr_result:
|
angle = ocr_result.get("doc_preprocessor_res", {}).get("angle", "0")
|
||||||
box = segment[0]
|
for i in range(len(ocr_result.get('rec_texts'))):
|
||||||
|
box = ocr_result.get("rec_polys")[i].tolist()
|
||||||
box = _get_box(box)
|
box = _get_box(box)
|
||||||
if not _normal_box(box):
|
if not _normal_box(box):
|
||||||
continue
|
continue
|
||||||
text = segment[1][0]
|
layout.append((box, ocr_result.get("rec_texts")[i]))
|
||||||
layout.append((box, text))
|
return layout, str(angle)
|
||||||
return layout
|
|
||||||
|
|
||||||
|
|
||||||
def delete_temp_file(temp_files):
|
def delete_temp_file(temp_files):
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ def write_visual_result(image, angle=0, layout=None, result=None):
|
|||||||
img_name = img[:last_dot_index]
|
img_name = img[:last_dot_index]
|
||||||
img_type = img[last_dot_index + 1:]
|
img_type = img[last_dot_index + 1:]
|
||||||
|
|
||||||
img_array = image_util.read(image)
|
img_array, _ = image_util.read(image)
|
||||||
if angle != 0:
|
if angle != 0:
|
||||||
img_array = image_util.rotate(img_array, angle)
|
img_array = image_util.rotate(img_array, angle)
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
@@ -63,7 +63,7 @@ def visual_model_test(model_type, test_img, task_path, schema):
|
|||||||
img["y_offset"] -= offset_y
|
img["y_offset"] -= offset_y
|
||||||
|
|
||||||
temp_files_paths.append(temp_file.name)
|
temp_files_paths.append(temp_file.name)
|
||||||
parsed_doc = util.get_ocr_layout(
|
parsed_doc, _ = util.get_ocr_layout(
|
||||||
PaddleOCR(det_db_box_thresh=0.3, det_db_thresh=0.1, det_limit_side_len=1248, drop_score=0.3,
|
PaddleOCR(det_db_box_thresh=0.3, det_db_thresh=0.1, det_limit_side_len=1248, drop_score=0.3,
|
||||||
save_crop_res=False),
|
save_crop_res=False),
|
||||||
temp_file.name)
|
temp_file.name)
|
||||||
|
|||||||
Reference in New Issue
Block a user