自动识别新增文档检测与扭曲矫正功能

This commit is contained in:
2024-08-22 15:26:37 +08:00
parent 15ca978eaf
commit fd6b706803
3 changed files with 40 additions and 29 deletions

View File

@@ -114,3 +114,6 @@
1. 优化医院、科室匹配方法,采用模糊匹配 1. 优化医院、科室匹配方法,采用模糊匹配
2. 匹配时为医院、科室添加别名 2. 匹配时为医院、科室添加别名
3. 添加医院、科室名的分析处理 3. 添加医院、科室名的分析处理
20. 版本号1.13.0
1. 新增文档检测功能
2. 新增扭曲矫正功能

View File

@@ -1,6 +1,6 @@
x-env: x-env:
&template &template
image: fcb_photo_review:1.12.13 image: fcb_photo_review:1.13.0
restart: always restart: always
services: services:

View File

@@ -14,6 +14,8 @@ from sqlalchemy import update
from db import MysqlSession from db import MysqlSession
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
from doc_dewarp import dewarp
from object_detection import doc_detect
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
@@ -79,39 +81,45 @@ def information_extraction(ie, phrecs, identity):
continue continue
image = image_util.read(img_path) image = image_util.read(img_path)
angles = image_util.parse_rotation_angles(image) target_images = []
target_images += doc_detect.capture_target_area(image) # 识别文档区域并裁剪
if not target_images:
target_images.append(image) # 识别失败
angle_count = defaultdict(int, {"0": 0}) # 分割后图片的最优角度统计 angle_count = defaultdict(int, {"0": 0}) # 分割后图片的最优角度统计
zx_ie_results = [] for target_image in target_images:
split_results = image_util.split(image) dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲
for split_result in split_results: angles = image_util.parse_rotation_angles(dewarped_image)
rotated_img = image_util.rotate(split_result["img"], int(angles[0])) zx_ie_results = []
ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}] split_results = image_util.split(dewarped_image)
if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")): for split_result in split_results:
rotated_img = image_util.rotate(split_result["img"], int(angles[1])) rotated_img = image_util.rotate(split_result["img"], int(angles[0]))
ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]}) ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}]
if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")):
rotated_img = image_util.rotate(split_result["img"], int(angles[1]))
ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]})
now = util.get_default_datetime() now = util.get_default_datetime()
best_angle = ["0", 0] best_angle = ["0", 0]
for ie_result in ie_results: for ie_result in ie_results:
if not ie_result["result"]: if not ie_result["result"]:
continue continue
result_json = json.dumps(ie_result["result"], ensure_ascii=False) result_json = json.dumps(ie_result["result"], ensure_ascii=False)
if len(result_json) > 5000: if len(result_json) > 5000:
result_json = result_json[:5000] result_json = result_json[:5000]
zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
cfjaddress=phrec.cfjaddress, content=result_json, cfjaddress=phrec.cfjaddress, content=result_json,
rotation_angle=int(ie_result["angle"]), rotation_angle=int(ie_result["angle"]),
x_offset=split_result["x_offset"], x_offset=split_result["x_offset"],
y_offset=split_result["y_offset"], create_time=now, creator=HOSTNAME, y_offset=split_result["y_offset"], create_time=now,
update_time=now, updater=HOSTNAME)) creator=HOSTNAME, update_time=now, updater=HOSTNAME))
result = merge_result(result, ie_result["result"]) result = merge_result(result, ie_result["result"])
if len(ie_result["result"]) > best_angle[1]: if len(ie_result["result"]) > best_angle[1]:
best_angle = [ie_result["angle"], len(ie_result["result"])] best_angle = [ie_result["angle"], len(ie_result["result"])]
angle_count[best_angle[0]] += 1 angle_count[best_angle[0]] += 1
img_angle = max(angle_count, key=angle_count.get) img_angle = max(angle_count, key=angle_count.get)
if img_angle != "0": if img_angle != "0":