From fd6b70680366a602c115d6b023e37ff9c6a7ec84 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Thu, 22 Aug 2024 15:26:37 +0800 Subject: [PATCH] =?UTF-8?q?=E8=87=AA=E5=8A=A8=E8=AF=86=E5=88=AB=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E6=96=87=E6=A1=A3=E6=A3=80=E6=B5=8B=E4=B8=8E=E6=89=AD?= =?UTF-8?q?=E6=9B=B2=E7=9F=AB=E6=AD=A3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 ++- docker-compose.yml | 2 +- photo_review/photo_review.py | 62 ++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 47e7c9e..d1090f3 100644 --- a/README.md +++ b/README.md @@ -113,4 +113,7 @@ 19. 版本号:1.12.0 1. 优化医院、科室匹配方法,采用模糊匹配 2. 匹配时为医院、科室添加别名 - 3. 添加医院、科室名的分析处理 \ No newline at end of file + 3. 添加医院、科室名的分析处理 +20. 版本号:1.13.0 + 1. 新增文档检测功能 + 2. 新增扭曲矫正功能 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 5453658..63d87c5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ x-env: &template - image: fcb_photo_review:1.12.13 + image: fcb_photo_review:1.13.0 restart: always services: diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index 0fbf6b1..dab3b23 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -14,6 +14,8 @@ from sqlalchemy import update from db import MysqlSession from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec +from doc_dewarp import dewarp +from object_detection import doc_detect from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ @@ -79,39 +81,45 @@ def information_extraction(ie, phrecs, identity): continue image = image_util.read(img_path) - angles = image_util.parse_rotation_angles(image) + target_images = [] + target_images += doc_detect.capture_target_area(image) # 识别文档区域并裁剪 + if not target_images: + target_images.append(image) # 识别失败 angle_count = defaultdict(int, {"0": 0}) # 分割后图片的最优角度统计 - zx_ie_results = [] - split_results = image_util.split(image) - for split_result in split_results: - rotated_img = image_util.rotate(split_result["img"], int(angles[0])) - ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}] - if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")): - rotated_img = image_util.rotate(split_result["img"], int(angles[1])) - ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]}) + for target_image in target_images: + dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲 + angles = image_util.parse_rotation_angles(dewarped_image) + zx_ie_results = [] + split_results = image_util.split(dewarped_image) + for split_result in split_results: + rotated_img = image_util.rotate(split_result["img"], int(angles[0])) + ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}] + if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")): + rotated_img = image_util.rotate(split_result["img"], int(angles[1])) + ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]}) - now = util.get_default_datetime() - best_angle = ["0", 0] - for ie_result in ie_results: - if not ie_result["result"]: - continue + now = util.get_default_datetime() + best_angle = ["0", 0] + for ie_result in ie_results: + if not ie_result["result"]: + continue - result_json = json.dumps(ie_result["result"], ensure_ascii=False) - if len(result_json) > 5000: - result_json = result_json[:5000] - zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, - cfjaddress=phrec.cfjaddress, content=result_json, - rotation_angle=int(ie_result["angle"]), - x_offset=split_result["x_offset"], - y_offset=split_result["y_offset"], create_time=now, creator=HOSTNAME, - update_time=now, updater=HOSTNAME)) + result_json = json.dumps(ie_result["result"], ensure_ascii=False) + if len(result_json) > 5000: + result_json = result_json[:5000] + zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, + cfjaddress=phrec.cfjaddress, content=result_json, + rotation_angle=int(ie_result["angle"]), + x_offset=split_result["x_offset"], + y_offset=split_result["y_offset"], create_time=now, + creator=HOSTNAME, update_time=now, updater=HOSTNAME)) - result = merge_result(result, ie_result["result"]) + result = merge_result(result, ie_result["result"]) - if len(ie_result["result"]) > best_angle[1]: - best_angle = [ie_result["angle"], len(ie_result["result"])] + if len(ie_result["result"]) > best_angle[1]: + best_angle = [ie_result["angle"], len(ie_result["result"])] - angle_count[best_angle[0]] += 1 + angle_count[best_angle[0]] += 1 img_angle = max(angle_count, key=angle_count.get) if img_angle != "0":