From 15fe5d4f0d0988016c430357118f3bd5460e76d5 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Thu, 10 Oct 2024 09:24:09 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BF=A1=E6=81=AF=E6=8A=BD?= =?UTF-8?q?=E5=8F=96=E5=AD=98=E8=A1=A8=EF=BC=8C=E6=A0=B9=E6=8D=AE=E6=8A=BD?= =?UTF-8?q?=E5=8F=96=E7=BB=93=E6=9E=9C=E8=BF=9B=E8=A1=8C=E7=BC=BA=E9=A1=B9?= =?UTF-8?q?=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- photo_review/auto_photo_review.py | 38 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 460ec39..6abe6d2 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -1,3 +1,4 @@ +import json import logging import time from collections import defaultdict @@ -12,7 +13,7 @@ from rapidfuzz import process, fuzz from sqlalchemy import update from db import MysqlSession -from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview +from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult from log import HOSTNAME from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \ DEPARTMENT_FILTER @@ -98,11 +99,13 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150): # 关键信息提取 -def information_extraction(phrec, identity): +def information_extraction(phrec, pk_phhd, identity): """ 处理单张图片 - :param phrec: - :return: + :param phrec:图片信息 + :param pk_phhd:案子主键 + :param identity:处理批次标识 + :return:记录类型,信息抽取结果 """ img_path = image_util.get_img_path(phrec.cfjaddress) if not img_path: @@ -114,13 +117,13 @@ def information_extraction(phrec, identity): if phrec.cRectype != '1': better_img_path = None # 非结算单暂时不进行替换 if better_img_path is not None: + rec_type = '基本医保结算单' if text: info_extract = model_util.ie_settlement_text(text) else: info_extract = model_util.ie_settlement( better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path)) ) - return '基本医保结算单', info_extract else: target_image = model_util.det_book(img_path) # 识别文档区域并裁剪 dewarped_image = model_util.dewarp(target_image) # 去扭曲 @@ -143,7 +146,19 @@ def information_extraction(phrec, identity): else: info_extract = None - return rec_type, info_extract + if info_extract: + result_json = json.dumps(info_extract, ensure_ascii=False) + if len(result_json) > 5000: + result_json = result_json[:5000] + + now = common_util.get_default_datetime() + session = MysqlSession() + session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, + cfjaddress=phrec.cfjaddress, content=result_json, create_time=now, + creator=HOSTNAME, update_time=now, updater=HOSTNAME)) + session.commit() + session.close() + return rec_type, info_extract # 从keys中获取准确率最高的value @@ -402,7 +417,7 @@ def photo_review(pk_phhd, name): # 同一批图的标识 identity = int(time.time()) for phrec in phrecs: - rec_type, ie_result = information_extraction(phrec, identity) + rec_type, ie_result = information_extraction(phrec, pk_phhd, identity) if rec_type == '基本医保结算单': rec_result = settlement_result elif rec_type == '出院记录': @@ -424,12 +439,13 @@ def photo_review(pk_phhd, name): } # 三项资料完整性判断 # 三项资料缺项判断 - if (settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment'] + if (bool(settlement_data) and settlement_data['personal_account_payment'] + and settlement_data['personal_cash_payment'] and settlement_data['medical_expenses'] + and settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment'] < settlement_data['medical_expenses']): review_result['has_settlement'] = True - # TODO:出院记录和费用清单暂时没想好怎么判断 - review_result['has_discharge'] = True - review_result['has_cost'] = True + review_result['has_discharge'] = bool(discharge_result) + review_result['has_cost'] = bool(cost_result) # 三项资料缺页判断 # TODO:缺页需要对页码进行抽取,暂未训练相关模型 review_result['full_page'] = True