diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index c3d1bb9..7ac9573 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -9,7 +9,6 @@ import urllib.request import cv2 import numpy as np -import paddle import paddleclas from paddlenlp import Taskflow @@ -18,7 +17,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from time import sleep from sqlalchemy import update from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ - PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA + PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ + SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA from config.mysql import MysqlSession from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, LAYOUT_ANALYSIS from photo_review.entity.bd_yljg import BdYljg @@ -74,7 +74,8 @@ def split_image(img_path, max_ratio=2.82, best_ration=1.41, overlap=0.05): split_result.append({"img": cropped_img, "x_offset": 0, "y_offset": offset}) # 最后一次裁剪时不足的部分填充黑色 last_img = split_result[-1]["img"] - split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, round(cropped_width - last_img.shape[0]), 0, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0)) + split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, round(cropped_width - last_img.shape[0]), 0, 0, + cv2.BORDER_CONSTANT, value=(0, 0, 0)) else: # 宽度是较长边 cropped_height = height * best_ration @@ -84,7 +85,8 @@ def split_image(img_path, max_ratio=2.82, best_ration=1.41, overlap=0.05): split_result.append({"img": cropped_img, "x_offset": offset, "y_offset": 0}) # 最后一次裁剪时不足的部分填充黑色 last_img = split_result[-1]["img"] - split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, 0, 0, round(cropped_height - last_img.shape[1]), cv2.BORDER_CONSTANT, value=(0, 0, 0)) + split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, 0, 0, round(cropped_height - last_img.shape[1]), + cv2.BORDER_CONSTANT, value=(0, 0, 0)) else: split_result.append({"img": img, "x_offset": 0, "y_offset": 0}) return split_result @@ -134,14 +136,14 @@ def rotate_image(img, angle): # 关键信息提取 def information_extraction(ie, phrecs): result = {} + # 同一批图的标识 + identity = int(time.time()) for phrec in phrecs: pic_path = ucloud.get_private_url(phrec.cfjaddress) if not pic_path: continue split_result = split_image(pic_path) - # 同一张图的标识 - identity = int(time.time()) for img in split_result: with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: angle = get_image_rotation_angle(img["img"]) @@ -165,17 +167,14 @@ def information_extraction(ie, phrecs): result_json = result_json[:5000] session = MysqlSession() zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, cfjaddress=phrec.cfjaddress, - content=result_json, rotation_angle=angle, x_offset=img["x_offset"], y_offset=img["y_offset"], - create_time=now, update_time=now) + content=result_json, rotation_angle=angle, x_offset=img["x_offset"], + y_offset=img["y_offset"], create_time=now, update_time=now) session.add(zx_ocr) session.commit() session.close() result = merge_result(result, ie_result) - # # 完成一次任务释放显存 - # paddle.device.cuda.empty_cache() - # logging.info("显存释放完成") return result @@ -249,8 +248,9 @@ def photo_review(pk_phhd): elif phrec.cRectype == "4": cost_list.append(phrec) - settlement_list_ie_result = information_extraction(Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base", - task_path="config/model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS), settlement_list) + settlement_list_ie_result = information_extraction( + Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base", + task_path="config/model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS), settlement_list) settlement_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)), @@ -276,8 +276,9 @@ def photo_review(pk_phhd): settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"]) save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data) - discharge_record_ie_result = information_extraction(Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base", - task_path="config/model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS), discharge_record) + discharge_record_ie_result = information_extraction( + Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base", + task_path="config/model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS), discharge_record) discharge_data = { "pk_phhd": pk_phhd, "hospital": handle_hospital(get_best_value_in_keys(discharge_record_ie_result, HOSPITAL)), @@ -314,8 +315,9 @@ def photo_review(pk_phhd): discharge_data["department"] = ylks.name save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) - cost_list_ie_result = information_extraction(Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", - task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS), cost_list) + cost_list_ie_result = information_extraction( + Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", + task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS), cost_list) cost_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),