diff --git a/docker-compose-time.yml b/docker-compose-time.yml new file mode 100644 index 0000000..c488553 --- /dev/null +++ b/docker-compose-time.yml @@ -0,0 +1,41 @@ +x-env: + &template + image: fcb_photo_review:2.0.0 + restart: always + +services: + det_api: + <<: *template + build: + context: . + container_name: det_api + hostname: det_api + volumes: + - ./log:/app/log + - ./model:/app/model + command: [ "det_api.py" ] + deploy: + resources: + reservations: + devices: + - device_ids: [ "0" ] + capabilities: [ "gpu" ] + driver: "nvidia" + + photo_time_test: + <<: *template + container_name: photo_time_test + hostname: photo_time_test + volumes: + - ./log:/app/log + - ./model:/app/model + depends_on: + - det_api + command: [ "photo_review.py" ] + deploy: + resources: + reservations: + devices: + - device_ids: [ "0", "1" ] + capabilities: [ "gpu" ] + driver: "nvidia" \ No newline at end of file diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 3a80e2d..111fdcd 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -76,32 +76,47 @@ def request_ie_result(task_enum, phrecs): def information_extraction(ie, phrecs, identity): result = {} for phrec in phrecs: + ufile_start = time.time() img_path = ufile.get_private_url(phrec.cfjaddress) if not img_path: continue image = image_util.read(img_path) + logging.info(f"获取云图片耗时:{time.time() - ufile_start}s") target_images = [] + book_start = time.time() target_images += detector.request_book_areas(image) # 识别文档区域并裁剪 if not target_images: target_images.append(image) # 识别失败 + logging.info(f"检测文档耗时:{time.time() - book_start}s") angle_count = defaultdict(int, {"0": 0}) # 分割后图片的最优角度统计 for target_image in target_images: + dewarp_start = time.time() dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲 + logging.info(f"去扭曲耗时:{time.time() - dewarp_start}s") + rotate_start = time.time() angles = image_util.parse_rotation_angles(dewarped_image) + logging.info(f"检测图片旋转耗时:{time.time() - rotate_start}s") zx_ie_results = [] + split_start = time.time() split_results = image_util.split(dewarped_image) + logging.info(f"分割图片耗时:{time.time() - split_start}s") for split_result in split_results: if split_result["img"] is None or split_result["img"].size == 0: continue + rotated_start = time.time() rotated_img = image_util.rotate(split_result["img"], int(angles[0])) + logging.info(f"旋转图片耗时:{time.time() - rotated_start}s") + ie_start = time.time() ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}] + logging.info(f"信息抽取耗时:{time.time() - ie_start}s") if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")): rotated_img = image_util.rotate(split_result["img"], int(angles[1])) ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]}) now = util.get_default_datetime() best_angle = ["0", 0] + result_start = time.time() for ie_result in ie_results: if not ie_result["result"]: continue @@ -122,6 +137,7 @@ def information_extraction(ie, phrecs, identity): best_angle = [ie_result["angle"], len(ie_result["result"])] angle_count[best_angle[0]] += 1 + logging.info(f"构建结果耗时:{time.time() - result_start}s") img_angle = max(angle_count, key=angle_count.get) if img_angle != "0": @@ -129,19 +145,23 @@ def information_extraction(ie, phrecs, identity): with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: cv2.imwrite(temp_file.name, image) try: + upload_start = time.time() ufile.upload_file(phrec.cfjaddress, temp_file.name) # 修正旋转角度 for zx_ie_result in zx_ie_results: zx_ie_result.rotation_angle -= int(img_angle) + logging.info(f"上传图片耗时:{time.time() - upload_start}s") except Exception as e: logging.error(f"上传图片({phrec.cfjaddress})失败", exc_info=e) finally: util.delete_temp_file(temp_file.name) + db_start = time.time() session = MysqlSession() session.add_all(zx_ie_results) session.commit() session.close() + logging.info(f"写入数据库耗时:{time.time() - db_start}s") return result @@ -260,6 +280,7 @@ def search_department(department): def settlement_task(pk_phhd, settlement_list, identity): settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity) + settlement_start = time.time() settlement_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)), @@ -289,10 +310,12 @@ def settlement_task(pk_phhd, settlement_list, identity): settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0]) settlement_data["medical_expenses"] = parse_money_result[1] save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data) + logging.info(f"处理结算单耗时: {time.time() - settlement_start}s") def discharge_task(pk_phhd, discharge_record, identity): discharge_record_ie_result = information_extraction(DISCHARGE_IE, discharge_record, identity) + discharge_start = time.time() hospitals = get_values_of_keys(discharge_record_ie_result, HOSPITAL) departments = get_values_of_keys(discharge_record_ie_result, DEPARTMENT) discharge_data = { @@ -361,10 +384,12 @@ def discharge_task(pk_phhd, discharge_record, identity): if best_match: discharge_data["pk_ylks"] = best_match[2] save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) + logging.info(f"处理出院记录耗时: {time.time() - discharge_start}s") def cost_task(pk_phhd, cost_list, identity): cost_list_ie_result = information_extraction(COST_IE, cost_list, identity) + cost_start = time.time() cost_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)), @@ -376,9 +401,11 @@ def cost_task(pk_phhd, cost_list, identity): cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"]) cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"]) save_or_update_ie(ZxIeCost, pk_phhd, cost_data) + logging.info(f"处理费用记录耗时: {time.time() - cost_start}s") def photo_review(pk_phhd): + phrec_start = time.time() settlement_list = [] discharge_record = [] cost_list = [] @@ -395,6 +422,7 @@ def photo_review(pk_phhd): discharge_record.append(phrec) elif phrec.cRectype == "4": cost_list.append(phrec) + logging.info(f"图片分类耗时:{time.time() - phrec_start}s") # 同一批图的标识 identity = int(time.time()) @@ -405,6 +433,7 @@ def photo_review(pk_phhd): def main(): while 1: + db_start = time.time() session = MysqlSession() phhds = (session.query(ZxPhhd.pk_phhd) .join(ZxPhrec, ZxPhhd.pk_phhd == ZxPhrec.pk_phhd, isouter=True) @@ -417,6 +446,7 @@ def main(): session.execute(update_flag) session.commit() session.close() + logging.info(f"数据库查询耗时:{time.time() - db_start}s") if phhds: for phhd in phhds: pk_phhd = phhd.pk_phhd