diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index a9ac3e1..89cbd2f 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -139,33 +139,32 @@ def information_extraction(phrec, pk_phhd): else: # todo:也可能是图片,后续添加细分逻辑 dewarped_img_path = img_path angles = model_util.clas_orientation(dewarped_img_path) - ocr_result = [] ocr_text = '' - rotated_img = None + info_extract = [] for angle in angles: - tmp_ocr_result = [] - tmp_rotated_img = image_util.rotate(dewarped_img_path, int(angle)) - split_results = image_util.split(tmp_rotated_img) + ocr_result = [] + rotated_img = image_util.rotate(dewarped_img_path, int(angle)) + split_results = image_util.split(rotated_img) for split_result in split_results: if split_result['img'] is None: continue a4_img = image_util.expand_to_a4_size(split_result['img']) - tmp_ocr_result += model_util.ocr(a4_img) - tmp_ocr_text = common_util.ocr_result_to_text(tmp_ocr_result) + ocr_result += model_util.ocr(a4_img) + tmp_ocr_text = common_util.ocr_result_to_text(ocr_result) - if len(tmp_ocr_text) > len(ocr_text): - ocr_result = tmp_ocr_result + rec_type = model_util.clas_text(tmp_ocr_text) if ocr_text else None + if rec_type == '基本医保结算单': + tmp_info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result)) + elif rec_type == '出院记录': + tmp_info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result)) + elif rec_type == '费用清单': + tmp_info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result)) + else: + tmp_info_extract = None + + if len(tmp_info_extract) > len(info_extract): + info_extract = tmp_info_extract ocr_text = tmp_ocr_text - rotated_img = tmp_rotated_img - rec_type = model_util.clas_text(ocr_text) if ocr_text else None - if rec_type == '基本医保结算单': - info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result)) - elif rec_type == '出院记录': - info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result)) - elif rec_type == '费用清单': - info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result)) - else: - info_extract = None if info_extract: result_json = json.dumps(info_extract, ensure_ascii=False)