diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 4c8eddc..460ec39 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -115,11 +115,11 @@ def information_extraction(phrec, identity): better_img_path = None # 非结算单暂时不进行替换 if better_img_path is not None: if text: - info_extract = model_util.ie_settlement_text(text)[0] + info_extract = model_util.ie_settlement_text(text) else: - info_extract = model_util.ie_settlement(better_img_path, - common_util.ocr_result_to_layout(model_util.ocr(better_img_path))) - + info_extract = model_util.ie_settlement( + better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path)) + ) return '基本医保结算单', info_extract else: target_image = model_util.det_book(img_path) # 识别文档区域并裁剪 @@ -147,33 +147,31 @@ def information_extraction(phrec, identity): # 从keys中获取准确率最高的value -def get_best_value_in_keys(source, keys): +def get_best_value_of_key(source, key): # 最终结果 result = None # 最大可能性 best_probability = 0 - for key in keys: - values = source.get(key) - if values: - for value in values: - text = value.get("text") - probability = value.get("probability") - if text and probability > best_probability: - result = text - best_probability = probability + values = source.get(key) + if values: + for value in values: + text = value.get("text") + probability = value.get("probability") + if text and probability > best_probability: + result = text + best_probability = probability return result # 从keys中获取所有value组成list -def get_values_of_keys(source, keys): +def get_values_of_key(source, key): result = [] - for key in keys: - value = source.get(key) - if value: - for v in value: - v = v.get("text") - if v: - result.append(v) + value = source.get(key) + if value: + for v in value: + v = v.get("text") + if v: + result.append(v) # 去重 return list(set(result)) @@ -261,21 +259,21 @@ def search_department(department): def settlement_task(pk_phhd, settlement_list_ie_result): settlement_data = { "pk_phhd": pk_phhd, - "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['name'])), + "name": handle_name(get_best_value_of_key(settlement_list_ie_result, IE_KEY['name'])), "admission_date_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['admission_date'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_date'])), "discharge_date_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['discharge_date'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['discharge_date'])), "personal_cash_payment_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_cash_payment'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_cash_payment'])), "personal_account_payment_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_account_payment'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_account_payment'])), "personal_funded_amount_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_funded_amount'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_funded_amount'])), "medical_insurance_type_str": handle_original_data( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['medical_insurance_type'])), - "admission_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['admission_id'])), - "settlement_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['settlement_id'])), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_insurance_type'])), + "admission_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_id'])), + "settlement_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['settlement_id'])), } settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) @@ -286,8 +284,8 @@ def settlement_task(pk_phhd, settlement_list_ie_result): settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"]) parse_money_result = parse_money( - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['upper_case_medical_expenses']), - get_best_value_in_keys(settlement_list_ie_result, IE_KEY['medical_expenses'])) + get_best_value_of_key(settlement_list_ie_result, IE_KEY['upper_case_medical_expenses']), + get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses'])) settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0]) settlement_data["medical_expenses"] = parse_money_result[1] save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data) @@ -295,20 +293,20 @@ def settlement_task(pk_phhd, settlement_list_ie_result): def discharge_task(pk_phhd, discharge_record_ie_result): - hospitals = get_values_of_keys(discharge_record_ie_result, IE_KEY['hospital']) - departments = get_values_of_keys(discharge_record_ie_result, IE_KEY['department']) + hospitals = get_values_of_key(discharge_record_ie_result, IE_KEY['hospital']) + departments = get_values_of_key(discharge_record_ie_result, IE_KEY['department']) discharge_data = { "pk_phhd": pk_phhd, "hospital": handle_hospital(",".join(hospitals)), "department": handle_department(",".join(departments)), - "name": handle_name(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['name'])), + "name": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['name'])), "admission_date_str": handle_original_data( - get_best_value_in_keys(discharge_record_ie_result, IE_KEY['admission_date'])), + get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_date'])), "discharge_date_str": handle_original_data( - get_best_value_in_keys(discharge_record_ie_result, IE_KEY['discharge_date'])), - "doctor": handle_name(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['doctor'])), - "admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['admission_id'])), - "age": handle_age(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['age'])), + get_best_value_of_key(discharge_record_ie_result, IE_KEY['discharge_date'])), + "doctor": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['doctor'])), + "admission_id": handle_id(get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_id'])), + "age": handle_age(get_best_value_of_key(discharge_record_ie_result, IE_KEY['age'])), } discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"]) discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"]) @@ -371,13 +369,13 @@ def discharge_task(pk_phhd, discharge_record_ie_result): def cost_task(pk_phhd, cost_list_ie_result): cost_data = { "pk_phhd": pk_phhd, - "name": handle_name(get_best_value_in_keys(cost_list_ie_result, IE_KEY['name'])), + "name": handle_name(get_best_value_of_key(cost_list_ie_result, IE_KEY['name'])), "admission_date_str": handle_original_data( - get_best_value_in_keys(cost_list_ie_result, IE_KEY['admission_date'])), + get_best_value_of_key(cost_list_ie_result, IE_KEY['admission_date'])), "discharge_date_str": handle_original_data( - get_best_value_in_keys(cost_list_ie_result, IE_KEY['discharge_date'])), + get_best_value_of_key(cost_list_ie_result, IE_KEY['discharge_date'])), "medical_expenses_str": handle_original_data( - get_best_value_in_keys(cost_list_ie_result, IE_KEY['medical_expenses'])) + get_best_value_of_key(cost_list_ie_result, IE_KEY['medical_expenses'])) } cost_data["admission_date"] = handle_date(cost_data["admission_date_str"]) cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"]) @@ -413,9 +411,9 @@ def photo_review(pk_phhd, name): rec_result = cost_result else: rec_result = None - if rec_result: + if rec_result is not None: for key, value in ie_result.items(): - rec_result[key].append(value) + rec_result[key] += value settlement_data = settlement_task(pk_phhd, settlement_result) discharge_data = discharge_task(pk_phhd, discharge_result) diff --git a/util/model_util.py b/util/model_util.py index db65d1b..0e5aa2a 100644 --- a/util/model_util.py +++ b/util/model_util.py @@ -35,9 +35,10 @@ def ie_settlement(img_path, layout): url = 'http://ie_settlement:5002' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @@ -51,9 +52,10 @@ def ie_settlement_text(text): url = 'http://ie_settlement:5002/text' response = requests.post(url, {'text': text}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @@ -68,9 +70,10 @@ def ie_discharge(img_path, layout): url = 'http://ie_discharge:5003' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @@ -84,9 +87,10 @@ def ie_discharge_text(text): url = 'http://ie_discharge:5003/text' response = requests.post(url, {'text': text}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @@ -101,9 +105,10 @@ def ie_cost(img_path, layout): url = 'http://ie_cost:5004' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @@ -117,9 +122,10 @@ def ie_cost_text(text): url = 'http://ie_cost:5004/text' response = requests.post(url, {'text': text}) if response.status_code == 200: - return response.json() - else: - return None + ie_result = response.json() + if ie_result.get(0): + return ie_result[0] + return None @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,