修正部分英文拼写;修正图片传递;修正页码解析

This commit is contained in:
2024-10-10 15:36:46 +08:00
parent 5c0fc0f819
commit a11cefb999
3 changed files with 16 additions and 9 deletions

View File

@@ -136,7 +136,8 @@ def information_extraction(phrec, pk_phhd, identity):
for split_result in split_results: for split_result in split_results:
if split_result['img'] is None: if split_result['img'] is None:
continue continue
ocr_result += model_util.ocr(rotated_img) a4_img = image_util.expand_to_a4_size(split_result['img'])
ocr_result += model_util.ocr(a4_img)
ocr_text = common_util.ocr_result_to_text(ocr_result) ocr_text = common_util.ocr_result_to_text(ocr_result)
rec_type = model_util.clas_text(ocr_text) if ocr_text else None rec_type = model_util.clas_text(ocr_text) if ocr_text else None
if rec_type == '基本医保结算单': if rec_type == '基本医保结算单':
@@ -301,7 +302,7 @@ def settlement_task(pk_phhd, settlement_list_ie_result):
settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"]) settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
parse_money_result = parse_money( parse_money_result = parse_money(
get_best_value_of_key(settlement_list_ie_result, IE_KEY['upper_case_medical_expenses']), get_best_value_of_key(settlement_list_ie_result, IE_KEY['uppercase_medical_expenses']),
get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses'])) get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses']))
settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0]) settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
settlement_data["medical_expenses"] = parse_money_result[1] settlement_data["medical_expenses"] = parse_money_result[1]
@@ -397,6 +398,7 @@ def cost_task(pk_phhd, cost_list_ie_result):
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"]) cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"]) cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"]) cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
if cost_list_ie_result.get(IE_KEY['page']):
page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']]) page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']])
cost_data['page_nums'] = handle_original_data(','.join(page_nums)) cost_data['page_nums'] = handle_original_data(','.join(page_nums))
cost_data['page_count'] = handle_tiny_int(page_count) cost_data['page_count'] = handle_tiny_int(page_count)

View File

@@ -16,6 +16,6 @@ IE_KEY = {
'admission_id': '住院号', 'admission_id': '住院号',
'settlement_id': '医保结算单号码', 'settlement_id': '医保结算单号码',
'age': '年龄', 'age': '年龄',
'upper_case_medical_expenses': '大写总额', 'uppercase_medical_expenses': '大写总额',
'page': '页码', 'page': '页码',
} }

View File

@@ -172,12 +172,14 @@ def invert_rotate_rectangle(rectangle, center, angle):
return [new_top_left[0], new_top_left[1], new_bot_right[0], new_bot_right[1]] return [new_top_left[0], new_top_left[1], new_bot_right[0], new_bot_right[1]]
def expand_to_a4_size(image): def expand_to_a4_size(img_path):
""" """
以尽量少的方式将图片扩充到a4大小 以尽量少的方式将图片扩充到a4大小
:param image: 图片NumPy数组 :param img_path: 图片路径
:return: 扩充后的图片NumPy数组和偏移量 :return: 扩充后的图片NumPy数组和偏移量
""" """
image = cv2.imread(img_path)
img_name, img_ext = parse_save_path(img_path)
height, width = image.shape[:2] height, width = image.shape[:2]
x_offset, y_offset = 0, 0 x_offset, y_offset = 0, 0
hw_ratio = height / width hw_ratio = height / width
@@ -205,7 +207,10 @@ def expand_to_a4_size(image):
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8') exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
exp_img.fill(255) exp_img.fill(255)
image = numpy.vstack([exp_img, image, exp_img]) image = numpy.vstack([exp_img, image, exp_img])
return image, x_offset, y_offset # todo:未拓展时不要生成新的图片
save_path = get_save_path(f'{img_name}.a4.{img_ext}')
cv2.imwrite(save_path, image)
return save_path, x_offset, y_offset
def combined(img1, img2): def combined(img1, img2):