增加ocr结果存表

This commit is contained in:
2024-12-24 14:55:43 +08:00
parent 96b8a06e6c
commit 5e6a471954
4 changed files with 27 additions and 9 deletions

View File

@@ -26,7 +26,7 @@ from ucloud import ufile
from util import image_util, util, html_util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
parse_hospital, handle_doctor
parse_hospital, handle_doctor, handle_text
# 合并信息抽取结果
@@ -41,6 +41,7 @@ def ie_temp_image(ie, ocr, image):
cv2.imwrite(temp_file.name, image)
ie_result = []
ocr_pure_text = ''
try:
layout = util.get_ocr_layout(ocr, temp_file.name)
if not layout:
@@ -48,6 +49,8 @@ def ie_temp_image(ie, ocr, image):
ie_result = []
else:
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
for lay in layout:
ocr_pure_text += lay[1]
except MemoryError as e:
# 显存不足时应该抛出错误,让程序重启,同时释放显存
raise e
@@ -58,7 +61,7 @@ def ie_temp_image(ie, ocr, image):
os.remove(temp_file.name)
except Exception as e:
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
return ie_result
return ie_result, ocr_pure_text
# 关键信息提取
@@ -150,6 +153,7 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
# 关键信息提取
def information_extraction(ie, phrecs, identity):
result = {}
ocr_text = ''
for phrec in phrecs:
img_path = ufile.get_private_url(phrec.cfjaddress)
if not img_path:
@@ -168,7 +172,7 @@ def information_extraction(ie, phrecs, identity):
if text:
info_extract = ie(text)[0]
else:
info_extract = ie_temp_image(ie, OCR, image)
info_extract = ie_temp_image(ie, OCR, image)[0]
ie_result = {'result': info_extract, 'angle': '0'}
now = util.get_default_datetime()
@@ -201,10 +205,12 @@ def information_extraction(ie, phrecs, identity):
if split_result['img'] is None or split_result['img'].size == 0:
continue
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
ie_temp_result = ie_temp_image(ie, OCR, rotated_img)
ocr_text += ie_temp_result[1]
ie_results = [{'result': ie_temp_result[0], 'angle': angles[0]}]
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img)[0], 'angle': angles[1]})
now = util.get_default_datetime()
best_angle = ['0', 0]
for ie_result in ie_results:
@@ -252,6 +258,7 @@ def information_extraction(ie, phrecs, identity):
session.commit()
session.close()
result['ocr_text'] = ocr_text
return result
@@ -414,6 +421,7 @@ def discharge_task(pk_phhd, discharge_record, identity):
"doctor": handle_doctor(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
"admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
"age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
"content": handle_text(discharge_record_ie_result['ocr_text']),
}
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
@@ -479,7 +487,8 @@ def cost_task(pk_phhd, cost_list, identity):
"name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
"admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
"discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES)),
"content": handle_text(cost_list_ie_result['ocr_text']),
}
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])