增加ocr结果存表
This commit is contained in:
@@ -26,7 +26,7 @@ from ucloud import ufile
|
||||
from util import image_util, util, html_util
|
||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
||||
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
|
||||
parse_hospital, handle_doctor
|
||||
parse_hospital, handle_doctor, handle_text
|
||||
|
||||
|
||||
# 合并信息抽取结果
|
||||
@@ -41,6 +41,7 @@ def ie_temp_image(ie, ocr, image):
|
||||
cv2.imwrite(temp_file.name, image)
|
||||
|
||||
ie_result = []
|
||||
ocr_pure_text = ''
|
||||
try:
|
||||
layout = util.get_ocr_layout(ocr, temp_file.name)
|
||||
if not layout:
|
||||
@@ -48,6 +49,8 @@ def ie_temp_image(ie, ocr, image):
|
||||
ie_result = []
|
||||
else:
|
||||
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
|
||||
for lay in layout:
|
||||
ocr_pure_text += lay[1]
|
||||
except MemoryError as e:
|
||||
# 显存不足时应该抛出错误,让程序重启,同时释放显存
|
||||
raise e
|
||||
@@ -58,7 +61,7 @@ def ie_temp_image(ie, ocr, image):
|
||||
os.remove(temp_file.name)
|
||||
except Exception as e:
|
||||
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
||||
return ie_result
|
||||
return ie_result, ocr_pure_text
|
||||
|
||||
|
||||
# 关键信息提取
|
||||
@@ -150,6 +153,7 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
||||
# 关键信息提取
|
||||
def information_extraction(ie, phrecs, identity):
|
||||
result = {}
|
||||
ocr_text = ''
|
||||
for phrec in phrecs:
|
||||
img_path = ufile.get_private_url(phrec.cfjaddress)
|
||||
if not img_path:
|
||||
@@ -168,7 +172,7 @@ def information_extraction(ie, phrecs, identity):
|
||||
if text:
|
||||
info_extract = ie(text)[0]
|
||||
else:
|
||||
info_extract = ie_temp_image(ie, OCR, image)
|
||||
info_extract = ie_temp_image(ie, OCR, image)[0]
|
||||
ie_result = {'result': info_extract, 'angle': '0'}
|
||||
|
||||
now = util.get_default_datetime()
|
||||
@@ -201,10 +205,12 @@ def information_extraction(ie, phrecs, identity):
|
||||
if split_result['img'] is None or split_result['img'].size == 0:
|
||||
continue
|
||||
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
||||
ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
|
||||
ie_temp_result = ie_temp_image(ie, OCR, rotated_img)
|
||||
ocr_text += ie_temp_result[1]
|
||||
ie_results = [{'result': ie_temp_result[0], 'angle': angles[0]}]
|
||||
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
||||
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
|
||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img)[0], 'angle': angles[1]})
|
||||
now = util.get_default_datetime()
|
||||
best_angle = ['0', 0]
|
||||
for ie_result in ie_results:
|
||||
@@ -252,6 +258,7 @@ def information_extraction(ie, phrecs, identity):
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
result['ocr_text'] = ocr_text
|
||||
return result
|
||||
|
||||
|
||||
@@ -414,6 +421,7 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
||||
"doctor": handle_doctor(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
|
||||
"admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
|
||||
"age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
|
||||
"content": handle_text(discharge_record_ie_result['ocr_text']),
|
||||
}
|
||||
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||
@@ -479,7 +487,8 @@ def cost_task(pk_phhd, cost_list, identity):
|
||||
"name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
|
||||
"admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
|
||||
"discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
|
||||
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
|
||||
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES)),
|
||||
"content": handle_text(cost_list_ie_result['ocr_text']),
|
||||
}
|
||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||
|
||||
Reference in New Issue
Block a user