增加ocr结果存表
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from sqlalchemy import Column, DECIMAL, Date, DateTime, Index, String, text, LargeBinary
|
from sqlalchemy import Column, DECIMAL, Date, DateTime, Index, String, text, LargeBinary, Text
|
||||||
from sqlalchemy.dialects.mysql import BIT, CHAR, INTEGER, TINYINT, VARCHAR
|
from sqlalchemy.dialects.mysql import BIT, CHAR, INTEGER, TINYINT, VARCHAR
|
||||||
|
|
||||||
from db import Base
|
from db import Base
|
||||||
@@ -56,6 +56,7 @@ class ZxIeCost(Base):
|
|||||||
|
|
||||||
pk_ie_cost = Column(INTEGER(11), primary_key=True, comment='费用明细信息抽取主键')
|
pk_ie_cost = Column(INTEGER(11), primary_key=True, comment='费用明细信息抽取主键')
|
||||||
pk_phhd = Column(INTEGER(11), nullable=False, unique=True, comment='报销案子主键')
|
pk_phhd = Column(INTEGER(11), nullable=False, unique=True, comment='报销案子主键')
|
||||||
|
content = Column(Text, comment='详细内容')
|
||||||
name = Column(String(30), comment='患者姓名')
|
name = Column(String(30), comment='患者姓名')
|
||||||
admission_date_str = Column(String(255), comment='入院日期字符串')
|
admission_date_str = Column(String(255), comment='入院日期字符串')
|
||||||
admission_date = Column(Date, comment='入院日期')
|
admission_date = Column(Date, comment='入院日期')
|
||||||
@@ -63,6 +64,8 @@ class ZxIeCost(Base):
|
|||||||
discharge_date = Column(Date, comment='出院日期')
|
discharge_date = Column(Date, comment='出院日期')
|
||||||
medical_expenses_str = Column(String(255), comment='费用总额字符串')
|
medical_expenses_str = Column(String(255), comment='费用总额字符串')
|
||||||
medical_expenses = Column(DECIMAL(18, 2), comment='费用总额')
|
medical_expenses = Column(DECIMAL(18, 2), comment='费用总额')
|
||||||
|
page_nums = Column(String(255), comment='页码')
|
||||||
|
page_count = Column(TINYINT(4), comment='页数')
|
||||||
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
||||||
creator = Column(String(255), comment='创建人')
|
creator = Column(String(255), comment='创建人')
|
||||||
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
||||||
@@ -94,7 +97,7 @@ class ZxIeDischarge(Base):
|
|||||||
|
|
||||||
pk_ie_discharge = Column(INTEGER(11), primary_key=True, comment='出院记录信息抽取主键')
|
pk_ie_discharge = Column(INTEGER(11), primary_key=True, comment='出院记录信息抽取主键')
|
||||||
pk_phhd = Column(INTEGER(11), nullable=False, unique=True, comment='报销案子主键')
|
pk_phhd = Column(INTEGER(11), nullable=False, unique=True, comment='报销案子主键')
|
||||||
content = Column(String(5000), comment='详细内容')
|
content = Column(Text, comment='详细内容')
|
||||||
hospital = Column(String(255), comment='医院')
|
hospital = Column(String(255), comment='医院')
|
||||||
pk_yljg = Column(INTEGER(11), comment='医院主键')
|
pk_yljg = Column(INTEGER(11), comment='医院主键')
|
||||||
department = Column(String(255), comment='科室')
|
department = Column(String(255), comment='科室')
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
x-env:
|
x-env:
|
||||||
&template
|
&template
|
||||||
image: fcb_photo_review:1.14.10
|
image: fcb_photo_review:1.14.11
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
x-review:
|
x-review:
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ from ucloud import ufile
|
|||||||
from util import image_util, util, html_util
|
from util import image_util, util, html_util
|
||||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
||||||
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
|
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
|
||||||
parse_hospital, handle_doctor
|
parse_hospital, handle_doctor, handle_text
|
||||||
|
|
||||||
|
|
||||||
# 合并信息抽取结果
|
# 合并信息抽取结果
|
||||||
@@ -41,6 +41,7 @@ def ie_temp_image(ie, ocr, image):
|
|||||||
cv2.imwrite(temp_file.name, image)
|
cv2.imwrite(temp_file.name, image)
|
||||||
|
|
||||||
ie_result = []
|
ie_result = []
|
||||||
|
ocr_pure_text = ''
|
||||||
try:
|
try:
|
||||||
layout = util.get_ocr_layout(ocr, temp_file.name)
|
layout = util.get_ocr_layout(ocr, temp_file.name)
|
||||||
if not layout:
|
if not layout:
|
||||||
@@ -48,6 +49,8 @@ def ie_temp_image(ie, ocr, image):
|
|||||||
ie_result = []
|
ie_result = []
|
||||||
else:
|
else:
|
||||||
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
|
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
|
||||||
|
for lay in layout:
|
||||||
|
ocr_pure_text += lay[1]
|
||||||
except MemoryError as e:
|
except MemoryError as e:
|
||||||
# 显存不足时应该抛出错误,让程序重启,同时释放显存
|
# 显存不足时应该抛出错误,让程序重启,同时释放显存
|
||||||
raise e
|
raise e
|
||||||
@@ -58,7 +61,7 @@ def ie_temp_image(ie, ocr, image):
|
|||||||
os.remove(temp_file.name)
|
os.remove(temp_file.name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
||||||
return ie_result
|
return ie_result, ocr_pure_text
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
@@ -150,6 +153,7 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
|||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(ie, phrecs, identity):
|
def information_extraction(ie, phrecs, identity):
|
||||||
result = {}
|
result = {}
|
||||||
|
ocr_text = ''
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
img_path = ufile.get_private_url(phrec.cfjaddress)
|
img_path = ufile.get_private_url(phrec.cfjaddress)
|
||||||
if not img_path:
|
if not img_path:
|
||||||
@@ -168,7 +172,7 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
if text:
|
if text:
|
||||||
info_extract = ie(text)[0]
|
info_extract = ie(text)[0]
|
||||||
else:
|
else:
|
||||||
info_extract = ie_temp_image(ie, OCR, image)
|
info_extract = ie_temp_image(ie, OCR, image)[0]
|
||||||
ie_result = {'result': info_extract, 'angle': '0'}
|
ie_result = {'result': info_extract, 'angle': '0'}
|
||||||
|
|
||||||
now = util.get_default_datetime()
|
now = util.get_default_datetime()
|
||||||
@@ -201,10 +205,12 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
if split_result['img'] is None or split_result['img'].size == 0:
|
if split_result['img'] is None or split_result['img'].size == 0:
|
||||||
continue
|
continue
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
||||||
ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
|
ie_temp_result = ie_temp_image(ie, OCR, rotated_img)
|
||||||
|
ocr_text += ie_temp_result[1]
|
||||||
|
ie_results = [{'result': ie_temp_result[0], 'angle': angles[0]}]
|
||||||
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
||||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
|
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img)[0], 'angle': angles[1]})
|
||||||
now = util.get_default_datetime()
|
now = util.get_default_datetime()
|
||||||
best_angle = ['0', 0]
|
best_angle = ['0', 0]
|
||||||
for ie_result in ie_results:
|
for ie_result in ie_results:
|
||||||
@@ -252,6 +258,7 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
|
result['ocr_text'] = ocr_text
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@@ -414,6 +421,7 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
|||||||
"doctor": handle_doctor(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
|
"doctor": handle_doctor(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
|
||||||
"admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
|
"admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
|
||||||
"age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
|
"age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
|
||||||
|
"content": handle_text(discharge_record_ie_result['ocr_text']),
|
||||||
}
|
}
|
||||||
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
||||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||||
@@ -479,7 +487,8 @@ def cost_task(pk_phhd, cost_list, identity):
|
|||||||
"name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
|
"name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
|
||||||
"admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
|
"admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
|
||||||
"discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
|
"discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
|
||||||
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
|
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES)),
|
||||||
|
"content": handle_text(cost_list_ie_result['ocr_text']),
|
||||||
}
|
}
|
||||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||||
|
|||||||
@@ -190,3 +190,9 @@ def parse_hospital(string):
|
|||||||
split_hospitals = string_without_company.replace("医院", "医院 ")
|
split_hospitals = string_without_company.replace("医院", "医院 ")
|
||||||
result += split_hospitals.strip().split(" ")
|
result += split_hospitals.strip().split(" ")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def handle_text(string):
|
||||||
|
if not string:
|
||||||
|
return ""
|
||||||
|
return string[:16383]
|
||||||
|
|||||||
Reference in New Issue
Block a user