补充缺页判断
This commit is contained in:
@@ -21,7 +21,8 @@ from services.paddle_services import IE_KEY
|
|||||||
from ucloud import ufile
|
from ucloud import ufile
|
||||||
from util import image_util, common_util, html_util, model_util
|
from util import image_util, common_util, html_util, model_util
|
||||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
|
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
|
||||||
handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital
|
handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital, \
|
||||||
|
parse_page_num, handle_tiny_int
|
||||||
|
|
||||||
|
|
||||||
# 尝试从二维码中获取高清图片
|
# 尝试从二维码中获取高清图片
|
||||||
@@ -124,6 +125,7 @@ def information_extraction(phrec, pk_phhd, identity):
|
|||||||
info_extract = model_util.ie_settlement(
|
info_extract = model_util.ie_settlement(
|
||||||
better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
|
better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
|
||||||
)
|
)
|
||||||
|
ocr_text = None # 此处肯定不是出院记录,后续用不到
|
||||||
else:
|
else:
|
||||||
target_image = model_util.det_book(img_path) # 识别文档区域并裁剪
|
target_image = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||||||
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
||||||
@@ -158,7 +160,7 @@ def information_extraction(phrec, pk_phhd, identity):
|
|||||||
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
return rec_type, info_extract
|
return rec_type, info_extract, ocr_text
|
||||||
|
|
||||||
|
|
||||||
# 从keys中获取准确率最高的value
|
# 从keys中获取准确率最高的value
|
||||||
@@ -395,6 +397,9 @@ def cost_task(pk_phhd, cost_list_ie_result):
|
|||||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||||
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
||||||
|
page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']])
|
||||||
|
cost_data['page_nums'] = handle_original_data(','.join(page_nums))
|
||||||
|
cost_data['page_count'] = handle_tiny_int(page_count)
|
||||||
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
||||||
return cost_data
|
return cost_data
|
||||||
|
|
||||||
@@ -416,19 +421,24 @@ def photo_review(pk_phhd, name):
|
|||||||
session.close()
|
session.close()
|
||||||
# 同一批图的标识
|
# 同一批图的标识
|
||||||
identity = int(time.time())
|
identity = int(time.time())
|
||||||
|
discharge_text = ''
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
rec_type, ie_result = information_extraction(phrec, pk_phhd, identity)
|
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd, identity)
|
||||||
if rec_type == '基本医保结算单':
|
if rec_type == '基本医保结算单':
|
||||||
rec_result = settlement_result
|
rec_result = settlement_result
|
||||||
elif rec_type == '出院记录':
|
elif rec_type == '出院记录':
|
||||||
rec_result = discharge_result
|
rec_result = discharge_result
|
||||||
|
discharge_text += ocr_text
|
||||||
elif rec_type == '费用清单':
|
elif rec_type == '费用清单':
|
||||||
rec_result = cost_result
|
rec_result = cost_result
|
||||||
else:
|
else:
|
||||||
rec_result = None
|
rec_result = None
|
||||||
if rec_result is not None:
|
if rec_result is not None:
|
||||||
for key, value in ie_result.items():
|
for key, value in ie_result.items():
|
||||||
rec_result[key] += value
|
if key == '页码':
|
||||||
|
rec_result[key].append(value) # 页码要区分来源,所以多包一层
|
||||||
|
else:
|
||||||
|
rec_result[key] += value
|
||||||
|
|
||||||
settlement_data = settlement_task(pk_phhd, settlement_result)
|
settlement_data = settlement_task(pk_phhd, settlement_result)
|
||||||
discharge_data = discharge_task(pk_phhd, discharge_result)
|
discharge_data = discharge_task(pk_phhd, discharge_result)
|
||||||
@@ -447,8 +457,26 @@ def photo_review(pk_phhd, name):
|
|||||||
review_result['has_discharge'] = bool(discharge_result)
|
review_result['has_discharge'] = bool(discharge_result)
|
||||||
review_result['has_cost'] = bool(cost_result)
|
review_result['has_cost'] = bool(cost_result)
|
||||||
# 三项资料缺页判断
|
# 三项资料缺页判断
|
||||||
# TODO:缺页需要对页码进行抽取,暂未训练相关模型
|
page_description = []
|
||||||
review_result['full_page'] = True
|
# todo:关键词需根据实际情况调整
|
||||||
|
discharge_key = ['入院诊断', '入院日期', '出院日期', '出院诊断', '入院情况', '诊疗经过', '出院情况', '出院医嘱']
|
||||||
|
if not all(key in discharge_text for key in discharge_key):
|
||||||
|
page_description.append('《出院记录》缺页')
|
||||||
|
|
||||||
|
cost_missing_page = {}
|
||||||
|
if cost_data['page_nums']:
|
||||||
|
page_nums = cost_data['page_nums'].split(',')
|
||||||
|
required_set = set(range(1, cost_data['page_count'] + 1))
|
||||||
|
page_set = set(page_nums)
|
||||||
|
cost_missing_page = required_set - page_set
|
||||||
|
if cost_missing_page:
|
||||||
|
page_description.append(f"《住院费用清单》,缺第{','.join(cost_missing_page)}页")
|
||||||
|
|
||||||
|
if page_description:
|
||||||
|
review_result['full_page'] = False
|
||||||
|
review_result['page_description'] = ';'.join(page_description)
|
||||||
|
else:
|
||||||
|
review_result['full_page'] = True
|
||||||
|
|
||||||
if (review_result['has_settlement'] and review_result['has_discharge'] and review_result['has_cost']
|
if (review_result['has_settlement'] and review_result['has_discharge'] and review_result['has_cost']
|
||||||
and review_result['full_page']):
|
and review_result['full_page']):
|
||||||
|
|||||||
@@ -16,5 +16,6 @@ IE_KEY = {
|
|||||||
'admission_id': '住院号',
|
'admission_id': '住院号',
|
||||||
'settlement_id': '医保结算单号码',
|
'settlement_id': '医保结算单号码',
|
||||||
'age': '年龄',
|
'age': '年龄',
|
||||||
'upper_case_medical_expenses': '大写总额'
|
'upper_case_medical_expenses': '大写总额',
|
||||||
|
'page': '页码',
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from utils import process_request
|
|||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
COST_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
|
COST_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
|
||||||
'name', 'admission_date', 'discharge_date', 'medical_expenses'
|
'name', 'admission_date', 'discharge_date', 'medical_expenses', 'page'
|
||||||
])
|
])
|
||||||
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
|
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
|
||||||
task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
|
task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
|
||||||
|
|||||||
@@ -178,3 +178,22 @@ def parse_hospital(string):
|
|||||||
split_hospitals = string_without_company.replace('医院', '医院 ')
|
split_hospitals = string_without_company.replace('医院', '医院 ')
|
||||||
result += split_hospitals.strip().split(' ')
|
result += split_hospitals.strip().split(' ')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def parse_page_num(page_list):
|
||||||
|
if not page_list:
|
||||||
|
return None, None
|
||||||
|
pages = []
|
||||||
|
total = []
|
||||||
|
for page in page_list:
|
||||||
|
join = ''.join(page)
|
||||||
|
numbers = re.findall(r'\d+', join)
|
||||||
|
pages.append(min(numbers))
|
||||||
|
total.append(max(numbers))
|
||||||
|
return pages, max(total)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_tiny_int(num):
|
||||||
|
if not num:
|
||||||
|
return None
|
||||||
|
return num if num <= 127 else 127
|
||||||
|
|||||||
Reference in New Issue
Block a user