698 lines
31 KiB
Python
698 lines
31 KiB
Python
import copy
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import shutil
|
||
import time
|
||
import uuid
|
||
from collections import defaultdict
|
||
from time import sleep
|
||
|
||
import cv2
|
||
import jieba
|
||
import numpy as np
|
||
import zxingcpp
|
||
from rapidfuzz import process, fuzz
|
||
from sqlalchemy import update
|
||
|
||
from db import MysqlSession
|
||
from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
|
||
from log import HOSTNAME
|
||
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
||
DEPARTMENT_FILTER, DISCHARGE_KEY, set_batch_id, get_batch_id
|
||
from services.paddle_services import IE_KEY
|
||
from ucloud import ufile, BUCKET
|
||
from util import image_util, common_util, html_util, model_util
|
||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
|
||
handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital, \
|
||
parse_page_num, handle_tiny_int
|
||
|
||
|
||
def parse_qrcode(img_path, image_id):
|
||
"""
|
||
解析二维码,尝试从中获取高清图片
|
||
:param img_path: 待解析图片
|
||
:param image_id: 图片id
|
||
:return: 解析结果
|
||
"""
|
||
|
||
def _parse_pdf_url(pdf_url_to_parse):
|
||
local_pdf_path = None
|
||
img_name, img_ext = common_util.parse_save_path(img_path)
|
||
try:
|
||
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
|
||
pdf_imgs = image_util.pdf_to_imgs(local_pdf_path)
|
||
# 结算单部分
|
||
better_settlement_path = common_util.get_processed_img_path(f'{img_name}.better_settlement.jpg')
|
||
cv2.imwrite(better_settlement_path, pdf_imgs[0][0])
|
||
# 费用清单部分
|
||
better_cost_path = common_util.get_processed_img_path(f'{img_name}.better_cost.jpg')
|
||
total_height = sum([p[0].shape[0] for p in pdf_imgs[1:]])
|
||
common_width = pdf_imgs[1][0].shape[1]
|
||
better_cost_img = np.zeros((total_height, common_width, 3), dtype=np.uint8)
|
||
current_y = 0
|
||
for pdf in pdf_imgs[1:]:
|
||
height = pdf[0].shape[0]
|
||
better_cost_img[current_y:current_y + height, :, :] = pdf[0]
|
||
current_y += height
|
||
# cost_text += pdf[1] # 费用清单文本暂时没用到
|
||
cv2.imwrite(better_cost_path, better_cost_img)
|
||
|
||
return better_settlement_path, pdf_imgs[0][1], better_cost_path
|
||
except Exception as ex:
|
||
logging.getLogger('error').error('解析pdf失败!', exc_info=ex)
|
||
return None, None, None
|
||
finally:
|
||
if local_pdf_path:
|
||
common_util.delete_temp_file(local_pdf_path)
|
||
|
||
jsczt_base_url = 'http://einvoice.jsczt.cn'
|
||
try:
|
||
img = cv2.imread(img_path)
|
||
results = zxingcpp.read_barcodes(img, text_mode=zxingcpp.TextMode.HRI)
|
||
except Exception as e:
|
||
logging.getLogger('error').info('二维码识别失败', exc_info=e)
|
||
results = []
|
||
for result in results:
|
||
try:
|
||
url = result.text
|
||
if url.startswith(jsczt_base_url):
|
||
id_base = html_util.get_jsczt_id_base(url)
|
||
if not id_base:
|
||
continue
|
||
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
|
||
return _parse_pdf_url(pdf_url)
|
||
elif '/yldzpjqr/invoice/query/issueinfo' in url:
|
||
# 无锡医院
|
||
pdf_url = html_util.get_wx_pdf_url(url)
|
||
if not pdf_url:
|
||
continue
|
||
return _parse_pdf_url(pdf_url)
|
||
elif '/dzfp/tz3y' in url:
|
||
# 泰州市第三人民医院
|
||
pdf_url = html_util.get_tz3y_pdf_url(url)
|
||
if not pdf_url:
|
||
continue
|
||
return _parse_pdf_url(pdf_url)
|
||
elif url.startswith('http://weixin.qq.com'):
|
||
# 无效地址
|
||
continue
|
||
else:
|
||
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
||
except Exception as e:
|
||
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
||
continue
|
||
|
||
return None, None, None
|
||
|
||
|
||
# 关键信息提取
|
||
def information_extraction(phrec, pk_phhd):
|
||
"""
|
||
处理单张图片
|
||
:param phrec:图片信息
|
||
:param pk_phhd:案子主键
|
||
:return:记录类型,信息抽取结果
|
||
"""
|
||
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||
if not os.path.exists(img_path):
|
||
original_img_path = common_util.get_img_path(phrec.cfjaddress)
|
||
if not original_img_path:
|
||
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||
if not img_url:
|
||
return None, None, None
|
||
original_img_path = common_util.save_to_local(img_url)
|
||
shutil.copy2(original_img_path, img_path)
|
||
if image_util.is_photo(img_path):
|
||
book_img_path = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||
dewarped_img_path = model_util.dewarp(book_img_path) # 去扭曲
|
||
else: # todo:也可能是图片,后续添加细分逻辑
|
||
dewarped_img_path = img_path
|
||
angles = model_util.clas_orientation(dewarped_img_path)
|
||
ocr_text = ''
|
||
info_extract = []
|
||
rec_type = None
|
||
for angle in angles:
|
||
ocr_result = []
|
||
rotated_img = image_util.rotate(dewarped_img_path, int(angle))
|
||
split_results = image_util.split(rotated_img)
|
||
for split_result in split_results:
|
||
if split_result['img'] is None:
|
||
continue
|
||
a4_img = image_util.expand_to_a4_size(split_result['img'])
|
||
tmp_ocr_result = model_util.ocr(a4_img)
|
||
if tmp_ocr_result:
|
||
ocr_result += tmp_ocr_result
|
||
tmp_ocr_text = common_util.ocr_result_to_text(ocr_result)
|
||
|
||
tmp_rec_type = model_util.clas_text(tmp_ocr_text) if ocr_text else None
|
||
if not tmp_rec_type:
|
||
rec_dict = {
|
||
'1': '基本医保结算单',
|
||
'3': '出院记录',
|
||
'4': '费用清单',
|
||
}
|
||
tmp_rec_type = rec_dict.get(phrec.cRectype)
|
||
if tmp_rec_type == '基本医保结算单':
|
||
tmp_info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||
elif tmp_rec_type == '出院记录':
|
||
tmp_info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||
elif tmp_rec_type == '费用清单':
|
||
tmp_info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||
else:
|
||
tmp_info_extract = []
|
||
|
||
if len(tmp_info_extract) > len(info_extract):
|
||
info_extract = tmp_info_extract
|
||
ocr_text = tmp_ocr_text
|
||
rec_type = tmp_rec_type
|
||
|
||
if info_extract:
|
||
result_json = json.dumps(info_extract, ensure_ascii=False)
|
||
if len(result_json) > 5000:
|
||
result_json = result_json[:5000]
|
||
|
||
now = common_util.get_default_datetime()
|
||
session = MysqlSession()
|
||
session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=get_batch_id(),
|
||
cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
|
||
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
||
session.commit()
|
||
session.close()
|
||
return rec_type, info_extract, ocr_text
|
||
|
||
|
||
# 从keys中获取准确率最高的value
|
||
def get_best_value_of_key(source, key):
|
||
# 最终结果
|
||
result = None
|
||
# 最大可能性
|
||
best_probability = 0
|
||
values = source.get(key)
|
||
if values:
|
||
for value in values:
|
||
for v in value:
|
||
text = v.get("text")
|
||
probability = v.get("probability")
|
||
if text and probability > best_probability:
|
||
result = text
|
||
best_probability = probability
|
||
return result
|
||
|
||
|
||
# 从keys中获取所有value组成list
|
||
def get_values_of_key(source, key):
|
||
result = []
|
||
values = source.get(key)
|
||
if values:
|
||
for value in values:
|
||
for v in value:
|
||
v = v.get("text")
|
||
if v:
|
||
result.append(v)
|
||
# 去重
|
||
return list(set(result))
|
||
|
||
|
||
def save_or_update_ie(table, pk_phhd, data):
|
||
data = {k: v for k, v in data.items() if v is not None and v != ""}
|
||
obj = table(**data)
|
||
session = MysqlSession()
|
||
db_data = session.query(table).filter_by(pk_phhd=pk_phhd).one_or_none()
|
||
now = common_util.get_default_datetime()
|
||
if db_data:
|
||
# 更新
|
||
db_data.update_time = now
|
||
db_data.creator = HOSTNAME
|
||
for k, v in data.items():
|
||
setattr(db_data, k, v)
|
||
else:
|
||
# 新增
|
||
obj.create_time = now
|
||
obj.creator = HOSTNAME
|
||
obj.update_time = now
|
||
obj.updater = HOSTNAME
|
||
session.add(obj)
|
||
session.commit()
|
||
session.close()
|
||
|
||
|
||
def search_hospital(hospital):
|
||
def _filter_search_keywords(keywords):
|
||
keywords = [x for x in keywords if x not in HOSPITAL_FILTER and len(x) > 1]
|
||
result1 = ""
|
||
result2 = ""
|
||
for keyword in keywords:
|
||
if "医院" in keyword:
|
||
break
|
||
result2 = result1
|
||
result1 = keyword
|
||
result = [result1]
|
||
if result2:
|
||
result.append(result2)
|
||
return result
|
||
|
||
cut_list = jieba.lcut(hospital, HMM=False)
|
||
session = MysqlSession()
|
||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all()
|
||
if not yljg:
|
||
filter_keywords = _filter_search_keywords(cut_list)
|
||
for filter_keyword in filter_keywords:
|
||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{filter_keyword}%")).all()
|
||
if yljg:
|
||
break
|
||
session.close()
|
||
yljg = {row.pk_yljg: row.name for row in yljg}
|
||
best_match = process.extractOne(hospital, yljg, scorer=fuzz.partial_token_set_ratio)
|
||
return best_match
|
||
|
||
|
||
def search_department(department):
|
||
def _filter_search_keywords(keywords):
|
||
keywords = [x for x in keywords if x not in DEPARTMENT_FILTER]
|
||
return keywords
|
||
|
||
cut_list = jieba.lcut(department, HMM=False)
|
||
session = MysqlSession()
|
||
cut_list = _filter_search_keywords(cut_list)
|
||
if not cut_list:
|
||
return None
|
||
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all()
|
||
if not ylks:
|
||
filter_keywords = cut_list
|
||
for filter_keyword in filter_keywords:
|
||
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{filter_keyword}%")).all()
|
||
if ylks:
|
||
break
|
||
session.close()
|
||
ylks = {row.pk_ylks: row.name for row in ylks}
|
||
best_match = process.extractOne(department, ylks, scorer=fuzz.token_ratio)
|
||
if best_match and best_match[0] in ["内科", "外科"]:
|
||
# 降低内科、外科的优先级
|
||
best_match = list(best_match)
|
||
best_match[1] -= 100
|
||
return best_match
|
||
|
||
|
||
def settlement_task(pk_phhd, settlement_list_ie_result):
|
||
settlement_data = {
|
||
"pk_phhd": pk_phhd,
|
||
"name": handle_name(get_best_value_of_key(settlement_list_ie_result, IE_KEY['name'])),
|
||
"admission_date_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_date'])),
|
||
"discharge_date_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['discharge_date'])),
|
||
"personal_cash_payment_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_cash_payment'])),
|
||
"personal_account_payment_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_account_payment'])),
|
||
"personal_funded_amount_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_funded_amount'])),
|
||
"medical_insurance_type_str": handle_original_data(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_insurance_type'])),
|
||
"admission_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_id'])),
|
||
"settlement_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['settlement_id'])),
|
||
}
|
||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||
settlement_data["discharge_date"] = handle_date(settlement_data["discharge_date_str"])
|
||
settlement_data["personal_cash_payment"] = handle_decimal(settlement_data["personal_cash_payment_str"])
|
||
settlement_data["personal_account_payment"] = handle_decimal(settlement_data["personal_account_payment_str"])
|
||
settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"])
|
||
settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
|
||
|
||
parse_money_result = parse_money(
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['uppercase_medical_expenses']),
|
||
get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses']))
|
||
settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
|
||
settlement_data["medical_expenses"] = parse_money_result[1]
|
||
save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data)
|
||
return settlement_data
|
||
|
||
|
||
def discharge_task(pk_phhd, discharge_record_ie_result):
|
||
hospitals = get_values_of_key(discharge_record_ie_result, IE_KEY['hospital'])
|
||
departments = get_values_of_key(discharge_record_ie_result, IE_KEY['department'])
|
||
discharge_data = {
|
||
"pk_phhd": pk_phhd,
|
||
"hospital": handle_hospital(",".join(hospitals)),
|
||
"department": handle_department(",".join(departments)),
|
||
"name": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['name'])),
|
||
"admission_date_str": handle_original_data(
|
||
get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_date'])),
|
||
"discharge_date_str": handle_original_data(
|
||
get_best_value_of_key(discharge_record_ie_result, IE_KEY['discharge_date'])),
|
||
"doctor": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['doctor'])),
|
||
"admission_id": handle_id(get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_id'])),
|
||
"age": handle_age(get_best_value_of_key(discharge_record_ie_result, IE_KEY['age'])),
|
||
}
|
||
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||
|
||
if hospitals:
|
||
match_hospitals = []
|
||
for hospital in hospitals:
|
||
parsed_hospitals = parse_hospital(hospital)
|
||
for parsed_hospital in parsed_hospitals:
|
||
search_result = search_hospital(parsed_hospital)
|
||
match_hospitals.append(search_result)
|
||
if search_result and search_result[1] == 100:
|
||
break
|
||
for hospital_alias_key in HOSPITAL_ALIAS.keys():
|
||
if hospital_alias_key in parsed_hospital:
|
||
for hospital_alias in HOSPITAL_ALIAS[hospital_alias_key]:
|
||
new_hospital = parsed_hospital.replace(hospital_alias_key, hospital_alias)
|
||
match_hospitals.append(search_hospital(new_hospital))
|
||
break
|
||
best_match = None
|
||
best_score = 0
|
||
for match_hospital in match_hospitals:
|
||
if match_hospital and match_hospital[1] > best_score:
|
||
best_match = match_hospital
|
||
best_score = match_hospital[1]
|
||
if best_score == 100:
|
||
break
|
||
|
||
if best_match:
|
||
discharge_data["pk_yljg"] = best_match[2]
|
||
if departments:
|
||
match_departments = []
|
||
for department in departments:
|
||
parsed_departments = parse_department(department)
|
||
for parsed_department in parsed_departments:
|
||
search_result = search_department(parsed_department)
|
||
match_departments.append(search_result)
|
||
if search_result and search_result[1] == 100:
|
||
break
|
||
for department_alias_key in DEPARTMENT_ALIAS.keys():
|
||
if department_alias_key in parsed_department:
|
||
for department_alias in DEPARTMENT_ALIAS[department_alias_key]:
|
||
new_department = parsed_department.replace(department_alias_key, department_alias)
|
||
match_departments.append(search_department(new_department))
|
||
break
|
||
best_match = None
|
||
best_score = -1000
|
||
for match_department in match_departments:
|
||
if match_department and match_department[1] > best_score:
|
||
best_match = match_department
|
||
best_score = match_department[1]
|
||
if best_score == 100:
|
||
break
|
||
if best_match:
|
||
discharge_data["pk_ylks"] = best_match[2]
|
||
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
|
||
return discharge_data
|
||
|
||
|
||
def cost_task(pk_phhd, cost_list_ie_result):
|
||
cost_data = {
|
||
"pk_phhd": pk_phhd,
|
||
"name": handle_name(get_best_value_of_key(cost_list_ie_result, IE_KEY['name'])),
|
||
"admission_date_str": handle_original_data(
|
||
get_best_value_of_key(cost_list_ie_result, IE_KEY['admission_date'])),
|
||
"discharge_date_str": handle_original_data(
|
||
get_best_value_of_key(cost_list_ie_result, IE_KEY['discharge_date'])),
|
||
"medical_expenses_str": handle_original_data(
|
||
get_best_value_of_key(cost_list_ie_result, IE_KEY['medical_expenses']))
|
||
}
|
||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
||
if cost_list_ie_result.get(IE_KEY['page']):
|
||
page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']])
|
||
cost_data['page_nums'] = handle_original_data(','.join(page_nums))
|
||
cost_data['page_count'] = handle_tiny_int(page_count)
|
||
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
||
return cost_data
|
||
|
||
|
||
def parse_pdf_text(settlement_text):
|
||
pattern = (r'(?:交款人:(.*?)\n|住院时间:(.*?)至(.*?)\n|\(小写\)(.*?)\n|个人现金支付:(.*?)\n|个人账户支付:(.*?)\n'
|
||
r'|个人自费:(.*?)\n|医保类型:(.*?)\n|住院科别:(.*?)\n|住院号:(.*?)\n|票据号码:(.*?)\n|)')
|
||
# 查找所有匹配项
|
||
matches = re.findall(pattern, settlement_text)
|
||
results = {}
|
||
keys = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费', '医保类型',
|
||
'科室', '住院号', '医保结算单号码']
|
||
|
||
for match in matches:
|
||
for key, value in zip(keys, match):
|
||
if value:
|
||
results[key] = [[{'text': value, 'probability': 1}]]
|
||
settlement_key = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费',
|
||
'医保类型', '住院号', '医保结算单号码']
|
||
discharge_key = ['科室', '患者姓名', '入院日期', '出院日期', '住院号']
|
||
cost_key = ['患者姓名', '入院日期', '出院日期', '费用总额']
|
||
settlement_result = {key: copy.copy(results[key]) for key in settlement_key if key in results}
|
||
discharge_result = {key: copy.copy(results[key]) for key in discharge_key if key in results}
|
||
cost_result = {key: copy.copy(results[key]) for key in cost_key if key in results}
|
||
return settlement_result, discharge_result, cost_result
|
||
|
||
|
||
def photo_review(pk_phhd, name):
|
||
"""
|
||
处理单个报销案子
|
||
:param pk_phhd: 报销单主键
|
||
:param name: 报销人姓名
|
||
"""
|
||
settlement_result = defaultdict(list)
|
||
discharge_result = defaultdict(list)
|
||
cost_result = defaultdict(list)
|
||
|
||
session = MysqlSession()
|
||
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
|
||
ZxPhrec.pk_phhd == pk_phhd
|
||
).order_by(ZxPhrec.cRectype, ZxPhrec.rowno).all()
|
||
session.close()
|
||
|
||
# 同一批图的标识
|
||
set_batch_id(uuid.uuid4().hex)
|
||
processed_img_dir = common_util.get_processed_img_path('')
|
||
os.makedirs(processed_img_dir, exist_ok=True)
|
||
|
||
has_pdf = False # 是否获取到了pdf,获取到可以直接利用pdf更快的获取信息
|
||
better_settlement_path = None
|
||
better_cost_path = None
|
||
settlement_text = ''
|
||
qrcode_img_id = None
|
||
for phrec in phrecs:
|
||
original_img_path = common_util.get_img_path(phrec.cfjaddress)
|
||
if not original_img_path:
|
||
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||
if not img_url:
|
||
continue
|
||
original_img_path = common_util.save_to_local(img_url)
|
||
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||
shutil.copy2(original_img_path, img_path)
|
||
# 尝试从二维码中获取高清图片
|
||
better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress)
|
||
if better_settlement_path:
|
||
has_pdf = True
|
||
qrcode_img_id = phrec.cfjaddress
|
||
break
|
||
|
||
discharge_text = ''
|
||
if has_pdf:
|
||
settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text)
|
||
discharge_ie_result = defaultdict(list)
|
||
|
||
is_cost_updated = False
|
||
for phrec in phrecs:
|
||
if phrec.cRectype == '1':
|
||
if phrec.cfjaddress == qrcode_img_id:
|
||
try:
|
||
ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
||
ufile.upload_file(phrec.cfjaddress, better_settlement_path)
|
||
except Exception as e:
|
||
logging.error("更新结算单pdf图片出错", exc_info=e)
|
||
elif phrec.cRectype == '3':
|
||
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
|
||
if rec_type == '出院记录':
|
||
discharge_text += ocr_text
|
||
for key, value in ie_result.items():
|
||
discharge_ie_result[key].append(value)
|
||
# 暂不替换费用清单
|
||
# elif phrec.cRectype == '4':
|
||
# if not is_cost_updated:
|
||
# try:
|
||
# ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
||
# ufile.upload_file(phrec.cfjaddress, better_cost_path)
|
||
# except Exception as e:
|
||
# logging.error("更新费用清单pdf图片出错", exc_info=e)
|
||
# finally:
|
||
# is_cost_updated = True
|
||
|
||
# 合并出院记录
|
||
for key, value in discharge_ie_result.items():
|
||
ie_value = get_best_value_of_key(discharge_ie_result, key)
|
||
pdf_value = discharge_result.get(key)[0][0]['text'] if discharge_result.get(key) else ''
|
||
similarity_ratio = fuzz.ratio(ie_value, pdf_value)
|
||
if similarity_ratio < 60:
|
||
discharge_result[key] = [[{'text': ie_value, 'probability': 1}]]
|
||
else:
|
||
for phrec in phrecs:
|
||
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
|
||
if rec_type == '基本医保结算单':
|
||
rec_result = settlement_result
|
||
elif rec_type == '出院记录':
|
||
rec_result = discharge_result
|
||
discharge_text += ocr_text
|
||
elif rec_type == '费用清单':
|
||
rec_result = cost_result
|
||
else:
|
||
rec_result = None
|
||
if rec_result is not None:
|
||
for key, value in ie_result.items():
|
||
rec_result[key].append(value)
|
||
|
||
# 删除多余图片
|
||
if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
|
||
shutil.rmtree(processed_img_dir)
|
||
|
||
settlement_data = settlement_task(pk_phhd, settlement_result)
|
||
discharge_data = discharge_task(pk_phhd, discharge_result)
|
||
cost_data = cost_task(pk_phhd, cost_result)
|
||
|
||
# 三项资料完整性判断
|
||
# 三项资料缺项判断
|
||
review_result = {
|
||
'pk_phhd': pk_phhd,
|
||
'has_settlement': bool(settlement_result),
|
||
'has_discharge': bool(discharge_result),
|
||
'has_cost': bool(cost_result),
|
||
}
|
||
if (review_result['has_settlement'] and settlement_data.get('personal_account_payment')
|
||
and settlement_data.get('personal_cash_payment') and settlement_data.get('medical_expenses')):
|
||
review_result['has_settlement'] &= (
|
||
float(settlement_data['personal_account_payment']) + float(settlement_data['personal_cash_payment'])
|
||
< float(settlement_data['medical_expenses'])
|
||
)
|
||
if has_pdf:
|
||
review_result['has_discharge'] &= bool(discharge_text)
|
||
|
||
# 三项资料缺页判断
|
||
page_description = []
|
||
if review_result['has_discharge']:
|
||
for discharge_item in DISCHARGE_KEY:
|
||
if not any(key in discharge_text for key in DISCHARGE_KEY[discharge_item]):
|
||
page_description.append(f"《出院记录》缺页")
|
||
break
|
||
|
||
if review_result['has_cost']:
|
||
cost_missing_page = {}
|
||
if cost_data.get('page_nums'):
|
||
page_nums = cost_data['page_nums'].split(',')
|
||
required_set = set(str(i) for i in range(1, cost_data['page_count'] + 1))
|
||
page_set = set(page_nums)
|
||
cost_missing_page = required_set - page_set
|
||
if cost_missing_page:
|
||
cost_missing_page = sorted(cost_missing_page)
|
||
page_description.append(f"《住院费用清单》,缺第{','.join(cost_missing_page)}页")
|
||
|
||
if page_description:
|
||
review_result['full_page'] = False
|
||
review_result['page_description'] = ';'.join(page_description)
|
||
else:
|
||
review_result['full_page'] = True
|
||
|
||
review_result['integrity'] = (review_result['has_settlement'] and review_result['has_discharge']
|
||
and review_result['has_cost'] and review_result['full_page'])
|
||
|
||
# 三项资料一致性判断
|
||
# 姓名一致性
|
||
name_list = [settlement_data['name'], discharge_data['name'], cost_data['name']]
|
||
if sum(not bool(n) for n in name_list) > 1: # 有2个及以上空值直接认为都不一致
|
||
review_result['name_match'] = '0'
|
||
else:
|
||
unique_name = set(name_list)
|
||
if len(unique_name) == 1:
|
||
review_result['name_match'] = '1' if name == unique_name.pop() else '5'
|
||
elif len(unique_name) == 2:
|
||
if settlement_data['name'] != discharge_data['name'] and settlement_data['name'] != cost_data['name']:
|
||
review_result['name_match'] = '2'
|
||
elif discharge_data['name'] != settlement_data['name'] and discharge_data['name'] != cost_data['name']:
|
||
review_result['name_match'] = '3'
|
||
else:
|
||
review_result['name_match'] = '4'
|
||
else:
|
||
review_result['name_match'] = '0'
|
||
|
||
# 住院日期一致性
|
||
if (settlement_data['admission_date'] and discharge_data['admission_date']
|
||
and settlement_data['discharge_date'] and discharge_data['discharge_date']
|
||
and settlement_data['admission_date'] == discharge_data['admission_date']
|
||
and settlement_data['discharge_date'] == discharge_data['discharge_date']):
|
||
review_result['admission_date_match'] = '1'
|
||
else:
|
||
review_result['admission_date_match'] = '0'
|
||
|
||
# 出院日期一致性
|
||
discharge_date_list = [settlement_data['discharge_date'], discharge_data['discharge_date'],
|
||
cost_data['discharge_date']]
|
||
if sum(not bool(d) for d in discharge_date_list) > 1:
|
||
review_result['discharge_date_match'] = '0'
|
||
else:
|
||
unique_discharge_date = set(discharge_date_list)
|
||
if len(unique_discharge_date) == 1:
|
||
review_result['discharge_date_match'] = '1'
|
||
elif len(unique_discharge_date) == 2:
|
||
if (settlement_data['discharge_date'] != discharge_data['discharge_date']
|
||
and settlement_data['discharge_date'] != cost_data['discharge_date']):
|
||
review_result['discharge_date_match'] = '2'
|
||
elif (discharge_data['discharge_date'] != settlement_data['discharge_date']
|
||
and discharge_data['discharge_date'] != cost_data['discharge_date']):
|
||
review_result['discharge_date_match'] = '3'
|
||
else:
|
||
review_result['discharge_date_match'] = '4'
|
||
else:
|
||
review_result['discharge_date_match'] = '0'
|
||
|
||
review_result['consistency'] = (
|
||
review_result['name_match'] == '1' and review_result['admission_date_match'] == '1'
|
||
and review_result['discharge_date_match'] == '1')
|
||
|
||
review_result['success'] = review_result['integrity'] and review_result['consistency']
|
||
save_or_update_ie(ZxIeReview, pk_phhd, review_result)
|
||
|
||
|
||
def main():
|
||
"""
|
||
照片审核批量控制
|
||
"""
|
||
while 1:
|
||
session = MysqlSession()
|
||
phhds = (session.query(ZxPhhd.pk_phhd, ZxPhhd.cXm)
|
||
.join(ZxPhrec, ZxPhhd.pk_phhd == ZxPhrec.pk_phhd, isouter=True)
|
||
.filter(ZxPhhd.exsuccess_flag == "1")
|
||
.filter(ZxPhrec.pk_phrec.isnot(None))
|
||
.order_by(ZxPhhd.priority_num.desc())
|
||
.distinct().limit(PHHD_BATCH_SIZE).all())
|
||
# 将状态改为正在识别中
|
||
pk_phhd_values = [phhd.pk_phhd for phhd in phhds]
|
||
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd.in_(pk_phhd_values)).values(exsuccess_flag="2"))
|
||
session.execute(update_flag)
|
||
session.commit()
|
||
session.close()
|
||
if phhds:
|
||
for phhd in phhds:
|
||
pk_phhd = phhd.pk_phhd
|
||
logging.info(f"开始识别:{pk_phhd}")
|
||
start_time = time.time()
|
||
photo_review(pk_phhd, phhd.cXm)
|
||
|
||
# 识别完成更新标识
|
||
session = MysqlSession()
|
||
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
|
||
exsuccess_flag="8",
|
||
ref_id1=HOSTNAME,
|
||
checktime=common_util.get_default_datetime(),
|
||
fFSYLFY=time.time() - start_time))
|
||
session.execute(update_flag)
|
||
session.commit()
|
||
session.close()
|
||
else:
|
||
# 没有查询到新案子,等待一段时间后再查
|
||
logging.info(f"暂未查询到需要识别的案子,等待{SLEEP_MINUTES}分钟...")
|
||
sleep(SLEEP_MINUTES * 60)
|