添加信息抽取存表,根据抽取结果进行缺项判断
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
@@ -12,7 +13,7 @@ from rapidfuzz import process, fuzz
|
|||||||
from sqlalchemy import update
|
from sqlalchemy import update
|
||||||
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview
|
from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
|
||||||
from log import HOSTNAME
|
from log import HOSTNAME
|
||||||
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
||||||
DEPARTMENT_FILTER
|
DEPARTMENT_FILTER
|
||||||
@@ -98,11 +99,13 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150):
|
|||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(phrec, identity):
|
def information_extraction(phrec, pk_phhd, identity):
|
||||||
"""
|
"""
|
||||||
处理单张图片
|
处理单张图片
|
||||||
:param phrec:
|
:param phrec:图片信息
|
||||||
:return:
|
:param pk_phhd:案子主键
|
||||||
|
:param identity:处理批次标识
|
||||||
|
:return:记录类型,信息抽取结果
|
||||||
"""
|
"""
|
||||||
img_path = image_util.get_img_path(phrec.cfjaddress)
|
img_path = image_util.get_img_path(phrec.cfjaddress)
|
||||||
if not img_path:
|
if not img_path:
|
||||||
@@ -114,13 +117,13 @@ def information_extraction(phrec, identity):
|
|||||||
if phrec.cRectype != '1':
|
if phrec.cRectype != '1':
|
||||||
better_img_path = None # 非结算单暂时不进行替换
|
better_img_path = None # 非结算单暂时不进行替换
|
||||||
if better_img_path is not None:
|
if better_img_path is not None:
|
||||||
|
rec_type = '基本医保结算单'
|
||||||
if text:
|
if text:
|
||||||
info_extract = model_util.ie_settlement_text(text)
|
info_extract = model_util.ie_settlement_text(text)
|
||||||
else:
|
else:
|
||||||
info_extract = model_util.ie_settlement(
|
info_extract = model_util.ie_settlement(
|
||||||
better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
|
better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
|
||||||
)
|
)
|
||||||
return '基本医保结算单', info_extract
|
|
||||||
else:
|
else:
|
||||||
target_image = model_util.det_book(img_path) # 识别文档区域并裁剪
|
target_image = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||||||
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
||||||
@@ -143,7 +146,19 @@ def information_extraction(phrec, identity):
|
|||||||
else:
|
else:
|
||||||
info_extract = None
|
info_extract = None
|
||||||
|
|
||||||
return rec_type, info_extract
|
if info_extract:
|
||||||
|
result_json = json.dumps(info_extract, ensure_ascii=False)
|
||||||
|
if len(result_json) > 5000:
|
||||||
|
result_json = result_json[:5000]
|
||||||
|
|
||||||
|
now = common_util.get_default_datetime()
|
||||||
|
session = MysqlSession()
|
||||||
|
session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
|
||||||
|
cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
|
||||||
|
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
||||||
|
session.commit()
|
||||||
|
session.close()
|
||||||
|
return rec_type, info_extract
|
||||||
|
|
||||||
|
|
||||||
# 从keys中获取准确率最高的value
|
# 从keys中获取准确率最高的value
|
||||||
@@ -402,7 +417,7 @@ def photo_review(pk_phhd, name):
|
|||||||
# 同一批图的标识
|
# 同一批图的标识
|
||||||
identity = int(time.time())
|
identity = int(time.time())
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
rec_type, ie_result = information_extraction(phrec, identity)
|
rec_type, ie_result = information_extraction(phrec, pk_phhd, identity)
|
||||||
if rec_type == '基本医保结算单':
|
if rec_type == '基本医保结算单':
|
||||||
rec_result = settlement_result
|
rec_result = settlement_result
|
||||||
elif rec_type == '出院记录':
|
elif rec_type == '出院记录':
|
||||||
@@ -424,12 +439,13 @@ def photo_review(pk_phhd, name):
|
|||||||
}
|
}
|
||||||
# 三项资料完整性判断
|
# 三项资料完整性判断
|
||||||
# 三项资料缺项判断
|
# 三项资料缺项判断
|
||||||
if (settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
|
if (bool(settlement_data) and settlement_data['personal_account_payment']
|
||||||
|
and settlement_data['personal_cash_payment'] and settlement_data['medical_expenses']
|
||||||
|
and settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
|
||||||
< settlement_data['medical_expenses']):
|
< settlement_data['medical_expenses']):
|
||||||
review_result['has_settlement'] = True
|
review_result['has_settlement'] = True
|
||||||
# TODO:出院记录和费用清单暂时没想好怎么判断
|
review_result['has_discharge'] = bool(discharge_result)
|
||||||
review_result['has_discharge'] = True
|
review_result['has_cost'] = bool(cost_result)
|
||||||
review_result['has_cost'] = True
|
|
||||||
# 三项资料缺页判断
|
# 三项资料缺页判断
|
||||||
# TODO:缺页需要对页码进行抽取,暂未训练相关模型
|
# TODO:缺页需要对页码进行抽取,暂未训练相关模型
|
||||||
review_result['full_page'] = True
|
review_result['full_page'] = True
|
||||||
|
|||||||
Reference in New Issue
Block a user