优化与调整

This commit is contained in:
2024-05-29 11:31:22 +08:00
parent 1eeb2b1544
commit a94bb613a9
5 changed files with 61 additions and 19 deletions

View File

@@ -16,7 +16,7 @@ PASSWORD = 'test9Root'
DB_URL = f'mysql+pymysql://{USERNAME}:{PASSWORD}@{HOSTNAME}:{PORT}/{DATABASE}' DB_URL = f'mysql+pymysql://{USERNAME}:{PASSWORD}@{HOSTNAME}:{PORT}/{DATABASE}'
# 是否打印执行的sql # 是否打印执行的sql
SHOW_SQL = False SHOW_SQL = True
Engine = create_engine(DB_URL, echo=SHOW_SQL) Engine = create_engine(DB_URL, echo=SHOW_SQL)
Base = declarative_base(Engine) Base = declarative_base(Engine)

View File

@@ -6,3 +6,6 @@ PHHD_BATCH_SIZE = 10
# 没有查询到案子的等待时间(分钟) # 没有查询到案子的等待时间(分钟)
SLEEP_MINUTES = 5 SLEEP_MINUTES = 5
# 是否发送报错邮件
SEND_ERROR_EMAIL = True

View File

@@ -3,7 +3,7 @@ import traceback
from auto_email.error_email import send_an_error_email from auto_email.error_email import send_an_error_email
from config.log import LOGGING_CONFIG from config.log import LOGGING_CONFIG
from config.photo_review import RETRY_TIME from config.photo_review import RETRY_TIME, SEND_ERROR_EMAIL
from photo_review.photo_review import main from photo_review.photo_review import main
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生 # 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
@@ -19,5 +19,7 @@ if __name__ == '__main__':
main() main()
except Exception as e: except Exception as e:
log.error(traceback.format_exc()) log.error(traceback.format_exc())
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e), error_detail=traceback.format_exc()) if SEND_ERROR_EMAIL:
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e),
error_detail=traceback.format_exc())
continue continue

View File

@@ -3,6 +3,7 @@ import logging
from time import sleep from time import sleep
from paddlenlp import Taskflow from paddlenlp import Taskflow
from sqlalchemy import update
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
@@ -16,7 +17,7 @@ from photo_review.entity.zx_ie_settlement import ZxIeSettlement
from photo_review.entity.zx_ocr import ZxOcr from photo_review.entity.zx_ocr import ZxOcr
from photo_review.entity.zx_phhd import ZxPhhd from photo_review.entity.zx_phhd import ZxPhhd
from photo_review.entity.zx_phrec import ZxPhrec from photo_review.entity.zx_phrec import ZxPhrec
from photo_review.util.data_util import handle_date, handle_decimal from photo_review.util.data_util import handle_date, handle_decimal, handle_department
from photo_review.util.ucloud import get_private_url from photo_review.util.ucloud import get_private_url
@@ -27,6 +28,7 @@ def information_extraction(schema, phrecs, task_path):
pic_path = get_private_url(phrec.cfjaddress) pic_path = get_private_url(phrec.cfjaddress)
if pic_path: if pic_path:
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path) ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
result = ie({"doc": pic_path}) result = ie({"doc": pic_path})
result_json = json.dumps(result, ensure_ascii=False) result_json = json.dumps(result, ensure_ascii=False)
@@ -173,13 +175,18 @@ def photo_review(pk_phhd):
discharge_data["hospital"] = yljg.name discharge_data["hospital"] = yljg.name
department_value = get_values_of_keys(discharge_record_ie_result, department_key) department_value = get_values_of_keys(discharge_record_ie_result, department_key)
if department_value: if department_value:
session = MysqlSession() department_values = []
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \ for dept in department_value:
.filter(BdYlks.name.in_(department_value)).limit(1).one_or_none() department_values += handle_department(dept)
session.close() department_values = list(set(department_values))
if ylks: if department_values:
discharge_data["pk_ylks"] = ylks.pk_ylks session = MysqlSession()
discharge_data["department"] = ylks.name ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
.filter(BdYlks.name.in_(department_values)).limit(1).one_or_none()
session.close()
if ylks:
discharge_data["pk_ylks"] = ylks.pk_ylks
discharge_data["department"] = ylks.name
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model") cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model")
@@ -192,19 +199,16 @@ def photo_review(pk_phhd):
} }
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"]) cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"]) cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
cost_data["medical_expenses"] = handle_date(cost_data["medical_expenses_str"]) cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
save_or_update_ie(ZxIeCost, pk_phhd, cost_data) save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
def main(): def main():
# 最后处理的报销案子pk
last_pk_phhd = 0
# 持续检测新案子 # 持续检测新案子
while 1: while 1:
session = MysqlSession() session = MysqlSession()
phhds = session.query(ZxPhhd.pk_phhd) \ phhds = session.query(ZxPhhd.pk_phhd) \
.filter(ZxPhhd.pk_phhd > last_pk_phhd) \ .filter(ZxPhhd.exsuccess_flag == '1') \
.filter(ZxPhhd.cStatus == '2') \
.limit(PHHD_BATCH_SIZE) \ .limit(PHHD_BATCH_SIZE) \
.all() .all()
session.close() session.close()
@@ -212,7 +216,13 @@ def main():
for phhd in phhds: for phhd in phhds:
pk_phhd = phhd.pk_phhd pk_phhd = phhd.pk_phhd
photo_review(pk_phhd) photo_review(pk_phhd)
last_pk_phhd = pk_phhd
# 识别完成更新标识
session = MysqlSession()
stmt = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(exsuccess_flag=8))
session.execute(stmt)
session.commit()
session.close()
else: else:
# 没有查询到新案子,等待一段时间后再查 # 没有查询到新案子,等待一段时间后再查
sleep_minutes = SLEEP_MINUTES sleep_minutes = SLEEP_MINUTES

View File

@@ -6,7 +6,12 @@ from datetime import datetime
def handle_decimal(string): def handle_decimal(string):
if not string: if not string:
return "" return ""
return re.sub(r'[^0-9.]', '', string) string = re.sub(r'[^0-9.]', '', string)
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
if back:
back = "." + back
return front + back
# 处理日期类数据 # 处理日期类数据
@@ -14,8 +19,13 @@ def handle_date(string):
if not string: if not string:
return "" return ""
string = string.replace("", "-").replace("", "-").replace("", "") string = string.replace("", "-").replace("", "-").replace("", "").replace("/", "-").replace(".", "-")
string = re.sub(r'[^0-9-]', '', string) string = re.sub(r'[^0-9-]', '', string)
length = len(string)
if length > 8 and "-" not in string:
string = string[:8]
elif length > 10 and "-" in string:
string = string[:10]
if is_valid_date_format(string): if is_valid_date_format(string):
return string return string
else: else:
@@ -48,3 +58,20 @@ def is_valid_date_format(date_str):
pass pass
return False return False
def handle_department(string):
result = []
if not string:
return result
result.append(string)
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
if string_without_num != string:
result.append(string_without_num)
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|[^]*', "", string_without_num)
if string_without_brackets != string_without_num:
result.append(string_without_brackets)
pure_string = string_without_brackets.split("")[0] + ""
if pure_string != string_without_brackets:
result.append(pure_string)
return result