优化与调整
This commit is contained in:
@@ -16,7 +16,7 @@ PASSWORD = 'test9Root'
|
||||
DB_URL = f'mysql+pymysql://{USERNAME}:{PASSWORD}@{HOSTNAME}:{PORT}/{DATABASE}'
|
||||
|
||||
# 是否打印执行的sql
|
||||
SHOW_SQL = False
|
||||
SHOW_SQL = True
|
||||
|
||||
Engine = create_engine(DB_URL, echo=SHOW_SQL)
|
||||
Base = declarative_base(Engine)
|
||||
|
||||
@@ -6,3 +6,6 @@ PHHD_BATCH_SIZE = 10
|
||||
|
||||
# 没有查询到案子的等待时间(分钟)
|
||||
SLEEP_MINUTES = 5
|
||||
|
||||
# 是否发送报错邮件
|
||||
SEND_ERROR_EMAIL = True
|
||||
|
||||
6
main.py
6
main.py
@@ -3,7 +3,7 @@ import traceback
|
||||
|
||||
from auto_email.error_email import send_an_error_email
|
||||
from config.log import LOGGING_CONFIG
|
||||
from config.photo_review import RETRY_TIME
|
||||
from config.photo_review import RETRY_TIME, SEND_ERROR_EMAIL
|
||||
from photo_review.photo_review import main
|
||||
|
||||
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
||||
@@ -19,5 +19,7 @@ if __name__ == '__main__':
|
||||
main()
|
||||
except Exception as e:
|
||||
log.error(traceback.format_exc())
|
||||
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e), error_detail=traceback.format_exc())
|
||||
if SEND_ERROR_EMAIL:
|
||||
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e),
|
||||
error_detail=traceback.format_exc())
|
||||
continue
|
||||
|
||||
@@ -3,6 +3,7 @@ import logging
|
||||
from time import sleep
|
||||
|
||||
from paddlenlp import Taskflow
|
||||
from sqlalchemy import update
|
||||
|
||||
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
||||
@@ -16,7 +17,7 @@ from photo_review.entity.zx_ie_settlement import ZxIeSettlement
|
||||
from photo_review.entity.zx_ocr import ZxOcr
|
||||
from photo_review.entity.zx_phhd import ZxPhhd
|
||||
from photo_review.entity.zx_phrec import ZxPhrec
|
||||
from photo_review.util.data_util import handle_date, handle_decimal
|
||||
from photo_review.util.data_util import handle_date, handle_decimal, handle_department
|
||||
from photo_review.util.ucloud import get_private_url
|
||||
|
||||
|
||||
@@ -27,6 +28,7 @@ def information_extraction(schema, phrecs, task_path):
|
||||
pic_path = get_private_url(phrec.cfjaddress)
|
||||
if pic_path:
|
||||
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
|
||||
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
|
||||
result = ie({"doc": pic_path})
|
||||
|
||||
result_json = json.dumps(result, ensure_ascii=False)
|
||||
@@ -173,9 +175,14 @@ def photo_review(pk_phhd):
|
||||
discharge_data["hospital"] = yljg.name
|
||||
department_value = get_values_of_keys(discharge_record_ie_result, department_key)
|
||||
if department_value:
|
||||
department_values = []
|
||||
for dept in department_value:
|
||||
department_values += handle_department(dept)
|
||||
department_values = list(set(department_values))
|
||||
if department_values:
|
||||
session = MysqlSession()
|
||||
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
|
||||
.filter(BdYlks.name.in_(department_value)).limit(1).one_or_none()
|
||||
.filter(BdYlks.name.in_(department_values)).limit(1).one_or_none()
|
||||
session.close()
|
||||
if ylks:
|
||||
discharge_data["pk_ylks"] = ylks.pk_ylks
|
||||
@@ -192,19 +199,16 @@ def photo_review(pk_phhd):
|
||||
}
|
||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||
cost_data["medical_expenses"] = handle_date(cost_data["medical_expenses_str"])
|
||||
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
||||
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
||||
|
||||
|
||||
def main():
|
||||
# 最后处理的报销案子pk
|
||||
last_pk_phhd = 0
|
||||
# 持续检测新案子
|
||||
while 1:
|
||||
session = MysqlSession()
|
||||
phhds = session.query(ZxPhhd.pk_phhd) \
|
||||
.filter(ZxPhhd.pk_phhd > last_pk_phhd) \
|
||||
.filter(ZxPhhd.cStatus == '2') \
|
||||
.filter(ZxPhhd.exsuccess_flag == '1') \
|
||||
.limit(PHHD_BATCH_SIZE) \
|
||||
.all()
|
||||
session.close()
|
||||
@@ -212,7 +216,13 @@ def main():
|
||||
for phhd in phhds:
|
||||
pk_phhd = phhd.pk_phhd
|
||||
photo_review(pk_phhd)
|
||||
last_pk_phhd = pk_phhd
|
||||
|
||||
# 识别完成更新标识
|
||||
session = MysqlSession()
|
||||
stmt = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(exsuccess_flag=8))
|
||||
session.execute(stmt)
|
||||
session.commit()
|
||||
session.close()
|
||||
else:
|
||||
# 没有查询到新案子,等待一段时间后再查
|
||||
sleep_minutes = SLEEP_MINUTES
|
||||
|
||||
@@ -6,7 +6,12 @@ from datetime import datetime
|
||||
def handle_decimal(string):
|
||||
if not string:
|
||||
return ""
|
||||
return re.sub(r'[^0-9.]', '', string)
|
||||
string = re.sub(r'[^0-9.]', '', string)
|
||||
front, back = string.rsplit('.', 1)
|
||||
front = front.replace(".", "")
|
||||
if back:
|
||||
back = "." + back
|
||||
return front + back
|
||||
|
||||
|
||||
# 处理日期类数据
|
||||
@@ -14,8 +19,13 @@ def handle_date(string):
|
||||
if not string:
|
||||
return ""
|
||||
|
||||
string = string.replace("年", "-").replace("月", "-").replace("日", "")
|
||||
string = string.replace("年", "-").replace("月", "-").replace("日", "").replace("/", "-").replace(".", "-")
|
||||
string = re.sub(r'[^0-9-]', '', string)
|
||||
length = len(string)
|
||||
if length > 8 and "-" not in string:
|
||||
string = string[:8]
|
||||
elif length > 10 and "-" in string:
|
||||
string = string[:10]
|
||||
if is_valid_date_format(string):
|
||||
return string
|
||||
else:
|
||||
@@ -48,3 +58,20 @@ def is_valid_date_format(date_str):
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def handle_department(string):
|
||||
result = []
|
||||
if not string:
|
||||
return result
|
||||
result.append(string)
|
||||
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
|
||||
if string_without_num != string:
|
||||
result.append(string_without_num)
|
||||
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|([^()]*)', "", string_without_num)
|
||||
if string_without_brackets != string_without_num:
|
||||
result.append(string_without_brackets)
|
||||
pure_string = string_without_brackets.split("科")[0] + "科"
|
||||
if pure_string != string_without_brackets:
|
||||
result.append(pure_string)
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user