优化与调整
This commit is contained in:
@@ -16,7 +16,7 @@ PASSWORD = 'test9Root'
|
|||||||
DB_URL = f'mysql+pymysql://{USERNAME}:{PASSWORD}@{HOSTNAME}:{PORT}/{DATABASE}'
|
DB_URL = f'mysql+pymysql://{USERNAME}:{PASSWORD}@{HOSTNAME}:{PORT}/{DATABASE}'
|
||||||
|
|
||||||
# 是否打印执行的sql
|
# 是否打印执行的sql
|
||||||
SHOW_SQL = False
|
SHOW_SQL = True
|
||||||
|
|
||||||
Engine = create_engine(DB_URL, echo=SHOW_SQL)
|
Engine = create_engine(DB_URL, echo=SHOW_SQL)
|
||||||
Base = declarative_base(Engine)
|
Base = declarative_base(Engine)
|
||||||
|
|||||||
@@ -6,3 +6,6 @@ PHHD_BATCH_SIZE = 10
|
|||||||
|
|
||||||
# 没有查询到案子的等待时间(分钟)
|
# 没有查询到案子的等待时间(分钟)
|
||||||
SLEEP_MINUTES = 5
|
SLEEP_MINUTES = 5
|
||||||
|
|
||||||
|
# 是否发送报错邮件
|
||||||
|
SEND_ERROR_EMAIL = True
|
||||||
|
|||||||
6
main.py
6
main.py
@@ -3,7 +3,7 @@ import traceback
|
|||||||
|
|
||||||
from auto_email.error_email import send_an_error_email
|
from auto_email.error_email import send_an_error_email
|
||||||
from config.log import LOGGING_CONFIG
|
from config.log import LOGGING_CONFIG
|
||||||
from config.photo_review import RETRY_TIME
|
from config.photo_review import RETRY_TIME, SEND_ERROR_EMAIL
|
||||||
from photo_review.photo_review import main
|
from photo_review.photo_review import main
|
||||||
|
|
||||||
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
||||||
@@ -19,5 +19,7 @@ if __name__ == '__main__':
|
|||||||
main()
|
main()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(traceback.format_exc())
|
log.error(traceback.format_exc())
|
||||||
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e), error_detail=traceback.format_exc())
|
if SEND_ERROR_EMAIL:
|
||||||
|
send_an_error_email(program_name='照片审核关键信息抽取脚本', error_name=repr(e),
|
||||||
|
error_detail=traceback.format_exc())
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import logging
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
|
from sqlalchemy import update
|
||||||
|
|
||||||
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
||||||
@@ -16,7 +17,7 @@ from photo_review.entity.zx_ie_settlement import ZxIeSettlement
|
|||||||
from photo_review.entity.zx_ocr import ZxOcr
|
from photo_review.entity.zx_ocr import ZxOcr
|
||||||
from photo_review.entity.zx_phhd import ZxPhhd
|
from photo_review.entity.zx_phhd import ZxPhhd
|
||||||
from photo_review.entity.zx_phrec import ZxPhrec
|
from photo_review.entity.zx_phrec import ZxPhrec
|
||||||
from photo_review.util.data_util import handle_date, handle_decimal
|
from photo_review.util.data_util import handle_date, handle_decimal, handle_department
|
||||||
from photo_review.util.ucloud import get_private_url
|
from photo_review.util.ucloud import get_private_url
|
||||||
|
|
||||||
|
|
||||||
@@ -27,6 +28,7 @@ def information_extraction(schema, phrecs, task_path):
|
|||||||
pic_path = get_private_url(phrec.cfjaddress)
|
pic_path = get_private_url(phrec.cfjaddress)
|
||||||
if pic_path:
|
if pic_path:
|
||||||
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
|
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
|
||||||
|
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
|
||||||
result = ie({"doc": pic_path})
|
result = ie({"doc": pic_path})
|
||||||
|
|
||||||
result_json = json.dumps(result, ensure_ascii=False)
|
result_json = json.dumps(result, ensure_ascii=False)
|
||||||
@@ -173,9 +175,14 @@ def photo_review(pk_phhd):
|
|||||||
discharge_data["hospital"] = yljg.name
|
discharge_data["hospital"] = yljg.name
|
||||||
department_value = get_values_of_keys(discharge_record_ie_result, department_key)
|
department_value = get_values_of_keys(discharge_record_ie_result, department_key)
|
||||||
if department_value:
|
if department_value:
|
||||||
|
department_values = []
|
||||||
|
for dept in department_value:
|
||||||
|
department_values += handle_department(dept)
|
||||||
|
department_values = list(set(department_values))
|
||||||
|
if department_values:
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
|
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
|
||||||
.filter(BdYlks.name.in_(department_value)).limit(1).one_or_none()
|
.filter(BdYlks.name.in_(department_values)).limit(1).one_or_none()
|
||||||
session.close()
|
session.close()
|
||||||
if ylks:
|
if ylks:
|
||||||
discharge_data["pk_ylks"] = ylks.pk_ylks
|
discharge_data["pk_ylks"] = ylks.pk_ylks
|
||||||
@@ -192,19 +199,16 @@ def photo_review(pk_phhd):
|
|||||||
}
|
}
|
||||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||||
cost_data["medical_expenses"] = handle_date(cost_data["medical_expenses_str"])
|
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
||||||
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# 最后处理的报销案子pk
|
|
||||||
last_pk_phhd = 0
|
|
||||||
# 持续检测新案子
|
# 持续检测新案子
|
||||||
while 1:
|
while 1:
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
phhds = session.query(ZxPhhd.pk_phhd) \
|
phhds = session.query(ZxPhhd.pk_phhd) \
|
||||||
.filter(ZxPhhd.pk_phhd > last_pk_phhd) \
|
.filter(ZxPhhd.exsuccess_flag == '1') \
|
||||||
.filter(ZxPhhd.cStatus == '2') \
|
|
||||||
.limit(PHHD_BATCH_SIZE) \
|
.limit(PHHD_BATCH_SIZE) \
|
||||||
.all()
|
.all()
|
||||||
session.close()
|
session.close()
|
||||||
@@ -212,7 +216,13 @@ def main():
|
|||||||
for phhd in phhds:
|
for phhd in phhds:
|
||||||
pk_phhd = phhd.pk_phhd
|
pk_phhd = phhd.pk_phhd
|
||||||
photo_review(pk_phhd)
|
photo_review(pk_phhd)
|
||||||
last_pk_phhd = pk_phhd
|
|
||||||
|
# 识别完成更新标识
|
||||||
|
session = MysqlSession()
|
||||||
|
stmt = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(exsuccess_flag=8))
|
||||||
|
session.execute(stmt)
|
||||||
|
session.commit()
|
||||||
|
session.close()
|
||||||
else:
|
else:
|
||||||
# 没有查询到新案子,等待一段时间后再查
|
# 没有查询到新案子,等待一段时间后再查
|
||||||
sleep_minutes = SLEEP_MINUTES
|
sleep_minutes = SLEEP_MINUTES
|
||||||
|
|||||||
@@ -6,7 +6,12 @@ from datetime import datetime
|
|||||||
def handle_decimal(string):
|
def handle_decimal(string):
|
||||||
if not string:
|
if not string:
|
||||||
return ""
|
return ""
|
||||||
return re.sub(r'[^0-9.]', '', string)
|
string = re.sub(r'[^0-9.]', '', string)
|
||||||
|
front, back = string.rsplit('.', 1)
|
||||||
|
front = front.replace(".", "")
|
||||||
|
if back:
|
||||||
|
back = "." + back
|
||||||
|
return front + back
|
||||||
|
|
||||||
|
|
||||||
# 处理日期类数据
|
# 处理日期类数据
|
||||||
@@ -14,8 +19,13 @@ def handle_date(string):
|
|||||||
if not string:
|
if not string:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
string = string.replace("年", "-").replace("月", "-").replace("日", "")
|
string = string.replace("年", "-").replace("月", "-").replace("日", "").replace("/", "-").replace(".", "-")
|
||||||
string = re.sub(r'[^0-9-]', '', string)
|
string = re.sub(r'[^0-9-]', '', string)
|
||||||
|
length = len(string)
|
||||||
|
if length > 8 and "-" not in string:
|
||||||
|
string = string[:8]
|
||||||
|
elif length > 10 and "-" in string:
|
||||||
|
string = string[:10]
|
||||||
if is_valid_date_format(string):
|
if is_valid_date_format(string):
|
||||||
return string
|
return string
|
||||||
else:
|
else:
|
||||||
@@ -48,3 +58,20 @@ def is_valid_date_format(date_str):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def handle_department(string):
|
||||||
|
result = []
|
||||||
|
if not string:
|
||||||
|
return result
|
||||||
|
result.append(string)
|
||||||
|
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
|
||||||
|
if string_without_num != string:
|
||||||
|
result.append(string_without_num)
|
||||||
|
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|([^()]*)', "", string_without_num)
|
||||||
|
if string_without_brackets != string_without_num:
|
||||||
|
result.append(string_without_brackets)
|
||||||
|
pure_string = string_without_brackets.split("科")[0] + "科"
|
||||||
|
if pure_string != string_without_brackets:
|
||||||
|
result.append(pure_string)
|
||||||
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user