43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
from paddlenlp import Taskflow
|
|
from paddleocr import PaddleOCR
|
|
|
|
from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA
|
|
|
|
# 项目奔溃的重试次数
|
|
RETRY_TIME = 1
|
|
|
|
# 每次从数据库获取的案子数量
|
|
PHHD_BATCH_SIZE = 10
|
|
|
|
# 没有查询到案子的等待时间(分钟)
|
|
SLEEP_MINUTES = 5
|
|
|
|
# 是否发送报错邮件
|
|
SEND_ERROR_EMAIL = True
|
|
|
|
# 信息抽取批量处理大小
|
|
IE_BATCH_SIZE = 4
|
|
|
|
# 是否开启布局分析
|
|
LAYOUT_ANALYSIS = False
|
|
|
|
# 可使用的GPU
|
|
CUDA_VISIBLE_DEVICES = "1"
|
|
|
|
# 基本医保结算单
|
|
SETTLEMENT_IE = Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base",
|
|
task_path="config/model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS,
|
|
batch_size=IE_BATCH_SIZE)
|
|
|
|
# 出院记录
|
|
DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base",
|
|
task_path="config/model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS,
|
|
batch_size=IE_BATCH_SIZE)
|
|
|
|
# 费用清单
|
|
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base",
|
|
task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE)
|
|
|
|
# OCR
|
|
OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
|