优化批量处理能力
This commit is contained in:
@@ -8,7 +8,7 @@ from sqlalchemy import update
|
||||
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
||||
from config.mysql import MysqlSession
|
||||
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES
|
||||
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, IE_BATCH_SIZE
|
||||
from photo_review.entity.bd_yljg import BdYljg
|
||||
from photo_review.entity.bd_ylks import BdYlks
|
||||
from photo_review.entity.zx_ie_cost import ZxIeCost
|
||||
@@ -24,29 +24,36 @@ from photo_review.util.util import get_default_datetime
|
||||
|
||||
# 关键信息提取
|
||||
def information_extraction(schema, phrecs, task_path):
|
||||
results = {}
|
||||
result = {}
|
||||
docs = []
|
||||
doc_phrecs = []
|
||||
for phrec in phrecs:
|
||||
pic_path = get_private_url(phrec.cfjaddress)
|
||||
if pic_path:
|
||||
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
||||
layout_analysis=True)
|
||||
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
|
||||
result = ie({"doc": pic_path})
|
||||
docs.append({"doc": pic_path})
|
||||
doc_phrecs.append(phrec)
|
||||
|
||||
result_json = json.dumps(result, ensure_ascii=False)
|
||||
if len(result_json) > 5000:
|
||||
result_json = result_json[:5000]
|
||||
# 提取完保存每张图片的结果
|
||||
now = get_default_datetime()
|
||||
session = MysqlSession()
|
||||
zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, cfjaddress=phrec.cfjaddress,
|
||||
content=result_json, create_time=now, update_time=now)
|
||||
session.add(zx_ocr)
|
||||
session.commit()
|
||||
session.close()
|
||||
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
||||
layout_analysis=True, batch_size=IE_BATCH_SIZE)
|
||||
ie_results = ie(docs)
|
||||
|
||||
results.update(result[0])
|
||||
return results
|
||||
now = get_default_datetime()
|
||||
for i in range(len(ie_results)):
|
||||
ie_result = ie_results[i]
|
||||
phrec = doc_phrecs[i]
|
||||
result_json = json.dumps(ie_result, ensure_ascii=False)
|
||||
if len(result_json) > 5000:
|
||||
result_json = result_json[:5000]
|
||||
session = MysqlSession()
|
||||
zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, cfjaddress=phrec.cfjaddress,
|
||||
content=result_json, create_time=now, update_time=now)
|
||||
session.add(zx_ocr)
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
result.update(ie_result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# 从keys中获取准确率最高的value
|
||||
|
||||
Reference in New Issue
Block a user