优化:将结果保存到zx_ocr;优化值的抽取
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
@@ -8,6 +9,7 @@ from photo_review.entity.bd_ylks import BdYlks
|
|||||||
from photo_review.entity.zx_ie_cost import ZxIeCost
|
from photo_review.entity.zx_ie_cost import ZxIeCost
|
||||||
from photo_review.entity.zx_ie_discharge import ZxIeDischarge
|
from photo_review.entity.zx_ie_discharge import ZxIeDischarge
|
||||||
from photo_review.entity.zx_ie_settlement import ZxIeSettlement
|
from photo_review.entity.zx_ie_settlement import ZxIeSettlement
|
||||||
|
from photo_review.entity.zx_ocr import ZxOcr
|
||||||
from photo_review.entity.zx_phhd import ZxPhhd
|
from photo_review.entity.zx_phhd import ZxPhhd
|
||||||
from photo_review.entity.zx_phrec import ZxPhrec
|
from photo_review.entity.zx_phrec import ZxPhrec
|
||||||
from photo_review.util.data_util import handle_date, handle_decimal
|
from photo_review.util.data_util import handle_date, handle_decimal
|
||||||
@@ -15,26 +17,41 @@ from photo_review.util.ucloud import get_private_url
|
|||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(schema, pictures, task_path):
|
def information_extraction(schema, phrecs, task_path):
|
||||||
results = {}
|
results = {}
|
||||||
for picture in pictures:
|
for phrec in phrecs:
|
||||||
pic_path = get_private_url(picture)
|
pic_path = get_private_url(phrec.cfjaddress)
|
||||||
if pic_path:
|
if pic_path:
|
||||||
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
|
ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path)
|
||||||
result = ie({"doc": pic_path})
|
result = ie({"doc": pic_path})
|
||||||
|
|
||||||
|
# 提取完保存每张图片的结果
|
||||||
|
session = MysqlSession()
|
||||||
|
zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, cfjaddress=phrec.cfjaddress,
|
||||||
|
content=json.dumps(result, ensure_ascii=False))
|
||||||
|
session.add(zx_ocr)
|
||||||
|
session.commit()
|
||||||
|
session.close()
|
||||||
|
|
||||||
results.update(result[0])
|
results.update(result[0])
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# 从keys中获取第一个不为空的value
|
# 从keys中获取准确率最高的value
|
||||||
def get_value_in_keys(source, keys):
|
def get_best_value_in_keys(source, keys):
|
||||||
|
# 最终结果
|
||||||
|
result = None
|
||||||
|
# 最大可能性
|
||||||
|
most_probability = 0
|
||||||
for key in keys:
|
for key in keys:
|
||||||
value = source.get(key)
|
values = source.get(key)
|
||||||
if value:
|
if values:
|
||||||
value = value[0].get("text")
|
for value in values:
|
||||||
if value:
|
text = value.get("text")
|
||||||
return value
|
probability = value.get("probability")
|
||||||
return None
|
if text and probability > most_probability:
|
||||||
|
result = text
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
# 从keys中获取所有value组成list
|
# 从keys中获取所有value组成list
|
||||||
@@ -69,17 +86,17 @@ def photo_review(pk_phhd):
|
|||||||
cost_list = []
|
cost_list = []
|
||||||
|
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \
|
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \
|
||||||
.filter(ZxPhrec.pk_phhd == pk_phhd) \
|
.filter(ZxPhrec.pk_phhd == pk_phhd) \
|
||||||
.all()
|
.all()
|
||||||
session.close()
|
session.close()
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
if phrec.cRectype == "1":
|
if phrec.cRectype == "1":
|
||||||
settlement_list.append(phrec.cfjaddress)
|
settlement_list.append(phrec)
|
||||||
elif phrec.cRectype == "3":
|
elif phrec.cRectype == "3":
|
||||||
discharge_record.append(phrec.cfjaddress)
|
discharge_record.append(phrec)
|
||||||
elif phrec.cRectype == "4":
|
elif phrec.cRectype == "4":
|
||||||
cost_list.append(phrec.cfjaddress)
|
cost_list.append(phrec)
|
||||||
|
|
||||||
name_key = ["姓名", "交款人"]
|
name_key = ["姓名", "交款人"]
|
||||||
admission_date_key = ["入院日期", "住院时间", "开始日期", "费用发生时间", "入院时间", "住院日期"]
|
admission_date_key = ["入院日期", "住院时间", "开始日期", "费用发生时间", "入院时间", "住院日期"]
|
||||||
@@ -110,14 +127,14 @@ def photo_review(pk_phhd):
|
|||||||
"config/model/settlement_list_model")
|
"config/model/settlement_list_model")
|
||||||
settlement_data = {
|
settlement_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"name": get_value_in_keys(settlement_list_ie_result, name_key),
|
"name": get_best_value_in_keys(settlement_list_ie_result, name_key),
|
||||||
"admission_date_str": get_value_in_keys(settlement_list_ie_result, admission_date_key),
|
"admission_date_str": get_best_value_in_keys(settlement_list_ie_result, admission_date_key),
|
||||||
"discharge_date_str": get_value_in_keys(settlement_list_ie_result, discharge_date_key),
|
"discharge_date_str": get_best_value_in_keys(settlement_list_ie_result, discharge_date_key),
|
||||||
"medical_expenses_str": get_value_in_keys(settlement_list_ie_result, medical_expenses_key),
|
"medical_expenses_str": get_best_value_in_keys(settlement_list_ie_result, medical_expenses_key),
|
||||||
"personal_cash_payment_str": get_value_in_keys(settlement_list_ie_result, personal_cash_payment_key),
|
"personal_cash_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_cash_payment_key),
|
||||||
"personal_account_payment_str": get_value_in_keys(settlement_list_ie_result, personal_account_payment_key),
|
"personal_account_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_account_payment_key),
|
||||||
"personal_funded_amount_str": get_value_in_keys(settlement_list_ie_result, personal_funded_amount_key),
|
"personal_funded_amount_str": get_best_value_in_keys(settlement_list_ie_result, personal_funded_amount_key),
|
||||||
"medical_insurance_type": get_value_in_keys(settlement_list_ie_result, medical_insurance_type_key)
|
"medical_insurance_type": get_best_value_in_keys(settlement_list_ie_result, medical_insurance_type_key)
|
||||||
}
|
}
|
||||||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||||||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||||||
@@ -132,12 +149,12 @@ def photo_review(pk_phhd):
|
|||||||
"config/model/discharge_record_model")
|
"config/model/discharge_record_model")
|
||||||
discharge_data = {
|
discharge_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"hospital": get_value_in_keys(discharge_record_ie_result, hospital_key),
|
"hospital": get_best_value_in_keys(discharge_record_ie_result, hospital_key),
|
||||||
"department": get_value_in_keys(discharge_record_ie_result, department_key),
|
"department": get_best_value_in_keys(discharge_record_ie_result, department_key),
|
||||||
"name": get_value_in_keys(discharge_record_ie_result, name_key),
|
"name": get_best_value_in_keys(discharge_record_ie_result, name_key),
|
||||||
"admission_date_str": get_value_in_keys(discharge_record_ie_result, admission_date_key),
|
"admission_date_str": get_best_value_in_keys(discharge_record_ie_result, admission_date_key),
|
||||||
"discharge_date_str": get_value_in_keys(discharge_record_ie_result, discharge_date_key),
|
"discharge_date_str": get_best_value_in_keys(discharge_record_ie_result, discharge_date_key),
|
||||||
"doctor": get_value_in_keys(discharge_record_ie_result, doctor_key)
|
"doctor": get_best_value_in_keys(discharge_record_ie_result, doctor_key)
|
||||||
}
|
}
|
||||||
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
||||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||||
@@ -164,10 +181,10 @@ def photo_review(pk_phhd):
|
|||||||
cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model")
|
cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model")
|
||||||
cost_data = {
|
cost_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"name": get_value_in_keys(cost_list_ie_result, name_key),
|
"name": get_best_value_in_keys(cost_list_ie_result, name_key),
|
||||||
"admission_date_str": get_value_in_keys(cost_list_ie_result, admission_date_key),
|
"admission_date_str": get_best_value_in_keys(cost_list_ie_result, admission_date_key),
|
||||||
"discharge_date_str": get_value_in_keys(cost_list_ie_result, discharge_date_key),
|
"discharge_date_str": get_best_value_in_keys(cost_list_ie_result, discharge_date_key),
|
||||||
"medical_expenses_str": get_value_in_keys(cost_list_ie_result, medical_expenses_key)
|
"medical_expenses_str": get_best_value_in_keys(cost_list_ie_result, medical_expenses_key)
|
||||||
}
|
}
|
||||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||||
|
|||||||
Reference in New Issue
Block a user