启用ocr中的cls功能
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA
|
from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA
|
||||||
|
|
||||||
@@ -36,3 +37,6 @@ DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA
|
|||||||
# 费用清单
|
# 费用清单
|
||||||
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base",
|
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base",
|
||||||
task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE)
|
task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE)
|
||||||
|
|
||||||
|
# OCR
|
||||||
|
OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from sqlalchemy import update
|
|||||||
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
|
||||||
from config.mysql import MysqlSession
|
from config.mysql import MysqlSession
|
||||||
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE
|
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR
|
||||||
from photo_review.entity.bd_yljg import BdYljg
|
from photo_review.entity.bd_yljg import BdYljg
|
||||||
from photo_review.entity.bd_ylks import BdYlks
|
from photo_review.entity.bd_ylks import BdYlks
|
||||||
from photo_review.entity.zx_ie_cost import ZxIeCost
|
from photo_review.entity.zx_ie_cost import ZxIeCost
|
||||||
@@ -95,6 +95,36 @@ def merge_result(result1, result2):
|
|||||||
return result1
|
return result1
|
||||||
|
|
||||||
|
|
||||||
|
# 获取图片OCR,并将其box转为两点矩形框
|
||||||
|
def get_ocr_layout(img_path):
|
||||||
|
def _get_box(box):
|
||||||
|
box = [
|
||||||
|
min(box[0][0], box[3][0]), # x1
|
||||||
|
min(box[0][1], box[1][1]), # y1
|
||||||
|
max(box[1][0], box[2][0]), # x2
|
||||||
|
max(box[2][1], box[3][1]), # y2
|
||||||
|
]
|
||||||
|
return box
|
||||||
|
|
||||||
|
def _normal_box(box):
|
||||||
|
# Ensure the height and width of bbox are greater than zero
|
||||||
|
if box[3] - box[1] < 0 or box[2] - box[0] < 0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
layout = []
|
||||||
|
ocr_result = OCR.ocr(img_path)
|
||||||
|
ocr_result = ocr_result[0]
|
||||||
|
for segment in ocr_result:
|
||||||
|
box = segment[0]
|
||||||
|
box = _get_box(box)
|
||||||
|
if not _normal_box(box):
|
||||||
|
continue
|
||||||
|
text = segment[1][0]
|
||||||
|
layout.append((box, text))
|
||||||
|
return layout
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(ie, phrecs):
|
def information_extraction(ie, phrecs):
|
||||||
result = {}
|
result = {}
|
||||||
@@ -107,7 +137,9 @@ def information_extraction(ie, phrecs):
|
|||||||
for img in split_result:
|
for img in split_result:
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
cv2.imwrite(temp_file.name, img["img"])
|
cv2.imwrite(temp_file.name, img["img"])
|
||||||
docs.append({"doc": temp_file.name})
|
# 为使用ocr中的cls,单独调用ocr
|
||||||
|
layout = get_ocr_layout(temp_file.name)
|
||||||
|
docs.append({"doc": temp_file.name, "layout": layout})
|
||||||
doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
|
doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
|
||||||
if not docs:
|
if not docs:
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import cv2
|
|||||||
|
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from photo_review.photo_review import split_image
|
from photo_review.photo_review import split_image, get_ocr_layout
|
||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
from paddlenlp.utils.doc_parser import DocParser
|
from paddlenlp.utils.doc_parser import DocParser
|
||||||
from ucloud import ucloud
|
from ucloud import ucloud
|
||||||
@@ -70,7 +70,7 @@ def visual_model_test(model_type, test_img, task_path, schema):
|
|||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
cv2.imwrite(temp_file.name, img["img"])
|
cv2.imwrite(temp_file.name, img["img"])
|
||||||
temp_files_paths.append(temp_file.name)
|
temp_files_paths.append(temp_file.name)
|
||||||
docs.append({"doc": temp_file.name})
|
docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)})
|
||||||
|
|
||||||
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
||||||
layout_analysis=False)
|
layout_analysis=False)
|
||||||
@@ -133,7 +133,7 @@ def main(model_type, pic_name=None):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# main("ocr")
|
# main("ocr")
|
||||||
main("settlement", "PH20240529000194_1_075936_1.PNG")
|
main("settlement")
|
||||||
# main("discharge")
|
# main("discharge")
|
||||||
# main("cost")
|
# main("cost")
|
||||||
# main("cost_detail")
|
# main("cost_detail")
|
||||||
|
|||||||
Reference in New Issue
Block a user