From a81719bdc7c58732412244bd2e4d6bd9cf92035f Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Mon, 24 Jun 2024 11:03:43 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=AF=E7=94=A8ocr=E4=B8=AD=E7=9A=84cls?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/photo_review.py | 4 +++ photo_review/photo_review.py | 36 ++++++++++++++++++++++++-- visual_model_test/visual_model_test.py | 6 ++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/config/photo_review.py b/config/photo_review.py index 523012b..1690baa 100644 --- a/config/photo_review.py +++ b/config/photo_review.py @@ -1,4 +1,5 @@ from paddlenlp import Taskflow +from paddleocr import PaddleOCR from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA @@ -36,3 +37,6 @@ DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA # 费用清单 COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE) + +# OCR +OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index caf1a3d..a63a09a 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -18,7 +18,7 @@ from sqlalchemy import update from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR from config.mysql import MysqlSession -from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE +from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR from photo_review.entity.bd_yljg import BdYljg from photo_review.entity.bd_ylks import BdYlks from photo_review.entity.zx_ie_cost import ZxIeCost @@ -95,6 +95,36 @@ def merge_result(result1, result2): return result1 +# 获取图片OCR,并将其box转为两点矩形框 +def get_ocr_layout(img_path): + def _get_box(box): + box = [ + min(box[0][0], box[3][0]), # x1 + min(box[0][1], box[1][1]), # y1 + max(box[1][0], box[2][0]), # x2 + max(box[2][1], box[3][1]), # y2 + ] + return box + + def _normal_box(box): + # Ensure the height and width of bbox are greater than zero + if box[3] - box[1] < 0 or box[2] - box[0] < 0: + return False + return True + + layout = [] + ocr_result = OCR.ocr(img_path) + ocr_result = ocr_result[0] + for segment in ocr_result: + box = segment[0] + box = _get_box(box) + if not _normal_box(box): + continue + text = segment[1][0] + layout.append((box, text)) + return layout + + # 关键信息提取 def information_extraction(ie, phrecs): result = {} @@ -107,7 +137,9 @@ def information_extraction(ie, phrecs): for img in split_result: with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: cv2.imwrite(temp_file.name, img["img"]) - docs.append({"doc": temp_file.name}) + # 为使用ocr中的cls,单独调用ocr + layout = get_ocr_layout(temp_file.name) + docs.append({"doc": temp_file.name, "layout": layout}) doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]}) if not docs: return result diff --git a/visual_model_test/visual_model_test.py b/visual_model_test/visual_model_test.py index 2b4fed1..f9bdb6f 100644 --- a/visual_model_test/visual_model_test.py +++ b/visual_model_test/visual_model_test.py @@ -10,7 +10,7 @@ import cv2 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from photo_review.photo_review import split_image +from photo_review.photo_review import split_image, get_ocr_layout from paddlenlp import Taskflow from paddlenlp.utils.doc_parser import DocParser from ucloud import ucloud @@ -70,7 +70,7 @@ def visual_model_test(model_type, test_img, task_path, schema): with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: cv2.imwrite(temp_file.name, img["img"]) temp_files_paths.append(temp_file.name) - docs.append({"doc": temp_file.name}) + docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)}) my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path, layout_analysis=False) @@ -133,7 +133,7 @@ def main(model_type, pic_name=None): if __name__ == '__main__': # main("ocr") - main("settlement", "PH20240529000194_1_075936_1.PNG") + main("settlement") # main("discharge") # main("cost") # main("cost_detail")