From fc0c54fbd2979c5002ffcb6d0aaa90fbcc4d6327 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Tue, 25 Jun 2024 14:13:43 +0800 Subject: [PATCH] =?UTF-8?q?ocr=E9=85=8D=E7=BD=AE=E7=9A=84cls=E5=8F=AA?= =?UTF-8?q?=E8=83=BD=E5=8C=BA=E5=88=860=E5=BA=A6=E4=B8=8E180=E5=BA=A6?= =?UTF-8?q?=EF=BC=8C=E4=B8=8D=E7=AC=A6=E5=90=88=E9=9C=80=E6=B1=82=EF=BC=8C?= =?UTF-8?q?=E6=9B=B4=E6=8D=A2=E4=B8=BApaddleclas=E4=B8=AD=E7=9A=84?= =?UTF-8?q?=E5=9B=BE=E7=89=87=E6=96=B9=E5=90=91=E8=AF=86=E5=88=AB=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/photo_review.py | 4 -- photo_review/photo_review.py | 71 +++++++++++++------------- visual_model_test/visual_model_test.py | 56 ++++++++++++-------- 3 files changed, 69 insertions(+), 62 deletions(-) diff --git a/config/photo_review.py b/config/photo_review.py index 1690baa..523012b 100644 --- a/config/photo_review.py +++ b/config/photo_review.py @@ -1,5 +1,4 @@ from paddlenlp import Taskflow -from paddleocr import PaddleOCR from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA @@ -37,6 +36,3 @@ DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA # 费用清单 COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE) - -# OCR -OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index 56d7f78..f8e5ba0 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -10,6 +10,7 @@ import urllib.request import cv2 import numpy as np import paddle +import paddleclas sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -18,7 +19,7 @@ from sqlalchemy import update from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR from config.mysql import MysqlSession -from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR +from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE from photo_review.entity.bd_yljg import BdYljg from photo_review.entity.bd_ylks import BdYlks from photo_review.entity.zx_ie_cost import ZxIeCost @@ -95,36 +96,34 @@ def merge_result(result1, result2): return result1 -# 获取图片OCR,并将其box转为两点矩形框 -def get_ocr_layout(img_path): - def _get_box(box): - box = [ - min(box[0][0], box[3][0]), # x1 - min(box[0][1], box[1][1]), # y1 - max(box[1][0], box[2][0]), # x2 - max(box[2][1], box[3][1]), # y2 - ] - return box +# 获取图片旋转角度 +def get_image_rotation_angle(img): + model = paddleclas.PaddleClas(model_name="text_image_orientation") + result = model.predict(input_data=img) + angle = int(next(result)[0]["label_names"][0]) + return angle - def _normal_box(box): - # Ensure the height and width of bbox are greater than zero - if box[3] - box[1] < 0 or box[2] - box[0] < 0: - return False - return True - layout = [] - ocr_result = OCR.ocr(img_path) - ocr_result = ocr_result[0] - if not ocr_result: - return layout - for segment in ocr_result: - box = segment[0] - box = _get_box(box) - if not _normal_box(box): - continue - text = segment[1][0] - layout.append((box, text)) - return layout +# 旋转图片 +def rotate_image(img, angle): + if angle == 0: + return + height, width, _ = img.shape + if angle == 180: + new_width = width + new_height = height + else: + new_width = height + new_height = width + # 绕图像的中心旋转 + # 参数:旋转中心 旋转度数 scale + matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1) + # 旋转后平移 + matrix[0, 2] += (new_width - width) / 2 + matrix[1, 2] += (new_height - height) / 2 + # 参数:原始图像 旋转参数 元素图像宽高 + rotated = cv2.warpAffine(img, matrix, (new_width, new_height)) + return rotated # 关键信息提取 @@ -138,11 +137,11 @@ def information_extraction(ie, phrecs): split_result = split_image(pic_path) for img in split_result: with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: - cv2.imwrite(temp_file.name, img["img"]) - # 为使用ocr中的cls,单独调用ocr - layout = get_ocr_layout(temp_file.name) - docs.append({"doc": temp_file.name, "layout": layout}) - doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]}) + angle = get_image_rotation_angle(img["img"]) + rotated_img = rotate_image(img["img"], angle) + cv2.imwrite(temp_file.name, rotated_img) + docs.append({"doc": temp_file.name}) + doc_phrecs.append({"phrec": phrec, "rotation": angle, "x_offset": img["x_offset"], "y_offset": img["y_offset"]}) if not docs: return result @@ -170,8 +169,8 @@ def information_extraction(ie, phrecs): result_json = result_json[:5000] session = MysqlSession() zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=id, cfjaddress=phrec.cfjaddress, - content=result_json, x_offset=doc_phrec["x_offset"], y_offset=doc_phrec["y_offset"], - create_time=now, update_time=now) + content=result_json, rotation=doc_phrec["rotation"], x_offset=doc_phrec["x_offset"], + y_offset=doc_phrec["y_offset"], create_time=now, update_time=now) session.add(zx_ocr) session.commit() session.close() diff --git a/visual_model_test/visual_model_test.py b/visual_model_test/visual_model_test.py index 7054e39..3d5480f 100644 --- a/visual_model_test/visual_model_test.py +++ b/visual_model_test/visual_model_test.py @@ -10,13 +10,13 @@ import cv2 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from photo_review.photo_review import split_image, get_ocr_layout +from photo_review.photo_review import split_image, get_image_rotation_angle, rotate_image, open_image from paddlenlp import Taskflow from paddlenlp.utils.doc_parser import DocParser from ucloud import ucloud -def write_visual_result(image, layout=None, result=None): +def write_visual_result(image, angle=0, layout=None, result=None): img = image.split("?")[0] img = re.split(r'[\\/]', img)[-1] img_name = "" @@ -26,19 +26,25 @@ def write_visual_result(image, layout=None, result=None): img_name = img[:last_dot_index] img_type = img[last_dot_index + 1:] - if layout: - print(layout) - DocParser.write_image_with_results( - image, - layout=layout, - save_path="./img_result/" + img_name + "_layout." + img_type) + img_array = open_image(image) + if angle != 0: + img_array = rotate_image(img_array, angle) + with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: + cv2.imwrite(temp_file.name, img_array) + if layout: + print(layout) + DocParser.write_image_with_results( + temp_file.name, + layout=layout, + save_path="./img_result/" + img_name + "_layout." + img_type) - if result: - print(result) - DocParser.write_image_with_results( - image, - result=result, - save_path="./img_result/" + img_name + "_result." + img_type) + if result: + print(result) + DocParser.write_image_with_results( + temp_file.name, + result=result, + save_path="./img_result/" + img_name + "_result." + img_type) + os.remove(temp_file.name) def visual_model_test(model_type, test_img, task_path, schema): @@ -46,34 +52,40 @@ def visual_model_test(model_type, test_img, task_path, schema): imgs = split_image(test_img) layout = [] temp_files_paths = [] + doc_parser = DocParser(layout_analysis=False) for img in imgs: with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: - cv2.imwrite(temp_file.name, img["img"]) + angle = get_image_rotation_angle(img["img"]) + rotated_img = rotate_image(img["img"], angle) + cv2.imwrite(temp_file.name, rotated_img) temp_files_paths.append(temp_file.name) - ocr_layout = get_ocr_layout(temp_file.name) + parsed_doc = doc_parser.parse({"doc": temp_file.name}) if img["x_offset"] or img["y_offset"]: - for box in ocr_layout: + for p in parsed_doc["layout"]: + box = p[0] box[0] += img["x_offset"] box[1] += img["y_offset"] box[2] += img["x_offset"] box[3] += img["y_offset"] - layout += ocr_layout + layout += parsed_doc["layout"] - write_visual_result(test_img, layout=layout) + write_visual_result(test_img, angle, layout=layout) else: docs = [] split_result = split_image(test_img) temp_files_paths = [] for img in split_result: with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: - cv2.imwrite(temp_file.name, img["img"]) + angle = get_image_rotation_angle(img["img"]) + rotated_img = rotate_image(img["img"], angle) + cv2.imwrite(temp_file.name, rotated_img) temp_files_paths.append(temp_file.name) - docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)}) + docs.append({"doc": temp_file.name}) my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path, layout_analysis=False) my_results = my_ie(docs) - write_visual_result(test_img, result=my_results[0]) + write_visual_result(test_img, angle, result=my_results[0]) # 使用完临时文件后,记得清理(删除)它们 for path in temp_files_paths: