ocr配置的cls只能区分0度与180度,不符合需求,更换为paddleclas中的图片方向识别模型

This commit is contained in:
2024-06-25 14:13:43 +08:00
parent e8dd62e1f5
commit fc0c54fbd2
3 changed files with 69 additions and 62 deletions

View File

@@ -10,6 +10,7 @@ import urllib.request
import cv2
import numpy as np
import paddle
import paddleclas
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -18,7 +19,7 @@ from sqlalchemy import update
from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
from config.mysql import MysqlSession
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR
from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE
from photo_review.entity.bd_yljg import BdYljg
from photo_review.entity.bd_ylks import BdYlks
from photo_review.entity.zx_ie_cost import ZxIeCost
@@ -95,36 +96,34 @@ def merge_result(result1, result2):
return result1
# 获取图片OCR并将其box转为两点矩形框
def get_ocr_layout(img_path):
def _get_box(box):
box = [
min(box[0][0], box[3][0]), # x1
min(box[0][1], box[1][1]), # y1
max(box[1][0], box[2][0]), # x2
max(box[2][1], box[3][1]), # y2
]
return box
# 获取图片旋转角度
def get_image_rotation_angle(img):
model = paddleclas.PaddleClas(model_name="text_image_orientation")
result = model.predict(input_data=img)
angle = int(next(result)[0]["label_names"][0])
return angle
def _normal_box(box):
# Ensure the height and width of bbox are greater than zero
if box[3] - box[1] < 0 or box[2] - box[0] < 0:
return False
return True
layout = []
ocr_result = OCR.ocr(img_path)
ocr_result = ocr_result[0]
if not ocr_result:
return layout
for segment in ocr_result:
box = segment[0]
box = _get_box(box)
if not _normal_box(box):
continue
text = segment[1][0]
layout.append((box, text))
return layout
# 旋转图片
def rotate_image(img, angle):
if angle == 0:
return
height, width, _ = img.shape
if angle == 180:
new_width = width
new_height = height
else:
new_width = height
new_height = width
# 绕图像的中心旋转
# 参数:旋转中心 旋转度数 scale
matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
# 旋转后平移
matrix[0, 2] += (new_width - width) / 2
matrix[1, 2] += (new_height - height) / 2
# 参数:原始图像 旋转参数 元素图像宽高
rotated = cv2.warpAffine(img, matrix, (new_width, new_height))
return rotated
# 关键信息提取
@@ -138,11 +137,11 @@ def information_extraction(ie, phrecs):
split_result = split_image(pic_path)
for img in split_result:
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
cv2.imwrite(temp_file.name, img["img"])
# 为使用ocr中的cls单独调用ocr
layout = get_ocr_layout(temp_file.name)
docs.append({"doc": temp_file.name, "layout": layout})
doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
angle = get_image_rotation_angle(img["img"])
rotated_img = rotate_image(img["img"], angle)
cv2.imwrite(temp_file.name, rotated_img)
docs.append({"doc": temp_file.name})
doc_phrecs.append({"phrec": phrec, "rotation": angle, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
if not docs:
return result
@@ -170,8 +169,8 @@ def information_extraction(ie, phrecs):
result_json = result_json[:5000]
session = MysqlSession()
zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=id, cfjaddress=phrec.cfjaddress,
content=result_json, x_offset=doc_phrec["x_offset"], y_offset=doc_phrec["y_offset"],
create_time=now, update_time=now)
content=result_json, rotation=doc_phrec["rotation"], x_offset=doc_phrec["x_offset"],
y_offset=doc_phrec["y_offset"], create_time=now, update_time=now)
session.add(zx_ocr)
session.commit()
session.close()