更新OCR版本,Bata版,还不能上线
This commit is contained in:
@@ -36,14 +36,15 @@ def merge_result(result1, result2):
|
||||
return result1
|
||||
|
||||
|
||||
def ie_temp_image(ie, ocr, image):
|
||||
def ie_temp_image(ie, ocr, image, is_screenshot=False):
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||
cv2.imwrite(temp_file.name, image)
|
||||
|
||||
ie_result = []
|
||||
ocr_pure_text = ''
|
||||
angle = '0'
|
||||
try:
|
||||
layout = util.get_ocr_layout(ocr, temp_file.name)
|
||||
layout, angle = util.get_ocr_layout(ocr, temp_file.name, is_screenshot)
|
||||
if not layout:
|
||||
# 无识别结果
|
||||
ie_result = []
|
||||
@@ -61,7 +62,7 @@ def ie_temp_image(ie, ocr, image):
|
||||
os.remove(temp_file.name)
|
||||
except Exception as e:
|
||||
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
||||
return ie_result, ocr_pure_text
|
||||
return ie_result, ocr_pure_text, angle
|
||||
|
||||
|
||||
# 关键信息提取
|
||||
@@ -159,7 +160,7 @@ def information_extraction(ie, phrecs, identity):
|
||||
if not img_path:
|
||||
continue
|
||||
|
||||
image = image_util.read(img_path)
|
||||
image, exif_data = image_util.read(img_path)
|
||||
if image is None:
|
||||
# 图片可能因为某些原因获取不到
|
||||
continue
|
||||
@@ -175,7 +176,7 @@ def information_extraction(ie, phrecs, identity):
|
||||
if text:
|
||||
info_extract = ie(text)[0]
|
||||
else:
|
||||
info_extract = ie_temp_image(ie, OCR, image)[0]
|
||||
info_extract = ie_temp_image(ie, OCR, image, True)[0]
|
||||
ie_result = {'result': info_extract, 'angle': '0'}
|
||||
|
||||
now = util.get_default_datetime()
|
||||
@@ -193,27 +194,20 @@ def information_extraction(ie, phrecs, identity):
|
||||
|
||||
result = merge_result(result, ie_result['result'])
|
||||
else:
|
||||
is_screenshot = image_util.is_screenshot(image, exif_data)
|
||||
target_images = []
|
||||
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪
|
||||
if not target_images:
|
||||
target_images.append(image) # 识别失败
|
||||
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
|
||||
for target_image in target_images:
|
||||
# dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲
|
||||
dewarped_image = target_image
|
||||
angles = image_util.parse_rotation_angles(dewarped_image)
|
||||
|
||||
split_results = image_util.split(dewarped_image)
|
||||
split_results = image_util.split(target_image)
|
||||
for split_result in split_results:
|
||||
if split_result['img'] is None or split_result['img'].size == 0:
|
||||
continue
|
||||
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
||||
ie_temp_result = ie_temp_image(ie, OCR, rotated_img)
|
||||
ie_temp_result = ie_temp_image(ie, OCR, split_result['img'], is_screenshot)
|
||||
ocr_text += ie_temp_result[1]
|
||||
ie_results = [{'result': ie_temp_result[0], 'angle': angles[0]}]
|
||||
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
||||
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img)[0], 'angle': angles[1]})
|
||||
ie_results = [{'result': ie_temp_result[0], 'angle': ie_temp_result[2]}]
|
||||
now = util.get_default_datetime()
|
||||
best_angle = ['0', 0]
|
||||
for ie_result in ie_results:
|
||||
|
||||
Reference in New Issue
Block a user