优化图片的存储,及时删除处理过程中产生的图片
This commit is contained in:
@@ -54,11 +54,11 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
|
||||
if improve:
|
||||
# 再次识别,提高精度
|
||||
image = cv2.imread(image_path)
|
||||
img_name, img_ext = image_util.parse_save_path(image_path)
|
||||
img_name, img_ext = common_util.parse_save_path(image_path)
|
||||
# 截图时偏大一点
|
||||
capture_box = common_util.zoom_rectangle(box, 0.2)
|
||||
captured_image = image_util.capture(image, capture_box)
|
||||
captured_image_path = image_util.get_save_path(f'{img_name}.capture.{img_ext}')
|
||||
captured_image_path = common_util.get_processed_img_path(f'{img_name}.capture.{img_ext}')
|
||||
cv2.imwrite(captured_image_path, captured_image)
|
||||
captured_a4_img_path, offset_x, offset_y = image_util.expand_to_a4_size(captured_image_path)
|
||||
try:
|
||||
@@ -170,7 +170,7 @@ def handle_image_for_mask(split_result):
|
||||
def mask_photo(img_path, name, id_card_num, color=(255, 255, 255)):
|
||||
def _mask(ip, n, icn, c):
|
||||
i = cv2.imread(ip)
|
||||
img_name, img_ext = image_util.parse_save_path(ip)
|
||||
img_name, img_ext = common_util.parse_save_path(ip)
|
||||
do_mask = False
|
||||
split_results = image_util.split(ip)
|
||||
for split_result in split_results:
|
||||
@@ -188,7 +188,7 @@ def mask_photo(img_path, name, id_card_num, color=(255, 255, 255)):
|
||||
result[3] + y_offset,
|
||||
)
|
||||
cv2.rectangle(i, (int(result[0]), int(result[1])), (int(result[2]), int(result[3])), c, -1, 0)
|
||||
masked_path = image_util.get_save_path(f'{img_name}.mask.{img_ext}')
|
||||
masked_path = common_util.get_processed_img_path(f'{img_name}.mask.{img_ext}')
|
||||
cv2.imwrite(masked_path, i)
|
||||
return do_mask, masked_path
|
||||
|
||||
@@ -222,7 +222,7 @@ def photo_mask(pk_phhd, name, id_card_num):
|
||||
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||||
if not img_url:
|
||||
continue
|
||||
img_path = image_util.save_to_local(img_url)
|
||||
img_path = common_util.save_to_local(img_url)
|
||||
is_masked, image = mask_photo(img_path, name, id_card_num)
|
||||
|
||||
# 如果涂抹了要备份以及更新
|
||||
|
||||
@@ -9,6 +9,8 @@ PHHD_BATCH_SIZE = 10
|
||||
SLEEP_MINUTES = 5
|
||||
# 是否发送报错邮件
|
||||
SEND_ERROR_EMAIL = True
|
||||
# 处理批号(这里仅起声明作用)
|
||||
BATCH_ID = ''
|
||||
|
||||
'''
|
||||
别名配置
|
||||
@@ -56,3 +58,9 @@ DISCHARGE_KEY = {
|
||||
'出院日期': ['出院日期', '出院时间'],
|
||||
'出院医嘱': ['出院医嘱']
|
||||
}
|
||||
|
||||
|
||||
def modify_batch_id(batch_id):
|
||||
# 修改批号的接口
|
||||
global BATCH_ID
|
||||
BATCH_ID = batch_id
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from time import sleep
|
||||
|
||||
@@ -16,7 +19,7 @@ from db import MysqlSession
|
||||
from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
|
||||
from log import HOSTNAME
|
||||
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
||||
DEPARTMENT_FILTER, DISCHARGE_KEY
|
||||
DEPARTMENT_FILTER, DISCHARGE_KEY, modify_batch_id, BATCH_ID
|
||||
from services.paddle_services import IE_KEY
|
||||
from ucloud import ufile
|
||||
from util import image_util, common_util, html_util, model_util
|
||||
@@ -46,8 +49,8 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150):
|
||||
# 将渲染结果转换为OpenCV兼容的格式
|
||||
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
img_name, img_ext = image_util.parse_save_path(img_path)
|
||||
better_img_path = image_util.get_save_path(f'{img_name}.better.{img_ext}')
|
||||
img_name, img_ext = common_util.parse_save_path(img_path)
|
||||
better_img_path = common_util.get_processed_img_path(f'{img_name}.better.{img_ext}')
|
||||
cv2.imwrite(better_img_path, img)
|
||||
return better_img_path, page.get_text()
|
||||
except Exception as ex:
|
||||
@@ -100,20 +103,21 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150):
|
||||
|
||||
|
||||
# 关键信息提取
|
||||
def information_extraction(phrec, pk_phhd, identity):
|
||||
def information_extraction(phrec, pk_phhd):
|
||||
"""
|
||||
处理单张图片
|
||||
:param phrec:图片信息
|
||||
:param pk_phhd:案子主键
|
||||
:param identity:处理批次标识
|
||||
:return:记录类型,信息抽取结果
|
||||
"""
|
||||
img_path = image_util.get_img_path(phrec.cfjaddress)
|
||||
if not img_path:
|
||||
original_img_path = common_util.get_img_path(phrec.cfjaddress)
|
||||
if not original_img_path:
|
||||
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||||
if not img_url:
|
||||
return None, None, None
|
||||
img_path = image_util.save_to_local(img_url)
|
||||
original_img_path = common_util.save_to_local(img_url)
|
||||
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||||
shutil.copy2(original_img_path, img_path)
|
||||
|
||||
# 尝试从二维码中获取高清图片
|
||||
better_img_path, text = get_better_image_from_qrcode(img_path, phrec.cfjaddress)
|
||||
@@ -129,17 +133,26 @@ def information_extraction(phrec, pk_phhd, identity):
|
||||
)
|
||||
ocr_text = None # 此处肯定不是出院记录,后续用不到
|
||||
else:
|
||||
target_image = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||||
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
||||
angles = model_util.clas_orientation(dewarped_image)
|
||||
rotated_img = image_util.rotate(dewarped_image, int(angles[0]))
|
||||
split_results = image_util.split(rotated_img)
|
||||
if image_util.is_photo(img_path):
|
||||
book_img_path = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||||
dewarped_img_path = model_util.dewarp(book_img_path) # 去扭曲
|
||||
else: # todo:也可能是图片,后续添加细分逻辑
|
||||
dewarped_img_path = img_path
|
||||
angles = model_util.clas_orientation(dewarped_img_path)
|
||||
ocr_result = []
|
||||
for split_result in split_results:
|
||||
if split_result['img'] is None:
|
||||
continue
|
||||
a4_img = image_util.expand_to_a4_size(split_result['img'])
|
||||
ocr_result += model_util.ocr(a4_img)
|
||||
rotated_img = None
|
||||
for angle in angles:
|
||||
tmp_ocr_result = []
|
||||
tmp_rotated_img = image_util.rotate(dewarped_img_path, int(angle))
|
||||
split_results = image_util.split(tmp_rotated_img)
|
||||
for split_result in split_results:
|
||||
if split_result['img'] is None:
|
||||
continue
|
||||
a4_img = image_util.expand_to_a4_size(split_result['img'])
|
||||
tmp_ocr_result += model_util.ocr(a4_img)
|
||||
if len(tmp_ocr_result) > len(ocr_result):
|
||||
ocr_result = tmp_ocr_result
|
||||
rotated_img = tmp_rotated_img
|
||||
ocr_text = common_util.ocr_result_to_text(ocr_result)
|
||||
rec_type = model_util.clas_text(ocr_text) if ocr_text else None
|
||||
if rec_type == '基本医保结算单':
|
||||
@@ -158,7 +171,7 @@ def information_extraction(phrec, pk_phhd, identity):
|
||||
|
||||
now = common_util.get_default_datetime()
|
||||
session = MysqlSession()
|
||||
session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
|
||||
session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=BATCH_ID,
|
||||
cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
|
||||
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
||||
session.commit()
|
||||
@@ -175,23 +188,25 @@ def get_best_value_of_key(source, key):
|
||||
values = source.get(key)
|
||||
if values:
|
||||
for value in values:
|
||||
text = value.get("text")
|
||||
probability = value.get("probability")
|
||||
if text and probability > best_probability:
|
||||
result = text
|
||||
best_probability = probability
|
||||
for v in value:
|
||||
text = v.get("text")
|
||||
probability = v.get("probability")
|
||||
if text and probability > best_probability:
|
||||
result = text
|
||||
best_probability = probability
|
||||
return result
|
||||
|
||||
|
||||
# 从keys中获取所有value组成list
|
||||
def get_values_of_key(source, key):
|
||||
result = []
|
||||
value = source.get(key)
|
||||
if value:
|
||||
for v in value:
|
||||
v = v.get("text")
|
||||
if v:
|
||||
result.append(v)
|
||||
values = source.get(key)
|
||||
if values:
|
||||
for value in values:
|
||||
for v in value:
|
||||
v = v.get("text")
|
||||
if v:
|
||||
result.append(v)
|
||||
# 去重
|
||||
return list(set(result))
|
||||
|
||||
@@ -424,10 +439,12 @@ def photo_review(pk_phhd, name):
|
||||
).all()
|
||||
session.close()
|
||||
# 同一批图的标识
|
||||
identity = int(time.time())
|
||||
modify_batch_id(uuid.uuid4().hex)
|
||||
discharge_text = ''
|
||||
for phrec in phrecs:
|
||||
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd, identity)
|
||||
processed_img_dir = common_util.get_processed_img_path('')
|
||||
os.makedirs(processed_img_dir, exist_ok=True)
|
||||
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
|
||||
if rec_type == '基本医保结算单':
|
||||
rec_result = settlement_result
|
||||
elif rec_type == '出院记录':
|
||||
@@ -439,10 +456,11 @@ def photo_review(pk_phhd, name):
|
||||
rec_result = None
|
||||
if rec_result is not None:
|
||||
for key, value in ie_result.items():
|
||||
if key == '页码':
|
||||
rec_result[key].append(value) # 页码要区分来源,所以多包一层
|
||||
else:
|
||||
rec_result[key] += value
|
||||
rec_result[key].append(value)
|
||||
|
||||
# 删除多余图片
|
||||
if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
|
||||
shutil.rmtree(processed_img_dir)
|
||||
|
||||
settlement_data = settlement_task(pk_phhd, settlement_result)
|
||||
discharge_data = discharge_task(pk_phhd, discharge_result)
|
||||
|
||||
@@ -2,8 +2,12 @@ import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from opencc import OpenCC
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
|
||||
from log import PROJECT_ROOT
|
||||
from photo_review import BATCH_ID
|
||||
from util import string_util, model_util
|
||||
|
||||
|
||||
@@ -255,3 +259,51 @@ def chinese_money_to_number(chinese_money_amount):
|
||||
def traditional_to_simple_chinese(traditional_chinese):
|
||||
converter = OpenCC('t2s')
|
||||
return converter.convert(traditional_chinese)
|
||||
|
||||
|
||||
def parse_img_url(url):
|
||||
"""
|
||||
解析图片url
|
||||
:param url: 图片url
|
||||
:return: 图片名称和图片后缀
|
||||
"""
|
||||
url = url.split('?')[0]
|
||||
return os.path.basename(url)
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('保存图片失败!'))
|
||||
def save_to_local(img_url):
|
||||
"""
|
||||
保存图片到本地
|
||||
:param img_url: 图片url
|
||||
:return: 本地保存地址
|
||||
"""
|
||||
response = requests.get(img_url)
|
||||
response.raise_for_status() # 检查响应状态码是否正常
|
||||
|
||||
save_path = get_tmp_img_path(parse_img_url(img_url))
|
||||
with open(save_path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
return save_path
|
||||
|
||||
|
||||
def get_img_path(img_full_name):
|
||||
save_path = get_tmp_img_path(img_full_name)
|
||||
if os.path.exists(save_path):
|
||||
return save_path
|
||||
return None
|
||||
|
||||
|
||||
def get_tmp_img_path(img_full_name):
|
||||
return os.path.join(PROJECT_ROOT, 'tmp_img', img_full_name)
|
||||
|
||||
|
||||
def get_processed_img_path(img_full_name):
|
||||
return os.path.join(str(get_tmp_img_path(BATCH_ID)), img_full_name)
|
||||
|
||||
|
||||
def parse_save_path(img_path):
|
||||
img_full_name = os.path.basename(img_path)
|
||||
img_name, img_ext = img_full_name.rsplit('.', 1)
|
||||
return img_name, img_ext
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
|
||||
import cv2
|
||||
import numpy
|
||||
import requests
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
from PIL import Image
|
||||
|
||||
from log import PROJECT_ROOT
|
||||
from util import common_util
|
||||
|
||||
|
||||
def capture(image, rectangle):
|
||||
@@ -42,14 +39,14 @@ def split(img_path, ratio=1.414, overlap=0.05, x_compensation=3):
|
||||
hw_ratio = height / width
|
||||
wh_ratio = width / height
|
||||
|
||||
img_name, img_ext = parse_save_path(img_path)
|
||||
img_name, img_ext = common_util.parse_save_path(img_path)
|
||||
if hw_ratio > ratio: # 纵向过长
|
||||
new_img_height = width * ratio
|
||||
step = width * (ratio - overlap) # 偏移步长
|
||||
for i in range(math.ceil(height / step)):
|
||||
offset = round(step * i)
|
||||
cropped_img = capture(image, [0, offset, width, offset + new_img_height])
|
||||
split_path = get_save_path(f'{img_name}.split_{i}.{img_ext}')
|
||||
split_path = common_util.get_processed_img_path(f'{img_name}.split_{i}.{img_ext}')
|
||||
cv2.imwrite(split_path, cropped_img)
|
||||
split_result.append({'img': split_path, 'x_offset': 0, 'y_offset': offset})
|
||||
elif wh_ratio > ratio: # 横向过长
|
||||
@@ -58,7 +55,7 @@ def split(img_path, ratio=1.414, overlap=0.05, x_compensation=3):
|
||||
for i in range(math.ceil(width / step)):
|
||||
offset = round(step * i)
|
||||
cropped_img = capture(image, [offset, 0, offset + new_img_width, width])
|
||||
split_path = get_save_path(f'{img_name}.split_{i}.{img_ext}')
|
||||
split_path = common_util.get_processed_img_path(f'{img_name}.split_{i}.{img_ext}')
|
||||
cv2.imwrite(split_path, cropped_img)
|
||||
split_result.append({'img': split_path, 'x_offset': offset, 'y_offset': 0})
|
||||
else:
|
||||
@@ -111,8 +108,8 @@ def rotate(img_path, angle):
|
||||
# 参数:原始图像 旋转参数 元素图像宽高
|
||||
rotated = cv2.warpAffine(image, matrix, (new_width, new_height))
|
||||
|
||||
img_name, img_ext = parse_save_path(img_path)
|
||||
rotated_path = get_save_path(f'{img_name}.rotate_{angle}.{img_ext}')
|
||||
img_name, img_ext = common_util.parse_save_path(img_path)
|
||||
rotated_path = common_util.get_processed_img_path(f'{img_name}.rotate_{angle}.{img_ext}')
|
||||
cv2.imwrite(rotated_path, rotated)
|
||||
return rotated_path
|
||||
|
||||
@@ -178,7 +175,7 @@ def expand_to_a4_size(img_path):
|
||||
:return: 扩充后的图片NumPy数组和偏移量
|
||||
"""
|
||||
image = cv2.imread(img_path)
|
||||
img_name, img_ext = parse_save_path(img_path)
|
||||
img_name, img_ext = common_util.parse_save_path(img_path)
|
||||
height, width = image.shape[:2]
|
||||
x_offset, y_offset = 0, 0
|
||||
hw_ratio = height / width
|
||||
@@ -206,8 +203,9 @@ def expand_to_a4_size(img_path):
|
||||
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
|
||||
exp_img.fill(255)
|
||||
image = numpy.vstack([exp_img, image, exp_img])
|
||||
# todo:未拓展时不要生成新的图片
|
||||
save_path = get_save_path(f'{img_name}.a4.{img_ext}')
|
||||
else:
|
||||
return img_path, 0, 0
|
||||
save_path = common_util.get_processed_img_path(f'{img_name}.a4.{img_ext}')
|
||||
cv2.imwrite(save_path, image)
|
||||
return save_path, x_offset, y_offset
|
||||
|
||||
@@ -240,45 +238,16 @@ def combined(img1, img2):
|
||||
return combined_img
|
||||
|
||||
|
||||
def parse_img_url(url):
|
||||
def is_photo(img_path):
|
||||
"""
|
||||
解析图片url
|
||||
:param url: 图片url
|
||||
:return: 图片名称和图片后缀
|
||||
是否是拍照照片
|
||||
:param img_path: 图片路径
|
||||
:return: True:是照片;False:可能不是照片,也可能在传输过程中相机等信息丢失了
|
||||
"""
|
||||
url = url.split('?')[0]
|
||||
return os.path.basename(url)
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('保存图片失败!'))
|
||||
def save_to_local(img_url):
|
||||
"""
|
||||
保存图片到本地
|
||||
:param img_url: 图片url
|
||||
:return: 本地保存地址
|
||||
"""
|
||||
response = requests.get(img_url)
|
||||
response.raise_for_status() # 检查响应状态码是否正常
|
||||
|
||||
save_path = get_save_path(parse_img_url(img_url))
|
||||
with open(save_path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
return save_path
|
||||
|
||||
|
||||
def get_img_path(img_full_name):
|
||||
save_path = get_save_path(img_full_name)
|
||||
if os.path.exists(save_path):
|
||||
return save_path
|
||||
return None
|
||||
|
||||
|
||||
def get_save_path(img_full_name):
|
||||
return os.path.join(PROJECT_ROOT, 'tmp_img', img_full_name)
|
||||
|
||||
|
||||
def parse_save_path(img_path):
|
||||
img_full_name = os.path.basename(img_path)
|
||||
img_name, img_ext = img_full_name.rsplit('.', 1)
|
||||
return img_name, img_ext
|
||||
img = Image.open(img_path)
|
||||
exif = img.getexif()
|
||||
if exif:
|
||||
# 271:相机制造商, 272:相机型号
|
||||
if any(tag in exif for tag in (271, 272)):
|
||||
return True
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user