From 89de0e0815a84dabbfa445e98a4d4b9bbd284484 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Tue, 9 Jul 2024 12:47:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=B6=82=E6=8A=B9=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=EF=BC=8C=E6=9A=82=E6=9C=AA=E5=AE=8C=E6=88=90=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E4=BA=91=E7=AB=AF=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models.py | 157 +++++++++++++++++++++++++++ photo_mask.py | 285 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 442 insertions(+) create mode 100644 models.py create mode 100644 photo_mask.py diff --git a/models.py b/models.py new file mode 100644 index 0000000..f3bb680 --- /dev/null +++ b/models.py @@ -0,0 +1,157 @@ +# coding: utf-8 +from sqlalchemy import Column, DECIMAL, Date, DateTime, Index, String, text, LargeBinary +from sqlalchemy.dialects.mysql import BIT, CHAR, INTEGER, TINYINT, VARCHAR + +from config.mysql import Base + + +class ZxPhhd(Base): + __tablename__ = 'zx_phhd' + __table_args__ = ( + Index('zx_phhd_idx3', 'pk_yljg', 'cjsd_id'), + ) + + pk_phhd = Column(INTEGER(11), primary_key=True, comment='病案主键') + cPhhd_id = Column(VARCHAR(20), index=True, comment='拍一拍单据号') + billdate = Column(DateTime) + pk_person = Column(INTEGER(11), server_default=text("'0'"), comment='人员主键') + pk_corp = Column(INTEGER(11), index=True, server_default=text("'0'"), comment='单位主键') + cRyid = Column(VARCHAR(20), comment='人员编码') + cJBH = Column(VARCHAR(20), comment='结报号') + cXm = Column(String(12), comment='姓名') + cSfzh = Column(VARCHAR(20), index=True, comment='身份证号') + czh = Column(VARCHAR(20), comment='小组') + cXb = Column(VARCHAR(2), comment='性别') + dCsny = Column(Date, comment='出身时间') + fAge = Column(INTEGER(4), server_default=text("'0'"), comment='年龄') + pk_yljg = Column(INTEGER(11), server_default=text("'0'"), comment='医院pk') + pk_ylks = Column(INTEGER(11), server_default=text("'0'"), comment='科室pk') + cDoctor = Column(VARCHAR(20), comment='医生') + dZYRQ = Column(DateTime, comment='入院日期') + dCYRQ = Column(DateTime) + cZYH = Column(VARCHAR(20), comment='住院号') + iMZTS = Column(INTEGER(4), server_default=text("'0'"), comment='住院天数') + fFSYLFY = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='发生医疗费用') + fZcfwfy = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='政策范围内费用') + fxnhbcje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='新农合补助金额') + fqtbcje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='其它补助金额') + fBCJE = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='补偿金额') + fgrzfje1 = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='个人支付金额1') + fgezfje2 = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='个人支付金额2') + fgrzfje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='个人自费金额') + fZfje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='自付金额') + fXianje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='限额') + fMaxbcje = Column(DECIMAL(18, 2), server_default=text("'0.00'"), comment='最高支付') + cJBBM = Column(VARCHAR(20), server_default=text("'-'"), comment='疾病编码') + cJbbm_cyzd = Column(VARCHAR(20), comment='疾病编码出院诊断') + cSsczmc = Column(VARCHAR(100), comment='手术操作名称') + problem_note = Column(String(400)) + depiction = Column(VARCHAR(300), comment='备注') + creator = Column(VARCHAR(30), comment='创建人') + creationtime = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间') + modifier = Column(VARCHAR(30), comment='最后修改人') + modifiedtime = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='最后修改时间') + cStatus = Column(CHAR(1), index=True, server_default=text("'0'"), comment='状态') + vercode = Column(VARCHAR(4), comment='版本编码') + drg_groups_id = Column(String(6), server_default=text("'2017'"), comment='drg分组器版本号') + cSource_flag = Column(VARCHAR(4), server_default=text("'1'"), comment='病案来源') + ref_id1 = Column(VARCHAR(20)) + ref_id2 = Column(VARCHAR(20)) + ref_pk1 = Column(INTEGER(11)) + checker = Column(VARCHAR(30), comment='创建人') + checktime = Column(DateTime, comment='创建时间') + paint_flag = Column(CHAR(1), index=True, server_default=text("'0'")) + paint_user = Column(VARCHAR(30)) + paint_date = Column(DateTime) + filetype_id = Column(VARCHAR(10), comment='文件类型id') + cMphone = Column(VARCHAR(11), comment='移动电话') + cmiss_rectype = Column(VARCHAR(10), comment='遗漏记录类型') + cmiss_remark = Column(VARCHAR(200), comment='遗漏备注 ') + bhg_remark = Column(VARCHAR(200), comment='不合格原因') + cmis_jsd_flag = Column(BIT(1)) + cmis_jsd_page = Column(VARCHAR(40), comment='结算单遗漏页码') + cmis_cyjl_flag = Column(BIT(1)) + cmis_cyjl_page = Column(VARCHAR(40), comment='出院记录遗漏页码') + cmis_fyqd_flag = Column(BIT(1)) + cmis_fyqd_page = Column(VARCHAR(40), comment='费用清单遗漏页码') + cmis_evidence = Column(VARCHAR(60), comment='相关证明材料') + del_reason = Column(CHAR(1), server_default=text("'0'"), comment='删除原因') + deln_reason = Column(VARCHAR(2), comment='删除原因') + train_flag = Column(VARCHAR(10), server_default=text("'0'"), comment='培训标志') + cjsd_id = Column(VARCHAR(30), comment='结算单号码') + sms_content = Column(String(1000), comment='短信内容') + judge_backup = Column(VARCHAR(100)) + remind_num = Column(INTEGER(11), server_default=text("'0'"), comment='提醒次数') + input_cxm = Column(String(20), comment='姓名核对') + yb_type = Column(VARCHAR(10), comment='医保类别') + bucode = Column(VARCHAR(4), server_default=text("'1'"), comment='业务单元') + subcorpname = Column(VARCHAR(80), comment='子公司') + deptname = Column(VARCHAR(80), comment='部门') + psncode = Column(VARCHAR(16), comment='工号') + period_code = Column(VARCHAR(10), comment='会计期间') + cbznote = Column(VARCHAR(500)) + corp_list = Column(VARCHAR(200), comment='其它补助单位及年度') + perjudge_flag = Column(CHAR(1), server_default=text("'0'"), comment='人员信息判断标志') + priority_num = Column(TINYINT(4), server_default=text("'0'"), comment='优先领取') + addin_xybz = Column(BIT(1)) + addin_tybz = Column(BIT(1)) + finish_flag = Column(BIT(1)) + exsuccess_flag = Column(CHAR(1), index=True, server_default=text("'0'"), comment='交换成功标志') + pk_soncorp = Column(INTEGER(11), server_default=text("'0'"), comment='镇下村单位pk值') + return_times = Column(INTEGER(11), server_default=text("'0'"), comment='退回次数') + phuser_type = Column(CHAR(1), server_default=text("'1'"), comment='拍传人类型') + examine_note = Column(VARCHAR(400), comment='抽查意见') + examine_user = Column(VARCHAR(30), comment='抽查人') + examine_date = Column(DateTime, comment='抽查时间') + channel_code = Column(VARCHAR(10), comment='上传渠道') + einvoice_flag = Column(BIT(1)) + drgs_flag = Column(CHAR(1), server_default=text("'0'"), comment='病种类型(0-无,1-单病种,2-drg)') + applyDate = Column(DateTime) + admissionDate = Column(Date, comment='入院日期') + dischargeDate = Column(Date, comment='出院日期') + reapplyDate = Column(Date, comment='重新上传日期') + exreq_times = Column(INTEGER(11), index=True, server_default=text("'0'"), comment='请求次数') + drug_source = Column(CHAR(1), server_default=text("'0'"), comment='药品来源(1-外购药,0--没有)') + addin_passpaydate = Column(BIT(1)) + apply_classid = Column(CHAR(1), server_default=text("'0'"), comment='申请人员类别(1-农民工,0-默认)') + + +class ZxPhrec(Base): + __tablename__ = 'zx_phrec' + + pk_phrec = Column(INTEGER(11), primary_key=True, comment='病案清主键') + pk_phhd = Column(INTEGER(11), index=True, comment='病案主键') + cRectype = Column(CHAR(1), comment='记录类型(1-入院小结,2--出院小结,3--手术记录 4清单)') + rowno = Column(TINYINT(4), server_default=text("'1'"), comment='序号') + cfjaddress = Column(String(200), comment='附件地址') + cfjaddress2 = Column(String(500), comment='附件地址2') + cfjaddress3 = Column(String(500), comment='附件地址3') + cfjblob = Column(LargeBinary, comment='附件') + cfjblob2 = Column(LargeBinary, comment='附件2') + cfjblob3 = Column(LargeBinary, comment='附件3') + subsys_id = Column(String(4), comment='分系统代码') + depiction = Column(String(100), comment='备注') + isreupload = Column(CHAR(1), server_default=text("'0'")) + checker = Column(String(10), comment='创建人') + checktime = Column(DateTime, comment='创建时间') + creator = Column(String(30), comment='创建人') + creationtime = Column(DateTime, index=True, comment='创建时间') + modifier = Column(String(30), comment='最后修改人') + modifiedtime = Column(DateTime, comment='最后修改时间') + cSource_flag = Column(String(4), server_default=text("'1'"), comment='病案来源') + cStatus = Column(CHAR(1), server_default=text("'0'"), comment='状态') + link_flag = Column(CHAR(1), server_default=text("'1'"), comment='是否采用链接') + filetype_id = Column(String(10), server_default=text("'jpg'"), comment='文件类型id') + cmiss_flag = Column(INTEGER(4), server_default=text("'0'"), comment='遗漏补拍标记') + paint_flag = Column(CHAR(1), server_default=text("'0'"), comment='涂抹标志') + paint_user = Column(String(30)) + paint_date = Column(DateTime) + byz_xmbz_flag = Column(BIT(1)) + byz_zyrqcyjl_flag = Column(BIT(1)) + byz_zyhcyjl_flag = Column(BIT(1)) + byz_ftyccyjl_flag = Column(BIT(1)) + byz_ftycfyqd_flag = Column(BIT(1)) + byz_ftycjsd_flag = Column(BIT(1)) + unsharp_flag = Column(BIT(1)) + judge_backup = Column(String(40)) + ps_flag = Column(CHAR(1), server_default=text("'0'"), comment='图片是否ps过(0-无,1-ps过)') diff --git a/photo_mask.py b/photo_mask.py new file mode 100644 index 0000000..66cd18c --- /dev/null +++ b/photo_mask.py @@ -0,0 +1,285 @@ +import logging +import math +import os +import tempfile +import traceback +import urllib.request +from time import sleep + +import cv2 +import numpy as np +import paddleclas +from paddleocr import PaddleOCR +from sqlalchemy import update + +from auto_email.error_email import send_an_error_email +from config.mysql import MysqlSession +from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES +from models import ZxPhrec, ZxPhhd +from ucloud import ucloud + +OCR = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False, gpu_id=1) + + +def open_image(img_path): + if img_path.startswith("http"): + # 发送HTTP请求并获取图像数据 + resp = urllib.request.urlopen(img_path) + # 将数据读取为字节流 + image_data = resp.read() + # 将字节流转换为NumPy数组 + image_np = np.frombuffer(image_data, np.uint8) + # 解码NumPy数组为OpenCV图像格式 + image = cv2.imdecode(image_np, cv2.IMREAD_COLOR) + else: + image = cv2.imread(img_path) + return image + + +def split_image(img, max_ratio=2.82, best_ration=1.41, overlap=0.05): + split_result = [] + # 获取图片的宽度和高度 + height, width = img.shape[:2] + # 计算宽高比 + ratio = max(width, height) / min(width, height) + # 检查是否需要裁剪 + if ratio > max_ratio: + # 确定裁剪的尺寸,保持长宽比,以较短边为基准 + new_ratio = best_ration - overlap + if width < height: + # 高度是较长边 + cropped_width = width * best_ration + for i in range(math.ceil(height / (width * new_ratio))): + offset = round(width * new_ratio * i) + # 参数形式为[y1:y2, x1:x2] + cropped_img = img[offset:round(offset + cropped_width), 0:width] + split_result.append({"img": cropped_img, "x_offset": 0, "y_offset": offset}) + # 最后一次裁剪时不足的部分填充黑色 + last_img = split_result[-1]["img"] + split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, round(cropped_width - last_img.shape[0]), 0, 0, + cv2.BORDER_CONSTANT, value=(0, 0, 0)) + else: + # 宽度是较长边 + cropped_height = height * best_ration + for i in range(math.ceil(width / (height * new_ratio))): + offset = round(height * new_ratio * i) + cropped_img = img[0:height, offset:round(offset + cropped_height)] + split_result.append({"img": cropped_img, "x_offset": offset, "y_offset": 0}) + # 最后一次裁剪时不足的部分填充黑色 + last_img = split_result[-1]["img"] + split_result[-1]["img"] = cv2.copyMakeBorder(last_img, 0, 0, 0, round(cropped_height - last_img.shape[1]), + cv2.BORDER_CONSTANT, value=(0, 0, 0)) + else: + split_result.append({"img": img, "x_offset": 0, "y_offset": 0}) + return split_result + + +# 获取图片旋转角度 +def get_image_rotation_angles(img): + angles = ['0', '90'] + model = paddleclas.PaddleClas(model_name="text_image_orientation") + result = model.predict(input_data=img) + try: + result = next(result)[0] + if result["scores"][0] < 0.5: + return angles + angles = result["label_names"] + except Exception as e: + logging.error("获取图片旋转角度失败", exc_info=e) + return angles + + +def rotate_image(img, angle): + if angle == 0: + return img + height, width, _ = img.shape + if angle == 180: + new_width = width + new_height = height + else: + new_width = height + new_height = width + # 绕图像的中心旋转 + # 参数:旋转中心 旋转度数 scale + matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1) + # 旋转后平移 + matrix[0, 2] += (new_width - width) / 2 + matrix[1, 2] += (new_height - height) / 2 + # 参数:原始图像 旋转参数 元素图像宽高 + rotated = cv2.warpAffine(img, matrix, (new_width, new_height)) + return rotated + + +def rotate_rectangle(rectangle, center, angle): + def rotate_point(pt, angle, center): + matrix = cv2.getRotationMatrix2D(center, angle, 1) + # 旋转后平移 + matrix[0, 2] += center[1] - center[0] + matrix[1, 2] += center[0] - center[1] + + reverse_matrix = cv2.invertAffineTransform(matrix) + + pt = np.array([[pt[0]], [pt[1]], [1]]) + return np.dot(reverse_matrix, pt) + + x1, y1, x2, y2 = rectangle + + # 计算矩形的四个顶点 + top_left = (x1, y1) + bot_left = (x1, y2) + top_right = (x2, y1) + bot_right = (x2, y2) + + # 旋转矩形的四个顶点 + rot_top_left = rotate_point(top_left, angle, center).astype(int) + rot_bot_left = rotate_point(bot_left, angle, center).astype(int) + rot_bot_right = rotate_point(bot_right, angle, center).astype(int) + rot_top_right = rotate_point(top_right, angle, center).astype(int) + + # 找出旋转后矩形的新左上角和右下角坐标 + new_top_left = (min(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]), + min(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1])) + new_bot_right = (max(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]), + max(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1])) + + return [new_top_left[0], new_top_left[1], new_bot_right[0], new_bot_right[1]] + + +def get_ocr_layout(ocr, img_path): + def _get_box(old_box): + new_box = [ + min(old_box[0][0], old_box[3][0]), # x1 + min(old_box[0][1], old_box[1][1]), # y1 + max(old_box[1][0], old_box[2][0]), # x2 + max(old_box[2][1], old_box[3][1]), # y2 + ] + return new_box + + def _normal_box(box_data): + # Ensure the height and width of bbox are greater than zero + if box_data[3] - box_data[1] < 0 or box_data[2] - box_data[0] < 0: + return False + return True + + layout = [] + ocr_result = ocr.ocr(img_path, cls=False) + ocr_result = ocr_result[0] + if not ocr_result: + return layout + for segment in ocr_result: + box = segment[0] + box = _get_box(box) + if not _normal_box(box): + continue + text = segment[1][0] + layout.append((box, text)) + return layout + + +def find_box_of_content(content, layout): + full_box = layout[0] + box_len = full_box[2] - full_box[0] + text = layout[1] + text_len = len(text) + char_len = box_len / text_len + index = text.index(content) + return ( + full_box[0] + index * char_len, + full_box[1], + full_box[0] + (index + len(content)) * char_len, + full_box[3], + ) + + +def mask_image(image, content): + with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: + cv2.imwrite(temp_file.name, image) + + result = [] + try: + layouts = get_ocr_layout(OCR, temp_file.name) + if not layouts: + # 无识别结果 + return result + else: + # 涂抹 + for layout in layouts: + if content in layout[1]: + result.append(find_box_of_content(content, layout)) + return result + except Exception as e: + logging.error("涂抹时出错", exc_info=e) + finally: + try: + os.remove(temp_file.name) + except Exception as e: + logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e) + + +def photo_mask(pk_phhd, content): + session = MysqlSession() + phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cfjaddress) \ + .filter(ZxPhrec.pk_phhd == pk_phhd) \ + .filter(ZxPhrec.cRectype.in_(["3", "4"])) \ + .all() + session.close() + for phrec in phrecs: + img_url = ucloud.get_private_url(phrec.cfjaddress) + if not img_url: + continue + # 打开图片 + image = open_image(img_url) + split_result = split_image(image) + for img in split_result: + angles = get_image_rotation_angles(img["img"]) + rotated_img = rotate_image(img["img"], int(angles[0])) + results = mask_image(rotated_img, content) + + for result in results: + height, width = img["img"].shape[:2] + center = (width / 2, height / 2) + result = rotate_rectangle(result, center, int(angles[0])) + result = ( + result[0] + img["x_offset"], + result[1] + img["y_offset"], + result[2] + img["x_offset"], + result[3] + img["y_offset"], + ) + cv2.rectangle(image, (int(result[0]), int(result[1])), (int(result[2]), int(result[3])), + (255, 255, 255), -1, 0) + cv2.imwrite(f"./mask_test/{phrec.cfjaddress}.jpg", image) + + +if __name__ == '__main__': + try: + while 1: + session = MysqlSession() + phhds = session.query(ZxPhhd.pk_phhd, ZxPhhd.cXm).filter( + ZxPhhd.paint_flag == "1" + ).limit(PHHD_BATCH_SIZE).all() + # 将状态改为正在涂抹中 + pk_phhd_values = [phhd.pk_phhd for phhd in phhds] + update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd.in_(pk_phhd_values)).values(paint_flag="2")) + session.execute(update_flag) + session.commit() + session.close() + if phhds: + for phhd in phhds: + pk_phhd = phhd.pk_phhd + logging.info(f"开始涂抹:{pk_phhd}") + photo_mask(pk_phhd, phhd.cXm) + + # 识别完成更新标识 + session = MysqlSession() + update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(paint_flag="8")) + session.execute(update_flag) + session.commit() + session.close() + else: + # 没有查询到新案子,等待一段时间后再查 + log = logging.getLogger() + log.info(f"暂未查询到需要涂抹的案子,等待{SLEEP_MINUTES}分钟...") + sleep(SLEEP_MINUTES * 60) + except Exception as e: + logging.error(traceback.format_exc()) + send_an_error_email(program_name='照片涂抹脚本', error_name=repr(e), error_detail=traceback.format_exc())