添加无锡锡山人民医院票据处理

2024-09-06 12:43:31 +08:00
parent 0b4ccd9b84
commit c2717c29b7
3 changed files with 63 additions and 28 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 x-env:
  &template
-  image: fcb_photo_review:1.14.1
+  image: fcb_photo_review:1.14.2
  restart: always

 x-review:
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -76,25 +76,15 @@ def request_ie_result(task_enum, phrecs):

 # 尝试从二维码中获取高清图片
 def get_better_image_from_qrcode(image, image_id, dpi=150):
-    js_base_url = 'http://einvoice.jsczt.cn'
+    def _parse_pdf_url(pdf_url_to_parse):
+        pdf_file = None
+        local_pdf_path = None
        try:
-        results = zxingcpp.read_barcodes(image)
-    except Exception as e:
-        logging.getLogger('error').info("二维码识别失败", exc_info=e)
-        results = []
-    for result in results:
-        pdf = None
-        pdf_path = None
-        try:
-            url = result.text
-            if url.startswith(js_base_url):
-                id_base = html_util.get_jsczt_id_base(url)
-                pdf_url = f'{js_base_url}/download?idBase={id_base}'
-                pdf_path = html_util.download_pdf(pdf_url)
+            local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
            # 打开PDF文件
-                pdf = fitz.open(pdf_path)
+            pdf_file = fitz.open(local_pdf_path)
            # 选择第一页
-                page = pdf[0]
+            page = pdf_file[0]
            # 定义缩放系数（DPI）
            default_dpi = 72
            zoom = dpi / default_dpi
@@ -106,16 +96,43 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
            img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            return img, page.get_text()
+        except Exception as ex:
+            logging.getLogger('error').error('解析pdf失败！', exc_info=ex)
+            return None, None
+        finally:
+            if pdf_file:
+                pdf_file.close()
+            if local_pdf_path:
+                util.delete_temp_file(local_pdf_path)
+
+    jsczt_base_url = 'http://einvoice.jsczt.cn'
+    try:
+        results = zxingcpp.read_barcodes(image)
+    except Exception as e:
+        logging.getLogger('error').info('二维码识别失败', exc_info=e)
+        results = []
+    for result in results:
+        try:
+            url = result.text
+            if url.startswith(jsczt_base_url):
+                id_base = html_util.get_jsczt_id_base(url)
+                if not id_base:
+                    continue
+
+                pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
+                return _parse_pdf_url(pdf_url)
+            elif url.startswith('http://dzfp.wxxsh.net'):  # 无锡市锡山人民医院
+                pdf_url = html_util.get_wxxsh_pdf_url(url)
+                if not pdf_url:
+                    continue
+
+                return _parse_pdf_url(pdf_url)
            else:
                logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容：{url}')
        except Exception as e:
            logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
            continue
-        finally:
-            if pdf:
-                pdf.close()
-            if pdf_path:
-                util.delete_temp_file(pdf_path)
+
    return None, None


@@ -206,9 +223,12 @@ def information_extraction(ie, phrecs, identity):
            try:
                ufile.upload_file(phrec.cfjaddress, temp_file.name)
                if img_angle != '0':
+                    logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功，已旋转{img_angle}度。')
                    # 修正旋转角度
                    for zx_ie_result in zx_ie_results:
                        zx_ie_result.rotation_angle -= int(img_angle)
+                else:
+                    logging.info(f'高清图片[{phrec.cfjaddress}]替换成功！')
            except Exception as e:
                logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
            finally:
--- a/util/html_util.py
+++ b/util/html_util.py
@@ -41,3 +41,18 @@ def download_pdf(url, local_filename=None):
                if chunk:  # filter out keep-alive new chunks
                    file.write(chunk)
        return local_filename
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('获取无锡锡山人民医院票据失败！'))
+def get_wxxsh_pdf_url(url):
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(f'请求无锡锡山人民医院票据失败！状态码: {response.status_code}')
+    soup = BeautifulSoup(response.text, 'html.parser')
+    pdf_url = soup.find('a', string='点击查看电子票据')
+    if pdf_url:
+        # 获取隐藏字段的值
+        value = pdf_url.get('href')
+        return value
+    return None