From 6c149108419c8ebfe9066e64c3b6b3415d3b17a8 Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Thu, 12 Sep 2024 13:56:49 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=8C=E7=BB=B4=E7=A0=81?=
 =?UTF-8?q?=E8=A7=A3=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docker-compose.yml                |  2 +-
 photo_review/auto_photo_review.py | 15 +++++++++++----
 util/html_util.py                 | 28 ++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index b05ab8b..a44524d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 x-env:
   &template
-  image: fcb_photo_review:1.14.5
+  image: fcb_photo_review:1.14.6
   restart: always
 
 x-review:
diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py
index bd1bd08..41520ac 100644
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -118,16 +118,23 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
                 id_base = html_util.get_jsczt_id_base(url)
                 if not id_base:
                     continue
-
                 pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
                 return _parse_pdf_url(pdf_url)
-            elif url.startswith('http://dzfp.wxxsh.net') or url.startswith('http://dzpj.wuxi5h.com'):
-                # 无锡市锡山人民医院、无锡市第五人民医院
+            elif '/yldzpjqr/invoice/query/issueinfo' in url:
+                # 无锡医院
                 pdf_url = html_util.get_wx_pdf_url(url)
                 if not pdf_url:
                     continue
-
                 return _parse_pdf_url(pdf_url)
+            elif '/dzfp/tz3y' in url:
+                # 泰州市第三人民医院
+                pdf_url = html_util.get_tz3y_pdf_url(url)
+                if not pdf_url:
+                    continue
+                return _parse_pdf_url(pdf_url)
+            elif url.startswith('http://weixin.qq.com'):
+                # 无效地址
+                continue
             else:
                 logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容：{url}')
         except Exception as e:
diff --git a/util/html_util.py b/util/html_util.py
index fb929af..613a3a6 100644
--- a/util/html_util.py
+++ b/util/html_util.py
@@ -1,5 +1,7 @@
 import logging
+import re
 import tempfile
+from urllib.parse import parse_qs, urlparse
 
 import requests
 from bs4 import BeautifulSoup
@@ -56,3 +58,29 @@ def get_wx_pdf_url(url):
         value = pdf_url.get('href')
         return value
     return None
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('获取泰州三院电子发票失败！'))
+def get_tz3y_pdf_url(url):
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(f'请求泰州三院电子发票失败！状态码: {response.status_code}')
+    soup = BeautifulSoup(response.text, 'html.parser')
+    script_tag = soup.find('script', {'src': None})
+    if script_tag:
+        url_match = re.search(r'var url="(.*?)"\+fphm;', script_tag.string)
+        if url_match:
+            request_pdf_url = url_match.group(1)
+            query = urlparse(url).query
+            query_params = parse_qs(query)
+            fphm = query_params.get('fphm')[0]
+            request_pdf_url += fphm
+            response = requests.get(request_pdf_url)
+            if response.status_code != 200:
+                raise Exception(f'请求泰州三院电子发票失败！状态码: {response.status_code}')
+            pdf_match = re.search(r"'dzfpUrl':'(.*)'", response.text)
+            if pdf_match:
+                return pdf_match.group(1)
+
+    return None