From 6c149108419c8ebfe9066e64c3b6b3415d3b17a8 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Thu, 12 Sep 2024 13:56:49 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=8C=E7=BB=B4=E7=A0=81?= =?UTF-8?q?=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 2 +- photo_review/auto_photo_review.py | 15 +++++++++++---- util/html_util.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b05ab8b..a44524d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ x-env: &template - image: fcb_photo_review:1.14.5 + image: fcb_photo_review:1.14.6 restart: always x-review: diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index bd1bd08..41520ac 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -118,16 +118,23 @@ def get_better_image_from_qrcode(image, image_id, dpi=150): id_base = html_util.get_jsczt_id_base(url) if not id_base: continue - pdf_url = f'{jsczt_base_url}/download?idBase={id_base}' return _parse_pdf_url(pdf_url) - elif url.startswith('http://dzfp.wxxsh.net') or url.startswith('http://dzpj.wuxi5h.com'): - # 无锡市锡山人民医院、无锡市第五人民医院 + elif '/yldzpjqr/invoice/query/issueinfo' in url: + # 无锡医院 pdf_url = html_util.get_wx_pdf_url(url) if not pdf_url: continue - return _parse_pdf_url(pdf_url) + elif '/dzfp/tz3y' in url: + # 泰州市第三人民医院 + pdf_url = html_util.get_tz3y_pdf_url(url) + if not pdf_url: + continue + return _parse_pdf_url(pdf_url) + elif url.startswith('http://weixin.qq.com'): + # 无效地址 + continue else: logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}') except Exception as e: diff --git a/util/html_util.py b/util/html_util.py index fb929af..613a3a6 100644 --- a/util/html_util.py +++ b/util/html_util.py @@ -1,5 +1,7 @@ import logging +import re import tempfile +from urllib.parse import parse_qs, urlparse import requests from bs4 import BeautifulSoup @@ -56,3 +58,29 @@ def get_wx_pdf_url(url): value = pdf_url.get('href') return value return None + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('获取泰州三院电子发票失败!')) +def get_tz3y_pdf_url(url): + response = requests.get(url) + if response.status_code != 200: + raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}') + soup = BeautifulSoup(response.text, 'html.parser') + script_tag = soup.find('script', {'src': None}) + if script_tag: + url_match = re.search(r'var url="(.*?)"\+fphm;', script_tag.string) + if url_match: + request_pdf_url = url_match.group(1) + query = urlparse(url).query + query_params = parse_qs(query) + fphm = query_params.get('fphm')[0] + request_pdf_url += fphm + response = requests.get(request_pdf_url) + if response.status_code != 200: + raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}') + pdf_match = re.search(r"'dzfpUrl':'(.*)'", response.text) + if pdf_match: + return pdf_match.group(1) + + return None