优化二维码解析

This commit is contained in:
2024-09-12 13:56:49 +08:00
parent 7cd0a564a0
commit 6c14910841
3 changed files with 40 additions and 5 deletions

View File

@@ -1,6 +1,6 @@
x-env:
&template
image: fcb_photo_review:1.14.5
image: fcb_photo_review:1.14.6
restart: always
x-review:

View File

@@ -118,16 +118,23 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
id_base = html_util.get_jsczt_id_base(url)
if not id_base:
continue
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
return _parse_pdf_url(pdf_url)
elif url.startswith('http://dzfp.wxxsh.net') or url.startswith('http://dzpj.wuxi5h.com'):
# 无锡市锡山人民医院、无锡市第五人民医院
elif '/yldzpjqr/invoice/query/issueinfo' in url:
# 无锡医院
pdf_url = html_util.get_wx_pdf_url(url)
if not pdf_url:
continue
return _parse_pdf_url(pdf_url)
elif '/dzfp/tz3y' in url:
# 泰州市第三人民医院
pdf_url = html_util.get_tz3y_pdf_url(url)
if not pdf_url:
continue
return _parse_pdf_url(pdf_url)
elif url.startswith('http://weixin.qq.com'):
# 无效地址
continue
else:
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
except Exception as e:

View File

@@ -1,5 +1,7 @@
import logging
import re
import tempfile
from urllib.parse import parse_qs, urlparse
import requests
from bs4 import BeautifulSoup
@@ -56,3 +58,29 @@ def get_wx_pdf_url(url):
value = pdf_url.get('href')
return value
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取泰州三院电子发票失败!'))
def get_tz3y_pdf_url(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
soup = BeautifulSoup(response.text, 'html.parser')
script_tag = soup.find('script', {'src': None})
if script_tag:
url_match = re.search(r'var url="(.*?)"\+fphm;', script_tag.string)
if url_match:
request_pdf_url = url_match.group(1)
query = urlparse(url).query
query_params = parse_qs(query)
fphm = query_params.get('fphm')[0]
request_pdf_url += fphm
response = requests.get(request_pdf_url)
if response.status_code != 200:
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
pdf_match = re.search(r"'dzfpUrl':'(.*)'", response.text)
if pdf_match:
return pdf_match.group(1)
return None