优化二维码解析
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
x-env:
|
x-env:
|
||||||
&template
|
&template
|
||||||
image: fcb_photo_review:1.14.5
|
image: fcb_photo_review:1.14.6
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
x-review:
|
x-review:
|
||||||
|
|||||||
@@ -118,16 +118,23 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
|||||||
id_base = html_util.get_jsczt_id_base(url)
|
id_base = html_util.get_jsczt_id_base(url)
|
||||||
if not id_base:
|
if not id_base:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
|
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
|
||||||
return _parse_pdf_url(pdf_url)
|
return _parse_pdf_url(pdf_url)
|
||||||
elif url.startswith('http://dzfp.wxxsh.net') or url.startswith('http://dzpj.wuxi5h.com'):
|
elif '/yldzpjqr/invoice/query/issueinfo' in url:
|
||||||
# 无锡市锡山人民医院、无锡市第五人民医院
|
# 无锡医院
|
||||||
pdf_url = html_util.get_wx_pdf_url(url)
|
pdf_url = html_util.get_wx_pdf_url(url)
|
||||||
if not pdf_url:
|
if not pdf_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return _parse_pdf_url(pdf_url)
|
return _parse_pdf_url(pdf_url)
|
||||||
|
elif '/dzfp/tz3y' in url:
|
||||||
|
# 泰州市第三人民医院
|
||||||
|
pdf_url = html_util.get_tz3y_pdf_url(url)
|
||||||
|
if not pdf_url:
|
||||||
|
continue
|
||||||
|
return _parse_pdf_url(pdf_url)
|
||||||
|
elif url.startswith('http://weixin.qq.com'):
|
||||||
|
# 无效地址
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -56,3 +58,29 @@ def get_wx_pdf_url(url):
|
|||||||
value = pdf_url.get('href')
|
value = pdf_url.get('href')
|
||||||
return value
|
return value
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||||
|
after=lambda x: logging.warning('获取泰州三院电子发票失败!'))
|
||||||
|
def get_tz3y_pdf_url(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
script_tag = soup.find('script', {'src': None})
|
||||||
|
if script_tag:
|
||||||
|
url_match = re.search(r'var url="(.*?)"\+fphm;', script_tag.string)
|
||||||
|
if url_match:
|
||||||
|
request_pdf_url = url_match.group(1)
|
||||||
|
query = urlparse(url).query
|
||||||
|
query_params = parse_qs(query)
|
||||||
|
fphm = query_params.get('fphm')[0]
|
||||||
|
request_pdf_url += fphm
|
||||||
|
response = requests.get(request_pdf_url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
|
||||||
|
pdf_match = re.search(r"'dzfpUrl':'(.*)'", response.text)
|
||||||
|
if pdf_match:
|
||||||
|
return pdf_match.group(1)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user