优化二维码解析
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import logging
|
||||
import re
|
||||
import tempfile
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -56,3 +58,29 @@ def get_wx_pdf_url(url):
|
||||
value = pdf_url.get('href')
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取泰州三院电子发票失败!'))
|
||||
def get_tz3y_pdf_url(url):
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
script_tag = soup.find('script', {'src': None})
|
||||
if script_tag:
|
||||
url_match = re.search(r'var url="(.*?)"\+fphm;', script_tag.string)
|
||||
if url_match:
|
||||
request_pdf_url = url_match.group(1)
|
||||
query = urlparse(url).query
|
||||
query_params = parse_qs(query)
|
||||
fphm = query_params.get('fphm')[0]
|
||||
request_pdf_url += fphm
|
||||
response = requests.get(request_pdf_url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'请求泰州三院电子发票失败!状态码: {response.status_code}')
|
||||
pdf_match = re.search(r"'dzfpUrl':'(.*)'", response.text)
|
||||
if pdf_match:
|
||||
return pdf_match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user