新增二维码识别替换高清图片功能
This commit is contained in:
43
util/html_util.py
Normal file
43
util/html_util.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import logging
|
||||
import tempfile
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取江苏省财政票据idBase失败!'))
|
||||
def get_jsczt_id_base(url):
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'请求江苏省财政票据失败!状态码: {response.status_code}')
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
hidden_input = soup.find('input', {'name': "idBase"})
|
||||
if hidden_input:
|
||||
# 获取隐藏字段的值
|
||||
value = hidden_input.get('value')
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('下载pdf失败!'))
|
||||
def download_pdf(url, local_filename=None):
|
||||
# 如果没有提供文件名,则使用URL中的文件名
|
||||
if local_filename is None:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
|
||||
local_filename = temp_pdf.name
|
||||
# 发送HTTP GET请求
|
||||
response = requests.get(url, stream=True)
|
||||
# 检查请求是否成功
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'下载pdf失败!状态码: {response.status_code}')
|
||||
else:
|
||||
# 打开一个文件用于写入二进制数据
|
||||
with open(local_filename, 'wb') as file:
|
||||
# 迭代写入文件
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
file.write(chunk)
|
||||
return local_filename
|
||||
Reference in New Issue
Block a user