新增二维码识别替换高清图片功能

This commit is contained in:
2024-09-05 13:29:17 +08:00
parent 53a3dcd508
commit de631bef2e
6 changed files with 255 additions and 198 deletions

43
util/html_util.py Normal file
View File

@@ -0,0 +1,43 @@
import logging
import tempfile
import requests
from bs4 import BeautifulSoup
from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取江苏省财政票据idBase失败'))
def get_jsczt_id_base(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f'请求江苏省财政票据失败!状态码: {response.status_code}')
soup = BeautifulSoup(response.text, 'html.parser')
hidden_input = soup.find('input', {'name': "idBase"})
if hidden_input:
# 获取隐藏字段的值
value = hidden_input.get('value')
return value
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('下载pdf失败'))
def download_pdf(url, local_filename=None):
# 如果没有提供文件名则使用URL中的文件名
if local_filename is None:
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
local_filename = temp_pdf.name
# 发送HTTP GET请求
response = requests.get(url, stream=True)
# 检查请求是否成功
if response.status_code != 200:
raise Exception(f'下载pdf失败状态码: {response.status_code}')
else:
# 打开一个文件用于写入二进制数据
with open(local_filename, 'wb') as file:
# 迭代写入文件
for chunk in response.iter_content(chunk_size=8192):
if chunk: # filter out keep-alive new chunks
file.write(chunk)
return local_filename