新增二维码识别替换高清图片功能

2024-09-05 13:29:17 +08:00
parent 53a3dcd508
commit de631bef2e
6 changed files with 255 additions and 198 deletions
--- a/util/html_util.py
+++ b/util/html_util.py
@@ -0,0 +1,43 @@
+import logging
+import tempfile
+
+import requests
+from bs4 import BeautifulSoup
+from tenacity import retry, stop_after_attempt, wait_random
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('获取江苏省财政票据idBase失败！'))
+def get_jsczt_id_base(url):
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception(f'请求江苏省财政票据失败！状态码: {response.status_code}')
+    soup = BeautifulSoup(response.text, 'html.parser')
+    hidden_input = soup.find('input', {'name': "idBase"})
+    if hidden_input:
+        # 获取隐藏字段的值
+        value = hidden_input.get('value')
+        return value
+    return None
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('下载pdf失败！'))
+def download_pdf(url, local_filename=None):
+    # 如果没有提供文件名，则使用URL中的文件名
+    if local_filename is None:
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
+            local_filename = temp_pdf.name
+    # 发送HTTP GET请求
+    response = requests.get(url, stream=True)
+    # 检查请求是否成功
+    if response.status_code != 200:
+        raise Exception(f'下载pdf失败！状态码: {response.status_code}')
+    else:
+        # 打开一个文件用于写入二进制数据
+        with open(local_filename, 'wb') as file:
+            # 迭代写入文件
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:  # filter out keep-alive new chunks
+                    file.write(chunk)
+        return local_filename