diff --git a/docker-compose.yml b/docker-compose.yml index 8c99003..aa78057 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ x-env: &template - image: fcb_photo_review:1.14.1 + image: fcb_photo_review:1.14.2 restart: always x-review: diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 7e8832c..4342fa9 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -76,46 +76,63 @@ def request_ie_result(task_enum, phrecs): # 尝试从二维码中获取高清图片 def get_better_image_from_qrcode(image, image_id, dpi=150): - js_base_url = 'http://einvoice.jsczt.cn' + def _parse_pdf_url(pdf_url_to_parse): + pdf_file = None + local_pdf_path = None + try: + local_pdf_path = html_util.download_pdf(pdf_url_to_parse) + # 打开PDF文件 + pdf_file = fitz.open(local_pdf_path) + # 选择第一页 + page = pdf_file[0] + # 定义缩放系数(DPI) + default_dpi = 72 + zoom = dpi / default_dpi + # 设置矩阵变换参数 + mat = fitz.Matrix(zoom, zoom) + # 渲染页面 + pix = page.get_pixmap(matrix=mat) + # 将渲染结果转换为OpenCV兼容的格式 + img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + return img, page.get_text() + except Exception as ex: + logging.getLogger('error').error('解析pdf失败!', exc_info=ex) + return None, None + finally: + if pdf_file: + pdf_file.close() + if local_pdf_path: + util.delete_temp_file(local_pdf_path) + + jsczt_base_url = 'http://einvoice.jsczt.cn' try: results = zxingcpp.read_barcodes(image) except Exception as e: - logging.getLogger('error').info("二维码识别失败", exc_info=e) + logging.getLogger('error').info('二维码识别失败', exc_info=e) results = [] for result in results: - pdf = None - pdf_path = None try: url = result.text - if url.startswith(js_base_url): + if url.startswith(jsczt_base_url): id_base = html_util.get_jsczt_id_base(url) - pdf_url = f'{js_base_url}/download?idBase={id_base}' - pdf_path = html_util.download_pdf(pdf_url) - # 打开PDF文件 - pdf = fitz.open(pdf_path) - # 选择第一页 - page = pdf[0] - # 定义缩放系数(DPI) - default_dpi = 72 - zoom = dpi / default_dpi - # 设置矩阵变换参数 - mat = fitz.Matrix(zoom, zoom) - # 渲染页面 - pix = page.get_pixmap(matrix=mat) - # 将渲染结果转换为OpenCV兼容的格式 - img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1)) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - return img, page.get_text() + if not id_base: + continue + + pdf_url = f'{jsczt_base_url}/download?idBase={id_base}' + return _parse_pdf_url(pdf_url) + elif url.startswith('http://dzfp.wxxsh.net'): # 无锡市锡山人民医院 + pdf_url = html_util.get_wxxsh_pdf_url(url) + if not pdf_url: + continue + + return _parse_pdf_url(pdf_url) else: logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}') except Exception as e: logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e) continue - finally: - if pdf: - pdf.close() - if pdf_path: - util.delete_temp_file(pdf_path) + return None, None @@ -206,9 +223,12 @@ def information_extraction(ie, phrecs, identity): try: ufile.upload_file(phrec.cfjaddress, temp_file.name) if img_angle != '0': + logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功,已旋转{img_angle}度。') # 修正旋转角度 for zx_ie_result in zx_ie_results: zx_ie_result.rotation_angle -= int(img_angle) + else: + logging.info(f'高清图片[{phrec.cfjaddress}]替换成功!') except Exception as e: logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e) finally: diff --git a/util/html_util.py b/util/html_util.py index caeeda2..16c1525 100644 --- a/util/html_util.py +++ b/util/html_util.py @@ -41,3 +41,18 @@ def download_pdf(url, local_filename=None): if chunk: # filter out keep-alive new chunks file.write(chunk) return local_filename + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('获取无锡锡山人民医院票据失败!')) +def get_wxxsh_pdf_url(url): + response = requests.get(url) + if response.status_code != 200: + raise Exception(f'请求无锡锡山人民医院票据失败!状态码: {response.status_code}') + soup = BeautifulSoup(response.text, 'html.parser') + pdf_url = soup.find('a', string='点击查看电子票据') + if pdf_url: + # 获取隐藏字段的值 + value = pdf_url.get('href') + return value + return None