添加无锡锡山人民医院票据处理

This commit is contained in:
2024-09-06 12:43:31 +08:00
parent 0b4ccd9b84
commit c2717c29b7
3 changed files with 63 additions and 28 deletions

View File

@@ -1,6 +1,6 @@
x-env:
&template
image: fcb_photo_review:1.14.1
image: fcb_photo_review:1.14.2
restart: always
x-review:

View File

@@ -76,25 +76,15 @@ def request_ie_result(task_enum, phrecs):
# 尝试从二维码中获取高清图片
def get_better_image_from_qrcode(image, image_id, dpi=150):
js_base_url = 'http://einvoice.jsczt.cn'
def _parse_pdf_url(pdf_url_to_parse):
pdf_file = None
local_pdf_path = None
try:
results = zxingcpp.read_barcodes(image)
except Exception as e:
logging.getLogger('error').info("二维码识别失败", exc_info=e)
results = []
for result in results:
pdf = None
pdf_path = None
try:
url = result.text
if url.startswith(js_base_url):
id_base = html_util.get_jsczt_id_base(url)
pdf_url = f'{js_base_url}/download?idBase={id_base}'
pdf_path = html_util.download_pdf(pdf_url)
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
# 打开PDF文件
pdf = fitz.open(pdf_path)
pdf_file = fitz.open(local_pdf_path)
# 选择第一页
page = pdf[0]
page = pdf_file[0]
# 定义缩放系数DPI
default_dpi = 72
zoom = dpi / default_dpi
@@ -106,16 +96,43 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
return img, page.get_text()
except Exception as ex:
logging.getLogger('error').error('解析pdf失败', exc_info=ex)
return None, None
finally:
if pdf_file:
pdf_file.close()
if local_pdf_path:
util.delete_temp_file(local_pdf_path)
jsczt_base_url = 'http://einvoice.jsczt.cn'
try:
results = zxingcpp.read_barcodes(image)
except Exception as e:
logging.getLogger('error').info('二维码识别失败', exc_info=e)
results = []
for result in results:
try:
url = result.text
if url.startswith(jsczt_base_url):
id_base = html_util.get_jsczt_id_base(url)
if not id_base:
continue
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
return _parse_pdf_url(pdf_url)
elif url.startswith('http://dzfp.wxxsh.net'): # 无锡市锡山人民医院
pdf_url = html_util.get_wxxsh_pdf_url(url)
if not pdf_url:
continue
return _parse_pdf_url(pdf_url)
else:
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
except Exception as e:
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
continue
finally:
if pdf:
pdf.close()
if pdf_path:
util.delete_temp_file(pdf_path)
return None, None
@@ -206,9 +223,12 @@ def information_extraction(ie, phrecs, identity):
try:
ufile.upload_file(phrec.cfjaddress, temp_file.name)
if img_angle != '0':
logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功,已旋转{img_angle}度。')
# 修正旋转角度
for zx_ie_result in zx_ie_results:
zx_ie_result.rotation_angle -= int(img_angle)
else:
logging.info(f'高清图片[{phrec.cfjaddress}]替换成功!')
except Exception as e:
logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
finally:

View File

@@ -41,3 +41,18 @@ def download_pdf(url, local_filename=None):
if chunk: # filter out keep-alive new chunks
file.write(chunk)
return local_filename
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取无锡锡山人民医院票据失败!'))
def get_wxxsh_pdf_url(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f'请求无锡锡山人民医院票据失败!状态码: {response.status_code}')
soup = BeautifulSoup(response.text, 'html.parser')
pdf_url = soup.find('a', string='点击查看电子票据')
if pdf_url:
# 获取隐藏字段的值
value = pdf_url.get('href')
return value
return None