添加无锡锡山人民医院票据处理
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
x-env:
|
||||
&template
|
||||
image: fcb_photo_review:1.14.1
|
||||
image: fcb_photo_review:1.14.2
|
||||
restart: always
|
||||
|
||||
x-review:
|
||||
|
||||
@@ -76,25 +76,15 @@ def request_ie_result(task_enum, phrecs):
|
||||
|
||||
# 尝试从二维码中获取高清图片
|
||||
def get_better_image_from_qrcode(image, image_id, dpi=150):
|
||||
js_base_url = 'http://einvoice.jsczt.cn'
|
||||
def _parse_pdf_url(pdf_url_to_parse):
|
||||
pdf_file = None
|
||||
local_pdf_path = None
|
||||
try:
|
||||
results = zxingcpp.read_barcodes(image)
|
||||
except Exception as e:
|
||||
logging.getLogger('error').info("二维码识别失败", exc_info=e)
|
||||
results = []
|
||||
for result in results:
|
||||
pdf = None
|
||||
pdf_path = None
|
||||
try:
|
||||
url = result.text
|
||||
if url.startswith(js_base_url):
|
||||
id_base = html_util.get_jsczt_id_base(url)
|
||||
pdf_url = f'{js_base_url}/download?idBase={id_base}'
|
||||
pdf_path = html_util.download_pdf(pdf_url)
|
||||
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
|
||||
# 打开PDF文件
|
||||
pdf = fitz.open(pdf_path)
|
||||
pdf_file = fitz.open(local_pdf_path)
|
||||
# 选择第一页
|
||||
page = pdf[0]
|
||||
page = pdf_file[0]
|
||||
# 定义缩放系数(DPI)
|
||||
default_dpi = 72
|
||||
zoom = dpi / default_dpi
|
||||
@@ -106,16 +96,43 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
||||
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
return img, page.get_text()
|
||||
except Exception as ex:
|
||||
logging.getLogger('error').error('解析pdf失败!', exc_info=ex)
|
||||
return None, None
|
||||
finally:
|
||||
if pdf_file:
|
||||
pdf_file.close()
|
||||
if local_pdf_path:
|
||||
util.delete_temp_file(local_pdf_path)
|
||||
|
||||
jsczt_base_url = 'http://einvoice.jsczt.cn'
|
||||
try:
|
||||
results = zxingcpp.read_barcodes(image)
|
||||
except Exception as e:
|
||||
logging.getLogger('error').info('二维码识别失败', exc_info=e)
|
||||
results = []
|
||||
for result in results:
|
||||
try:
|
||||
url = result.text
|
||||
if url.startswith(jsczt_base_url):
|
||||
id_base = html_util.get_jsczt_id_base(url)
|
||||
if not id_base:
|
||||
continue
|
||||
|
||||
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
|
||||
return _parse_pdf_url(pdf_url)
|
||||
elif url.startswith('http://dzfp.wxxsh.net'): # 无锡市锡山人民医院
|
||||
pdf_url = html_util.get_wxxsh_pdf_url(url)
|
||||
if not pdf_url:
|
||||
continue
|
||||
|
||||
return _parse_pdf_url(pdf_url)
|
||||
else:
|
||||
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
||||
except Exception as e:
|
||||
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
||||
continue
|
||||
finally:
|
||||
if pdf:
|
||||
pdf.close()
|
||||
if pdf_path:
|
||||
util.delete_temp_file(pdf_path)
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
@@ -206,9 +223,12 @@ def information_extraction(ie, phrecs, identity):
|
||||
try:
|
||||
ufile.upload_file(phrec.cfjaddress, temp_file.name)
|
||||
if img_angle != '0':
|
||||
logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功,已旋转{img_angle}度。')
|
||||
# 修正旋转角度
|
||||
for zx_ie_result in zx_ie_results:
|
||||
zx_ie_result.rotation_angle -= int(img_angle)
|
||||
else:
|
||||
logging.info(f'高清图片[{phrec.cfjaddress}]替换成功!')
|
||||
except Exception as e:
|
||||
logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
|
||||
finally:
|
||||
|
||||
@@ -41,3 +41,18 @@ def download_pdf(url, local_filename=None):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
file.write(chunk)
|
||||
return local_filename
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取无锡锡山人民医院票据失败!'))
|
||||
def get_wxxsh_pdf_url(url):
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'请求无锡锡山人民医院票据失败!状态码: {response.status_code}')
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
pdf_url = soup.find('a', string='点击查看电子票据')
|
||||
if pdf_url:
|
||||
# 获取隐藏字段的值
|
||||
value = pdf_url.get('href')
|
||||
return value
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user