添加无锡锡山人民医院票据处理
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
x-env:
|
x-env:
|
||||||
&template
|
&template
|
||||||
image: fcb_photo_review:1.14.1
|
image: fcb_photo_review:1.14.2
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
x-review:
|
x-review:
|
||||||
|
|||||||
@@ -76,25 +76,15 @@ def request_ie_result(task_enum, phrecs):
|
|||||||
|
|
||||||
# 尝试从二维码中获取高清图片
|
# 尝试从二维码中获取高清图片
|
||||||
def get_better_image_from_qrcode(image, image_id, dpi=150):
|
def get_better_image_from_qrcode(image, image_id, dpi=150):
|
||||||
js_base_url = 'http://einvoice.jsczt.cn'
|
def _parse_pdf_url(pdf_url_to_parse):
|
||||||
|
pdf_file = None
|
||||||
|
local_pdf_path = None
|
||||||
try:
|
try:
|
||||||
results = zxingcpp.read_barcodes(image)
|
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
|
||||||
except Exception as e:
|
|
||||||
logging.getLogger('error').info("二维码识别失败", exc_info=e)
|
|
||||||
results = []
|
|
||||||
for result in results:
|
|
||||||
pdf = None
|
|
||||||
pdf_path = None
|
|
||||||
try:
|
|
||||||
url = result.text
|
|
||||||
if url.startswith(js_base_url):
|
|
||||||
id_base = html_util.get_jsczt_id_base(url)
|
|
||||||
pdf_url = f'{js_base_url}/download?idBase={id_base}'
|
|
||||||
pdf_path = html_util.download_pdf(pdf_url)
|
|
||||||
# 打开PDF文件
|
# 打开PDF文件
|
||||||
pdf = fitz.open(pdf_path)
|
pdf_file = fitz.open(local_pdf_path)
|
||||||
# 选择第一页
|
# 选择第一页
|
||||||
page = pdf[0]
|
page = pdf_file[0]
|
||||||
# 定义缩放系数(DPI)
|
# 定义缩放系数(DPI)
|
||||||
default_dpi = 72
|
default_dpi = 72
|
||||||
zoom = dpi / default_dpi
|
zoom = dpi / default_dpi
|
||||||
@@ -106,16 +96,43 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
|||||||
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
|
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
|
||||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||||
return img, page.get_text()
|
return img, page.get_text()
|
||||||
|
except Exception as ex:
|
||||||
|
logging.getLogger('error').error('解析pdf失败!', exc_info=ex)
|
||||||
|
return None, None
|
||||||
|
finally:
|
||||||
|
if pdf_file:
|
||||||
|
pdf_file.close()
|
||||||
|
if local_pdf_path:
|
||||||
|
util.delete_temp_file(local_pdf_path)
|
||||||
|
|
||||||
|
jsczt_base_url = 'http://einvoice.jsczt.cn'
|
||||||
|
try:
|
||||||
|
results = zxingcpp.read_barcodes(image)
|
||||||
|
except Exception as e:
|
||||||
|
logging.getLogger('error').info('二维码识别失败', exc_info=e)
|
||||||
|
results = []
|
||||||
|
for result in results:
|
||||||
|
try:
|
||||||
|
url = result.text
|
||||||
|
if url.startswith(jsczt_base_url):
|
||||||
|
id_base = html_util.get_jsczt_id_base(url)
|
||||||
|
if not id_base:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pdf_url = f'{jsczt_base_url}/download?idBase={id_base}'
|
||||||
|
return _parse_pdf_url(pdf_url)
|
||||||
|
elif url.startswith('http://dzfp.wxxsh.net'): # 无锡市锡山人民医院
|
||||||
|
pdf_url = html_util.get_wxxsh_pdf_url(url)
|
||||||
|
if not pdf_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return _parse_pdf_url(pdf_url)
|
||||||
else:
|
else:
|
||||||
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
||||||
continue
|
continue
|
||||||
finally:
|
|
||||||
if pdf:
|
|
||||||
pdf.close()
|
|
||||||
if pdf_path:
|
|
||||||
util.delete_temp_file(pdf_path)
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
@@ -206,9 +223,12 @@ def information_extraction(ie, phrecs, identity):
|
|||||||
try:
|
try:
|
||||||
ufile.upload_file(phrec.cfjaddress, temp_file.name)
|
ufile.upload_file(phrec.cfjaddress, temp_file.name)
|
||||||
if img_angle != '0':
|
if img_angle != '0':
|
||||||
|
logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功,已旋转{img_angle}度。')
|
||||||
# 修正旋转角度
|
# 修正旋转角度
|
||||||
for zx_ie_result in zx_ie_results:
|
for zx_ie_result in zx_ie_results:
|
||||||
zx_ie_result.rotation_angle -= int(img_angle)
|
zx_ie_result.rotation_angle -= int(img_angle)
|
||||||
|
else:
|
||||||
|
logging.info(f'高清图片[{phrec.cfjaddress}]替换成功!')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
|
logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -41,3 +41,18 @@ def download_pdf(url, local_filename=None):
|
|||||||
if chunk: # filter out keep-alive new chunks
|
if chunk: # filter out keep-alive new chunks
|
||||||
file.write(chunk)
|
file.write(chunk)
|
||||||
return local_filename
|
return local_filename
|
||||||
|
|
||||||
|
|
||||||
|
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||||
|
after=lambda x: logging.warning('获取无锡锡山人民医院票据失败!'))
|
||||||
|
def get_wxxsh_pdf_url(url):
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f'请求无锡锡山人民医院票据失败!状态码: {response.status_code}')
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
pdf_url = soup.find('a', string='点击查看电子票据')
|
||||||
|
if pdf_url:
|
||||||
|
# 获取隐藏字段的值
|
||||||
|
value = pdf_url.get('href')
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user