import logging import os from datetime import datetime from opencc import OpenCC from util import string_util # 获取yyyy-MM-dd HH:mm:ss格式的当前时间 def get_default_datetime(): return datetime.now().strftime('%Y-%m-%d %H:%M:%S') def get_ocr_layout(ocr, img_path, is_screenshot=False): """ 获取ocr识别的结果,转为合适的layout形式 :param is_screenshot: 是否是截图 :param ocr: ocr模型 :param img_path: 图片本地路径 :return: """ def _get_box(old_box): new_box = [ min(old_box[0][0], old_box[3][0]), # x1 min(old_box[0][1], old_box[1][1]), # y1 max(old_box[1][0], old_box[2][0]), # x2 max(old_box[2][1], old_box[3][1]), # y2 ] return new_box def _normal_box(box_data): # Ensure the height and width of bbox are greater than zero if box_data[3] - box_data[1] < 0 or box_data[2] - box_data[0] < 0: return False return True layout = [] ocr_result = ocr.predict(input=img_path, use_doc_orientation_classify=not is_screenshot, use_doc_unwarping=not is_screenshot) ocr_result = next(ocr_result) if not ocr_result: return layout, "0" angle = ocr_result.get("doc_preprocessor_res", {}).get("angle", "0") for i in range(len(ocr_result.get('rec_texts'))): box = ocr_result.get("rec_polys")[i].tolist() box = _get_box(box) if not _normal_box(box): continue layout.append((box, ocr_result.get("rec_texts")[i])) return layout, str(angle) def delete_temp_file(temp_files): """ 删除临时文件,可以批量 :param temp_files: 临时文件路径 """ if not temp_files: return if isinstance(temp_files, str): temp_files = [temp_files] for file in temp_files: try: os.remove(file) logging.info(f"临时文件 {file} 已删除") except Exception as e: logging.warning(f"删除临时文件 {file} 时出错: {e}") def zoom_rectangle(rectangle, ratio): """ 缩放矩形 :param rectangle: 原矩形坐标 :param ratio: 缩放比率 :return: 缩放后的矩形坐标 """ x1, y1, x2, y2 = rectangle x1 = round(x1 - x1 * ratio) y1 = round(y1 - y1 * ratio) x2 = round(x2 + x2 * ratio) y2 = round(y2 + y2 * ratio) return [x1, y1, x2, y2] def chinese_to_money_unit(chinese): if chinese in ["拾", "十"]: return 10, False elif chinese in ["佰", "百"]: return 100, False elif chinese in ["仟", "千"]: return 1000, False elif chinese == "万": return 10000, True elif chinese == "亿": return 100000000, True else: return None, False def chinese_char_to_number(chinese): if chinese == "零": return 0 elif chinese in ["一", "壹"]: return 1 elif chinese in ["二", "贰"]: return 2 elif chinese in ["三", "叁"]: return 3 elif chinese in ["四", "肆"]: return 4 elif chinese in ["五", "伍"]: return 5 elif chinese in ["六", "陆"]: return 6 elif chinese in ["七", "柒"]: return 7 elif chinese in ["八", "捌"]: return 8 elif chinese in ["九", "玖"]: return 9 else: return -1 def chinese_to_number(chinese): length = len(chinese) result = 0 section = 0 number = 0 unit = [None, False] for i in range(length): c = chinese[i] num = chinese_char_to_number(c) if num >= 0: if num == 0: if number > 0 and unit[0] is not None: section += number * (unit[0] / 10) unit = [None, False] elif number > 0: raise ValueError(f"{chinese} has bad number '{chinese[i - 1]}{c}' at: {i}") number = num else: unit = chinese_to_money_unit(c) if unit[0] is None: raise ValueError(f"{chinese} has unknown unit '{c}' at: {i}") if unit[1]: section = (section + number) * unit[0] result += section section = 0 else: unit_number = number if number == 0 and i == 0: unit_number = 1 section += unit_number * unit[0] number = 0 if number > 0 and unit[0] is not None: number *= unit[0] / 10 return result + section + number def chinese_money_to_number(chinese_money_amount): if string_util.blank(chinese_money_amount): return None yi = chinese_money_amount.find("元") if yi == -1: yi = chinese_money_amount.find("圆") ji = chinese_money_amount.find("角") fi = chinese_money_amount.find("分") if yi == -1 and ji == -1 and fi == -1: raise ValueError(f"无法解析: {chinese_money_amount}") y_str = None if yi > 0: y_str = chinese_money_amount[0:yi] j_str = None if ji > 0: if yi >= 0: if ji > yi: j_str = chinese_money_amount[yi + 1:ji] else: j_str = chinese_money_amount[0:ji] f_str = None if fi > 0: if ji >= 0: if fi > ji: f_str = chinese_money_amount[ji + 1:fi] elif yi > 0: if fi > yi: f_str = chinese_money_amount[yi + 1:fi] else: f_str = chinese_money_amount[0: fi] y = 0 j = 0 f = 0 if not string_util.blank(y_str): y = chinese_to_number(y_str) if not string_util.blank(j_str): j = chinese_to_number(j_str) if not string_util.blank(f_str): f = chinese_to_number(f_str) amount = y amount += j / 10 amount += f / 100 return round(amount, 2) # 将繁体字转换为简体字 def traditional_to_simple_chinese(traditional_chinese): converter = OpenCC('t2s') return converter.convert(traditional_chinese)