调整ocr参数

This commit is contained in:
2024-07-29 14:04:55 +08:00
parent d7ba4c1103
commit 9653d2348a
2 changed files with 4 additions and 4 deletions

View File

@@ -36,4 +36,4 @@ SIMILAR_CHAR = {
} }
# 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败 # 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.1, det_limit_side_len=1248, drop_score=0.3) OCR = PaddleOCR(show_log=False, det_db_thresh=0.1, det_db_box_thresh=0.3, det_limit_side_len=1248, drop_score=0.3)

View File

@@ -64,7 +64,7 @@ def visual_model_test(model_type, test_img, task_path, schema):
temp_files_paths.append(temp_file.name) temp_files_paths.append(temp_file.name)
parsed_doc = util.get_ocr_layout( parsed_doc = util.get_ocr_layout(
PaddleOCR(det_db_box_thresh=0.1, det_limit_side_len=1248, drop_score=0.3), PaddleOCR(det_db_box_thresh=0.3, det_db_thresh=0.1, det_limit_side_len=1248, drop_score=0.3),
temp_file.name) temp_file.name)
# parsed_doc = doc_parser.parse({"doc": temp_file.name})["layout"] # parsed_doc = doc_parser.parse({"doc": temp_file.name})["layout"]
if img["x_offset"] or img["y_offset"]: if img["x_offset"] or img["y_offset"]:
@@ -120,7 +120,7 @@ def main(model_type, pic_name=None):
if model_type == "ocr": if model_type == "ocr":
task_path = None task_path = None
test_img_path = ufile.get_private_url(pic_name) if pic_name else "../test_img/IMG_20240723_131247.jpg" test_img_path = ufile.get_private_url(pic_name, "drg103") if pic_name else "../test_img/PH20240725004467_3_185708_1.jpg"
schema = None schema = None
elif model_type == "settlement": elif model_type == "settlement":
task_path = "../model/settlement_list_model" task_path = "../model/settlement_list_model"
@@ -150,7 +150,7 @@ def main(model_type, pic_name=None):
if __name__ == '__main__': if __name__ == '__main__':
main("ocr") main("ocr", "PH20240727000461_1_085531_1.PNG.jpg")
# main("settlement") # main("settlement")
# main("discharge") # main("discharge")
# main("cost") # main("cost")