修正页码排序

This commit is contained in:
2024-10-18 10:49:01 +08:00
parent d85b3fff8f
commit 3f93bd476a
2 changed files with 12 additions and 7 deletions

View File

@@ -194,14 +194,16 @@ def parse_page_num(page_list):
for page in page_list:
page_texts = [p.get('text', '') for p in page]
join = ''.join(page_texts)
numbers = re.findall(r'\d+', join)
numbers_str = re.findall(r'\d+', join)
# 过滤异常值
numbers = [num for num in numbers if int(num) <= 30]
numbers = [int(num) for num in numbers_str if int(num) <= 30]
if not numbers:
continue
pages.append(min(numbers))
total.append(max(numbers))
return pages, int(max(total if total else ['1']))
if not pages:
return None, None
return pages, max(total)
def handle_tiny_int(num):