修正页码排序
This commit is contained in:
@@ -194,14 +194,16 @@ def parse_page_num(page_list):
|
||||
for page in page_list:
|
||||
page_texts = [p.get('text', '') for p in page]
|
||||
join = ''.join(page_texts)
|
||||
numbers = re.findall(r'\d+', join)
|
||||
numbers_str = re.findall(r'\d+', join)
|
||||
# 过滤异常值
|
||||
numbers = [num for num in numbers if int(num) <= 30]
|
||||
numbers = [int(num) for num in numbers_str if int(num) <= 30]
|
||||
if not numbers:
|
||||
continue
|
||||
pages.append(min(numbers))
|
||||
total.append(max(numbers))
|
||||
return pages, int(max(total if total else ['1']))
|
||||
if not pages:
|
||||
return None, None
|
||||
return pages, max(total)
|
||||
|
||||
|
||||
def handle_tiny_int(num):
|
||||
|
||||
Reference in New Issue
Block a user