过滤页码异常的值
This commit is contained in:
@@ -195,6 +195,8 @@ def parse_page_num(page_list):
|
||||
page_texts = [p.get('text', '') for p in page]
|
||||
join = ''.join(page_texts)
|
||||
numbers = re.findall(r'\d+', join)
|
||||
# 过滤异常值
|
||||
numbers = [num for num in numbers if int(num) <= 30]
|
||||
if not numbers:
|
||||
continue
|
||||
pages.append(min(numbers))
|
||||
|
||||
Reference in New Issue
Block a user