修正页码分析

This commit is contained in:
2024-10-17 14:55:19 +08:00
parent 0e4cfd10b6
commit b5dffaf5bd

View File

@@ -195,6 +195,8 @@ def parse_page_num(page_list):
page_texts = [p.get('text', '') for p in page]
join = ''.join(page_texts)
numbers = re.findall(r'\d+', join)
if not numbers:
continue
pages.append(min(numbers))
total.append(max(numbers))
return pages, max(total)