修正页码解析

This commit is contained in:
2024-10-12 15:46:50 +08:00
parent 0b0882d456
commit 19237d3a3c

View File

@@ -186,7 +186,8 @@ def parse_page_num(page_list):
pages = [] pages = []
total = [] total = []
for page in page_list: for page in page_list:
join = ''.join(page) page_texts = [p.get('text', '') for p in page]
join = ''.join(page_texts)
numbers = re.findall(r'\d+', join) numbers = re.findall(r'\d+', join)
pages.append(min(numbers)) pages.append(min(numbers))
total.append(max(numbers)) total.append(max(numbers))