diff --git a/util/data_util.py b/util/data_util.py index ce2fad0..be46097 100644 --- a/util/data_util.py +++ b/util/data_util.py @@ -186,7 +186,8 @@ def parse_page_num(page_list): pages = [] total = [] for page in page_list: - join = ''.join(page) + page_texts = [p.get('text', '') for p in page] + join = ''.join(page_texts) numbers = re.findall(r'\d+', join) pages.append(min(numbers)) total.append(max(numbers))