From 19237d3a3c1d3dc30550679a905cc1d015fdd441 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Sat, 12 Oct 2024 15:46:50 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E9=A1=B5=E7=A0=81=E8=A7=A3?= =?UTF-8?q?=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- util/data_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/data_util.py b/util/data_util.py index ce2fad0..be46097 100644 --- a/util/data_util.py +++ b/util/data_util.py @@ -186,7 +186,8 @@ def parse_page_num(page_list): pages = [] total = [] for page in page_list: - join = ''.join(page) + page_texts = [p.get('text', '') for p in page] + join = ''.join(page_texts) numbers = re.findall(r'\d+', join) pages.append(min(numbers)) total.append(max(numbers))