更换文档检测模型
This commit is contained in:
340
paddle_detection/deploy/pipeline/download.py
Normal file
340
paddle_detection/deploy/pipeline/download.py
Normal file
@@ -0,0 +1,340 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os, sys
|
||||
import os.path as osp
|
||||
import hashlib
|
||||
import requests
|
||||
import shutil
|
||||
import tqdm
|
||||
import time
|
||||
import tarfile
|
||||
import zipfile
|
||||
from paddle.utils.download import _get_unique_endpoints
|
||||
|
||||
PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX = 'https://paddledet.bj.bcebos.com/'
|
||||
|
||||
DOWNLOAD_RETRY_LIMIT = 3
|
||||
|
||||
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/infer_weights")
|
||||
|
||||
MODEL_URL_MD5_DICT = {
|
||||
'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_det_infer.tar.gz':
|
||||
'1b8eae0f098635699bd4e8bccf3067a7',
|
||||
'https://bj.bcebos.com/v1/paddledet/models/pipeline/ch_PP-OCRv3_rec_infer.tar.gz':
|
||||
'64fa0e0701efd93c7db52a9b685b3de6',
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_ppvehicle.zip":
|
||||
"3859d1a26e0c498285c2374b1a347013",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_s_36e_ppvehicle.zip":
|
||||
"4ed58b546be2a76d8ccbb138f64874ac",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/dark_hrnet_w32_256x192.zip":
|
||||
"a20d5f6ca087bff0e9f2b18df45a36f2",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/PPLCNet_x1_0_person_attribute_945_infer.zip":
|
||||
"1dfb161bf12bbc1365b2ed6866674483",
|
||||
"https://videotag.bj.bcebos.com/PaddleVideo-release2.3/ppTSM_fight.zip":
|
||||
"5d4609142501258608bf0a1445eedaba",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/STGCN.zip":
|
||||
"cf1c3c4bae90b975accb954d13129ea4",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/ppyoloe_crn_s_80e_smoking_visdrone.zip":
|
||||
"4cd12ae55be8f0eb2b90c08ac3b48218",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/PPHGNet_tiny_calling_halfbody.zip":
|
||||
"cf86b87ace97540dace6ef08e62b584a",
|
||||
"https://bj.bcebos.com/v1/paddledet/models/pipeline/reid_model.zip":
|
||||
"fdc4dac38393b8e2b5921c1e1fdd5315"
|
||||
}
|
||||
|
||||
|
||||
def is_url(path):
|
||||
"""
|
||||
Whether path is URL.
|
||||
Args:
|
||||
path (string): URL string or not.
|
||||
"""
|
||||
return path.startswith('http://') \
|
||||
or path.startswith('https://') \
|
||||
or path.startswith('ppdet://')
|
||||
|
||||
|
||||
def parse_url(url):
|
||||
url = url.replace("ppdet://", PPDET_WEIGHTS_DOWNLOAD_URL_PREFIX)
|
||||
return url
|
||||
|
||||
|
||||
def map_path(url, root_dir, path_depth=1):
|
||||
# parse path after download to decompress under root_dir
|
||||
assert path_depth > 0, "path_depth should be a positive integer"
|
||||
dirname = url
|
||||
for _ in range(path_depth):
|
||||
dirname = osp.dirname(dirname)
|
||||
fpath = osp.relpath(url, dirname)
|
||||
|
||||
zip_formats = ['.zip', '.tar', '.gz']
|
||||
for zip_format in zip_formats:
|
||||
fpath = fpath.replace(zip_format, '')
|
||||
return osp.join(root_dir, fpath)
|
||||
|
||||
|
||||
def _md5check(fullname, md5sum=None):
|
||||
if md5sum is None:
|
||||
return True
|
||||
|
||||
md5 = hashlib.md5()
|
||||
with open(fullname, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
md5.update(chunk)
|
||||
calc_md5sum = md5.hexdigest()
|
||||
|
||||
if calc_md5sum != md5sum:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _check_exist_file_md5(filename, md5sum, url):
|
||||
return _md5check(filename, md5sum)
|
||||
|
||||
|
||||
def _download(url, path, md5sum=None):
|
||||
"""
|
||||
Download from url, save to path.
|
||||
url (str): download url
|
||||
path (str): download to given path
|
||||
"""
|
||||
if not osp.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
fname = osp.split(url)[-1]
|
||||
fullname = osp.join(path, fname)
|
||||
retry_cnt = 0
|
||||
while not (osp.exists(fullname) and _check_exist_file_md5(fullname, md5sum,
|
||||
url)):
|
||||
if retry_cnt < DOWNLOAD_RETRY_LIMIT:
|
||||
retry_cnt += 1
|
||||
else:
|
||||
raise RuntimeError("Download from {} failed. "
|
||||
"Retry limit reached".format(url))
|
||||
|
||||
# NOTE: windows path join may incur \, which is invalid in url
|
||||
if sys.platform == "win32":
|
||||
url = url.replace('\\', '/')
|
||||
|
||||
req = requests.get(url, stream=True)
|
||||
if req.status_code != 200:
|
||||
raise RuntimeError("Downloading from {} failed with code "
|
||||
"{}!".format(url, req.status_code))
|
||||
|
||||
# For protecting download interupted, download to
|
||||
# tmp_fullname firstly, move tmp_fullname to fullname
|
||||
# after download finished
|
||||
tmp_fullname = fullname + "_tmp"
|
||||
total_size = req.headers.get('content-length')
|
||||
with open(tmp_fullname, 'wb') as f:
|
||||
if total_size:
|
||||
for chunk in tqdm.tqdm(
|
||||
req.iter_content(chunk_size=1024),
|
||||
total=(int(total_size) + 1023) // 1024,
|
||||
unit='KB'):
|
||||
f.write(chunk)
|
||||
else:
|
||||
for chunk in req.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
shutil.move(tmp_fullname, fullname)
|
||||
return fullname
|
||||
|
||||
|
||||
def _download_dist(url, path, md5sum=None):
|
||||
env = os.environ
|
||||
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
|
||||
trainer_id = int(env['PADDLE_TRAINER_ID'])
|
||||
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
|
||||
if num_trainers <= 1:
|
||||
return _download(url, path, md5sum)
|
||||
else:
|
||||
fname = osp.split(url)[-1]
|
||||
fullname = osp.join(path, fname)
|
||||
lock_path = fullname + '.download.lock'
|
||||
|
||||
if not osp.isdir(path):
|
||||
os.makedirs(path)
|
||||
|
||||
if not osp.exists(fullname):
|
||||
from paddle.distributed import ParallelEnv
|
||||
unique_endpoints = _get_unique_endpoints(ParallelEnv()
|
||||
.trainer_endpoints[:])
|
||||
with open(lock_path, 'w'): # touch
|
||||
os.utime(lock_path, None)
|
||||
if ParallelEnv().current_endpoint in unique_endpoints:
|
||||
_download(url, path, md5sum)
|
||||
os.remove(lock_path)
|
||||
else:
|
||||
while os.path.exists(lock_path):
|
||||
time.sleep(0.5)
|
||||
return fullname
|
||||
else:
|
||||
return _download(url, path, md5sum)
|
||||
|
||||
|
||||
def _move_and_merge_tree(src, dst):
|
||||
"""
|
||||
Move src directory to dst, if dst is already exists,
|
||||
merge src to dst
|
||||
"""
|
||||
if not osp.exists(dst):
|
||||
shutil.move(src, dst)
|
||||
elif osp.isfile(src):
|
||||
shutil.move(src, dst)
|
||||
else:
|
||||
for fp in os.listdir(src):
|
||||
src_fp = osp.join(src, fp)
|
||||
dst_fp = osp.join(dst, fp)
|
||||
if osp.isdir(src_fp):
|
||||
if osp.isdir(dst_fp):
|
||||
_move_and_merge_tree(src_fp, dst_fp)
|
||||
else:
|
||||
shutil.move(src_fp, dst_fp)
|
||||
elif osp.isfile(src_fp) and \
|
||||
not osp.isfile(dst_fp):
|
||||
shutil.move(src_fp, dst_fp)
|
||||
|
||||
|
||||
def _decompress(fname):
|
||||
"""
|
||||
Decompress for zip and tar file
|
||||
"""
|
||||
|
||||
# For protecting decompressing interupted,
|
||||
# decompress to fpath_tmp directory firstly, if decompress
|
||||
# successed, move decompress files to fpath and delete
|
||||
# fpath_tmp and remove download compress file.
|
||||
fpath = osp.split(fname)[0]
|
||||
fpath_tmp = osp.join(fpath, 'tmp')
|
||||
if osp.isdir(fpath_tmp):
|
||||
shutil.rmtree(fpath_tmp)
|
||||
os.makedirs(fpath_tmp)
|
||||
|
||||
if fname.find('tar') >= 0:
|
||||
with tarfile.open(fname) as tf:
|
||||
tf.extractall(path=fpath_tmp)
|
||||
elif fname.find('zip') >= 0:
|
||||
with zipfile.ZipFile(fname) as zf:
|
||||
zf.extractall(path=fpath_tmp)
|
||||
elif fname.find('.txt') >= 0:
|
||||
return
|
||||
else:
|
||||
raise TypeError("Unsupport compress file type {}".format(fname))
|
||||
|
||||
for f in os.listdir(fpath_tmp):
|
||||
src_dir = osp.join(fpath_tmp, f)
|
||||
dst_dir = osp.join(fpath, f)
|
||||
_move_and_merge_tree(src_dir, dst_dir)
|
||||
|
||||
shutil.rmtree(fpath_tmp)
|
||||
os.remove(fname)
|
||||
|
||||
|
||||
def _decompress_dist(fname):
|
||||
env = os.environ
|
||||
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
|
||||
trainer_id = int(env['PADDLE_TRAINER_ID'])
|
||||
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
|
||||
if num_trainers <= 1:
|
||||
_decompress(fname)
|
||||
else:
|
||||
lock_path = fname + '.decompress.lock'
|
||||
from paddle.distributed import ParallelEnv
|
||||
unique_endpoints = _get_unique_endpoints(ParallelEnv()
|
||||
.trainer_endpoints[:])
|
||||
# NOTE(dkp): _decompress_dist always performed after
|
||||
# _download_dist, in _download_dist sub-trainers is waiting
|
||||
# for download lock file release with sleeping, if decompress
|
||||
# prograss is very fast and finished with in the sleeping gap
|
||||
# time, e.g in tiny dataset such as coco_ce, spine_coco, main
|
||||
# trainer may finish decompress and release lock file, so we
|
||||
# only craete lock file in main trainer and all sub-trainer
|
||||
# wait 1s for main trainer to create lock file, for 1s is
|
||||
# twice as sleeping gap, this waiting time can keep all
|
||||
# trainer pipeline in order
|
||||
# **change this if you have more elegent methods**
|
||||
if ParallelEnv().current_endpoint in unique_endpoints:
|
||||
with open(lock_path, 'w'): # touch
|
||||
os.utime(lock_path, None)
|
||||
_decompress(fname)
|
||||
os.remove(lock_path)
|
||||
else:
|
||||
time.sleep(1)
|
||||
while os.path.exists(lock_path):
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
_decompress(fname)
|
||||
|
||||
|
||||
def get_path(url, root_dir=WEIGHTS_HOME, md5sum=None, check_exist=True):
|
||||
""" Download from given url to root_dir.
|
||||
if file or directory specified by url is exists under
|
||||
root_dir, return the path directly, otherwise download
|
||||
from url and decompress it, return the path.
|
||||
url (str): download url
|
||||
root_dir (str): root dir for downloading
|
||||
md5sum (str): md5 sum of download package
|
||||
"""
|
||||
# parse path after download to decompress under root_dir
|
||||
fullpath = map_path(url, root_dir)
|
||||
|
||||
# For same zip file, decompressed directory name different
|
||||
# from zip file name, rename by following map
|
||||
decompress_name_map = {"ppTSM_fight": "ppTSM", }
|
||||
for k, v in decompress_name_map.items():
|
||||
if fullpath.find(k) >= 0:
|
||||
fullpath = osp.join(osp.split(fullpath)[0], v)
|
||||
|
||||
if osp.exists(fullpath) and check_exist:
|
||||
if not osp.isfile(fullpath) or \
|
||||
_check_exist_file_md5(fullpath, md5sum, url):
|
||||
return fullpath, True
|
||||
else:
|
||||
os.remove(fullpath)
|
||||
|
||||
fullname = _download_dist(url, root_dir, md5sum)
|
||||
|
||||
# new weights format which postfix is 'pdparams' not
|
||||
# need to decompress
|
||||
if osp.splitext(fullname)[-1] not in ['.pdparams', '.yml']:
|
||||
_decompress_dist(fullname)
|
||||
|
||||
return fullpath, False
|
||||
|
||||
|
||||
def get_weights_path(url):
|
||||
"""Get weights path from WEIGHTS_HOME, if not exists,
|
||||
download it from url.
|
||||
"""
|
||||
url = parse_url(url)
|
||||
md5sum = None
|
||||
if url in MODEL_URL_MD5_DICT.keys():
|
||||
md5sum = MODEL_URL_MD5_DICT[url]
|
||||
path, _ = get_path(url, WEIGHTS_HOME, md5sum)
|
||||
return path
|
||||
|
||||
|
||||
def auto_download_model(model_path):
|
||||
# auto download
|
||||
if is_url(model_path):
|
||||
weight = get_weights_path(model_path)
|
||||
return weight
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_path = "https://bj.bcebos.com/v1/paddledet/models/pipeline/mot_ppyoloe_l_36e_pipeline.zip"
|
||||
auto_download_model(model_path)
|
||||
Reference in New Issue
Block a user