diff options
author | Yousong Zhou <yszhou4tech@gmail.com> | 2018-06-28 18:27:27 +0800 |
---|---|---|
committer | Yousong Zhou <yszhou4tech@gmail.com> | 2018-07-05 01:30:57 +0800 |
commit | 04b9f8587370f96366c6e53fb411473279ba7c02 (patch) | |
tree | e94957b40825486a70c170db1fbae225cff9307b /scripts/download.py | |
parent | e48ea13b3bac5393d6400156ddb066ec5de2ea4e (diff) | |
download | upstream-04b9f8587370f96366c6e53fb411473279ba7c02.tar.gz upstream-04b9f8587370f96366c6e53fb411473279ba7c02.tar.bz2 upstream-04b9f8587370f96366c6e53fb411473279ba7c02.zip |
scripts/dl_github_archive.py: rename from download.py
- Make the code more GitHub-specific
- Requires mirror hash to work with .gitattributes
- Use different API depending on whether PKG_SOURCE_VERSION is a
complete commit id or other ref types like tags
- Fix removing symbolic link
- pre-clean dir_untar for possible leftovers from previous run
Signed-off-by: Yousong Zhou <yszhou4tech@gmail.com>
Diffstat (limited to 'scripts/download.py')
-rwxr-xr-x | scripts/download.py | 421 |
1 files changed, 0 insertions, 421 deletions
diff --git a/scripts/download.py b/scripts/download.py deleted file mode 100755 index 779d7b3de2..0000000000 --- a/scripts/download.py +++ /dev/null @@ -1,421 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com> -# -# This is free software, licensed under the GNU General Public License v2. -# See /LICENSE for more information. - -import argparse -import calendar -import datetime -import errno -import fcntl -import json -import os -import os.path -import re -import shutil -import ssl -import subprocess -import sys -import time -import urllib2 - -TMPDIR = os.environ.get('TMP_DIR') or '/tmp' -TMPDIR_DL = os.path.join(TMPDIR, 'dl') -DOWNLOAD_METHODS = [] - -class PathException(Exception): pass -class DownloadException(Exception): pass - - -class Path(object): - """Context class for preparing and cleaning up directories. - - If ``path`` ``isdir``, then it will be created on context enter. - - If ``keep`` is True, then ``path`` will NOT be removed on context exit - """ - - def __init__(self, path, isdir=True, keep=False): - self.path = path - self.isdir = isdir - self.keep = keep - - def __enter__(self): - if self.isdir: - self.mkdir_all(self.path) - return self - - def __exit__(self, exc_type, exc_value, traceback): - if not self.keep: - self.rm_all(self.path) - - @staticmethod - def mkdir_all(path): - """Same as mkdir -p.""" - names = os.path.split(path) - p = '' - for name in names: - p = os.path.join(p, name) - Path._mkdir(p) - - @staticmethod - def _rmdir_all(dir_): - names = Path._listdir(dir_) - for name in names: - p = os.path.join(dir_, name) - if os.path.isdir(p): - Path._rmdir_all(p) - else: - Path._remove(p) - Path._rmdir(dir_) - - @staticmethod - def _mkdir(path): - Path._os_func(os.mkdir, path, errno.EEXIST) - - @staticmethod - def _rmdir(path): - Path._os_func(os.rmdir, path, errno.ENOENT) - - @staticmethod - def _remove(path): - Path._os_func(os.remove, path, errno.ENOENT) - - @staticmethod - def _listdir(path): - return Path._os_func(os.listdir, path, errno.ENOENT, default=[]) - - @staticmethod - def _os_func(func, path, errno, default=None): - """Call func(path) in an idempotent way. - - On exception ``ex``, if the type is OSError and ``ex.errno == errno``, - return ``default``, otherwise, re-raise - """ - try: - return func(path) - except OSError as e: - if e.errno == errno: - return default - else: - raise - - @staticmethod - def rm_all(path): - """Same as rm -r.""" - if os.path.isdir(path): - Path._rmdir_all(path) - else: - Path._remove(path) - - @staticmethod - def untar(path, into=None): - """Extract tarball at ``path`` into subdir ``into``. - - return subdir name if and only if there exists one, otherwise raise PathException - """ - args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions') - subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22)) - dirs = os.listdir(into) - if len(dirs) == 1: - return dirs[0] - else: - raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs)) - - @staticmethod - def tar(path, subdir, into=None, ts=None): - """Pack ``path`` into tarball ``into``.""" - # --sort=name requires a recent build of GNU tar - args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name'] - args += ['-C', path, '-cf', into, subdir] - envs = os.environ.copy() - if ts is not None: - args.append('--mtime=@%d' % ts) - if into.endswith('.xz'): - envs['XZ_OPT'] = '-7e' - args.append('-J') - elif into.endswith('.bz2'): - args.append('-j') - elif into.endswith('.gz'): - args.append('-z') - envs['GZIP'] = '-n' - else: - raise PathException('unknown compression type %s' % into) - subprocess.check_call(args, env=envs) - - -class GitHubCommitTsCache(object): - __cachef = 'github.commit.ts.cache' - __cachen = 2048 - - def __init__(self): - Path.mkdir_all(TMPDIR_DL) - self.cachef = os.path.join(TMPDIR_DL, self.__cachef) - self.cache = {} - - def get(self, k): - """Get timestamp with key ``k``.""" - fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT) - with os.fdopen(fileno) as fin: - try: - fcntl.lockf(fileno, fcntl.LOCK_SH) - self._cache_init(fin) - if k in self.cache: - ts = self.cache[k][0] - return ts - finally: - fcntl.lockf(fileno, fcntl.LOCK_UN) - return None - - def set(self, k, v): - """Update timestamp with ``k``.""" - fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT) - with os.fdopen(fileno, 'wb+') as f: - try: - fcntl.lockf(fileno, fcntl.LOCK_EX) - self._cache_init(f) - self.cache[k] = (v, int(time.time())) - self._cache_flush(f) - finally: - fcntl.lockf(fileno, fcntl.LOCK_UN) - - def _cache_init(self, fin): - for line in fin: - k, ts, updated = line.split() - ts = int(ts) - updated = int(updated) - self.cache[k] = (ts, updated) - - def _cache_flush(self, fout): - cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1]) - cache = cache[:self.__cachen] - self.cache = {} - os.ftruncate(fout.fileno(), 0) - fout.seek(0, os.SEEK_SET) - for k, ent in cache: - ts = ent[0] - updated = ent[1] - line = '{0} {1} {2}\n'.format(k, ts, updated) - fout.write(line) - - -class DownloadMethod(object): - """Base class of all download method.""" - - def __init__(self, args): - self.args = args - self.urls = args.urls - self.url = self.urls[0] - self.dl_dir = args.dl_dir - - @classmethod - def resolve(cls, args): - """Resolve download method to use. - - return instance of subclass of DownloadMethod - """ - for c in DOWNLOAD_METHODS: - if c.match(args): - return c(args) - - @staticmethod - def match(args): - """Return True if it can do the download.""" - return NotImplemented - - def download(self): - """Do the download and put it into the download dir.""" - return NotImplemented - - -class DownloadMethodGitHubTarball(DownloadMethod): - """Download and repack archive tarabll from GitHub.""" - - __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)') - - def __init__(self, args): - super(DownloadMethodGitHubTarball, self).__init__(args) - self._init_owner_repo() - self.version = args.version - self.subdir = args.subdir - self.source = args.source - self.commit_ts = None # lazy load commit timestamp - self.commit_ts_cache = GitHubCommitTsCache() - self.name = 'github-tarball' - - @staticmethod - def match(args): - """Match if it's a GitHub clone url.""" - url = args.urls[0] - proto = args.proto - if proto == 'git' and isinstance(url, basestring) \ - and (url.startswith('https://github.com/') or url.startswith('git://github.com/')): - return True - return False - - def download(self): - """Download and repack GitHub archive tarball.""" - self._init_commit_ts() - with Path(TMPDIR_DL, keep=True) as dir_dl: - # fetch tarball from GitHub - tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl') - with Path(tarball_path, isdir=False): - self._fetch(tarball_path) - # unpack - d = os.path.join(dir_dl.path, self.subdir + '.untar') - with Path(d) as dir_untar: - tarball_prefix = Path.untar(tarball_path, into=dir_untar.path) - dir0 = os.path.join(dir_untar.path, tarball_prefix) - dir1 = os.path.join(dir_untar.path, self.subdir) - # submodules check - if self._has_submodule(dir0): - raise DownloadException('unable to fetch submodules\' source code') - # rename subdir - os.rename(dir0, dir1) - # repack - into=os.path.join(TMPDIR_DL, self.source) - Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts) - # move to target location - file1 = os.path.join(self.dl_dir, self.source) - if into != file1: - shutil.move(into, file1) - - def _has_submodule(self, dir_): - m = os.path.join(dir_, '.gitmodules') - try: - st = os.stat(m) - return st.st_size > 0 - except OSError as e: - return e.errno != errno.ENOENT - - def _init_owner_repo(self): - url = self.url - m = self.__repo_url_regex.search(url) - if m is None: - raise DownloadException('invalid github url: %s' % url) - owner = m.group('owner') - repo = m.group('repo') - if repo.endswith('.git'): - repo = repo[:-4] - self.owner = owner - self.repo = repo - - def _init_commit_ts(self): - if self.commit_ts is not None: - return - url = self._make_repo_url_path('git', 'commits', self.version) - ct = self.commit_ts_cache.get(url) - if ct is not None: - self.commit_ts = ct - return - resp = self._make_request(url) - data = resp.read() - data = json.loads(data) - date = data['committer']['date'] - date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ') - date = date.timetuple() - ct = calendar.timegm(date) - self.commit_ts = ct - self.commit_ts_cache.set(url, ct) - - def _fetch(self, path): - """Fetch tarball of the specified version ref.""" - ref = self.version - url = self._make_repo_url_path('tarball', ref) - resp = self._make_request(url) - with open(path, 'wb') as fout: - while True: - d = resp.read(4096) - if not d: - break - fout.write(d) - - def _make_repo_url_path(self, *args): - url = '/repos/{0}/{1}'.format(self.owner, self.repo) - if args: - url += '/' + '/'.join(args) - return url - - def _make_request(self, path): - """Request GitHub API endpoint on ``path``.""" - url = 'https://api.github.com' + path - headers = { - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'OpenWrt', - } - req = urllib2.Request(url, headers=headers) - sslcontext = ssl._create_unverified_context() - fileobj = urllib2.urlopen(req, context=sslcontext) - return fileobj - - -class DownloadMethodCatchall(DownloadMethod): - """Dummy method that knows names but not ways of download.""" - - def __init__(self, args): - super(DownloadMethodCatchall, self).__init__(args) - self.args = args - self.proto = args.proto - self.name = self._resolve_name() - - def _resolve_name(self): - if self.proto: - return self.proto - methods_map = ( - ('default', ('@APACHE/', '@GITHUB/', '@GNOME/', '@GNU/', - '@KERNEL/', '@SF/', '@SAVANNAH/', 'ftp://', 'http://', - 'https://', 'file://')), - ('git', ('git://', )), - ('svn', ('svn://', )), - ('cvs', ('cvs://', )), - ('bzr', ('sftp://', )), - ('bzr', ('sftp://', )), - ('unknown', ('', )), - ) - for name, prefixes in methods_map: - if any(url.startswith(prefix) for prefix in prefixes for url in self.urls): - return name - - @staticmethod - def match(args): - """Return True.""" - return True - - def download(self): - """Not implemented. - - raise DownloadException - """ - raise DownloadException('download method for %s is not yet implemented' % self.name) - -# order matters -DOWNLOAD_METHODS = [ - DownloadMethodGitHubTarball, - DownloadMethodCatchall, -] - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('action', choices=('dl_method', 'dl'), help='Action to take') - parser.add_argument('--urls', nargs='+', metavar='URL', help='Download URLs') - parser.add_argument('--proto', help='Download proto') - parser.add_argument('--subdir', help='Source code subdir name') - parser.add_argument('--version', help='Source code version') - parser.add_argument('--source', help='Source tarball filename') - parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir') - args = parser.parse_args() - if args.action == 'dl_method': - method = DownloadMethod.resolve(args) - sys.stdout.write(method.name + '\n') - elif args.action == 'dl': - method = DownloadMethod.resolve(args) - try: - method.download() - except Exception: - raise - -if __name__ == '__main__': - main() |