aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/download.py
diff options
context:
space:
mode:
authorYousong Zhou <yszhou4tech@gmail.com>2018-06-28 18:27:27 +0800
committerYousong Zhou <yszhou4tech@gmail.com>2018-07-05 01:30:57 +0800
commit04b9f8587370f96366c6e53fb411473279ba7c02 (patch)
treee94957b40825486a70c170db1fbae225cff9307b /scripts/download.py
parente48ea13b3bac5393d6400156ddb066ec5de2ea4e (diff)
downloadupstream-04b9f8587370f96366c6e53fb411473279ba7c02.tar.gz
upstream-04b9f8587370f96366c6e53fb411473279ba7c02.tar.bz2
upstream-04b9f8587370f96366c6e53fb411473279ba7c02.zip
scripts/dl_github_archive.py: rename from download.py
- Make the code more GitHub-specific - Requires mirror hash to work with .gitattributes - Use different API depending on whether PKG_SOURCE_VERSION is a complete commit id or other ref types like tags - Fix removing symbolic link - pre-clean dir_untar for possible leftovers from previous run Signed-off-by: Yousong Zhou <yszhou4tech@gmail.com>
Diffstat (limited to 'scripts/download.py')
-rwxr-xr-xscripts/download.py421
1 files changed, 0 insertions, 421 deletions
diff --git a/scripts/download.py b/scripts/download.py
deleted file mode 100755
index 779d7b3de2..0000000000
--- a/scripts/download.py
+++ /dev/null
@@ -1,421 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com>
-#
-# This is free software, licensed under the GNU General Public License v2.
-# See /LICENSE for more information.
-
-import argparse
-import calendar
-import datetime
-import errno
-import fcntl
-import json
-import os
-import os.path
-import re
-import shutil
-import ssl
-import subprocess
-import sys
-import time
-import urllib2
-
-TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
-TMPDIR_DL = os.path.join(TMPDIR, 'dl')
-DOWNLOAD_METHODS = []
-
-class PathException(Exception): pass
-class DownloadException(Exception): pass
-
-
-class Path(object):
- """Context class for preparing and cleaning up directories.
-
- If ``path`` ``isdir``, then it will be created on context enter.
-
- If ``keep`` is True, then ``path`` will NOT be removed on context exit
- """
-
- def __init__(self, path, isdir=True, keep=False):
- self.path = path
- self.isdir = isdir
- self.keep = keep
-
- def __enter__(self):
- if self.isdir:
- self.mkdir_all(self.path)
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- if not self.keep:
- self.rm_all(self.path)
-
- @staticmethod
- def mkdir_all(path):
- """Same as mkdir -p."""
- names = os.path.split(path)
- p = ''
- for name in names:
- p = os.path.join(p, name)
- Path._mkdir(p)
-
- @staticmethod
- def _rmdir_all(dir_):
- names = Path._listdir(dir_)
- for name in names:
- p = os.path.join(dir_, name)
- if os.path.isdir(p):
- Path._rmdir_all(p)
- else:
- Path._remove(p)
- Path._rmdir(dir_)
-
- @staticmethod
- def _mkdir(path):
- Path._os_func(os.mkdir, path, errno.EEXIST)
-
- @staticmethod
- def _rmdir(path):
- Path._os_func(os.rmdir, path, errno.ENOENT)
-
- @staticmethod
- def _remove(path):
- Path._os_func(os.remove, path, errno.ENOENT)
-
- @staticmethod
- def _listdir(path):
- return Path._os_func(os.listdir, path, errno.ENOENT, default=[])
-
- @staticmethod
- def _os_func(func, path, errno, default=None):
- """Call func(path) in an idempotent way.
-
- On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
- return ``default``, otherwise, re-raise
- """
- try:
- return func(path)
- except OSError as e:
- if e.errno == errno:
- return default
- else:
- raise
-
- @staticmethod
- def rm_all(path):
- """Same as rm -r."""
- if os.path.isdir(path):
- Path._rmdir_all(path)
- else:
- Path._remove(path)
-
- @staticmethod
- def untar(path, into=None):
- """Extract tarball at ``path`` into subdir ``into``.
-
- return subdir name if and only if there exists one, otherwise raise PathException
- """
- args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
- subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22))
- dirs = os.listdir(into)
- if len(dirs) == 1:
- return dirs[0]
- else:
- raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs))
-
- @staticmethod
- def tar(path, subdir, into=None, ts=None):
- """Pack ``path`` into tarball ``into``."""
- # --sort=name requires a recent build of GNU tar
- args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name']
- args += ['-C', path, '-cf', into, subdir]
- envs = os.environ.copy()
- if ts is not None:
- args.append('--mtime=@%d' % ts)
- if into.endswith('.xz'):
- envs['XZ_OPT'] = '-7e'
- args.append('-J')
- elif into.endswith('.bz2'):
- args.append('-j')
- elif into.endswith('.gz'):
- args.append('-z')
- envs['GZIP'] = '-n'
- else:
- raise PathException('unknown compression type %s' % into)
- subprocess.check_call(args, env=envs)
-
-
-class GitHubCommitTsCache(object):
- __cachef = 'github.commit.ts.cache'
- __cachen = 2048
-
- def __init__(self):
- Path.mkdir_all(TMPDIR_DL)
- self.cachef = os.path.join(TMPDIR_DL, self.__cachef)
- self.cache = {}
-
- def get(self, k):
- """Get timestamp with key ``k``."""
- fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT)
- with os.fdopen(fileno) as fin:
- try:
- fcntl.lockf(fileno, fcntl.LOCK_SH)
- self._cache_init(fin)
- if k in self.cache:
- ts = self.cache[k][0]
- return ts
- finally:
- fcntl.lockf(fileno, fcntl.LOCK_UN)
- return None
-
- def set(self, k, v):
- """Update timestamp with ``k``."""
- fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT)
- with os.fdopen(fileno, 'wb+') as f:
- try:
- fcntl.lockf(fileno, fcntl.LOCK_EX)
- self._cache_init(f)
- self.cache[k] = (v, int(time.time()))
- self._cache_flush(f)
- finally:
- fcntl.lockf(fileno, fcntl.LOCK_UN)
-
- def _cache_init(self, fin):
- for line in fin:
- k, ts, updated = line.split()
- ts = int(ts)
- updated = int(updated)
- self.cache[k] = (ts, updated)
-
- def _cache_flush(self, fout):
- cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1])
- cache = cache[:self.__cachen]
- self.cache = {}
- os.ftruncate(fout.fileno(), 0)
- fout.seek(0, os.SEEK_SET)
- for k, ent in cache:
- ts = ent[0]
- updated = ent[1]
- line = '{0} {1} {2}\n'.format(k, ts, updated)
- fout.write(line)
-
-
-class DownloadMethod(object):
- """Base class of all download method."""
-
- def __init__(self, args):
- self.args = args
- self.urls = args.urls
- self.url = self.urls[0]
- self.dl_dir = args.dl_dir
-
- @classmethod
- def resolve(cls, args):
- """Resolve download method to use.
-
- return instance of subclass of DownloadMethod
- """
- for c in DOWNLOAD_METHODS:
- if c.match(args):
- return c(args)
-
- @staticmethod
- def match(args):
- """Return True if it can do the download."""
- return NotImplemented
-
- def download(self):
- """Do the download and put it into the download dir."""
- return NotImplemented
-
-
-class DownloadMethodGitHubTarball(DownloadMethod):
- """Download and repack archive tarabll from GitHub."""
-
- __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
-
- def __init__(self, args):
- super(DownloadMethodGitHubTarball, self).__init__(args)
- self._init_owner_repo()
- self.version = args.version
- self.subdir = args.subdir
- self.source = args.source
- self.commit_ts = None # lazy load commit timestamp
- self.commit_ts_cache = GitHubCommitTsCache()
- self.name = 'github-tarball'
-
- @staticmethod
- def match(args):
- """Match if it's a GitHub clone url."""
- url = args.urls[0]
- proto = args.proto
- if proto == 'git' and isinstance(url, basestring) \
- and (url.startswith('https://github.com/') or url.startswith('git://github.com/')):
- return True
- return False
-
- def download(self):
- """Download and repack GitHub archive tarball."""
- self._init_commit_ts()
- with Path(TMPDIR_DL, keep=True) as dir_dl:
- # fetch tarball from GitHub
- tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
- with Path(tarball_path, isdir=False):
- self._fetch(tarball_path)
- # unpack
- d = os.path.join(dir_dl.path, self.subdir + '.untar')
- with Path(d) as dir_untar:
- tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
- dir0 = os.path.join(dir_untar.path, tarball_prefix)
- dir1 = os.path.join(dir_untar.path, self.subdir)
- # submodules check
- if self._has_submodule(dir0):
- raise DownloadException('unable to fetch submodules\' source code')
- # rename subdir
- os.rename(dir0, dir1)
- # repack
- into=os.path.join(TMPDIR_DL, self.source)
- Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
- # move to target location
- file1 = os.path.join(self.dl_dir, self.source)
- if into != file1:
- shutil.move(into, file1)
-
- def _has_submodule(self, dir_):
- m = os.path.join(dir_, '.gitmodules')
- try:
- st = os.stat(m)
- return st.st_size > 0
- except OSError as e:
- return e.errno != errno.ENOENT
-
- def _init_owner_repo(self):
- url = self.url
- m = self.__repo_url_regex.search(url)
- if m is None:
- raise DownloadException('invalid github url: %s' % url)
- owner = m.group('owner')
- repo = m.group('repo')
- if repo.endswith('.git'):
- repo = repo[:-4]
- self.owner = owner
- self.repo = repo
-
- def _init_commit_ts(self):
- if self.commit_ts is not None:
- return
- url = self._make_repo_url_path('git', 'commits', self.version)
- ct = self.commit_ts_cache.get(url)
- if ct is not None:
- self.commit_ts = ct
- return
- resp = self._make_request(url)
- data = resp.read()
- data = json.loads(data)
- date = data['committer']['date']
- date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
- date = date.timetuple()
- ct = calendar.timegm(date)
- self.commit_ts = ct
- self.commit_ts_cache.set(url, ct)
-
- def _fetch(self, path):
- """Fetch tarball of the specified version ref."""
- ref = self.version
- url = self._make_repo_url_path('tarball', ref)
- resp = self._make_request(url)
- with open(path, 'wb') as fout:
- while True:
- d = resp.read(4096)
- if not d:
- break
- fout.write(d)
-
- def _make_repo_url_path(self, *args):
- url = '/repos/{0}/{1}'.format(self.owner, self.repo)
- if args:
- url += '/' + '/'.join(args)
- return url
-
- def _make_request(self, path):
- """Request GitHub API endpoint on ``path``."""
- url = 'https://api.github.com' + path
- headers = {
- 'Accept': 'application/vnd.github.v3+json',
- 'User-Agent': 'OpenWrt',
- }
- req = urllib2.Request(url, headers=headers)
- sslcontext = ssl._create_unverified_context()
- fileobj = urllib2.urlopen(req, context=sslcontext)
- return fileobj
-
-
-class DownloadMethodCatchall(DownloadMethod):
- """Dummy method that knows names but not ways of download."""
-
- def __init__(self, args):
- super(DownloadMethodCatchall, self).__init__(args)
- self.args = args
- self.proto = args.proto
- self.name = self._resolve_name()
-
- def _resolve_name(self):
- if self.proto:
- return self.proto
- methods_map = (
- ('default', ('@APACHE/', '@GITHUB/', '@GNOME/', '@GNU/',
- '@KERNEL/', '@SF/', '@SAVANNAH/', 'ftp://', 'http://',
- 'https://', 'file://')),
- ('git', ('git://', )),
- ('svn', ('svn://', )),
- ('cvs', ('cvs://', )),
- ('bzr', ('sftp://', )),
- ('bzr', ('sftp://', )),
- ('unknown', ('', )),
- )
- for name, prefixes in methods_map:
- if any(url.startswith(prefix) for prefix in prefixes for url in self.urls):
- return name
-
- @staticmethod
- def match(args):
- """Return True."""
- return True
-
- def download(self):
- """Not implemented.
-
- raise DownloadException
- """
- raise DownloadException('download method for %s is not yet implemented' % self.name)
-
-# order matters
-DOWNLOAD_METHODS = [
- DownloadMethodGitHubTarball,
- DownloadMethodCatchall,
-]
-
-
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('action', choices=('dl_method', 'dl'), help='Action to take')
- parser.add_argument('--urls', nargs='+', metavar='URL', help='Download URLs')
- parser.add_argument('--proto', help='Download proto')
- parser.add_argument('--subdir', help='Source code subdir name')
- parser.add_argument('--version', help='Source code version')
- parser.add_argument('--source', help='Source tarball filename')
- parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
- args = parser.parse_args()
- if args.action == 'dl_method':
- method = DownloadMethod.resolve(args)
- sys.stdout.write(method.name + '\n')
- elif args.action == 'dl':
- method = DownloadMethod.resolve(args)
- try:
- method.download()
- except Exception:
- raise
-
-if __name__ == '__main__':
- main()