[PATCH 11/21] lookaside: Handle downloading of source files

Mathieu Bridon bochecha at fedoraproject.org
Wed May 6 11:53:07 UTC 2015


From: Mathieu Bridon <bochecha at daitauha.fr>

This is quite a large chunk of code that is moving from pyrpkg.Commands
to the new pyrpkg.lookaside.CGILookasideCache.

In addition, we now don't fork a call to the curl command any more, but
instead use pycurl. This allows the download method to be entirely
tested.

Downstreams can now also easily use their own URL format, which should
make the CentOS folks happier, as well as make it easier when we want to
add the hashtype to the URL in Fedora. (as part of the move away from
md5 hashes)

As part of the move to pycurl, we are losing the download progress
indication, but it will make a come-back in a future commit.
---
 src/pyrpkg/__init__.py  |  23 ++-------
 src/pyrpkg/errors.py    |   5 ++
 src/pyrpkg/lookaside.py |  63 +++++++++++++++++++++++-
 test/test_lookaside.py  | 124 +++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 193 insertions(+), 22 deletions(-)

diff --git a/src/pyrpkg/__init__.py b/src/pyrpkg/__init__.py
index eff3551..ce262e6 100644
--- a/src/pyrpkg/__init__.py
+++ b/src/pyrpkg/__init__.py
@@ -20,7 +20,6 @@ if sys.version_info[0:2] >= (2, 5):
 else:
     # We need a subprocess that has check_call
     from kitchen.pycompat27 import subprocess
-import hashlib
 import koji
 import rpm
 import logging
@@ -1579,26 +1578,10 @@ class Commands(object):
         sourcesf = SourcesFile(self.sources_filename, self.source_entry_type)
 
         for entry in sourcesf.entries:
-            # See if we already have a valid copy downloaded
             outfile = os.path.join(outdir, entry.file)
-            if os.path.exists(outfile):
-                if self.lookasidecache.file_is_valid(outfile, entry.hash, hashtype=entry.hashtype):
-                    continue
-            self.log.info("Downloading %s" % (entry.file))
-            urled_file = entry.file.replace(' ', '%20')
-            url = '%s/%s/%s/%s/%s' % (self.lookaside, self.module_name,
-                                      urled_file, entry.hash, urled_file)
-            # These options came from Makefile.common.
-            # Probably need to support wget as well
-            command = ['curl', '-H', 'Pragma:', '-o', outfile, '-R', '-S',
-                       '--fail']
-            if self.quiet:
-                command.append('-s')
-            command.append(url)
-            self._run_command(command)
-            if not self.lookasidecache.file_is_valid(outfile, entry.hash, hashtype=entry.hashtype):
-                raise rpkgError('%s failed checksum' % entry.file)
-        return
+            self.lookasidecache.download(
+                self.module_name, entry.file, entry.hash, outfile,
+                hashtype=entry.hashtype)
 
     def switch_branch(self, branch, fetch=True):
         """Switch the working branch
diff --git a/src/pyrpkg/errors.py b/src/pyrpkg/errors.py
index e9d481b..9547702 100644
--- a/src/pyrpkg/errors.py
+++ b/src/pyrpkg/errors.py
@@ -37,3 +37,8 @@ class MalformedLineError(rpkgError):
 class InvalidHashType(rpkgError):
     """Raised when we don't know the requested hash algorithm"""
     pass
+
+
+class DownloadError(rpkgError):
+    """Raised when something went wrong during a download"""
+    pass
diff --git a/src/pyrpkg/lookaside.py b/src/pyrpkg/lookaside.py
index e3c1bac..1108fcf 100644
--- a/src/pyrpkg/lookaside.py
+++ b/src/pyrpkg/lookaside.py
@@ -15,8 +15,13 @@ way it is done by Fedora, RHEL, and other distributions maintainers.
 
 
 import hashlib
+import logging
+import os
+import sys
 
-from .errors import InvalidHashType
+import pycurl
+
+from .errors import DownloadError, InvalidHashType
 
 
 class CGILookasideCache(object):
@@ -34,6 +39,10 @@ class CGILookasideCache(object):
         self.download_url = download_url
         self.upload_url = upload_url
 
+        self.log = logging.getLogger(__name__)
+
+        self.download_path = '%(name)s/%(filename)s/%(hash)s/%(filename)s'
+
     def hash_file(self, filename, hashtype=None):
         """Compute the hash of a file
 
@@ -77,3 +86,55 @@ class CGILookasideCache(object):
         """
         sum = self.hash_file(filename, hashtype)
         return sum == hash
+
+    def download(self, name, filename, hash, outfile, hashtype=None):
+        """Download a source file
+
+        Args:
+            name (str): The name of the module. (usually the name of the SRPM)
+            filename (str): The name of the file to download.
+            hash (str): The known good hash of the file.
+            outfile (str): The full path where to save the downloaded file.
+            hashtype (str, optional): The hash algorithm. (e.g 'md5')
+                This defaults to the hashtype passed to the constructor.
+        """
+        if hashtype is None:
+            hashtype = self.hashtype
+
+        if os.path.exists(outfile):
+            if self.file_is_valid(outfile, hash, hashtype=hashtype):
+                return
+
+        self.log.info("Downloading %s", filename)
+        urled_file = filename.replace(' ', '%20')
+
+        path_dict = {'name': name, 'filename': urled_file, 'hash': hash}
+        path = self.download_path % path_dict
+        url = '%s/%s' % (self.download_url, path)
+        self.log.debug("Full url: %s" % url)
+
+        with open(outfile, 'wb') as f:
+            c = pycurl.Curl()
+            c.setopt(pycurl.URL, url)
+            c.setopt(pycurl.HTTPHEADER, ['Pragma:'])
+            c.setopt(pycurl.OPT_FILETIME, True)
+            c.setopt(pycurl.WRITEDATA, f)
+
+            try:
+                c.perform()
+                tstamp = c.getinfo(pycurl.INFO_FILETIME)
+                status = c.getinfo(pycurl.RESPONSE_CODE)
+
+            except Exception as e:
+                raise DownloadError(e)
+
+            finally:
+                c.close()
+
+        if status != 200:
+            raise DownloadError('Server returned status code %d' % status)
+
+        os.utime(outfile, (tstamp, tstamp))
+
+        if not self.file_is_valid(outfile, hash, hashtype=hashtype):
+            raise DownloadError('%s failed checksum' % filename)
diff --git a/test/test_lookaside.py b/test/test_lookaside.py
index 4a4f067..89a34cd 100644
--- a/test/test_lookaside.py
+++ b/test/test_lookaside.py
@@ -7,17 +7,21 @@
 # the full text of the license.
 
 
+import hashlib
 import os
 import shutil
 import sys
 import tempfile
 import unittest
 
+import mock
+import pycurl
+
 old_path = list(sys.path)
 src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../src')
 sys.path.insert(0, src_path)
 from pyrpkg.lookaside import CGILookasideCache
-from pyrpkg.errors import InvalidHashType
+from pyrpkg.errors import DownloadError, InvalidHashType
 sys.path = old_path
 
 
@@ -64,3 +68,121 @@ class CGILookasideCacheTestCase(unittest.TestCase):
                                          '437b930db84b8079c2dd804a71936b5f'))
         self.assertFalse(lc.file_is_valid(self.filename, 'not the right hash',
                                           hashtype='sha512'))
+
+    @mock.patch('pyrpkg.lookaside.pycurl.Curl')
+    def test_download(self, mock_curl):
+        def mock_getinfo(info):
+            return 200 if info == pycurl.RESPONSE_CODE else 0
+
+        def mock_perform():
+            with open(self.filename, 'rb') as f:
+                curlopts[pycurl.WRITEDATA].write(f.read())
+
+        def mock_setopt(opt, value):
+            curlopts[opt] = value
+
+        curlopts = {}
+        curl = mock_curl.return_value
+        curl.getinfo.side_effect = mock_getinfo
+        curl.perform.side_effect = mock_perform
+        curl.setopt.side_effect = mock_setopt
+
+        with open(self.filename, 'wb') as f:
+            f.write(b'content')
+
+        name = 'pyrpkg'
+        filename = 'pyrpkg-0.0.tar.xz'
+        hash = hashlib.sha512(b'content').hexdigest()
+        outfile = os.path.join(self.workdir, 'pyrpkg-0.0.tar.xz')
+        full_url = 'http://example.com/%s/%s/%s/%s' % (name, filename, hash,
+                                                       filename)
+
+        lc = CGILookasideCache('sha512', 'http://example.com', '_')
+        lc.download(name, filename, hash, outfile, hashtype='sha512')
+        self.assertEqual(curl.perform.call_count, 1)
+        self.assertEqual(curlopts[pycurl.URL], full_url)
+        self.assertEqual(os.path.getmtime(outfile), 0)
+
+        with open(outfile) as f:
+            self.assertEqual(f.read(), 'content')
+
+        # Try a second time
+        lc.download(name, filename, hash, outfile)
+        self.assertEqual(curl.perform.call_count, 1)
+
+        # Try a third time
+        os.remove(outfile)
+        lc.download(name, filename, hash, outfile)
+        self.assertEqual(curl.perform.call_count, 2)
+
+    @mock.patch('pyrpkg.lookaside.pycurl.Curl')
+    def test_download_corrupted(self, mock_curl):
+        def mock_getinfo(info):
+            return 200 if info == pycurl.RESPONSE_CODE else 0
+
+        def mock_perform():
+            with open(self.filename) as f:
+                curlopts[pycurl.WRITEDATA].write(f.read())
+
+        def mock_setopt(opt, value):
+            curlopts[opt] = value
+
+        curlopts = {}
+        curl = mock_curl.return_value
+        curl.getinfo.side_effect = mock_getinfo
+        curl.perform.side_effect = mock_perform
+        curl.setopt.side_effect = mock_setopt
+
+        with open(self.filename, 'wb') as f:
+            f.write(b'content')
+
+        hash = "not the right hash"
+        outfile = os.path.join(self.workdir, 'pyrpkg-0.0.tar.xz')
+
+        lc = CGILookasideCache('sha512', 'http://example.com', '_')
+        self.assertRaises(DownloadError, lc.download, 'pyrpkg',
+                          'pyrpkg-0.0.tar.xz', hash, outfile)
+
+    @mock.patch('pyrpkg.lookaside.pycurl.Curl')
+    def test_download_failed(self, mock_curl):
+        curl = mock_curl.return_value
+        curl.perform.side_effect = Exception(
+            'Could not resolve host: example.com')
+
+        with open(self.filename, 'wb') as f:
+            f.write(b'content')
+
+        hash = hashlib.sha512(b'content').hexdigest()
+        outfile = os.path.join(self.workdir, 'pyrpkg-0.0.tar.xz')
+
+        lc = CGILookasideCache('sha512', 'http://example.com', '_')
+        self.assertRaises(DownloadError, lc.download, 'pyrpkg',
+                          'pyrpkg-0.0.tar.xz', hash, outfile)
+
+    @mock.patch('pyrpkg.lookaside.pycurl.Curl')
+    def test_download_failed_status_code(self, mock_curl):
+        def mock_getinfo(info):
+            return 500 if info == pycurl.RESPONSE_CODE else 0
+
+        def mock_perform():
+            with open(self.filename) as f:
+                curlopts[pycurl.WRITEDATA].write(f.read())
+
+        def mock_setopt(opt, value):
+            curlopts[opt] = value
+
+        curlopts = {}
+        curl = mock_curl.return_value
+        curl.getinfo.side_effect = mock_getinfo
+        curl.perform.side_effect = mock_perform
+        curl.setopt.side_effect = mock_setopt
+
+        with open(self.filename, 'wb') as f:
+            f.write(b'content')
+
+        hash = hashlib.sha512(b'content').hexdigest()
+        outfile = os.path.join(self.workdir, 'pyrpkg-0.0.tar.xz')
+
+        lc = CGILookasideCache('sha512', 'http://example.com', '_')
+        self.assertRaises(DownloadError, lc.download, 'pyrpkg',
+                          'pyrpkg-0.0.tar.xz', hash, outfile)
-- 
2.1.0



More information about the rel-eng mailing list