[python-urlgrabber] - Update to latest HEAD. - fix some test cases that were failing. BZ 918658 - exit(1) or /bin/urlgr

Thu Mar 7 12:28:55 UTC 2013

commit c508ad399b4837b823c9d12a980c6ecf1e7a65d7
Author: Zdenek Pavlas <zpavlas at redhat.com>
Date:   Thu Mar 7 13:22:57 2013 +0100

    - Update to latest HEAD.
    - fix some test cases that were failing.  BZ 918658
    - exit(1) or /bin/urlgrabber failures.  BZ 918613
    - clamp timestamps from the future.  BZ 894630
    - enable GSSNEGOTIATE if implemented correctly.
    - make error messages more verbose.

 python-urlgrabber.spec |   10 ++-
 urlgrabber-HEAD.patch  |  192 +++++++++++++++++++++++++++++-------------------
 2 files changed, 125 insertions(+), 77 deletions(-)
---

diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 6ba1d8d..43666d9 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 24%{?dist}
+Release: 25%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -44,6 +44,14 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Thu Mar  7 2013 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-25
+- Update to latest HEAD.
+- fix some test cases that were failing.  BZ 918658
+- exit(1) or /bin/urlgrabber failures.  BZ 918613
+- clamp timestamps from the future.  BZ 894630
+- enable GSSNEGOTIATE if implemented correctly.
+- make error messages more verbose.
+
 * Thu Feb 14 2013 Fedora Release Engineering <rel-eng at lists.fedoraproject.org> - 3.9.1-24
 - Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild
 
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index aaf9cbc..4633455 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -12,7 +12,7 @@ index 0000000..1ffe416
 +*.kateproject
 +ipython.log*
 diff --git a/scripts/urlgrabber b/scripts/urlgrabber
-index 518e512..09cd896 100644
+index 518e512..07881b3 100644
 --- a/scripts/urlgrabber
 +++ b/scripts/urlgrabber
 @@ -115,6 +115,7 @@ options:
@@ -71,6 +71,14 @@ index 518e512..09cd896 100644
  
      def help_doc(self):
          print __doc__
+@@ -294,6 +301,7 @@ class ugclient:
+                 if self.op.localfile: print f
+             except URLGrabError, e:
+                 print e
++                sys.exit(1)
+         
+     def set_debug_logger(self, dbspec):
+         try:
 diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
 new file mode 100755
 index 0000000..3dafb12
@@ -181,6 +189,37 @@ index 50c6348..5fb43f9 100644
  base_ftp  = 'ftp://localhost/test/'
  
  # set to a proftp server only. we're working around a couple of
+diff --git a/test/test_mirror.py b/test/test_mirror.py
+index 70fe069..cb63a41 100644
+--- a/test/test_mirror.py
++++ b/test/test_mirror.py
+@@ -28,7 +28,7 @@ import os
+ import string, tempfile, random, cStringIO, os
+ 
+ import urlgrabber.grabber
+-from urlgrabber.grabber import URLGrabber, URLGrabError
++from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions
+ import urlgrabber.mirror
+ from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
+ 
+@@ -106,6 +106,9 @@ class CallbackTests(TestCase):
+         self.g  = URLGrabber()
+         fullmirrors = [base_mirror_url + m + '/' for m in \
+                        (bad_mirrors + good_mirrors)]
++        if hasattr(urlgrabber.grabber, '_TH'):
++            # test assumes mirrors are not re-ordered
++            urlgrabber.grabber._TH.hosts.clear()
+         self.mg = MirrorGroup(self.g, fullmirrors)
+     
+     def test_failure_callback(self):
+@@ -168,6 +171,7 @@ class FakeGrabber:
+         self.resultlist = resultlist or []
+         self.index = 0
+         self.calls = []
++        self.opts = URLGrabberOptions()
+         
+     def urlgrab(self, url, filename=None, **kwargs):
+         self.calls.append( (url, filename) )
 diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
 index 3e5f3b7..8eeaeda 100644
 --- a/urlgrabber/byterange.py
@@ -236,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
      return (fb,lb)
  
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..6ce9861 100644
+index e090e90..1afb2c5 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
 @@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -958,7 +997,7 @@ index e090e90..6ce9861 100644
              if opts.ssl_cert_type:                
                  self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
              if opts.ssl_key_pass:
-@@ -1187,28 +1414,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
          if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
              self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
              
@@ -982,9 +1021,11 @@ index e090e90..6ce9861 100644
 +        # proxy
 +        if opts.proxy is not None:
 +            self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
-+            self.curl_obj.setopt(pycurl.PROXYAUTH,
-+                # All but Kerberos.  BZ 769254
-+                pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
++            auth = pycurl.HTTPAUTH_ANY
++            if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
++                # BZ 769254: work around a bug in curl < 7.28.0
++                auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
++            self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
 +
 +        if opts.username and opts.password:
 +            if self.scheme in ('http', 'https'):
@@ -1003,7 +1044,7 @@ index e090e90..6ce9861 100644
              
          # our url
          self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1453,36 @@ class PyCurlFileObject():
+@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
              
              code = self.http_code
              errcode = e.args[0]
@@ -1029,20 +1070,17 @@ index e090e90..6ce9861 100644
              elif errcode == 28:
 -                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
 -                err.url = self.url
-+                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
-+                err.url = errurl
-                 raise err
-             elif errcode == 35:
-                 msg = _("problem making ssl connection")
-                 err = URLGrabError(14, msg)
+-                raise err
+-            elif errcode == 35:
+-                msg = _("problem making ssl connection")
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
-+                err.url = errurl
-                 raise err
-             elif errcode == 37:
+-                raise err
+-            elif errcode == 37:
 -                msg = _("Could not open/read %s") % (self.url)
-+                msg = _("Could not open/read %s") % (errurl)
-                 err = URLGrabError(14, msg)
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
++                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
 +                err.url = errurl
                  raise err
                  
@@ -1052,48 +1090,16 @@ index e090e90..6ce9861 100644
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
                  # which almost always means something aborted it from outside
-@@ -1272,33 +1494,94 @@ class PyCurlFileObject():
-             elif errcode == 58:
-                 msg = _("problem with the local client certificate")
-                 err = URLGrabError(14, msg)
--                err.url = self.url
-+                err.url = errurl
-                 raise err
- 
-             elif errcode == 60:
--                msg = _("client cert cannot be verified or client cert incorrect")
-+                msg = _("Peer cert cannot be verified or peer cert invalid")
-                 err = URLGrabError(14, msg)
--                err.url = self.url
-+                err.url = errurl
-                 raise err
-             
-             elif errcode == 63:
-                 if self._error[1]:
-                     msg = self._error[1]
-                 else:
--                    msg = _("Max download size exceeded on %s") % (self.url)
-+                    msg = _("Max download size exceeded on %s") % ()
-                 err = URLGrabError(14, msg)
+@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
+                 # figure out what aborted the pycurl process FIXME
+                 raise KeyboardInterrupt
+                 
+-            elif errcode == 58:
+-                msg = _("problem with the local client certificate")
+-                err = URLGrabError(14, msg)
 -                err.url = self.url
-+                err.url = errurl
-                 raise err
-                     
--            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
--                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-+            elif str(e.args[1]) == '' and code and not 200 <= code <= 299:
-+                if self.scheme in ['http', 'https']:
-+                    if self.http_code in responses:
-+                        resp = responses[self.http_code]
-+                        msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
-+                    else:
-+                        msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
-+                elif self.scheme in ['ftp']:
-+                    msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
-+                else:
-+                    msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
-             else:
--                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                raise err
++            else:
 +                pyerr2str = { 5 : _("Couldn't resolve proxy"),
 +                              6 : _("Couldn't resolve host"),
 +                              7 : _("Couldn't connect"),
@@ -1137,25 +1143,57 @@ index e090e90..6ce9861 100644
 +                             70 : _("Out of disk space on server"),
 +                             73 : _("Remove file exists"),
 +                              }
-+                errstr = str(e.args[1])
-+                if not errstr:
-+                    errstr = pyerr2str.get(errcode, '<Unknown>')
-+                msg = 'curl#%s - "%s"' % (errcode, errstr)
-                 code = errcode
-             err = URLGrabError(14, msg)
-             err.code = code
-             err.exception = e
-             raise err
++                errstr = str(e.args[1]) or pyerr2str.get(errcode, '<Unknown>')
++                if code and not 200 <= code <= 299:
++                    msg = '%s Error %d - %s' % (self.scheme.upper(), code,
++                                                self.scheme in ('http', 'https')
++                                                and responses.get(code) or errstr)
++                else:
++                    msg = 'curl#%s - "%s"' % (errcode, errstr)
++                    code = errcode
+ 
+-            elif errcode == 60:
+-                msg = _("client cert cannot be verified or client cert incorrect")
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
++                err.code = code
+                 raise err
+-            
+-            elif errcode == 63:
+-                if self._error[1]:
+-                    msg = self._error[1]
+-                else:
+-                    msg = _("Max download size exceeded on %s") % (self.url)
++
 +        else:
 +            if self._error[1]:
 +                msg = self._error[1]
-+                err = URLGrabError(14, msg)
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
 +                err.url = urllib.unquote(self.url)
-+                raise err
+                 raise err
+-                    
+-            elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
+-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+-            else:
+-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                code = errcode
+-            err = URLGrabError(14, msg)
+-            err.code = code
+-            err.exception = e
+-            raise err
  
      def _do_open(self):
          self.curl_obj = _curl_cache
-@@ -1333,7 +1616,11 @@ class PyCurlFileObject():
+-        self.curl_obj.reset() # reset all old settings away, just in case
++        # reset() clears PYCURL_ERRORBUFFER, and there's no way
++        # to reinitialize it, so better don't do that.  BZ 896025
++        #self.curl_obj.reset() # reset all old settings away, just in case
+         # setup any ranges
+         self._set_opts()
+         self._do_grab()
+@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
                  
          if self.opts.range:
              rt = self.opts.range
@@ -1168,7 +1206,7 @@ index e090e90..6ce9861 100644
  
          if rt:
              header = range_tuple_to_header(rt)
-@@ -1434,21 +1721,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
              #fh, self._temp_name = mkstemp()
              #self.fo = open(self._temp_name, 'wb')
  
@@ -1222,7 +1260,7 @@ index e090e90..6ce9861 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1526,17 +1838,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
              if self._prog_running:
                  downloaded += self._reget_length
                  self.opts.progress_obj.update(downloaded)
@@ -1248,7 +1286,7 @@ index e090e90..6ce9861 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1544,13 +1859,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
              return True
          return False
          
@@ -1262,7 +1300,7 @@ index e090e90..6ce9861 100644
      def read(self, amt=None):
          self._fill_buffer(amt)
          if amt is None:
-@@ -1582,9 +1890,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()
          
@@ -1285,7 +1323,7 @@ index e090e90..6ce9861 100644
  
  #####################################################################
  # DEPRECATED FUNCTIONS
-@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
  
          
  #####################################################################
@@ -1727,6 +1765,8 @@ index e090e90..6ce9861 100644
 +        if ug_err is None:
 +            # defer first update if the file was small.  BZ 851178.
 +            if not ts and dl_size < 1e6: return
++            # clamp timestamps from the future.  BZ 894630.
++            if ts > now: ts = now
 +
 +            # k1: the older, the less useful
 +            # k2: <500ms readings are less reliable