[python-urlgrabber] - Update to latest HEAD. - fix some test cases that were failing. BZ 918658 - exit(1) or /bin/urlgr
Zdeněk Pavlas
zpavlas at fedoraproject.org
Thu Mar 7 12:28:55 UTC 2013
commit c508ad399b4837b823c9d12a980c6ecf1e7a65d7
Author: Zdenek Pavlas <zpavlas at redhat.com>
Date: Thu Mar 7 13:22:57 2013 +0100
- Update to latest HEAD.
- fix some test cases that were failing. BZ 918658
- exit(1) or /bin/urlgrabber failures. BZ 918613
- clamp timestamps from the future. BZ 894630
- enable GSSNEGOTIATE if implemented correctly.
- make error messages more verbose.
python-urlgrabber.spec | 10 ++-
urlgrabber-HEAD.patch | 192 +++++++++++++++++++++++++++++-------------------
2 files changed, 125 insertions(+), 77 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 6ba1d8d..43666d9 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
-Release: 24%{?dist}
+Release: 25%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@@ -44,6 +44,14 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
+* Thu Mar 7 2013 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-25
+- Update to latest HEAD.
+- fix some test cases that were failing. BZ 918658
+- exit(1) or /bin/urlgrabber failures. BZ 918613
+- clamp timestamps from the future. BZ 894630
+- enable GSSNEGOTIATE if implemented correctly.
+- make error messages more verbose.
+
* Thu Feb 14 2013 Fedora Release Engineering <rel-eng at lists.fedoraproject.org> - 3.9.1-24
- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index aaf9cbc..4633455 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -12,7 +12,7 @@ index 0000000..1ffe416
+*.kateproject
+ipython.log*
diff --git a/scripts/urlgrabber b/scripts/urlgrabber
-index 518e512..09cd896 100644
+index 518e512..07881b3 100644
--- a/scripts/urlgrabber
+++ b/scripts/urlgrabber
@@ -115,6 +115,7 @@ options:
@@ -71,6 +71,14 @@ index 518e512..09cd896 100644
def help_doc(self):
print __doc__
+@@ -294,6 +301,7 @@ class ugclient:
+ if self.op.localfile: print f
+ except URLGrabError, e:
+ print e
++ sys.exit(1)
+
+ def set_debug_logger(self, dbspec):
+ try:
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755
index 0000000..3dafb12
@@ -181,6 +189,37 @@ index 50c6348..5fb43f9 100644
base_ftp = 'ftp://localhost/test/'
# set to a proftp server only. we're working around a couple of
+diff --git a/test/test_mirror.py b/test/test_mirror.py
+index 70fe069..cb63a41 100644
+--- a/test/test_mirror.py
++++ b/test/test_mirror.py
+@@ -28,7 +28,7 @@ import os
+ import string, tempfile, random, cStringIO, os
+
+ import urlgrabber.grabber
+-from urlgrabber.grabber import URLGrabber, URLGrabError
++from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions
+ import urlgrabber.mirror
+ from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder
+
+@@ -106,6 +106,9 @@ class CallbackTests(TestCase):
+ self.g = URLGrabber()
+ fullmirrors = [base_mirror_url + m + '/' for m in \
+ (bad_mirrors + good_mirrors)]
++ if hasattr(urlgrabber.grabber, '_TH'):
++ # test assumes mirrors are not re-ordered
++ urlgrabber.grabber._TH.hosts.clear()
+ self.mg = MirrorGroup(self.g, fullmirrors)
+
+ def test_failure_callback(self):
+@@ -168,6 +171,7 @@ class FakeGrabber:
+ self.resultlist = resultlist or []
+ self.index = 0
+ self.calls = []
++ self.opts = URLGrabberOptions()
+
+ def urlgrab(self, url, filename=None, **kwargs):
+ self.calls.append( (url, filename) )
diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
index 3e5f3b7..8eeaeda 100644
--- a/urlgrabber/byterange.py
@@ -236,7 +275,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..6ce9861 100644
+index e090e90..1afb2c5 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -958,7 +997,7 @@ index e090e90..6ce9861 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
-@@ -1187,28 +1414,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1414,28 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@@ -982,9 +1021,11 @@ index e090e90..6ce9861 100644
+ # proxy
+ if opts.proxy is not None:
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
-+ self.curl_obj.setopt(pycurl.PROXYAUTH,
-+ # All but Kerberos. BZ 769254
-+ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
++ auth = pycurl.HTTPAUTH_ANY
++ if pycurl.version_info()[2] < (7 << 16 | 28 << 8 | 0):
++ # BZ 769254: work around a bug in curl < 7.28.0
++ auth &= ~pycurl.HTTPAUTH_GSSNEGOTIATE
++ self.curl_obj.setopt(pycurl.PROXYAUTH, auth)
+
+ if opts.username and opts.password:
+ if self.scheme in ('http', 'https'):
@@ -1003,7 +1044,7 @@ index e090e90..6ce9861 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1453,36 @@ class PyCurlFileObject():
+@@ -1228,39 +1455,26 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@@ -1029,20 +1070,17 @@ index e090e90..6ce9861 100644
elif errcode == 28:
- err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
- err.url = self.url
-+ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
-+ err.url = errurl
- raise err
- elif errcode == 35:
- msg = _("problem making ssl connection")
- err = URLGrabError(14, msg)
+- raise err
+- elif errcode == 35:
+- msg = _("problem making ssl connection")
+- err = URLGrabError(14, msg)
- err.url = self.url
-+ err.url = errurl
- raise err
- elif errcode == 37:
+- raise err
+- elif errcode == 37:
- msg = _("Could not open/read %s") % (self.url)
-+ msg = _("Could not open/read %s") % (errurl)
- err = URLGrabError(14, msg)
+- err = URLGrabError(14, msg)
- err.url = self.url
++ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
+ err.url = errurl
raise err
@@ -1052,48 +1090,16 @@ index e090e90..6ce9861 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
-@@ -1272,33 +1494,94 @@ class PyCurlFileObject():
- elif errcode == 58:
- msg = _("problem with the local client certificate")
- err = URLGrabError(14, msg)
-- err.url = self.url
-+ err.url = errurl
- raise err
-
- elif errcode == 60:
-- msg = _("client cert cannot be verified or client cert incorrect")
-+ msg = _("Peer cert cannot be verified or peer cert invalid")
- err = URLGrabError(14, msg)
-- err.url = self.url
-+ err.url = errurl
- raise err
-
- elif errcode == 63:
- if self._error[1]:
- msg = self._error[1]
- else:
-- msg = _("Max download size exceeded on %s") % (self.url)
-+ msg = _("Max download size exceeded on %s") % ()
- err = URLGrabError(14, msg)
+@@ -1269,40 +1483,76 @@ class PyCurlFileObject():
+ # figure out what aborted the pycurl process FIXME
+ raise KeyboardInterrupt
+
+- elif errcode == 58:
+- msg = _("problem with the local client certificate")
+- err = URLGrabError(14, msg)
- err.url = self.url
-+ err.url = errurl
- raise err
-
-- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
-- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-+ elif str(e.args[1]) == '' and code and not 200 <= code <= 299:
-+ if self.scheme in ['http', 'https']:
-+ if self.http_code in responses:
-+ resp = responses[self.http_code]
-+ msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
-+ else:
-+ msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
-+ elif self.scheme in ['ftp']:
-+ msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
-+ else:
-+ msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
- else:
-- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+- raise err
++ else:
+ pyerr2str = { 5 : _("Couldn't resolve proxy"),
+ 6 : _("Couldn't resolve host"),
+ 7 : _("Couldn't connect"),
@@ -1137,25 +1143,57 @@ index e090e90..6ce9861 100644
+ 70 : _("Out of disk space on server"),
+ 73 : _("Remove file exists"),
+ }
-+ errstr = str(e.args[1])
-+ if not errstr:
-+ errstr = pyerr2str.get(errcode, '<Unknown>')
-+ msg = 'curl#%s - "%s"' % (errcode, errstr)
- code = errcode
- err = URLGrabError(14, msg)
- err.code = code
- err.exception = e
- raise err
++ errstr = str(e.args[1]) or pyerr2str.get(errcode, '<Unknown>')
++ if code and not 200 <= code <= 299:
++ msg = '%s Error %d - %s' % (self.scheme.upper(), code,
++ self.scheme in ('http', 'https')
++ and responses.get(code) or errstr)
++ else:
++ msg = 'curl#%s - "%s"' % (errcode, errstr)
++ code = errcode
+
+- elif errcode == 60:
+- msg = _("client cert cannot be verified or client cert incorrect")
+ err = URLGrabError(14, msg)
+- err.url = self.url
++ err.url = errurl
++ err.code = code
+ raise err
+-
+- elif errcode == 63:
+- if self._error[1]:
+- msg = self._error[1]
+- else:
+- msg = _("Max download size exceeded on %s") % (self.url)
++
+ else:
+ if self._error[1]:
+ msg = self._error[1]
-+ err = URLGrabError(14, msg)
+ err = URLGrabError(14, msg)
+- err.url = self.url
+ err.url = urllib.unquote(self.url)
-+ raise err
+ raise err
+-
+- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
+- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
+- else:
+- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+- code = errcode
+- err = URLGrabError(14, msg)
+- err.code = code
+- err.exception = e
+- raise err
def _do_open(self):
self.curl_obj = _curl_cache
-@@ -1333,7 +1616,11 @@ class PyCurlFileObject():
+- self.curl_obj.reset() # reset all old settings away, just in case
++ # reset() clears PYCURL_ERRORBUFFER, and there's no way
++ # to reinitialize it, so better don't do that. BZ 896025
++ #self.curl_obj.reset() # reset all old settings away, just in case
+ # setup any ranges
+ self._set_opts()
+ self._do_grab()
+@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@@ -1168,7 +1206,7 @@ index e090e90..6ce9861 100644
if rt:
header = range_tuple_to_header(rt)
-@@ -1434,21 +1721,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@@ -1222,7 +1260,7 @@ index e090e90..6ce9861 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
-@@ -1526,17 +1838,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@@ -1248,7 +1286,7 @@ index e090e90..6ce9861 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
-@@ -1544,13 +1859,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
return True
return False
@@ -1262,7 +1300,7 @@ index e090e90..6ce9861 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
-@@ -1582,9 +1890,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@@ -1285,7 +1323,7 @@ index e090e90..6ce9861 100644
#####################################################################
# DEPRECATED FUNCTIONS
-@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1908,480 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@@ -1727,6 +1765,8 @@ index e090e90..6ce9861 100644
+ if ug_err is None:
+ # defer first update if the file was small. BZ 851178.
+ if not ts and dl_size < 1e6: return
++ # clamp timestamps from the future. BZ 894630.
++ if ts > now: ts = now
+
+ # k1: the older, the less useful
+ # k2: <500ms readings are less reliable
More information about the scm-commits
mailing list