[python-urlgrabber/f17] Update to latest head
Zdeněk Pavlas
zpavlas at fedoraproject.org
Fri Jul 20 12:11:52 UTC 2012
commit 604dc61e1723fda3f860def1a353e82d0fd3fc38
Author: Zdeněk Pavlas <zpavlas at redhat.com>
Date: Fri Jul 20 14:10:53 2012 +0200
Update to latest head
python-urlgrabber.spec | 8 ++-
urlgrabber-HEAD.patch | 176 +++++++++++++++++++++++++++++++-----------------
2 files changed, 122 insertions(+), 62 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index f57e573..5fbf43f 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
-Release: 13%{?dist}
+Release: 14%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@@ -44,6 +44,12 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
+* Fri Jul 20 2012 Zdenek Pavlas <zpavlas at redhat.com> - 3.9.1-14
+- update to latest HEAD
+- disable Kerberos proxy auth. BZ 769254
+- fix copy_local issue. BZ 837018
+- send 'tries' counter to mirror failure callback
+
* Thu Jun 14 2012 Zdenek Pavlas <zpavlas at redhat.com> - 3.9.1-13
- update to latest HEAD
- Start meters immediately, and only when asked to. BZ 831904, 831291.
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index c77c6c5..4e1b34b 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -73,12 +73,29 @@ index 518e512..09cd896 100644
print __doc__
diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down
new file mode 100755
-index 0000000..670750c
+index 0000000..3da55a4
--- /dev/null
+++ b/scripts/urlgrabber-ext-down
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,72 @@
+#! /usr/bin/python
+# A very simple external downloader
++# Copyright 2011-2012 Zdenek Pavlas
++
++# This library is free software; you can redistribute it and/or
++# modify it under the terms of the GNU Lesser General Public
++# License as published by the Free Software Foundation; either
++# version 2.1 of the License, or (at your option) any later version.
++#
++# This library is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++# Lesser General Public License for more details.
++#
++# You should have received a copy of the GNU Lesser General Public
++# License along with this library; if not, write to the
++# Free Software Foundation, Inc.,
++# 59 Temple Place, Suite 330,
++# Boston, MA 02111-1307 USA
+
+import time, os, errno, sys
+from urlgrabber.grabber import \
@@ -216,7 +233,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..071146c 100644
+index e090e90..83823ea 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,7 +49,7 @@ GENERAL ARGUMENTS (kwargs)
@@ -558,7 +575,19 @@ index e090e90..071146c 100644
"""Provides easy opening of URLs with a variety of options.
All options are specified as kwargs. Options may be specified when
-@@ -887,14 +1028,15 @@ class URLGrabber:
+@@ -872,7 +1013,6 @@ class URLGrabber:
+ # beware of infinite loops :)
+ tries = tries + 1
+ exception = None
+- retrycode = None
+ callback = None
+ if DEBUG: DEBUG.info('attempt %i/%s: %s',
+ tries, opts.retry, args[0])
+@@ -883,23 +1023,24 @@ class URLGrabber:
+ except URLGrabError, e:
+ exception = e
+ callback = opts.failure_callback
+- retrycode = e.errno
except KeyboardInterrupt, e:
exception = e
callback = opts.interrupt_callback
@@ -576,7 +605,13 @@ index e090e90..071146c 100644
if (opts.retry is None) or (tries == opts.retry):
if DEBUG: DEBUG.info('retries exceeded, re-raising')
-@@ -912,9 +1054,11 @@ class URLGrabber:
+ raise
+
++ retrycode = getattr(exception, 'errno', None)
+ if (retrycode is not None) and (retrycode not in opts.retrycodes):
+ if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
+ retrycode, opts.retrycodes)
+@@ -912,9 +1053,11 @@ class URLGrabber:
returned that supports them. The file object can be treated
like any other file object.
"""
@@ -588,7 +623,7 @@ index e090e90..071146c 100644
def retryfunc(opts, url):
return PyCurlFileObject(url, filename=None, opts=opts)
return self._retry(opts, retryfunc, url)
-@@ -925,12 +1069,17 @@ class URLGrabber:
+@@ -925,12 +1068,17 @@ class URLGrabber:
urlgrab returns the filename of the local file, which may be
different from the passed-in filename if copy_local == 0.
"""
@@ -606,7 +641,7 @@ index e090e90..071146c 100644
if scheme == 'file' and not opts.copy_local:
# just return the name of the local file - don't make a
# copy currently
-@@ -950,30 +1099,36 @@ class URLGrabber:
+@@ -950,30 +1098,36 @@ class URLGrabber:
elif not opts.range:
if not opts.checkfunc is None:
@@ -656,7 +691,7 @@ index e090e90..071146c 100644
def urlread(self, url, limit=None, **kwargs):
"""read the url into a string, up to 'limit' bytes
-@@ -982,9 +1137,11 @@ class URLGrabber:
+@@ -982,9 +1136,11 @@ class URLGrabber:
"I want the first N bytes" but rather 'read the whole file
into memory, but don't use too much'
"""
@@ -668,7 +703,7 @@ index e090e90..071146c 100644
if limit is not None:
limit = limit + 1
-@@ -1000,12 +1157,8 @@ class URLGrabber:
+@@ -1000,12 +1156,8 @@ class URLGrabber:
else: s = fo.read(limit)
if not opts.checkfunc is None:
@@ -683,7 +718,7 @@ index e090e90..071146c 100644
finally:
fo.close()
return s
-@@ -1020,6 +1173,7 @@ class URLGrabber:
+@@ -1020,6 +1172,7 @@ class URLGrabber:
return s
def _make_callback(self, callback_obj):
@@ -691,7 +726,7 @@ index e090e90..071146c 100644
if callable(callback_obj):
return callback_obj, (), {}
else:
-@@ -1030,7 +1184,7 @@ class URLGrabber:
+@@ -1030,7 +1183,7 @@ class URLGrabber:
default_grabber = URLGrabber()
@@ -700,7 +735,7 @@ index e090e90..071146c 100644
def __init__(self, url, filename, opts):
self.fo = None
self._hdr_dump = ''
-@@ -1052,10 +1206,11 @@ class PyCurlFileObject():
+@@ -1052,10 +1205,11 @@ class PyCurlFileObject():
self._reget_length = 0
self._prog_running = False
self._error = (None, None)
@@ -714,7 +749,7 @@ index e090e90..071146c 100644
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
Any attribute not found in _this_ object will be searched for
-@@ -1085,9 +1240,14 @@ class PyCurlFileObject():
+@@ -1085,9 +1239,14 @@ class PyCurlFileObject():
return -1
def _hdr_retrieve(self, buf):
@@ -730,7 +765,7 @@ index e090e90..071146c 100644
try:
self._hdr_dump += buf
# we have to get the size before we do the progress obj start
-@@ -1104,7 +1264,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1263,17 @@ class PyCurlFileObject():
s = parse150(buf)
if s:
self.size = int(s)
@@ -749,7 +784,7 @@ index e090e90..071146c 100644
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
-@@ -1113,8 +1283,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1282,10 @@ class PyCurlFileObject():
if self._parsed_hdr:
return self._parsed_hdr
statusend = self._hdr_dump.find('\n')
@@ -760,7 +795,7 @@ index e090e90..071146c 100644
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
-@@ -1127,6 +1299,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1298,9 @@ class PyCurlFileObject():
if not opts:
opts = self.opts
@@ -770,7 +805,7 @@ index e090e90..071146c 100644
# defaults we're always going to set
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1311,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1310,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -792,7 +827,7 @@ index e090e90..071146c 100644
# maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1333,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1332,11 @@ class PyCurlFileObject():
# timeouts
timeout = 300
@@ -807,7 +842,7 @@ index e090e90..071146c 100644
# ssl options
if self.scheme == 'https':
-@@ -1158,13 +1345,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1344,16 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@@ -825,7 +860,7 @@ index e090e90..071146c 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
-@@ -1187,28 +1377,24 @@ class PyCurlFileObject():
+@@ -1187,28 +1376,26 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@@ -849,7 +884,9 @@ index e090e90..071146c 100644
+ # proxy
+ if opts.proxy is not None:
+ self.curl_obj.setopt(pycurl.PROXY, opts.proxy)
-+ self.curl_obj.setopt(pycurl.PROXYAUTH, pycurl.HTTPAUTH_ANY)
++ self.curl_obj.setopt(pycurl.PROXYAUTH,
++ # All but Kerberos. BZ 769254
++ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE)
+
+ if opts.username and opts.password:
+ if self.scheme in ('http', 'https'):
@@ -868,7 +905,7 @@ index e090e90..071146c 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,12 +1414,14 @@ class PyCurlFileObject():
+@@ -1228,12 +1415,14 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@@ -885,7 +922,7 @@ index e090e90..071146c 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
-@@ -1244,23 +1432,23 @@ class PyCurlFileObject():
+@@ -1244,23 +1433,23 @@ class PyCurlFileObject():
raise KeyboardInterrupt
elif errcode == 28:
@@ -916,7 +953,7 @@ index e090e90..071146c 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
-@@ -1272,33 +1460,94 @@ class PyCurlFileObject():
+@@ -1272,33 +1461,94 @@ class PyCurlFileObject():
elif errcode == 58:
msg = _("problem with the local client certificate")
err = URLGrabError(14, msg)
@@ -1018,7 +1055,7 @@ index e090e90..071146c 100644
def _do_open(self):
self.curl_obj = _curl_cache
-@@ -1333,7 +1582,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@@ -1031,7 +1068,7 @@ index e090e90..071146c 100644
if rt:
header = range_tuple_to_header(rt)
-@@ -1434,21 +1687,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@@ -1085,7 +1122,7 @@ index e090e90..071146c 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
-@@ -1526,17 +1804,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@@ -1111,7 +1148,7 @@ index e090e90..071146c 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
-@@ -1544,13 +1825,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
return True
return False
@@ -1125,7 +1162,7 @@ index e090e90..071146c 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
-@@ -1582,9 +1856,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@@ -1148,7 +1185,7 @@ index e090e90..071146c 100644
#####################################################################
# DEPRECATED FUNCTIONS
-@@ -1621,6 +1907,433 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1908,442 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@@ -1415,18 +1452,23 @@ index e090e90..071146c 100644
+ try: _run_callback(opts.failure_callback, opts)
+ except URLGrabError, ug_err:
+ retry = 0 # no retries
-+ if opts.tries < retry and ug_err.args[0] in opts.retrycodes:
++ if opts.tries < retry and ug_err.errno in opts.retrycodes:
+ start(opts, opts.tries + 1) # simple retry
+ continue
+
+ if opts.mirror_group:
-+ mg, failed = opts.mirror_group
++ mg, failed, removed = opts.mirror_group
++ failed[key] = failed.get(key, 0) + 1
+ opts.mirror = key
+ opts.exception = ug_err
-+ action = _run_callback(mg.failure_callback, opts)
-+ if not (action and action.get('fail')):
++ action = mg.default_action or {}
++ if mg.failure_callback:
++ opts.tries = sum(failed.values())
++ action.update(_run_callback(mg.failure_callback, opts))
++ if not action.get('fail', 0):
+ # mask this mirror and retry
-+ failed.add(key)
++ if action.get('remove', 1):
++ removed.add(key)
+ _async_queue.append(opts)
+ continue
+
@@ -1453,17 +1495,21 @@ index e090e90..071146c 100644
+ perform()
+
+ if opts.mirror_group:
-+ mg, failed = opts.mirror_group
++ mg, failed, removed = opts.mirror_group
+
+ # find the best mirror
+ best = None
++ best_speed = None
+ for mirror in mg.mirrors:
+ key = mirror['mirror']
-+ if key in failed: continue
++ if key in removed: continue
+
+ # estimate mirror speed
+ speed = _TH.estimate(key)
+ speed /= 1 + host_con.get(key, 0)
++
++ # 2-tuple to select mirror with least failures
++ speed = -failed.get(key, 0), speed
+ if best is None or speed > best_speed:
+ best = mirror
+ best_speed = speed
@@ -1583,7 +1629,7 @@ index e090e90..071146c 100644
def _main_test():
try: url, filename = sys.argv[1:3]
diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
-index dad410b..d699b61 100644
+index dad410b..ac78b34 100644
--- a/urlgrabber/mirror.py
+++ b/urlgrabber/mirror.py
@@ -76,6 +76,9 @@ CUSTOMIZATION
@@ -1602,20 +1648,19 @@ index dad410b..d699b61 100644
-from grabber import URLGrabError, CallbackObject, DEBUG
+from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
-+from grabber import _run_callback, _do_raise, _async_queue
++from grabber import _run_callback, _do_raise
def _(st):
return st
-@@ -254,7 +258,7 @@ class MirrorGroup:
- # if these values are found in **kwargs passed to one of the urlXXX
- # methods, they will be stripped before getting passed on to the
- # grabber
-- options = ['default_action', 'failure_callback']
-+ options = ['default_action', 'failure_callback', 'failfunc']
-
- def _process_kwargs(self, kwargs):
- self.failure_callback = kwargs.get('failure_callback')
-@@ -263,7 +267,8 @@ class MirrorGroup:
+@@ -184,6 +188,7 @@ class MirrorGroup:
+
+ obj.exception = < exception that was raised >
+ obj.mirror = < the mirror that was tried >
++ obj.tries = < the number of mirror tries so far >
+ obj.relative_url = < url relative to the mirror >
+ obj.url = < full url that failed >
+ # .url is just the combination of .mirror
+@@ -263,7 +268,8 @@ class MirrorGroup:
def _parse_mirrors(self, mirrors):
parsed_mirrors = []
for m in mirrors:
@@ -1625,23 +1670,32 @@ index dad410b..d699b61 100644
parsed_mirrors.append(m)
return parsed_mirrors
-@@ -402,10 +407,25 @@ class MirrorGroup:
+@@ -382,7 +388,9 @@ class MirrorGroup:
+ try: del kw[k]
+ except KeyError: pass
+
++ tries = 0
+ while 1:
++ tries += 1
+ mirrorchoice = self._get_mirror(gr)
+ fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
+ kwargs = dict(mirrorchoice.get('kwargs', {}))
+@@ -399,13 +407,24 @@ class MirrorGroup:
+ obj.mirror = mirrorchoice['mirror']
+ obj.relative_url = gr.url
+ obj.url = fullurl
++ obj.tries = tries
self._failure(gr, obj)
def urlgrab(self, url, filename=None, **kwargs):
-+ if kwargs.get('async'):
-+ opts = self.grabber.opts.derive(**kwargs)
-+ opts.mirror_group = self, set()
-+ opts.relative_url = _to_utf8(url)
-+
-+ opts.url = 'http://tbd'
-+ opts.filename = filename
-+ opts.size = int(opts.size or 0)
-+ _async_queue.append(opts)
-+ return filename
-+
kw = dict(kwargs)
kw['filename'] = filename
++ if kw.get('async'):
++ # enable mirror failovers in async path
++ kw['mirror_group'] = self, {}, set()
++ kw['relative_url'] = url
++ else:
++ kw.pop('failfunc', None)
func = 'urlgrab'
- return self._mirror_try(func, url, kw)
+ try:
More information about the scm-commits
mailing list