[python-urlgrabber/f18] Update to latest HEAD.
Zdeněk Pavlas
zpavlas at fedoraproject.org
Thu Dec 6 11:12:32 UTC 2012
commit 68399901bcd148be71c533cb5e2e6416be0cad7d
Author: Zdeněk Pavlas <zpavlas at redhat.com>
Date: Thu Dec 6 12:12:23 2012 +0100
Update to latest HEAD.
python-urlgrabber.spec | 6 ++-
urlgrabber-HEAD.patch | 103 +++++++++++++++++++++++++++++++-----------------
2 files changed, 72 insertions(+), 37 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 8a6f37e..6ee5dd6 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
-Release: 22%{?dist}
+Release: 23%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@@ -44,6 +44,10 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
%changelog
+* Thu Dec 6 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-23
+- Update to latest HEAD.
+- Improve URLGRABBER_DEBUG, add max_connections. BZ 853432
+
* Thu Nov 1 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-22
- Update to latest HEAD
- Get rid of "HTTP 200 OK" errors. BZ 871835.
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 55c3ba4..bc028de 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -236,7 +236,7 @@ index 3e5f3b7..8eeaeda 100644
return (fb,lb)
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..74a692c 100644
+index e090e90..78c2e59 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
@@ -458,7 +458,26 @@ index e090e90..74a692c 100644
########################################################################
# functions for debugging output. These functions are here because they
# are also part of the module initialization.
-@@ -527,6 +608,29 @@ def _(st):
+@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None):
+ else: handler = logging.FileHandler(filename)
+ handler.setFormatter(formatter)
+ DBOBJ = logging.getLogger('urlgrabber')
++ DBOBJ.propagate = False
+ DBOBJ.addHandler(handler)
+ DBOBJ.setLevel(level)
+ except (KeyError, ImportError, ValueError):
+@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None):
+
+ def _log_package_state():
+ if not DEBUG: return
+- DEBUG.info('urlgrabber version = %s' % __version__)
+- DEBUG.info('trans function "_" = %s' % _)
++ DEBUG.debug('urlgrabber version = %s' % __version__)
++ DEBUG.debug('trans function "_" = %s' % _)
+
+ _init_default_logger()
+ _log_package_state()
+@@ -527,6 +609,29 @@ def _(st):
# END MODULE INITIALIZATION
########################################################################
@@ -488,7 +507,7 @@ index e090e90..74a692c 100644
class URLGrabError(IOError):
-@@ -662,6 +766,7 @@ class URLParser:
+@@ -662,6 +767,7 @@ class URLParser:
opts.quote = 0 --> do not quote it
opts.quote = None --> guess
"""
@@ -496,7 +515,7 @@ index e090e90..74a692c 100644
quote = opts.quote
if opts.prefix:
-@@ -768,6 +873,41 @@ class URLGrabberOptions:
+@@ -768,6 +874,41 @@ class URLGrabberOptions:
else: # throttle is a float
return self.bandwidth * self.throttle
@@ -538,7 +557,7 @@ index e090e90..74a692c 100644
def derive(self, **kwargs):
"""Create a derived URLGrabberOptions instance.
This method creates a new instance and overrides the
-@@ -791,30 +931,37 @@ class URLGrabberOptions:
+@@ -791,30 +932,37 @@ class URLGrabberOptions:
provided here.
"""
self.progress_obj = None
@@ -577,7 +596,7 @@ index e090e90..74a692c 100644
self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
self.ssl_context = None # no-op in pycurl
self.ssl_verify_peer = True # check peer's cert for authenticityb
-@@ -827,6 +974,12 @@ class URLGrabberOptions:
+@@ -827,6 +975,12 @@ class URLGrabberOptions:
self.size = None # if we know how big the thing we're getting is going
# to be. this is ultimately a MAXIMUM size for the file
self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@@ -590,7 +609,7 @@ index e090e90..74a692c 100644
def __repr__(self):
return self.format()
-@@ -846,7 +999,18 @@ class URLGrabberOptions:
+@@ -846,7 +1000,18 @@ class URLGrabberOptions:
s = s + indent + '}'
return s
@@ -610,7 +629,7 @@ index e090e90..74a692c 100644
"""Provides easy opening of URLs with a variety of options.
All options are specified as kwargs. Options may be specified when
-@@ -872,7 +1036,6 @@ class URLGrabber:
+@@ -872,7 +1037,6 @@ class URLGrabber:
# beware of infinite loops :)
tries = tries + 1
exception = None
@@ -618,7 +637,7 @@ index e090e90..74a692c 100644
callback = None
if DEBUG: DEBUG.info('attempt %i/%s: %s',
tries, opts.retry, args[0])
-@@ -883,54 +1046,62 @@ class URLGrabber:
+@@ -883,54 +1047,62 @@ class URLGrabber:
except URLGrabError, e:
exception = e
callback = opts.failure_callback
@@ -688,7 +707,7 @@ index e090e90..74a692c 100644
if scheme == 'file' and not opts.copy_local:
# just return the name of the local file - don't make a
# copy currently
-@@ -950,41 +1121,51 @@ class URLGrabber:
+@@ -950,41 +1122,51 @@ class URLGrabber:
elif not opts.range:
if not opts.checkfunc is None:
@@ -755,7 +774,7 @@ index e090e90..74a692c 100644
if limit is not None:
limit = limit + 1
-@@ -1000,12 +1181,8 @@ class URLGrabber:
+@@ -1000,12 +1182,8 @@ class URLGrabber:
else: s = fo.read(limit)
if not opts.checkfunc is None:
@@ -770,7 +789,7 @@ index e090e90..74a692c 100644
finally:
fo.close()
return s
-@@ -1020,6 +1197,7 @@ class URLGrabber:
+@@ -1020,6 +1198,7 @@ class URLGrabber:
return s
def _make_callback(self, callback_obj):
@@ -778,7 +797,7 @@ index e090e90..74a692c 100644
if callable(callback_obj):
return callback_obj, (), {}
else:
-@@ -1030,7 +1208,7 @@ class URLGrabber:
+@@ -1030,7 +1209,7 @@ class URLGrabber:
default_grabber = URLGrabber()
@@ -787,7 +806,7 @@ index e090e90..74a692c 100644
def __init__(self, url, filename, opts):
self.fo = None
self._hdr_dump = ''
-@@ -1052,10 +1230,13 @@ class PyCurlFileObject():
+@@ -1052,10 +1231,13 @@ class PyCurlFileObject():
self._reget_length = 0
self._prog_running = False
self._error = (None, None)
@@ -803,7 +822,7 @@ index e090e90..74a692c 100644
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
Any attribute not found in _this_ object will be searched for
-@@ -1067,6 +1248,12 @@ class PyCurlFileObject():
+@@ -1067,6 +1249,12 @@ class PyCurlFileObject():
def _retrieve(self, buf):
try:
@@ -816,7 +835,7 @@ index e090e90..74a692c 100644
if not self._prog_running:
if self.opts.progress_obj:
size = self.size + self._reget_length
-@@ -1079,15 +1266,24 @@ class PyCurlFileObject():
+@@ -1079,15 +1267,24 @@ class PyCurlFileObject():
self.opts.progress_obj.update(self._amount_read)
self._amount_read += len(buf)
@@ -843,7 +862,7 @@ index e090e90..74a692c 100644
try:
self._hdr_dump += buf
# we have to get the size before we do the progress obj start
-@@ -1104,7 +1300,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1301,17 @@ class PyCurlFileObject():
s = parse150(buf)
if s:
self.size = int(s)
@@ -857,12 +876,12 @@ index e090e90..74a692c 100644
+
+ if len(self._hdr_dump) != 0 and buf == '\r\n':
+ self._hdr_ended = True
-+ if DEBUG: DEBUG.info('header ended:')
++ if DEBUG: DEBUG.debug('header ended:')
+
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
-@@ -1113,8 +1319,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1320,10 @@ class PyCurlFileObject():
if self._parsed_hdr:
return self._parsed_hdr
statusend = self._hdr_dump.find('\n')
@@ -873,7 +892,7 @@ index e090e90..74a692c 100644
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
-@@ -1127,6 +1335,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1336,9 @@ class PyCurlFileObject():
if not opts:
opts = self.opts
@@ -883,13 +902,14 @@ index e090e90..74a692c 100644
# defaults we're always going to set
self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1347,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1348,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
+ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
- if DEBUG:
+- if DEBUG:
++ if DEBUG and DEBUG.level <= 10:
self.curl_obj.setopt(pycurl.VERBOSE, True)
if opts.user_agent:
self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
@@ -905,7 +925,7 @@ index e090e90..74a692c 100644
# maybe to be options later
self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1369,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1370,11 @@ class PyCurlFileObject():
# timeouts
timeout = 300
@@ -920,7 +940,7 @@ index e090e90..74a692c 100644
# ssl options
if self.scheme == 'https':
-@@ -1158,13 +1381,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1382,16 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@@ -938,7 +958,7 @@ index e090e90..74a692c 100644
if opts.ssl_cert_type:
self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
if opts.ssl_key_pass:
-@@ -1187,28 +1413,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1414,26 @@ class PyCurlFileObject():
if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
@@ -983,7 +1003,7 @@ index e090e90..74a692c 100644
# our url
self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,39 +1452,36 @@ class PyCurlFileObject():
+@@ -1228,39 +1453,36 @@ class PyCurlFileObject():
code = self.http_code
errcode = e.args[0]
@@ -1032,7 +1052,7 @@ index e090e90..74a692c 100644
# this is probably wrong but ultimately this is what happens
# we have a legit http code and a pycurl 'writer failed' code
# which almost always means something aborted it from outside
-@@ -1272,33 +1493,94 @@ class PyCurlFileObject():
+@@ -1272,33 +1494,94 @@ class PyCurlFileObject():
elif errcode == 58:
msg = _("problem with the local client certificate")
err = URLGrabError(14, msg)
@@ -1135,7 +1155,7 @@ index e090e90..74a692c 100644
def _do_open(self):
self.curl_obj = _curl_cache
-@@ -1333,7 +1615,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1616,11 @@ class PyCurlFileObject():
if self.opts.range:
rt = self.opts.range
@@ -1148,7 +1168,7 @@ index e090e90..74a692c 100644
if rt:
header = range_tuple_to_header(rt)
-@@ -1434,21 +1720,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1721,46 @@ class PyCurlFileObject():
#fh, self._temp_name = mkstemp()
#self.fo = open(self._temp_name, 'wb')
@@ -1202,7 +1222,7 @@ index e090e90..74a692c 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
-@@ -1526,17 +1837,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1838,20 @@ class PyCurlFileObject():
if self._prog_running:
downloaded += self._reget_length
self.opts.progress_obj.update(downloaded)
@@ -1228,7 +1248,7 @@ index e090e90..74a692c 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
-@@ -1544,13 +1858,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1859,6 @@ class PyCurlFileObject():
return True
return False
@@ -1242,7 +1262,7 @@ index e090e90..74a692c 100644
def read(self, amt=None):
self._fill_buffer(amt)
if amt is None:
-@@ -1582,9 +1889,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1890,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
@@ -1265,7 +1285,7 @@ index e090e90..74a692c 100644
#####################################################################
# DEPRECATED FUNCTIONS
-@@ -1621,6 +1940,467 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
#####################################################################
@@ -1498,17 +1518,23 @@ index e090e90..74a692c 100644
+ host_con = {} # current host connection counts
+
+ def start(opts, tries):
++ opts.tries = tries
++ try:
++ dl.start(opts)
++ except OSError, e:
++ # can't spawn downloader, give up immediately
++ opts.exception = URLGrabError(5, exception2msg(e))
++ _run_callback(opts.failfunc, opts)
++ return
++
+ key, limit = opts.async
+ host_con[key] = host_con.get(key, 0) + 1
-+ opts.tries = tries
+ if opts.progress_obj:
+ if opts.multi_progress_obj:
+ opts._progress = opts.multi_progress_obj.newMeter()
+ opts._progress.start(text=opts.text)
+ else:
+ opts._progress = time.time() # no updates
-+ if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
-+ dl.start(opts)
+
+ def perform():
+ for opts, size, ug_err in dl.perform():
@@ -1588,6 +1614,8 @@ index e090e90..74a692c 100644
+ # check global limit
+ while len(dl.running) >= default_grabber.opts.max_connections:
+ perform()
++ if DEBUG:
++ DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections)
+
+ if opts.mirror_group:
+ mg, errors, failed, removed = opts.mirror_group
@@ -1636,6 +1664,9 @@ index e090e90..74a692c 100644
+ key, limit = opts.async
+ while host_con.get(key, 0) >= limit:
+ perform()
++ if DEBUG:
++ DEBUG.info('max_connections(%s): %d/%d', key, host_con.get(key, 0), limit)
++
+ start(opts, 1)
+ except IOError, e:
+ if e.errno != 4: raise
More information about the scm-commits
mailing list