[python-urlgrabber/f18] Update to latest HEAD

Zdeněk Pavlas zpavlas at fedoraproject.org
Mon Aug 27 08:22:42 UTC 2012


commit 5e2f4ce4458008625c23700e921a7cdd486d468a
Author: Zdeněk Pavlas <zpavlas at redhat.com>
Date:   Mon Aug 27 10:21:51 2012 +0200

    Update to latest HEAD

 python-urlgrabber.spec |    5 +-
 urlgrabber-HEAD.patch  |  329 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 235 insertions(+), 99 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index e89deeb..1aa448e 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 15%{?dist}
+Release: 16%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -44,6 +44,9 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Mon Aug 27 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-16
+- Update to latest HEAD
+
 * Fri Jul 20 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-15
 - Update to latest head, misc bugfixes: BZ 832028, 831904, 831291.
 - Disable Kerberos auth.  BZ 769254
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 4e1b34b..a092d3a 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -233,10 +233,10 @@ index 3e5f3b7..8eeaeda 100644
      return (fb,lb)
  
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..83823ea 100644
+index e090e90..daa478d 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
-@@ -49,7 +49,7 @@ GENERAL ARGUMENTS (kwargs)
+@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs)
    progress_obj = None
  
      a class instance that supports the following methods:
@@ -245,7 +245,26 @@ index e090e90..83823ea 100644
        # length will be None if unknown
        po.update(read) # read == bytes read so far
        po.end()
-@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
+ 
++  multi_progress_obj = None
++
++    a class instance that supports the following methods:
++      mo.start(total_files, total_size)
++      mo.newMeter() => meter
++      mo.removeMeter(meter)
++      mo.end()
++
++   The 'meter' object is similar to progress_obj, but multiple
++   instances may be created and updated at the same time.
++
++   When downloading multiple files in parallel and multi_progress_obj
++   is None progress_obj is used in compatibility mode: finished files
++   are shown but there's no in-progress display.
++
+   text = None
+   
+     specifies alternative text to be passed to the progress meter
+@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs)
      (which can be set on default_grabber.throttle) is used. See
      BANDWIDTH THROTTLING for more information.
  
@@ -267,7 +286,7 @@ index e090e90..83823ea 100644
  
    bandwidth = 0
  
-@@ -143,8 +143,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs)
      note that proxy authentication information may be provided using
      normal URL constructs:
        proxies={ 'http' : 'http://user:host@foo:3128' }
@@ -282,7 +301,7 @@ index e090e90..83823ea 100644
  
    prefix = None
  
-@@ -198,6 +202,12 @@ GENERAL ARGUMENTS (kwargs)
+@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs)
      control, you should probably subclass URLParser and pass it in via
      the 'urlparser' option.
  
@@ -295,7 +314,7 @@ index e090e90..83823ea 100644
    ssl_ca_cert = None
  
      this option can be used if M2Crypto is available and will be
-@@ -211,43 +221,75 @@ GENERAL ARGUMENTS (kwargs)
+@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs)
      No-op when using the curl backend (default)
     
  
@@ -380,7 +399,7 @@ index e090e90..83823ea 100644
  
  RETRY RELATED ARGUMENTS
  
-@@ -328,6 +370,15 @@ RETRY RELATED ARGUMENTS
+@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS
      but it cannot (without severe trickiness) prevent the exception
      from being raised.
  
@@ -396,7 +415,7 @@ index e090e90..83823ea 100644
    interrupt_callback = None
  
      This callback is called if KeyboardInterrupt is received at any
-@@ -420,6 +471,7 @@ import time
+@@ -420,6 +486,7 @@ import time
  import string
  import urllib
  import urllib2
@@ -404,7 +423,7 @@ index e090e90..83823ea 100644
  import mimetools
  import thread
  import types
-@@ -428,9 +480,17 @@ import pycurl
+@@ -428,9 +495,17 @@ import pycurl
  from ftplib import parse150
  from StringIO import StringIO
  from httplib import HTTPException
@@ -423,7 +442,7 @@ index e090e90..83823ea 100644
  ########################################################################
  #                     MODULE INITIALIZATION
  ########################################################################
-@@ -439,6 +499,12 @@ try:
+@@ -439,6 +514,12 @@ try:
  except:
      __version__ = '???'
  
@@ -436,7 +455,7 @@ index e090e90..83823ea 100644
  ########################################################################
  # functions for debugging output.  These functions are here because they
  # are also part of the module initialization.
-@@ -527,6 +593,22 @@ def _(st):
+@@ -527,6 +608,22 @@ def _(st):
  #                 END MODULE INITIALIZATION
  ########################################################################
  
@@ -459,7 +478,7 @@ index e090e90..83823ea 100644
  
  
  class URLGrabError(IOError):
-@@ -662,6 +744,7 @@ class URLParser:
+@@ -662,6 +759,7 @@ class URLParser:
            opts.quote = 0     --> do not quote it
            opts.quote = None  --> guess
          """
@@ -467,7 +486,7 @@ index e090e90..83823ea 100644
          quote = opts.quote
          
          if opts.prefix:
-@@ -768,6 +851,41 @@ class URLGrabberOptions:
+@@ -768,6 +866,41 @@ class URLGrabberOptions:
          else: # throttle is a float
              return self.bandwidth * self.throttle
          
@@ -509,7 +528,13 @@ index e090e90..83823ea 100644
      def derive(self, **kwargs):
          """Create a derived URLGrabberOptions instance.
          This method creates a new instance and overrides the
-@@ -796,25 +914,31 @@ class URLGrabberOptions:
+@@ -791,30 +924,37 @@ class URLGrabberOptions:
+         provided here.
+         """
+         self.progress_obj = None
++        self.multi_progress_obj = None
+         self.throttle = 1.0
+         self.bandwidth = 0
          self.retry = None
          self.retrycodes = [-1,2,4,5,6,7]
          self.checkfunc = None
@@ -542,7 +567,7 @@ index e090e90..83823ea 100644
          self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
          self.ssl_context = None # no-op in pycurl
          self.ssl_verify_peer = True # check peer's cert for authenticityb
-@@ -827,6 +951,12 @@ class URLGrabberOptions:
+@@ -827,6 +967,12 @@ class URLGrabberOptions:
          self.size = None # if we know how big the thing we're getting is going
                           # to be. this is ultimately a MAXIMUM size for the file
          self.max_header_size = 2097152 #2mb seems reasonable for maximum header size
@@ -555,7 +580,7 @@ index e090e90..83823ea 100644
          
      def __repr__(self):
          return self.format()
-@@ -846,7 +976,18 @@ class URLGrabberOptions:
+@@ -846,7 +992,18 @@ class URLGrabberOptions:
          s = s + indent + '}'
          return s
  
@@ -575,7 +600,7 @@ index e090e90..83823ea 100644
      """Provides easy opening of URLs with a variety of options.
      
      All options are specified as kwargs. Options may be specified when
-@@ -872,7 +1013,6 @@ class URLGrabber:
+@@ -872,7 +1029,6 @@ class URLGrabber:
              # beware of infinite loops :)
              tries = tries + 1
              exception = None
@@ -583,7 +608,7 @@ index e090e90..83823ea 100644
              callback  = None
              if DEBUG: DEBUG.info('attempt %i/%s: %s',
                                   tries, opts.retry, args[0])
-@@ -883,23 +1023,24 @@ class URLGrabber:
+@@ -883,54 +1039,62 @@ class URLGrabber:
              except URLGrabError, e:
                  exception = e
                  callback = opts.failure_callback
@@ -611,24 +636,36 @@ index e090e90..83823ea 100644
              if (retrycode is not None) and (retrycode not in opts.retrycodes):
                  if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
                                       retrycode, opts.retrycodes)
-@@ -912,9 +1053,11 @@ class URLGrabber:
+                 raise
+     
+-    def urlopen(self, url, **kwargs):
++    def urlopen(self, url, opts=None, **kwargs):
+         """open the url and return a file object
+         If a progress object or throttle value specified when this 
+         object was created, then  a special file object will be 
          returned that supports them. The file object can be treated 
          like any other file object.
          """
+-        opts = self.opts.derive(**kwargs)
 +        url = _to_utf8(url)
-         opts = self.opts.derive(**kwargs)
++        opts = (opts or self.opts).derive(**kwargs)
          if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
 +        opts.find_proxy(url, parts[0])
          def retryfunc(opts, url):
              return PyCurlFileObject(url, filename=None, opts=opts)
          return self._retry(opts, retryfunc, url)
-@@ -925,12 +1068,17 @@ class URLGrabber:
+     
+-    def urlgrab(self, url, filename=None, **kwargs):
++    def urlgrab(self, url, filename=None, opts=None, **kwargs):
+         """grab the file at <url> and make a local copy at <filename>
+         If filename is none, the basename of the url is used.
          urlgrab returns the filename of the local file, which may be 
          different from the passed-in filename if copy_local == 0.
          """
+-        opts = self.opts.derive(**kwargs)
 +        url = _to_utf8(url)
-         opts = self.opts.derive(**kwargs)
++        opts = (opts or self.opts).derive(**kwargs)
          if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
          (scheme, host, path, parm, query, frag) = parts
@@ -641,7 +678,7 @@ index e090e90..83823ea 100644
          if scheme == 'file' and not opts.copy_local:
              # just return the name of the local file - don't make a 
              # copy currently
-@@ -950,30 +1098,36 @@ class URLGrabber:
+@@ -950,41 +1114,49 @@ class URLGrabber:
  
              elif not opts.range:
                  if not opts.checkfunc is None:
@@ -689,21 +726,24 @@ index e090e90..83823ea 100644
 +            opts.exception = e
 +            return _run_callback(opts.failfunc, opts)
      
-     def urlread(self, url, limit=None, **kwargs):
+-    def urlread(self, url, limit=None, **kwargs):
++    def urlread(self, url, limit=None, opts=None, **kwargs):
          """read the url into a string, up to 'limit' bytes
-@@ -982,9 +1136,11 @@ class URLGrabber:
+         If the limit is exceeded, an exception will be thrown.  Note
+         that urlread is NOT intended to be used as a way of saying 
          "I want the first N bytes" but rather 'read the whole file 
          into memory, but don't use too much'
          """
+-        opts = self.opts.derive(**kwargs)
 +        url = _to_utf8(url)
-         opts = self.opts.derive(**kwargs)
++        opts = (opts or self.opts).derive(**kwargs)
          if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
 +        opts.find_proxy(url, parts[0])
          if limit is not None:
              limit = limit + 1
              
-@@ -1000,12 +1156,8 @@ class URLGrabber:
+@@ -1000,12 +1172,8 @@ class URLGrabber:
                  else: s = fo.read(limit)
  
                  if not opts.checkfunc is None:
@@ -718,7 +758,7 @@ index e090e90..83823ea 100644
              finally:
                  fo.close()
              return s
-@@ -1020,6 +1172,7 @@ class URLGrabber:
+@@ -1020,6 +1188,7 @@ class URLGrabber:
          return s
          
      def _make_callback(self, callback_obj):
@@ -726,7 +766,7 @@ index e090e90..83823ea 100644
          if callable(callback_obj):
              return callback_obj, (), {}
          else:
-@@ -1030,7 +1183,7 @@ class URLGrabber:
+@@ -1030,7 +1199,7 @@ class URLGrabber:
  default_grabber = URLGrabber()
  
  
@@ -735,7 +775,7 @@ index e090e90..83823ea 100644
      def __init__(self, url, filename, opts):
          self.fo = None
          self._hdr_dump = ''
-@@ -1052,10 +1205,11 @@ class PyCurlFileObject():
+@@ -1052,10 +1221,11 @@ class PyCurlFileObject():
          self._reget_length = 0
          self._prog_running = False
          self._error = (None, None)
@@ -749,7 +789,7 @@ index e090e90..83823ea 100644
      def __getattr__(self, name):
          """This effectively allows us to wrap at the instance level.
          Any attribute not found in _this_ object will be searched for
-@@ -1085,9 +1239,14 @@ class PyCurlFileObject():
+@@ -1085,9 +1255,14 @@ class PyCurlFileObject():
              return -1
              
      def _hdr_retrieve(self, buf):
@@ -765,7 +805,7 @@ index e090e90..83823ea 100644
          try:
              self._hdr_dump += buf
              # we have to get the size before we do the progress obj start
-@@ -1104,7 +1263,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1279,17 @@ class PyCurlFileObject():
                      s = parse150(buf)
                  if s:
                      self.size = int(s)
@@ -784,7 +824,7 @@ index e090e90..83823ea 100644
              return len(buf)
          except KeyboardInterrupt:
              return pycurl.READFUNC_ABORT
-@@ -1113,8 +1282,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1298,10 @@ class PyCurlFileObject():
          if self._parsed_hdr:
              return self._parsed_hdr
          statusend = self._hdr_dump.find('\n')
@@ -795,7 +835,7 @@ index e090e90..83823ea 100644
          self._parsed_hdr =  mimetools.Message(hdrfp)
          return self._parsed_hdr
      
-@@ -1127,6 +1298,9 @@ class PyCurlFileObject():
+@@ -1127,6 +1314,9 @@ class PyCurlFileObject():
          if not opts:
              opts = self.opts
  
@@ -805,7 +845,7 @@ index e090e90..83823ea 100644
  
          # defaults we're always going to set
          self.curl_obj.setopt(pycurl.NOPROGRESS, False)
-@@ -1136,11 +1310,21 @@ class PyCurlFileObject():
+@@ -1136,11 +1326,21 @@ class PyCurlFileObject():
          self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
          self.curl_obj.setopt(pycurl.FAILONERROR, True)
          self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -827,7 +867,7 @@ index e090e90..83823ea 100644
          
          # maybe to be options later
          self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-@@ -1148,9 +1332,11 @@ class PyCurlFileObject():
+@@ -1148,9 +1348,11 @@ class PyCurlFileObject():
          
          # timeouts
          timeout = 300
@@ -842,7 +882,7 @@ index e090e90..83823ea 100644
  
          # ssl options
          if self.scheme == 'https':
-@@ -1158,13 +1344,16 @@ class PyCurlFileObject():
+@@ -1158,13 +1360,16 @@ class PyCurlFileObject():
                  self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
                  self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
              self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
@@ -860,7 +900,7 @@ index e090e90..83823ea 100644
              if opts.ssl_cert_type:                
                  self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type)
              if opts.ssl_key_pass:
-@@ -1187,28 +1376,26 @@ class PyCurlFileObject():
+@@ -1187,28 +1392,26 @@ class PyCurlFileObject():
          if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
              self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
              
@@ -905,7 +945,7 @@ index e090e90..83823ea 100644
              
          # our url
          self.curl_obj.setopt(pycurl.URL, self.url)
-@@ -1228,12 +1415,14 @@ class PyCurlFileObject():
+@@ -1228,12 +1431,14 @@ class PyCurlFileObject():
              
              code = self.http_code
              errcode = e.args[0]
@@ -922,7 +962,7 @@ index e090e90..83823ea 100644
                  
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
-@@ -1244,23 +1433,23 @@ class PyCurlFileObject():
+@@ -1244,23 +1449,23 @@ class PyCurlFileObject():
                  raise KeyboardInterrupt
              
              elif errcode == 28:
@@ -953,7 +993,7 @@ index e090e90..83823ea 100644
                  # this is probably wrong but ultimately this is what happens
                  # we have a legit http code and a pycurl 'writer failed' code
                  # which almost always means something aborted it from outside
-@@ -1272,33 +1461,94 @@ class PyCurlFileObject():
+@@ -1272,33 +1477,94 @@ class PyCurlFileObject():
              elif errcode == 58:
                  msg = _("problem with the local client certificate")
                  err = URLGrabError(14, msg)
@@ -1055,7 +1095,7 @@ index e090e90..83823ea 100644
  
      def _do_open(self):
          self.curl_obj = _curl_cache
-@@ -1333,7 +1583,11 @@ class PyCurlFileObject():
+@@ -1333,7 +1599,11 @@ class PyCurlFileObject():
                  
          if self.opts.range:
              rt = self.opts.range
@@ -1068,7 +1108,7 @@ index e090e90..83823ea 100644
  
          if rt:
              header = range_tuple_to_header(rt)
-@@ -1434,21 +1688,46 @@ class PyCurlFileObject():
+@@ -1434,21 +1704,46 @@ class PyCurlFileObject():
              #fh, self._temp_name = mkstemp()
              #self.fo = open(self._temp_name, 'wb')
  
@@ -1122,7 +1162,7 @@ index e090e90..83823ea 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1526,17 +1805,20 @@ class PyCurlFileObject():
+@@ -1526,17 +1821,20 @@ class PyCurlFileObject():
              if self._prog_running:
                  downloaded += self._reget_length
                  self.opts.progress_obj.update(downloaded)
@@ -1148,7 +1188,7 @@ index e090e90..83823ea 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1544,13 +1826,6 @@ class PyCurlFileObject():
+@@ -1544,13 +1842,6 @@ class PyCurlFileObject():
              return True
          return False
          
@@ -1162,7 +1202,7 @@ index e090e90..83823ea 100644
      def read(self, amt=None):
          self._fill_buffer(amt)
          if amt is None:
-@@ -1582,9 +1857,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1873,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()
          
@@ -1185,7 +1225,7 @@ index e090e90..83823ea 100644
  
  #####################################################################
  # DEPRECATED FUNCTIONS
-@@ -1621,6 +1908,442 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
+@@ -1621,6 +1924,460 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0,
  
          
  #####################################################################
@@ -1309,7 +1349,7 @@ index e090e90..83823ea 100644
 +            v = getattr(opts, k)
 +            if v is None: continue
 +            arg.append('%s=%s' % (k, _dumps(v)))
-+        if opts.progress_obj:
++        if opts.progress_obj and opts.multi_progress_obj:
 +            arg.append('progress_obj=True')
 +        arg = ' '.join(arg)
 +        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
@@ -1329,7 +1369,7 @@ index e090e90..83823ea 100644
 +            line = line.split(' ', 5)
 +            _id, size = map(int, line[:2])
 +            if len(line) == 2:
-+                self.running[_id].progress_obj.update(size)
++                self.running[_id]._progress.update(size)
 +                continue
 +            # job done
 +            opts = self.running.pop(_id)
@@ -1368,6 +1408,9 @@ index e090e90..83823ea 100644
 +    def perform(self):
 +        ret = []
 +        for fd, event in self.epoll.poll():
++            if event & select.EPOLLHUP:
++                if DEBUG: DEBUG.info('downloader died')
++                raise KeyboardInterrupt
 +            assert event & select.EPOLLIN
 +            done = self.running[fd].perform()
 +            if not done: continue
@@ -1395,19 +1438,20 @@ index e090e90..83823ea 100644
 +
 +_async_queue = []
 +
-+def parallel_wait(meter = 'text'):
++def parallel_wait(meter=None):
 +    '''Process queued requests in parallel.
 +    '''
 +
-+    if meter:
-+        count = total = 0
-+        for opts in _async_queue:
-+            if opts.progress_obj:
-+                count += 1
-+                total += opts.size
-+        if meter == 'text':
-+            from progress import TextMultiFileMeter
-+            meter = TextMultiFileMeter()
++    # calculate total sizes
++    meters = {}
++    for opts in _async_queue:
++        if opts.progress_obj and opts.multi_progress_obj:
++            count, total = meters.get(opts.multi_progress_obj) or (0, 0)
++            meters[opts.multi_progress_obj] = count + 1, total + opts.size
++
++    # start multi-file meters
++    for meter in meters:
++        count, total = meters[meter]
 +        meter.start(count, total)
 +
 +    dl = _ExternalDownloaderPool()
@@ -1417,11 +1461,12 @@ index e090e90..83823ea 100644
 +        key, limit = opts.async
 +        host_con[key] = host_con.get(key, 0) + 1
 +        opts.tries = tries
-+        if meter and opts.progress_obj:
-+            opts.progress_obj = meter.newMeter()
-+            opts.progress_obj.start(text=opts.text, basename=os.path.basename(opts.filename))
-+        else:
-+            opts.progress_obj = None
++        if opts.progress_obj:
++            if opts.multi_progress_obj:
++                opts._progress = opts.multi_progress_obj.newMeter()
++                opts._progress.start(text=opts.text)
++            else:
++                opts._progress = time.time() # no updates
 +        if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url)
 +        dl.start(opts)
 +
@@ -1429,15 +1474,16 @@ index e090e90..83823ea 100644
 +        for opts, size, ug_err in dl.perform():
 +            key, limit = opts.async
 +            host_con[key] -= 1
-+            m = opts.progress_obj
-+            if m:
-+                if ug_err:
-+                    m.failure(ug_err.args[1])
++            if opts.progress_obj:
++                if opts.multi_progress_obj:
++                    opts.multi_progress_obj.re.total += size - opts.size # correct totals
++                    opts._progress.end(size)
++                    opts.multi_progress_obj.removeMeter(opts._progress)
 +                else:
-+                    # file size might have changed
-+                    meter.re.total += size - opts.size
-+                    m.end(size)
-+                meter.removeMeter(m)
++                    opts.progress_obj.start(text=opts.text, now=opts._progress)
++                    opts.progress_obj.update(size)
++                    opts.progress_obj.end(size)
++                del opts._progress
 +
 +            if ug_err is None:
 +                if opts.checkfunc:
@@ -1457,13 +1503,15 @@ index e090e90..83823ea 100644
 +                continue
 +
 +            if opts.mirror_group:
-+                mg, failed, removed = opts.mirror_group
++                mg, errors, failed, removed = opts.mirror_group
++                errors.append((opts.url, str(ug_err)))
 +                failed[key] = failed.get(key, 0) + 1
 +                opts.mirror = key
 +                opts.exception = ug_err
 +                action = mg.default_action or {}
 +                if mg.failure_callback:
-+                    opts.tries = sum(failed.values())
++                    opts.tries = len(errors)
++                    action = dict(action) # update only the copy
 +                    action.update(_run_callback(mg.failure_callback, opts))
 +                if not action.get('fail', 0):
 +                    # mask this mirror and retry
@@ -1471,6 +1519,8 @@ index e090e90..83823ea 100644
 +                        removed.add(key)
 +                    _async_queue.append(opts)
 +                    continue
++                # fail=1 from callback
++                ug_err.errors = errors
 +
 +            # urlgrab failed
 +            opts.exception = ug_err
@@ -1491,11 +1541,11 @@ index e090e90..83823ea 100644
 +            idx += 1
 +
 +            # check global limit
-+            while len(dl.running) >= opts.max_connections:
++            while len(dl.running) >= default_grabber.opts.max_connections:
 +                perform()
 +
 +            if opts.mirror_group:
-+                mg, failed, removed = opts.mirror_group
++                mg, errors, failed, removed = opts.mirror_group
 +
 +                # find the best mirror
 +                best = None
@@ -1516,9 +1566,14 @@ index e090e90..83823ea 100644
 +
 +                if best is None:
 +                    opts.exception = URLGrabError(256, _('No more mirrors to try.'))
++                    opts.exception.errors = errors
 +                    _run_callback(opts.failfunc, opts)
 +                    continue
 +
++                # update the grabber object, apply mirror kwargs
++                grabber = best.get('grabber') or mg.grabber
++                opts.delegate = grabber.opts.derive(**best.get('kwargs', {}))
++
 +                # update the current mirror and limit
 +                key = best['mirror']
 +                limit = best.get('kwargs', {}).get('max_connections', 2)
@@ -1541,7 +1596,8 @@ index e090e90..83823ea 100644
 +
 +    finally:
 +        dl.abort()
-+        if meter: meter.end()
++        for meter in meters:
++            meter.end()
 +        del _async_queue[:]
 +        _TH.save()
 +
@@ -1560,7 +1616,7 @@ index e090e90..83823ea 100644
 +        if filename and _TH.dirty is None:
 +            try:
 +                for line in open(filename):
-+                    host, speed, fail, ts = line.split(' ', 3)
++                    host, speed, fail, ts = line.rsplit(' ', 3)
 +                    _TH.hosts[host] = int(speed), int(fail), int(ts)
 +            except IOError: pass
 +            _TH.dirty = False
@@ -1581,17 +1637,19 @@ index e090e90..83823ea 100644
 +
 +    @staticmethod
 +    def update(url, dl_size, dl_time, ug_err, baseurl=None):
-+        _TH.load()
-+
 +        # Use hostname from URL.  If it's a file:// URL, use baseurl.
 +        # If no baseurl, do not update timedhosts.
 +        host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
 +        if not host: return
 +
++        _TH.load()
 +        speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
 +        now = time.time()
 +
 +        if ug_err is None:
++            # defer first update if the file was small.  BZ 851178.
++            if not ts and dl_size < 1e6: return
++
 +            # k1: the older, the less useful
 +            # k2: <500ms readings are less reliable
 +            # speeds vary, use 10:1 smoothing
@@ -1629,20 +1687,24 @@ index e090e90..83823ea 100644
  def _main_test():
      try: url, filename = sys.argv[1:3]
 diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
-index dad410b..ac78b34 100644
+index dad410b..b17be17 100644
 --- a/urlgrabber/mirror.py
 +++ b/urlgrabber/mirror.py
 @@ -76,6 +76,9 @@ CUSTOMIZATION
         'grabber' is omitted, the default grabber will be used.  If
         kwargs are omitted, then (duh) they will not be used.
  
-+       kwarg 'max_connections' is used to store the max connection
-+       limit of this mirror.
++       kwarg 'max_connections' limits the number of concurrent
++       connections to this mirror.
 +
      3) Pass keyword arguments when instantiating the mirror group.
         See, for example, the failure_callback argument.
  
-@@ -90,7 +93,8 @@ CUSTOMIZATION
+@@ -87,10 +90,12 @@ CUSTOMIZATION
+ """
+ 
+ 
++import sys
  import random
  import thread  # needed for locking to make this threadsafe
  
@@ -1652,7 +1714,28 @@ index dad410b..ac78b34 100644
  
  def _(st): 
      return st
-@@ -184,6 +188,7 @@ class MirrorGroup:
+@@ -126,7 +131,9 @@ class MirrorGroup:
+         files)
+ 
+       * if the local list is ever exhausted, a URLGrabError will be
+-        raised (errno=256, no more mirrors)
++        raised (errno=256, No more mirrors).  The 'errors' attribute
++        holds a list of (full_url, errmsg) tuples.  This contains
++        all URLs tried and the corresponding error messages.
+ 
+     OPTIONS
+ 
+@@ -153,7 +160,8 @@ class MirrorGroup:
+ 
+         The 'fail' option will cause immediate failure by re-raising
+         the exception and no further attempts to get the current
+-        download.
++        download.  As in the "No more mirrors" case, the 'errors'
++        attribute is set in the exception object.
+ 
+         This dict can be set at instantiation time,
+           mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
+@@ -184,6 +192,7 @@ class MirrorGroup:
  
             obj.exception    = < exception that was raised >
             obj.mirror       = < the mirror that was tried >
@@ -1660,7 +1743,7 @@ index dad410b..ac78b34 100644
             obj.relative_url = < url relative to the mirror >
             obj.url          = < full url that failed >
                                # .url is just the combination of .mirror
-@@ -263,7 +268,8 @@ class MirrorGroup:
+@@ -263,7 +272,8 @@ class MirrorGroup:
      def _parse_mirrors(self, mirrors):
          parsed_mirrors = []
          for m in mirrors:
@@ -1670,7 +1753,35 @@ index dad410b..ac78b34 100644
              parsed_mirrors.append(m)
          return parsed_mirrors
      
-@@ -382,7 +388,9 @@ class MirrorGroup:
+@@ -280,7 +290,9 @@ class MirrorGroup:
+         #   return a random mirror so that multiple mirrors get used
+         #   even without failures.
+         if not gr.mirrors:
+-            raise URLGrabError(256, _('No more mirrors to try.'))
++            e = URLGrabError(256, _('No more mirrors to try.'))
++            e.errors = gr.errors
++            raise e
+         return gr.mirrors[gr._next]
+ 
+     def _failure(self, gr, cb_obj):
+@@ -307,7 +319,9 @@ class MirrorGroup:
+         a.update(action)
+         action = a
+         self.increment_mirror(gr, action)
+-        if action and action.get('fail', 0): raise
++        if action and action.get('fail', 0):
++            sys.exc_info()[1].errors = gr.errors
++            raise
+ 
+     def increment_mirror(self, gr, action={}):
+         """Tell the mirror object increment the mirror index
+@@ -377,35 +391,50 @@ class MirrorGroup:
+         gr.url  = url
+         gr.kw   = dict(kw)
+         self._load_gr(gr)
++        gr.errors = []
+ 
+         for k in self.options:
              try: del kw[k]
              except KeyError: pass
  
@@ -1679,8 +1790,21 @@ index dad410b..ac78b34 100644
 +            tries += 1
              mirrorchoice = self._get_mirror(gr)
              fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
-             kwargs = dict(mirrorchoice.get('kwargs', {}))
-@@ -399,13 +407,24 @@ class MirrorGroup:
+-            kwargs = dict(mirrorchoice.get('kwargs', {}))
+-            kwargs.update(kw)
+             grabber = mirrorchoice.get('grabber') or self.grabber
++            # apply mirrorchoice kwargs on top of grabber.opts
++            opts = grabber.opts.derive(**mirrorchoice.get('kwargs', {}))
+             func_ref = getattr(grabber, func)
+             if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
+             try:
+-                return func_ref( *(fullurl,), **kwargs )
++                return func_ref( *(fullurl,), opts=opts, **kw )
+             except URLGrabError, e:
+                 if DEBUG: DEBUG.info('MIRROR: failed')
++                gr.errors.append((fullurl, str(e)))
+                 obj = CallbackObject()
+                 obj.exception = e
                  obj.mirror = mirrorchoice['mirror']
                  obj.relative_url = gr.url
                  obj.url = fullurl
@@ -1692,7 +1816,7 @@ index dad410b..ac78b34 100644
          kw['filename'] = filename
 +        if kw.get('async'):
 +            # enable mirror failovers in async path
-+            kw['mirror_group'] = self, {}, set()
++            kw['mirror_group'] = self, [], {}, set()
 +            kw['relative_url'] = url
 +        else:
 +            kw.pop('failfunc', None)
@@ -1707,7 +1831,7 @@ index dad410b..ac78b34 100644
      def urlopen(self, url, **kwargs):
          kw = dict(kwargs)
 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
-index dd07c6a..4c126c5 100644
+index dd07c6a..ad57dbc 100644
 --- a/urlgrabber/progress.py
 +++ b/urlgrabber/progress.py
 @@ -211,6 +211,21 @@ def text_meter_total_size(size, downloaded=0):
@@ -1796,7 +1920,7 @@ index dd07c6a..4c126c5 100644
          self._do_end(now)
          
      def _do_end(self, now):
-@@ -466,11 +483,20 @@ class MultiFileMeter:
+@@ -466,11 +483,21 @@ class MultiFileMeter:
  
  
  class TextMultiFileMeter(MultiFileMeter):
@@ -1805,6 +1929,7 @@ index dd07c6a..4c126c5 100644
          self.fo = fo
 -        MultiFileMeter.__init__(self)
 +        MultiFileMeter.__init__(self, threaded)
++        self.index_time = self.index = 0
  
      # files: ###/### ###%  data: ######/###### ###%  time: ##:##:##/##:##:##
 +# New output, like TextMeter output...
@@ -1819,7 +1944,7 @@ index dd07c6a..4c126c5 100644
      def _do_update_meter(self, meter, now):
          self._lock.acquire()
          try:
-@@ -480,7 +506,7 @@ class TextMultiFileMeter(MultiFileMeter):
+@@ -480,7 +507,7 @@ class TextMultiFileMeter(MultiFileMeter):
              tf = self.numfiles or 1
              pf = 100 * float(df)/tf + 0.49
              dd = self.re.last_amount_read
@@ -1828,7 +1953,7 @@ index dd07c6a..4c126c5 100644
              pd = 100 * (self.re.fraction_read() or 0) + 0.49
              dt = self.re.elapsed_time()
              rt = self.re.remaining_time()
-@@ -491,9 +517,33 @@ class TextMultiFileMeter(MultiFileMeter):
+@@ -491,9 +518,41 @@ class TextMultiFileMeter(MultiFileMeter):
              ftd = format_number(td) + 'B'
              fdt = format_time(dt, 1)
              ftt = format_time(tt, 1)
@@ -1838,9 +1963,17 @@ index dd07c6a..4c126c5 100644
 +
 +            frac = self.re.fraction_read() or 0
 +            ave_dl = format_number(self.re.average_rate())
++
++            # cycle through active meters
++            if now > self.index_time:
++                self.index_time = now + 1.0
++                self.index += 1
++            if self.index >= len(self.meters):
++                self.index = 0
++            meter = self.meters[self.index]
 +            text = meter.text or meter.basename
 +            if tf > 1:
-+                text = '(%u/%u): %s' % (df+1, tf, text)
++                text = '(%u/%u): %s' % (df+1+self.index, tf, text)
 +
 +            # Include text + ui_rate in minimal
 +            tl = TerminalLine(8, 8+1+8)
@@ -1865,7 +1998,7 @@ index dd07c6a..4c126c5 100644
              self.fo.flush()
          finally:
              self._lock.release()
-@@ -502,18 +552,30 @@ class TextMultiFileMeter(MultiFileMeter):
+@@ -502,18 +561,30 @@ class TextMultiFileMeter(MultiFileMeter):
          self._lock.acquire()
          try:
              format = "%-30.30s %6.6s    %8.8s    %9.9s"
@@ -1902,7 +2035,7 @@ index dd07c6a..4c126c5 100644
  
      def _do_failure_meter(self, meter, message, now):
          self._lock.acquire()
-@@ -536,15 +598,6 @@ class TextMultiFileMeter(MultiFileMeter):
+@@ -536,15 +607,6 @@ class TextMultiFileMeter(MultiFileMeter):
              pass
          finally:
              self._lock.release()
@@ -1918,7 +2051,7 @@ index dd07c6a..4c126c5 100644
          
  ######################################################################
  # support classes and functions
-@@ -658,6 +711,8 @@ def format_time(seconds, use_hours=0):
+@@ -658,6 +720,8 @@ def format_time(seconds, use_hours=0):
      if seconds is None or seconds < 0:
          if use_hours: return '--:--:--'
          else:         return '--:--'


More information about the scm-commits mailing list