[python-urlgrabber] Process mirror retries before other queued requests.

Zdeněk Pavlas zpavlas at fedoraproject.org
Mon Dec 9 15:54:59 UTC 2013


commit 64057fc6755efd65890637e3ddb44bd8138d0b50
Author: Zdenek Pavlas <zpavlas at redhat.com>
Date:   Mon Dec 9 16:54:58 2013 +0100

    Process mirror retries before other queued requests.
    
    - Tell curl to return immediately on ctrl-c. BZ 1017491

 python-urlgrabber.spec |    8 +++++-
 urlgrabber-HEAD.patch  |   71 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index d48abe1..f026a56 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,8 +3,9 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.10
-Release: 0%{?dist}
+Release: 1%{?dist}
 Source0: http://urlgrabber.baseurl.org/download/urlgrabber-%{version}.tar.gz
+Patch1: urlgrabber-HEAD.patch
 
 License: LGPLv2+
 Group: Development/Libraries
@@ -22,6 +23,7 @@ authentication, proxies and more.
 
 %prep
 %setup -q -n urlgrabber-%{version}
+%patch1 -p1
 
 %build
 python setup.py build
@@ -42,6 +44,10 @@ rm -rf $RPM_BUILD_ROOT
 %attr(0755,root,root) %{_libexecdir}/urlgrabber-ext-down
 
 %changelog
+* Mon Dec  9 2013 Zdenek Pavlas <zpavlas at redhat.com> - 3.10-1
+- Process mirror retries before other queued requests.
+- Tell curl to return immediately on ctrl-c. BZ 1017491
+
 * Wed Oct  9 2013 Zdenek Pavlas <zpavlas at redhat.com> - 3.10-0
 - Update to latest HEAD.
 - clamp timestamps from the future.  BZ 894630, 1013733
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
new file mode 100644
index 0000000..e2203c4
--- /dev/null
+++ b/urlgrabber-HEAD.patch
@@ -0,0 +1,71 @@
+diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
+index b004f4d..d19ca01 100644
+--- a/urlgrabber/grabber.py
++++ b/urlgrabber/grabber.py
+@@ -1893,6 +1893,8 @@ class PyCurlFileObject(object):
+             urllib.addinfourl, via. urllib.URLopener.* """
+         return self.url
+         
++# tell curl to return immediately on ctrl-c
++pycurl.global_init(pycurl.GLOBAL_DEFAULT | pycurl.GLOBAL_ACK_EINTR)
+ _curl_cache = pycurl.Curl() # make one and reuse it over and over and over
+ 
+ def reset_curl_obj():
+@@ -2170,6 +2172,7 @@ def parallel_wait(meter=None):
+     dl = _ExternalDownloaderPool()
+     host_con = {} # current host connection counts
+     single = set() # hosts in single connection mode
++    retry_queue = []
+ 
+     def start(opts, tries):
+         opts.tries = tries
+@@ -2221,6 +2224,9 @@ def parallel_wait(meter=None):
+                 # Turn on the max_connections=1 override. BZ 853432
+                 if DEBUG: DEBUG.info('max_connections(%s) %s => 1', key, limit)
+                 single.add(key)
++                # When using multi-downloader the parent's _curl_cache
++                # object is idle. Kill it, as it might use keepalive=1.
++                reset_curl_obj()
+ 
+             retry = opts.retry or 0
+             if opts.failure_callback:
+@@ -2247,7 +2253,7 @@ def parallel_wait(meter=None):
+                     # mask this mirror and retry
+                     if action.get('remove', 1):
+                         removed.add(key)
+-                    _async_queue.append(opts)
++                    retry_queue.append(opts)
+                     continue
+                 # fail=1 from callback
+                 ug_err.errors = errors
+@@ -2257,19 +2263,22 @@ def parallel_wait(meter=None):
+             _run_callback(opts.failfunc, opts)
+ 
+     try:
+-        idx = 0
++        retry_idx = idx = 0
+         while True:
+-            if idx >= len(_async_queue):
+-                # the queue is empty
++            if retry_idx < len(retry_queue):
++                # retries first
++                opts = retry_queue[retry_idx]
++                retry_idx += 1
++            elif idx < len(_async_queue):
++                # handle next request
++                opts = _async_queue[idx]
++                idx += 1
++            else:
++                # both queues are empty
+                 if not dl.running: break
+-                # pending dl may extend it
+                 perform()
+                 continue
+ 
+-            # handle next request
+-            opts = _async_queue[idx]
+-            idx += 1
+-
+             # check global limit
+             while len(dl.running) >= default_grabber.opts.max_connections:
+                 perform()


More information about the scm-commits mailing list