[python-urlgrabber] fix file:// profiling. BZ 822632.
Zdeněk Pavlas
zpavlas at fedoraproject.org
Fri May 18 13:55:36 UTC 2012
commit 88126d0a238225cda023a8a3835ff773fc0cd152
Author: Zdeněk Pavlas <zpavlas at redhat.com>
Date: Fri May 18 15:55:18 2012 +0200
fix file:// profiling. BZ 822632.
file-url-profiling.patch | 65 ++++++++++++++++++++++++++++++++++++++++++++++
python-urlgrabber.spec | 7 ++++-
2 files changed, 71 insertions(+), 1 deletions(-)
---
diff --git a/file-url-profiling.patch b/file-url-profiling.patch
new file mode 100644
index 0000000..2fd1f3c
--- /dev/null
+++ b/file-url-profiling.patch
@@ -0,0 +1,65 @@
+commit 7c74f526dd761b647d6bb6a7b7d6c285fe78bdb8
+Author: Zdeněk Pavlas <zpavlas at redhat.com>
+Date: Fri May 18 15:38:44 2012 +0200
+
+ timedhosts: fix file:// profiling. BZ 822632.
+
+ - Do not profile absolute file:// URLs.
+ - Give a hint to _TH.update() which baseurl was used
+ so we may profile file:// mirrors, too.
+ - Strip username and password from stored hostnames.
+
+diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
+index 094be77..be85f92 100644
+--- a/urlgrabber/grabber.py
++++ b/urlgrabber/grabber.py
+@@ -2060,7 +2060,7 @@ class _ExternalDownloader:
+ else:
+ ug_err = URLGrabError(int(line[4]), line[5])
+ if DEBUG: DEBUG.info('failure: %s', err)
+- _TH.update(opts.url, int(line[2]), float(line[3]), ug_err)
++ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0])
+ ret.append((opts, size, ug_err))
+ return ret
+
+@@ -2268,7 +2268,7 @@ class _TH:
+ if filename and _TH.dirty is None:
+ try:
+ for line in open(filename):
+- host, speed, fail, ts = line.split()
++ host, speed, fail, ts = line.split(' ', 3)
+ _TH.hosts[host] = int(speed), int(fail), int(ts)
+ except IOError: pass
+ _TH.dirty = False
+@@ -2288,9 +2288,14 @@ class _TH:
+ _TH.dirty = False
+
+ @staticmethod
+- def update(url, dl_size, dl_time, ug_err):
++ def update(url, dl_size, dl_time, ug_err, baseurl=None):
+ _TH.load()
+- host = urlparse.urlsplit(url).netloc
++
++ # Use hostname from URL. If it's a file:// URL, use baseurl.
++ # If no baseurl, do not update timedhosts.
++ host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl
++ if not host: return
++
+ speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0)
+ now = time.time()
+
+@@ -2311,9 +2316,12 @@ class _TH:
+ _TH.dirty = True
+
+ @staticmethod
+- def estimate(url):
++ def estimate(baseurl):
+ _TH.load()
+- host = urlparse.urlsplit(url).netloc
++
++ # Use just the hostname, unless it's a file:// baseurl.
++ host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl
++
+ default_speed = default_grabber.opts.default_speed
+ try: speed, fail, ts = _TH.hosts[host]
+ except KeyError: return default_speed
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 31094ae..fc9909e 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,10 +3,11 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
-Release: 12%{?dist}
+Release: 13%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
Patch2: multi-downloader.patch
+Patch3: file-url-profiling.patch
License: LGPLv2+
Group: Development/Libraries
@@ -26,6 +27,7 @@ authentication, proxies and more.
%setup -q -n urlgrabber-%{version}
%patch1 -p1
%patch2 -p1
+%patch3 -p1
%build
python setup.py build
@@ -46,6 +48,9 @@ rm -rf $RPM_BUILD_ROOT
%attr(0755,root,root) /usr/libexec/urlgrabber-ext-down
%changelog
+* Fri May 18 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-13
+- fix file:// profiling. BZ 822632.
+
* Mon May 14 2012 Zdeněk Pavlas <zpavlas at redhat.com> - 3.9.1-12
- Update to latest HEAD
- Merge multi-downloader patches
More information about the scm-commits
mailing list