Changes from v1: * pass stream=True, to requests so that the file is not downloaded to memory first (oops!) * call flush() on the file object so that it does not swamp the cache too much * log the start and end of the image download operation
Martin Kolman (1): Replace python-urlgrabber with python-requests (#1141242)
anaconda.spec.in | 5 ++- pyanaconda/kickstart.py | 13 ++++--- pyanaconda/packaging/__init__.py | 36 ++++++++++---------- pyanaconda/packaging/livepayload.py | 67 ++++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 56 deletions(-)
Python urlgrabber is Python 2 only and kinda crufty. Replace it with the Python 3 compatible python-requests module, which provides a much nicer interface, is in active development and is very widely used.
Signed-off-by: Martin Kolman mkolman@redhat.com --- anaconda.spec.in | 5 ++- pyanaconda/kickstart.py | 13 ++++--- pyanaconda/packaging/__init__.py | 36 ++++++++++---------- pyanaconda/packaging/livepayload.py | 67 ++++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 56 deletions(-)
diff --git a/anaconda.spec.in b/anaconda.spec.in index 6a81c97..964bee3 100644 --- a/anaconda.spec.in +++ b/anaconda.spec.in @@ -33,7 +33,6 @@ Source0: %{name}-%{version}.tar.bz2 %define mehver 0.23-1 %define sckeyboardver 1.3.1 %define firewalldver 0.3.5-1 -%define pythonurlgrabberver 3.9.1-5 %define utillinuxver 2.15.1 %define dracutver 034-7 %define isomd5sum 1.0.10 @@ -64,7 +63,7 @@ BuildRequires: pykickstart >= %{pykickstartver} BuildRequires: python-bugzilla %endif BuildRequires: python-devel -BuildRequires: python-urlgrabber >= %{pythonurlgrabberver} +BuildRequires: python-requests BuildRequires: python-nose BuildRequires: systemd BuildRequires: yum >= %{yumver} @@ -100,7 +99,7 @@ Requires: rpm-python >= %{rpmver} Requires: parted >= %{partedver} Requires: pyparted >= %{pypartedver} Requires: yum >= %{yumver} -Requires: python-urlgrabber >= %{pythonurlgrabberver} +Requires: python-requests Requires: pykickstart >= %{pykickstartver} Requires: langtable-data >= %{langtablever} Requires: langtable-python >= %{langtablever} diff --git a/pyanaconda/kickstart.py b/pyanaconda/kickstart.py index 885bcd6..49c8613 100644 --- a/pyanaconda/kickstart.py +++ b/pyanaconda/kickstart.py @@ -44,8 +44,8 @@ import tempfile from pyanaconda.flags import flags, can_touch_runtime_system from pyanaconda.constants import ADDON_PATHS, IPMI_ABORTED import shlex +import requests import sys -import urlgrabber import pykickstart.commands as commands from pyanaconda import keyboard from pyanaconda import ntp @@ -159,15 +159,18 @@ def getEscrowCertificate(escrowCerts, url): log.info("escrow: downloading %s", url)
try: - f = urlgrabber.urlopen(url) - except urlgrabber.grabber.URLGrabError as e: + request = requests.get(url, verify=True) + except requests.exceptions.SSLError as e: + msg = _("SSL error while downloading the escrow certificate:\n\n%s") % e + raise KickstartError(msg) + except requests.exceptions.RequestException as e: msg = _("The following error was encountered while downloading the escrow certificate:\n\n%s") % e raise KickstartError(msg)
try: - escrowCerts[url] = f.read() + escrowCerts[url] = request.content finally: - f.close() + request.close()
return escrowCerts[url]
diff --git a/pyanaconda/packaging/__init__.py b/pyanaconda/packaging/__init__.py index 2438b6f..4a0c28e 100644 --- a/pyanaconda/packaging/__init__.py +++ b/pyanaconda/packaging/__init__.py @@ -29,8 +29,7 @@ """ from __future__ import print_function import os, sys -from urlgrabber.grabber import URLGrabber -from urlgrabber.grabber import URLGrabError +import requests import ConfigParser import shutil import time @@ -69,8 +68,7 @@ import blivet.arch from blivet.platform import platform
from pyanaconda.product import productName, productVersion -import urlgrabber -urlgrabber.grabber.default_grabber.opts.user_agent = "%s (anaconda)/%s" %(productName, productVersion) +USER_AGENT = "%s (anaconda)/%s" %(productName, productVersion)
from distutils.version import LooseVersion
@@ -340,9 +338,6 @@ class Payload(object): log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)", url, proxy_url, sslverify)
- ugopts = {"ssl_verify_peer": sslverify, - "ssl_verify_host": sslverify} - proxies = {} if proxy_url: try: @@ -353,21 +348,26 @@ class Payload(object): log.info("Failed to parse proxy for _getTreeInfo %s: %s", proxy_url, e)
- ug = URLGrabber() + response = None + headers = {"user-agent": USER_AGENT} try: - treeinfo = ug.urlgrab("%s/.treeinfo" % url, - "/tmp/.treeinfo", copy_local=True, - proxies=proxies, **ugopts) - except URLGrabError as e: + response = requests.get("%s/.treeinfo" % url, headers=headers, proxies=proxies, verify=sslverify) + except requests.exceptions.RequestException as e: try: - treeinfo = ug.urlgrab("%s/treeinfo" % url, - "/tmp/.treeinfo", copy_local=True, - proxies=proxies, **ugopts) - except URLGrabError as e: + response = requests.get("%s/treeinfo" % url, headers=headers, proxies=proxies, verify=sslverify) + except requests.exceptions.RequestException as e: log.info("Error downloading treeinfo: %s", e) - treeinfo = None + response = None + + if response: + # write the the local treeinfo file + with open("/tmp/.treeinfo", "w") as f: + f.write(response.text())
- return treeinfo + # and also return the treeinfo contents as a string + return response.text + else: + return None
def _getReleaseVersion(self, url): """ Return the release version of the tree at the specified URL. """ diff --git a/pyanaconda/packaging/livepayload.py b/pyanaconda/packaging/livepayload.py index 615bb61..bd71e94 100644 --- a/pyanaconda/packaging/livepayload.py +++ b/pyanaconda/packaging/livepayload.py @@ -33,8 +33,7 @@ import os import stat from time import sleep from threading import Lock -from urlgrabber.grabber import URLGrabber -from urlgrabber.grabber import URLGrabError +import requests from pyanaconda.iutil import ProxyString, ProxyStringError, lowerASCII import urllib import hashlib @@ -197,27 +196,19 @@ class LiveImagePayload(ImagePayload): def kernelVersionList(self): return self._kernelVersionList
-class URLGrabberProgress(object): - """ Provide methods for urlgrabber progress.""" - def start(self, filename, url, basename, size, text): - """ Start of urlgrabber download - - :param filename: path and file that download will be saved to - :type filename: string - :param url: url to download from - :type url: string - :param basename: file that it will be saved to - :type basename: string +class DownloadProgress(object): + """ Provide methods for download progress reporting.""" + + def start(self, url, size): + """ Start of download + + :param url: url of the download + :type url: str :param size: length of the file :type size: int - :param text: unknown - :type text: unknown """ - self.filename = filename self.url = url - self.basename = basename self.size = size - self.text = text self._pct = -1
def update(self, bytes_read): @@ -233,7 +224,6 @@ class URLGrabberProgress(object): if pct == self._pct: return self._pct = pct - progressQ.send_message(_("Downloading %(url)s (%(pct)d%%)") % \ {"url" : self.url, "pct" : pct})
@@ -326,20 +316,37 @@ class LiveImageKSPayload(LiveImagePayload): ImagePayload.unsetup(self)
def _preInstall_url_image(self): - """ Download the image using urlgrabber """ - # Setup urlgrabber and call back to download image to sysroot - progress = URLGrabberProgress() - ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl, - "ssl_verify_host": not self.data.method.noverifyssl, - "proxies" : self._proxies, - "progress_obj" : progress, - "copy_local" : True} + """ Download the image using Requests with progress reporting"""
error = None + progress = DownloadProgress() try: - ug = URLGrabber() - ug.urlgrab(self.data.method.url, self.image_path, **ugopts) - except URLGrabError as e: + log.info("Starting image download") + with open(self.image_path, "wb") as f: + ssl_verify = not self.data.method.noverifyssl + response = requests.get(self.data.method.url, proxies=self._proxies, verify=ssl_verify, stream=True) + total_length = response.headers.get('content-length') + if total_length is None: # no content length header + # just download the file in one go and fake the progress reporting once done + log.warning("content-length header is missing for the installation image, " + "download progress reporting will not be available") + f.write(response.content) + size = f.tell() + progress.start(self.data.method.url, size) + progress.end(size) + else: + # requests return headers as strings, so convert total_length to int + progress.start(self.data.method.url, int(total_length)) + bytes_read = 0 + for buf in response.iter_content(1024 * 1024): # 1 MB chunks + if buf: + f.write(buf) + f.flush() + bytes_read += len(buf) + progress.update(bytes_read) + progress.end(bytes_read) + log.info("Image download finished") + except requests.exceptions.RequestException as e: log.error("Error downloading liveimg: %s", e) error = e else:
On Fri, Jan 23, 2015 at 08:38:56PM +0100, Martin Kolman wrote:
Changes from v1:
- pass stream=True, to requests so that the file is not downloaded to memory first (oops!)
- call flush() on the file object so that it does not swamp the cache too much
- log the start and end of the image download operation
Martin Kolman (1): Replace python-urlgrabber with python-requests (#1141242)
anaconda.spec.in | 5 ++- pyanaconda/kickstart.py | 13 ++++--- pyanaconda/packaging/__init__.py | 36 ++++++++++---------- pyanaconda/packaging/livepayload.py | 67 ++++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 56 deletions(-)
Ack
anaconda-patches@lists.fedorahosted.org