[python-urlgrabber] update to latest urlgrabber head
Seth Vidal
skvidal at fedoraproject.org
Mon Aug 30 15:53:16 UTC 2010
commit 36ff3aaff05bd6eddd257e6e6d8657e7de5e05bc
Author: Seth Vidal <skvidal at fedoraproject.org>
Date: Mon Aug 30 11:53:16 2010 -0400
update to latest urlgrabber head
python-urlgrabber.spec | 5 +-
urlgrabber-HEAD.patch | 201 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 188 insertions(+), 18 deletions(-)
---
diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 8231d53..9b49ca5 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
Summary: A high-level cross-protocol url-grabber
Name: python-urlgrabber
Version: 3.9.1
-Release: 7%{?dist}
+Release: 8%{?dist}
Source0: urlgrabber-%{version}.tar.gz
Patch1: urlgrabber-HEAD.patch
@@ -43,6 +43,9 @@ rm -rf $RPM_BUILD_ROOT
%{_bindir}/urlgrabber
%changelog
+* Mon Aug 30 2010 Seth Vidal <skvidal at fedoraproject.org> - 3.9.1-8
+- update to latest head patches
+
* Thu Jul 22 2010 David Malcolm <dmalcolm at redhat.com> - 3.9.1-7
- Rebuilt for https://fedoraproject.org/wiki/Features/Python_2.7/MassRebuild
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 885f3a1..6b97585 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -11,6 +11,66 @@ index 0000000..1ffe416
+*.kdev*
+*.kateproject
+ipython.log*
+diff --git a/scripts/urlgrabber b/scripts/urlgrabber
+index 518e512..09cd896 100644
+--- a/scripts/urlgrabber
++++ b/scripts/urlgrabber
+@@ -115,6 +115,7 @@ options:
+ including quotes in the case of strings.
+ e.g. --user_agent='"foobar/2.0"'
+
++ --output FILE
+ -o FILE write output to FILE, otherwise the basename of the
+ url will be used
+ -O print the names of saved files to STDOUT
+@@ -170,12 +171,17 @@ class client_options:
+ return ug_options, ug_defaults
+
+ def process_command_line(self):
+- short_options = 'vd:hoOpD'
++ short_options = 'vd:ho:OpD'
+ long_options = ['profile', 'repeat=', 'verbose=',
+- 'debug=', 'help', 'progress']
++ 'debug=', 'help', 'progress', 'output=']
+ ug_long = [ o + '=' for o in self.ug_options ]
+- optlist, args = getopt.getopt(sys.argv[1:], short_options,
+- long_options + ug_long)
++ try:
++ optlist, args = getopt.getopt(sys.argv[1:], short_options,
++ long_options + ug_long)
++ except getopt.GetoptError, e:
++ print >>sys.stderr, "Error:", e
++ self.help([], ret=1)
++
+ self.verbose = 0
+ self.debug = None
+ self.outputfile = None
+@@ -193,6 +199,7 @@ class client_options:
+ if o == '--verbose': self.verbose = v
+ if o == '-v': self.verbose += 1
+ if o == '-o': self.outputfile = v
++ if o == '--output': self.outputfile = v
+ if o == '-p' or o == '--progress': self.progress = 1
+ if o == '-d' or o == '--debug': self.debug = v
+ if o == '--profile': self.profile = 1
+@@ -222,7 +229,7 @@ class client_options:
+ print "ERROR: cannot use -o when grabbing multiple files"
+ sys.exit(1)
+
+- def help(self, args):
++ def help(self, args, ret=0):
+ if not args:
+ print MAINHELP
+ else:
+@@ -234,7 +241,7 @@ class client_options:
+ self.help_ug_option(a)
+ else:
+ print 'ERROR: no help on command "%s"' % a
+- sys.exit(0)
++ sys.exit(ret)
+
+ def help_doc(self):
+ print __doc__
diff --git a/test/base_test_code.py b/test/base_test_code.py
index 50c6348..5fb43f9 100644
--- a/test/base_test_code.py
@@ -24,7 +84,7 @@ index 50c6348..5fb43f9 100644
# set to a proftp server only. we're working around a couple of
diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..4797436 100644
+index e090e90..0c78857 100644
--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
@@ -49,7 +109,19 @@ index e090e90..4797436 100644
bandwidth = 0
-@@ -439,6 +439,12 @@ try:
+@@ -248,6 +248,11 @@ GENERAL ARGUMENTS (kwargs)
+
+ Maximum size (in bytes) of the headers.
+
++ self.ip_resolve = 'whatever'
++
++ What type of name to IP resolving to use, default is to do both IPV4 and
++ IPV6.
++
+
+ RETRY RELATED ARGUMENTS
+
+@@ -439,6 +444,12 @@ try:
except:
__version__ = '???'
@@ -62,7 +134,15 @@ index e090e90..4797436 100644
########################################################################
# functions for debugging output. These functions are here because they
# are also part of the module initialization.
-@@ -808,7 +814,7 @@ class URLGrabberOptions:
+@@ -800,6 +811,7 @@ class URLGrabberOptions:
+ self.close_connection = 0
+ self.range = None
+ self.user_agent = 'urlgrabber/%s' % __version__
++ self.ip_resolve = None
+ self.keepalive = 1
+ self.proxies = None
+ self.reget = None
+@@ -808,7 +820,7 @@ class URLGrabberOptions:
self.prefix = None
self.opener = None
self.cache_openers = True
@@ -71,7 +151,17 @@ index e090e90..4797436 100644
self.text = None
self.http_headers = None
self.ftp_headers = None
-@@ -1052,9 +1058,15 @@ class PyCurlFileObject():
+@@ -931,6 +943,9 @@ class URLGrabber:
+ (scheme, host, path, parm, query, frag) = parts
+ if filename is None:
+ filename = os.path.basename( urllib.unquote(path) )
++ if not filename:
++ # This is better than nothing.
++ filename = 'index.html'
+ if scheme == 'file' and not opts.copy_local:
+ # just return the name of the local file - don't make a
+ # copy currently
+@@ -1052,9 +1067,15 @@ class PyCurlFileObject():
self._reget_length = 0
self._prog_running = False
self._error = (None, None)
@@ -88,7 +178,7 @@ index e090e90..4797436 100644
def __getattr__(self, name):
"""This effectively allows us to wrap at the instance level.
-@@ -1085,9 +1097,14 @@ class PyCurlFileObject():
+@@ -1085,9 +1106,14 @@ class PyCurlFileObject():
return -1
def _hdr_retrieve(self, buf):
@@ -104,7 +194,7 @@ index e090e90..4797436 100644
try:
self._hdr_dump += buf
# we have to get the size before we do the progress obj start
-@@ -1104,7 +1121,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1130,17 @@ class PyCurlFileObject():
s = parse150(buf)
if s:
self.size = int(s)
@@ -123,7 +213,7 @@ index e090e90..4797436 100644
return len(buf)
except KeyboardInterrupt:
return pycurl.READFUNC_ABORT
-@@ -1113,8 +1140,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1149,10 @@ class PyCurlFileObject():
if self._parsed_hdr:
return self._parsed_hdr
statusend = self._hdr_dump.find('\n')
@@ -134,7 +224,7 @@ index e090e90..4797436 100644
self._parsed_hdr = mimetools.Message(hdrfp)
return self._parsed_hdr
-@@ -1136,6 +1165,7 @@ class PyCurlFileObject():
+@@ -1136,11 +1174,21 @@ class PyCurlFileObject():
self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
self.curl_obj.setopt(pycurl.FAILONERROR, True)
self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -142,7 +232,21 @@ index e090e90..4797436 100644
if DEBUG:
self.curl_obj.setopt(pycurl.VERBOSE, True)
-@@ -1148,9 +1178,11 @@ class PyCurlFileObject():
+ if opts.user_agent:
+ self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
++ if opts.ip_resolve:
++ # Default is: IPRESOLVE_WHATEVER
++ ipr = opts.ip_resolve.lower()
++ if ipr == 'whatever': # Do we need this?
++ self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
++ if ipr == 'ipv4':
++ self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
++ if ipr == 'ipv6':
++ self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
+
+ # maybe to be options later
+ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+@@ -1148,9 +1196,11 @@ class PyCurlFileObject():
# timeouts
timeout = 300
@@ -157,7 +261,7 @@ index e090e90..4797436 100644
# ssl options
if self.scheme == 'https':
-@@ -1276,7 +1308,7 @@ class PyCurlFileObject():
+@@ -1276,7 +1326,7 @@ class PyCurlFileObject():
raise err
elif errcode == 60:
@@ -166,7 +270,7 @@ index e090e90..4797436 100644
err = URLGrabError(14, msg)
err.url = self.url
raise err
-@@ -1291,7 +1323,12 @@ class PyCurlFileObject():
+@@ -1291,14 +1341,70 @@ class PyCurlFileObject():
raise err
elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
@@ -178,9 +282,55 @@ index e090e90..4797436 100644
+ else:
+ msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)
else:
- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
++ pyerr2str = { 5 : _("Couldn't resolve proxy"),
++ 6 : _("Couldn't resolve host"),
++ 7 : _("Couldn't connect"),
++ 8 : _("Bad reply to FTP server"),
++ 9 : _("Access denied"),
++ 11 : _("Bad reply to FTP pass"),
++ 13 : _("Bad reply to FTP pasv"),
++ 14 : _("Bad reply to FTP 227"),
++ 15 : _("Couldn't get FTP host"),
++ 17 : _("Couldn't set FTP type"),
++ 18 : _("Partial file"),
++ 19 : _("FTP RETR command failed"),
++ 22 : _("HTTP returned error"),
++ 23 : _("Write error"),
++ 25 : _("Upload failed"),
++ 26 : _("Read error"),
++ 27 : _("Out of Memory"),
++ 28 : _("Operation timed out"),
++ 30 : _("FTP PORT command failed"),
++ 31 : _("FTP REST command failed"),
++ 33 : _("Range failed"),
++ 34 : _("HTTP POST failed"),
++ 35 : _("SSL CONNECT failed"),
++ 36 : _("Couldn't resume download"),
++ 37 : _("Couldn't read file"),
++ 42 : _("Aborted by callback"),
++ 47 : _("Too many redirects"),
++ 51 : _("Peer certificate failed verification"),
++ 53 : _("SSL engine not found"),
++ 54 : _("SSL engine set failed"),
++ 55 : _("Network error send()"),
++ 56 : _("Network error recv()"),
++ 58 : _("Local certificate failed"),
++ 59 : _("SSL set cipher failed"),
++ 60 : _("Local CA certificate failed"),
++ 61 : _("HTTP bad transfer encoding"),
++ 63 : _("Maximum file size exceeded"),
++ 64 : _("FTP SSL failed"),
++ 67 : _("Authentication failure"),
++ 70 : _("Out of disk space on server"),
++ 73 : _("Remove file exists"),
++ }
++ errstr = str(e.args[1])
++ if not errstr:
++ errstr = pyerr2str.get(errcode, '<Unknown>')
++ msg = 'curl#%s - "%s"' % (errcode, errstr)
code = errcode
-@@ -1299,6 +1336,12 @@ class PyCurlFileObject():
+ err = URLGrabError(14, msg)
err.code = code
err.exception = e
raise err
@@ -193,7 +343,24 @@ index e090e90..4797436 100644
def _do_open(self):
self.curl_obj = _curl_cache
-@@ -1446,9 +1489,23 @@ class PyCurlFileObject():
+@@ -1434,9 +1540,13 @@ class PyCurlFileObject():
+ #fh, self._temp_name = mkstemp()
+ #self.fo = open(self._temp_name, 'wb')
+
+-
+- self._do_perform()
+-
++ try:
++ self._do_perform()
++ except URLGrabError, e:
++ self.fo.flush()
++ self.fo.close()
++ raise e
++
+
+
+ if _was_filename:
+@@ -1446,9 +1556,23 @@ class PyCurlFileObject():
# set the time
mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
if mod_time != -1:
@@ -203,7 +370,7 @@ index e090e90..4797436 100644
+ except OSError, e:
+ err = URLGrabError(16, _(\
+ 'error setting timestamp on file %s from %s, OSError: %s')
-+ % (self.filenameself.url, e))
++ % (self.filename, self.url, e))
+ err.url = self.url
+ raise err
# re open it
@@ -219,7 +386,7 @@ index e090e90..4797436 100644
else:
#self.fo = open(self._temp_name, 'r')
self.fo.seek(0)
-@@ -1532,11 +1589,14 @@ class PyCurlFileObject():
+@@ -1532,11 +1656,14 @@ class PyCurlFileObject():
def _over_max_size(self, cur, max_size=None):
if not max_size:
@@ -238,7 +405,7 @@ index e090e90..4797436 100644
msg = _("Downloaded more than max size for %s: %s > %s") \
% (self.url, cur, max_size)
-@@ -1582,9 +1642,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1709,21 @@ class PyCurlFileObject():
self.opts.progress_obj.end(self._amount_read)
self.fo.close()
More information about the scm-commits
mailing list