[python-urlgrabber] update to latest urlgrabber head

Mon Aug 30 15:53:16 UTC 2010

commit 36ff3aaff05bd6eddd257e6e6d8657e7de5e05bc
Author: Seth Vidal <skvidal at fedoraproject.org>
Date:   Mon Aug 30 11:53:16 2010 -0400

    update to latest urlgrabber head

 python-urlgrabber.spec |    5 +-
 urlgrabber-HEAD.patch  |  201 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 188 insertions(+), 18 deletions(-)
---

diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec
index 8231d53..9b49ca5 100644
--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -3,7 +3,7 @@
 Summary: A high-level cross-protocol url-grabber
 Name: python-urlgrabber
 Version: 3.9.1
-Release: 7%{?dist}
+Release: 8%{?dist}
 Source0: urlgrabber-%{version}.tar.gz
 Patch1: urlgrabber-HEAD.patch
 
@@ -43,6 +43,9 @@ rm -rf $RPM_BUILD_ROOT
 %{_bindir}/urlgrabber
 
 %changelog
+* Mon Aug 30 2010 Seth Vidal <skvidal at fedoraproject.org> - 3.9.1-8
+- update to latest head patches
+
 * Thu Jul 22 2010 David Malcolm <dmalcolm at redhat.com> - 3.9.1-7
 - Rebuilt for https://fedoraproject.org/wiki/Features/Python_2.7/MassRebuild
 
diff --git a/urlgrabber-HEAD.patch b/urlgrabber-HEAD.patch
index 885f3a1..6b97585 100644
--- a/urlgrabber-HEAD.patch
+++ b/urlgrabber-HEAD.patch
@@ -11,6 +11,66 @@ index 0000000..1ffe416
 +*.kdev*
 +*.kateproject
 +ipython.log*
+diff --git a/scripts/urlgrabber b/scripts/urlgrabber
+index 518e512..09cd896 100644
+--- a/scripts/urlgrabber
++++ b/scripts/urlgrabber
+@@ -115,6 +115,7 @@ options:
+                     including quotes in the case of strings.
+                     e.g.  --user_agent='"foobar/2.0"'
+ 
++  --output FILE
+   -o FILE           write output to FILE, otherwise the basename of the
+                     url will be used
+   -O                print the names of saved files to STDOUT
+@@ -170,12 +171,17 @@ class client_options:
+         return ug_options, ug_defaults
+ 
+     def process_command_line(self):
+-        short_options = 'vd:hoOpD'
++        short_options = 'vd:ho:OpD'
+         long_options = ['profile', 'repeat=', 'verbose=',
+-                        'debug=', 'help', 'progress']
++                        'debug=', 'help', 'progress', 'output=']
+         ug_long = [ o + '=' for o in self.ug_options ]
+-        optlist, args = getopt.getopt(sys.argv[1:], short_options,
+-                                      long_options + ug_long)
++        try:
++            optlist, args = getopt.getopt(sys.argv[1:], short_options,
++                                          long_options + ug_long)
++        except getopt.GetoptError, e:
++            print >>sys.stderr, "Error:", e
++            self.help([], ret=1)
++
+         self.verbose = 0
+         self.debug = None
+         self.outputfile = None
+@@ -193,6 +199,7 @@ class client_options:
+             if o == '--verbose': self.verbose = v
+             if o == '-v':        self.verbose += 1
+             if o == '-o':        self.outputfile = v
++            if o == '--output':  self.outputfile = v
+             if o == '-p' or o == '--progress': self.progress = 1
+             if o == '-d' or o == '--debug': self.debug = v
+             if o == '--profile': self.profile = 1
+@@ -222,7 +229,7 @@ class client_options:
+             print "ERROR: cannot use -o when grabbing multiple files"
+             sys.exit(1)
+ 
+-    def help(self, args):
++    def help(self, args, ret=0):
+         if not args:
+             print MAINHELP
+         else:
+@@ -234,7 +241,7 @@ class client_options:
+                     self.help_ug_option(a)
+                 else:
+                     print 'ERROR: no help on command "%s"' % a
+-        sys.exit(0)
++        sys.exit(ret)
+ 
+     def help_doc(self):
+         print __doc__
 diff --git a/test/base_test_code.py b/test/base_test_code.py
 index 50c6348..5fb43f9 100644
 --- a/test/base_test_code.py
@@ -24,7 +84,7 @@ index 50c6348..5fb43f9 100644
  
  # set to a proftp server only. we're working around a couple of
 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
-index e090e90..4797436 100644
+index e090e90..0c78857 100644
 --- a/urlgrabber/grabber.py
 +++ b/urlgrabber/grabber.py
 @@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
@@ -49,7 +109,19 @@ index e090e90..4797436 100644
  
    bandwidth = 0
  
-@@ -439,6 +439,12 @@ try:
+@@ -248,6 +248,11 @@ GENERAL ARGUMENTS (kwargs)
+ 
+     Maximum size (in bytes) of the headers.
+     
++  self.ip_resolve = 'whatever'
++
++    What type of name to IP resolving to use, default is to do both IPV4 and
++    IPV6.
++
+ 
+ RETRY RELATED ARGUMENTS
+ 
+@@ -439,6 +444,12 @@ try:
  except:
      __version__ = '???'
  
@@ -62,7 +134,15 @@ index e090e90..4797436 100644
  ########################################################################
  # functions for debugging output.  These functions are here because they
  # are also part of the module initialization.
-@@ -808,7 +814,7 @@ class URLGrabberOptions:
+@@ -800,6 +811,7 @@ class URLGrabberOptions:
+         self.close_connection = 0
+         self.range = None
+         self.user_agent = 'urlgrabber/%s' % __version__
++        self.ip_resolve = None
+         self.keepalive = 1
+         self.proxies = None
+         self.reget = None
+@@ -808,7 +820,7 @@ class URLGrabberOptions:
          self.prefix = None
          self.opener = None
          self.cache_openers = True
@@ -71,7 +151,17 @@ index e090e90..4797436 100644
          self.text = None
          self.http_headers = None
          self.ftp_headers = None
-@@ -1052,9 +1058,15 @@ class PyCurlFileObject():
+@@ -931,6 +943,9 @@ class URLGrabber:
+         (scheme, host, path, parm, query, frag) = parts
+         if filename is None:
+             filename = os.path.basename( urllib.unquote(path) )
++            if not filename:
++                # This is better than nothing.
++                filename = 'index.html'
+         if scheme == 'file' and not opts.copy_local:
+             # just return the name of the local file - don't make a 
+             # copy currently
+@@ -1052,9 +1067,15 @@ class PyCurlFileObject():
          self._reget_length = 0
          self._prog_running = False
          self._error = (None, None)
@@ -88,7 +178,7 @@ index e090e90..4797436 100644
          
      def __getattr__(self, name):
          """This effectively allows us to wrap at the instance level.
-@@ -1085,9 +1097,14 @@ class PyCurlFileObject():
+@@ -1085,9 +1106,14 @@ class PyCurlFileObject():
              return -1
              
      def _hdr_retrieve(self, buf):
@@ -104,7 +194,7 @@ index e090e90..4797436 100644
          try:
              self._hdr_dump += buf
              # we have to get the size before we do the progress obj start
-@@ -1104,7 +1121,17 @@ class PyCurlFileObject():
+@@ -1104,7 +1130,17 @@ class PyCurlFileObject():
                      s = parse150(buf)
                  if s:
                      self.size = int(s)
@@ -123,7 +213,7 @@ index e090e90..4797436 100644
              return len(buf)
          except KeyboardInterrupt:
              return pycurl.READFUNC_ABORT
-@@ -1113,8 +1140,10 @@ class PyCurlFileObject():
+@@ -1113,8 +1149,10 @@ class PyCurlFileObject():
          if self._parsed_hdr:
              return self._parsed_hdr
          statusend = self._hdr_dump.find('\n')
@@ -134,7 +224,7 @@ index e090e90..4797436 100644
          self._parsed_hdr =  mimetools.Message(hdrfp)
          return self._parsed_hdr
      
-@@ -1136,6 +1165,7 @@ class PyCurlFileObject():
+@@ -1136,11 +1174,21 @@ class PyCurlFileObject():
          self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
          self.curl_obj.setopt(pycurl.FAILONERROR, True)
          self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
@@ -142,7 +232,21 @@ index e090e90..4797436 100644
          
          if DEBUG:
              self.curl_obj.setopt(pycurl.VERBOSE, True)
-@@ -1148,9 +1178,11 @@ class PyCurlFileObject():
+         if opts.user_agent:
+             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
++        if opts.ip_resolve:
++            # Default is: IPRESOLVE_WHATEVER
++            ipr = opts.ip_resolve.lower()
++            if ipr == 'whatever': # Do we need this?
++                self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
++            if ipr == 'ipv4':
++                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
++            if ipr == 'ipv6':
++                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
+         
+         # maybe to be options later
+         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+@@ -1148,9 +1196,11 @@ class PyCurlFileObject():
          
          # timeouts
          timeout = 300
@@ -157,7 +261,7 @@ index e090e90..4797436 100644
  
          # ssl options
          if self.scheme == 'https':
-@@ -1276,7 +1308,7 @@ class PyCurlFileObject():
+@@ -1276,7 +1326,7 @@ class PyCurlFileObject():
                  raise err
  
              elif errcode == 60:
@@ -166,7 +270,7 @@ index e090e90..4797436 100644
                  err = URLGrabError(14, msg)
                  err.url = self.url
                  raise err
-@@ -1291,7 +1323,12 @@ class PyCurlFileObject():
+@@ -1291,14 +1341,70 @@ class PyCurlFileObject():
                  raise err
                      
              elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
@@ -178,9 +282,55 @@ index e090e90..4797436 100644
 +                else:
 +                    msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)
              else:
-                 msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
+-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
++                pyerr2str = { 5 : _("Couldn't resolve proxy"),
++                              6 : _("Couldn't resolve host"),
++                              7 : _("Couldn't connect"),
++                              8 : _("Bad reply to FTP server"),
++                              9 : _("Access denied"),
++                             11 : _("Bad reply to FTP pass"),
++                             13 : _("Bad reply to FTP pasv"),
++                             14 : _("Bad reply to FTP 227"),
++                             15 : _("Couldn't get FTP host"),
++                             17 : _("Couldn't set FTP type"),
++                             18 : _("Partial file"),
++                             19 : _("FTP RETR command failed"),
++                             22 : _("HTTP returned error"),
++                             23 : _("Write error"),
++                             25 : _("Upload failed"),
++                             26 : _("Read error"),
++                             27 : _("Out of Memory"),
++                             28 : _("Operation timed out"),
++                             30 : _("FTP PORT command failed"),
++                             31 : _("FTP REST command failed"),
++                             33 : _("Range failed"),
++                             34 : _("HTTP POST failed"),
++                             35 : _("SSL CONNECT failed"),
++                             36 : _("Couldn't resume download"),
++                             37 : _("Couldn't read file"),
++                             42 : _("Aborted by callback"),
++                             47 : _("Too many redirects"),
++                             51 : _("Peer certificate failed verification"),
++                             53 : _("SSL engine not found"),
++                             54 : _("SSL engine set failed"),
++                             55 : _("Network error send()"),
++                             56 : _("Network error recv()"),
++                             58 : _("Local certificate failed"),
++                             59 : _("SSL set cipher failed"),
++                             60 : _("Local CA certificate failed"),
++                             61 : _("HTTP bad transfer encoding"),
++                             63 : _("Maximum file size exceeded"),
++                             64 : _("FTP SSL failed"),
++                             67 : _("Authentication failure"),
++                             70 : _("Out of disk space on server"),
++                             73 : _("Remove file exists"),
++                              }
++                errstr = str(e.args[1])
++                if not errstr:
++                    errstr = pyerr2str.get(errcode, '<Unknown>')
++                msg = 'curl#%s - "%s"' % (errcode, errstr)
                  code = errcode
-@@ -1299,6 +1336,12 @@ class PyCurlFileObject():
+             err = URLGrabError(14, msg)
              err.code = code
              err.exception = e
              raise err
@@ -193,7 +343,24 @@ index e090e90..4797436 100644
  
      def _do_open(self):
          self.curl_obj = _curl_cache
-@@ -1446,9 +1489,23 @@ class PyCurlFileObject():
+@@ -1434,9 +1540,13 @@ class PyCurlFileObject():
+             #fh, self._temp_name = mkstemp()
+             #self.fo = open(self._temp_name, 'wb')
+ 
+-            
+-        self._do_perform()
+-        
++        try:            
++            self._do_perform()
++        except URLGrabError, e:
++            self.fo.flush()
++            self.fo.close()
++            raise e
++    
+ 
+ 
+         if _was_filename:
+@@ -1446,9 +1556,23 @@ class PyCurlFileObject():
              # set the time
              mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
              if mod_time != -1:
@@ -203,7 +370,7 @@ index e090e90..4797436 100644
 +                except OSError, e:
 +                    err = URLGrabError(16, _(\
 +                      'error setting timestamp on file %s from %s, OSError: %s') 
-+                              % (self.filenameself.url, e))
++                              % (self.filename, self.url, e))
 +                    err.url = self.url
 +                    raise err
              # re open it
@@ -219,7 +386,7 @@ index e090e90..4797436 100644
          else:
              #self.fo = open(self._temp_name, 'r')
              self.fo.seek(0)
-@@ -1532,11 +1589,14 @@ class PyCurlFileObject():
+@@ -1532,11 +1656,14 @@ class PyCurlFileObject():
      def _over_max_size(self, cur, max_size=None):
  
          if not max_size:
@@ -238,7 +405,7 @@ index e090e90..4797436 100644
  
              msg = _("Downloaded more than max size for %s: %s > %s") \
                          % (self.url, cur, max_size)
-@@ -1582,9 +1642,21 @@ class PyCurlFileObject():
+@@ -1582,9 +1709,21 @@ class PyCurlFileObject():
              self.opts.progress_obj.end(self._amount_read)
          self.fo.close()