Author: tmckay
Date: 2011-08-05 20:49:12 +0000 (Fri, 05 Aug 2011)
New Revision: 4909
Modified:
trunk/sage/python/sage/util.py
Log:
Change url parsing routines to use regular expressions and allow path, etc.
Modified: trunk/sage/python/sage/util.py
===================================================================
--- trunk/sage/python/sage/util.py 2011-08-05 18:59:48 UTC (rev 4908)
+++ trunk/sage/python/sage/util.py 2011-08-05 20:49:12 UTC (rev 4909)
@@ -1,5 +1,8 @@
from time import time, sleep
from threading import Thread, Lock
+import re
+import copy
+import string
class MethodResult(object):
'''
@@ -201,78 +204,81 @@
'''
raise Exception("Not implemented")
-def host_port(hostname):
- '''
- Returns a tuple containing 'host' and 'port' strings from hostname.
+class sage_URL(object):
+ def __init__(self, scheme, user, password, host, port, path):
+ self.scheme = scheme
+ self.user = user
+ self.password = password
+ self.host = host
+ self.port = port
+ self.path = path
- Strings are split at the first colon to produce host and port strings.
- A string containing only digits will result in a tuple with the host
- value set to None and the port value set to the entire string. A string
- containing non-digits but no colon will result in a tuple with the port
- value set to None and the host value set to the entire string.
- '''
- assert type(hostname) in (str, unicode)
- import string
+ def __repr__(self):
+ return "sage_URL(%r)" % str(self)
- host = None
- port = None
+ def __str__(self):
+ s = ""
+ if self.scheme:
+ s += "%s://" % self.scheme
+ if self.user:
+ s += self.user
+ if self.password:
+ s += "/%s" % self.password
+ s += "@"
+ s += self.host
+ if self.port:
+ s += ":%s" % self.port
+ if self.path:
+ s += "/%s" % self.path
+ return s
- info = string.split(hostname, ":", maxsplit=1)
- if len(info) == 1:
- # All digits, assume it was just a port number
- if info[0].isdigit():
- port = info[0]
- else:
- host = info[0]
- else:
- host = info[0]
- port = info[1]
- return host, port
+def parse_URL(hoststring):
-def host_port_list(netlocs, default_port=None):
- '''
- Parses a list of network locations and returns
- a dictionary keyed by host containing sets of ports for each host.
+ RE = re.compile(r"""
+ # [ <scheme>:// ] [ <user> [ / <password> ] @]
<host> [ :<port> ] [ <path> ]
+ ^ (?: ([^:/@]+)://)? (?: ([^:/@]+) (?: / ([^:/@]+) )? @)? ([^@:/]+) (?:
:([0-9]+))? (?: / (.*))?$
+ """, re.X)
- Uses sage.util.host_port() to parse each item in the list.
+ scheme = user = password = host = port = path = None
+ match = RE.match(hoststring)
+ if match is not None:
+ scheme, user, password, host, port, path = match.groups()
+ return sage_URL(scheme, user, password, host, port, path)
- netlocs -- comma-separated list of network locations. A network location
- may have one of the following forms: 'host', 'host:port', or
'port'.
- If the 'port' form is used, the 'host' value is assumed to be the
last
- host encountered in the list or "localhost" if no host has been
- encountered. If the 'host' form is used, an entry for the host is made
- in the dictionary with an port list.
+def host_list(netlocs, default_scheme=None, default_port=None, default_path=None):
+ tokens = string.split(netlocs, ",")
- default_port -- port value for hosts in the dictionary which contain an
- empty port set after 'netlocs' is fully parsed. Ignored if equal to None.
- '''
- assert type(netlocs) in (str, unicode)
- import string
+ hosts = dict()
+ last_url = None
+ last_port_set = False
- # A dictionary of sets of ports keyed
- # by hostname
- hosts = dict()
- tokens = string.split(netlocs, ",")
- lasthost = "localhost"
- for name in tokens:
- host, port = host_port(string.strip(name))
- if host is None:
- host = lasthost
+ for loc in tokens:
+ url = None
+ loc = string.strip(loc)
+ if loc.isdigit():
+ # Allow just a port number to be specified if the previous
+ # url explicitly set a port. Shorthand for port list.
+ # Copy all information from the previous token except port.
+ if last_url is not None and last_port_set:
+ url = copy.copy(last_url)
+ url.port = loc
else:
- lasthost = host
- if host not in hosts:
- if port is None:
- hosts[host] = set()
+ url = parse_URL(loc)
+ if url.scheme is None:
+ url.scheme = default_scheme
+ if url.path is None:
+ url.path = default_path
+ if url.port is None:
+ url.port = str(default_port)
+ last_port_set = False
else:
- hosts[host] = set([port])
- elif port is not None:
- hosts[host].add(port)
+ last_port_set = True
+ last_url = url
+ if url is not None and url.host is not None and url.port is not None:
+ if url.host not in hosts:
+ hosts[url.host] = list()
+ hosts[url.host].append(url)
+
+ return hosts
- # Fill in default ports for hosts with no
- # ports assigned
- if default_port is not None:
- for host, ports in hosts.iteritems():
- if len(ports) == 0:
- ports.add(default_port)
- return hosts