moksha/connector/__init__.py | 3 moksha/connector/connector.py | 146 ++++++------------------ moksha/connector/utils.py | 247 +++++++++++++++++++++++++++++++++++++++++ moksha/middleware/connector.py | 2 4 files changed, 293 insertions(+), 105 deletions(-)
New commits: commit 44141d6c892403e6f28b14cc3276b568c425e019 Merge: dfb3f82... 23cce5f... Author: John (J5) Palmieri johnp@redhat.com Date: Mon Mar 23 18:03:12 2009 -0400
Merge branch 'master' of git+ssh://johnp@git.fedorahosted.org/git/moksha
commit dfb3f82c39e1001fe51aef1195f8ee6eca23f567 Author: John (J5) Palmieri johnp@redhat.com Date: Mon Mar 23 18:02:31 2009 -0400
add search to the connector interface
diff --git a/moksha/connector/__init__.py b/moksha/connector/__init__.py index d789d84..76f4e31 100644 --- a/moksha/connector/__init__.py +++ b/moksha/connector/__init__.py @@ -1 +1,2 @@ -from connector import IConnector, ICall, IQuery, IFeed, INotify, ParamFilter \ No newline at end of file +from connector import IConnector, ICall, IQuery, IFeed, INotify, ISearch +from utils import ParamFilter \ No newline at end of file diff --git a/moksha/connector/connector.py b/moksha/connector/connector.py index 4045e1c..c8cbfc5 100644 --- a/moksha/connector/connector.py +++ b/moksha/connector/connector.py @@ -16,6 +16,9 @@ # Copyright 2008, Red Hat, Inc. # Authors: John (J5) Palmieri johnp@redhat.com
+from utils import QueryPath, QueryCol, ParamFilter, WeightedSearch +from beaker.cache import Cache + """ Data Connector Interfaces
A Data Connector is an object which translate Moksha data requests to the native @@ -30,50 +33,6 @@ implemented (e.g. sorting in the ITable interface) must raise NotImplementedError if the value is set to anything but None """
-class QueryCol(dict): - def __init__(self, - column, - default_visible, - can_sort, - can_filter_wildcards): - super(QueryCol, self).__init__(column = column, - default_visible = default_visible, - can_sort = can_sort, - can_filter_wildcards = can_filter_wildcards) - -class QueryPath(dict): - def __init__(self, - path, - query_func, - primary_key_col, - default_sort_col, - default_sort_order, - can_paginate): - super(QueryPath, self).__init__( - path = path, - query_func = query_func, - primary_key_col = primary_key_col, - default_sort_col = default_sort_col, - default_sort_order = default_sort_order, - can_paginate = can_paginate, - columns={}) - - def register_column(self, - column, - default_visible = True, - can_sort = False, - can_filter_wildcards = False): - - self["columns"][column] = QueryCol( - column = column, - default_visible = default_visible, - can_sort = can_sort, - can_filter_wildcards = can_filter_wildcards - ) - - def get_query(self): - return self['query_func'] - class IConnector(object): """ Data connector interface
@@ -350,64 +309,43 @@ class INotify(object): def register_listener(self, listener_cb): pass
-class ParamFilter(object): - """Helper class for filtering query arguments""" - - def __init__(self): - self._translation_table = {} - self._param_table = {} - - def add_filter(self, param, args=[], cast=None, allow_none=True, filter_func=None): - pf = {} - if cast: - assert(isinstance(cast, type), - "cast should be of type <type> not cast %s" % str(type(cast))) - - pf['cast'] = cast - - pf['allow_none'] = allow_none - pf['filter_func'] = filter_func - - self._param_table[param] = pf - args.append(param) - for a in args: - assert(not(a in self._translation_table), - '''The argument %s has been registered for more than - one parameter translation''' % (a) - ) - - self._translation_table[a] = param - - def filter(self, d, conn=None): - results = {} - for k, v in d.iteritems(): - if k in self._translation_table: - param = self._translation_table[k] - allow_none = True - assign = True - if param in self._param_table: - pf = self._param_table[param] - cast = pf.get('cast') - if cast == bool: - if isinstance(v, basestring): - lv = v.lower() - if lv in ('t', 'y', 'true', 'yes'): - v = True - else: - v = False - elif not isinstance(v, bool): - v = False - elif cast: - v = cast(v) - - allow_none = pf['allow_none'] - - ff = pf['filter_func'] - if ff: - ff(conn, results, k, v, allow_none) - assign = False - - if (allow_none or (v != None)) and assign: - results[param] = v +class ISearch(IQuery): + filters = ParamFilter() + filters.add_filter('search', ['s'])
- return results + @classmethod + def register_search_path(cls, + path, + search_func, + primary_key_col = None, + default_sort_col = None, + default_sort_order = None, + can_paginate = True): + + cls._search_cache = fas_cache = Cache('moksha_search_cache_ ' + path) + + def query_func(conn=None, + start_row=0, + rows_per_page=10, + order=-1, + sort_col=None, + filters={}, + **params): + + s = WeightedSearch(lambda search_term: search_func(conn, search_term), + cls._paths[path]['columns'], + cls._search_cache) + search_string = cls.filters.filter(filters).get('search') + results = s.search(search_string) + + + return (len(results), results[start_row:start_row + rows_per_page]) + + qpath = cls.register_path(path = path, + query_func = query_func, + primary_key_col = primary_key_col, + default_sort_col = default_sort_col, + default_sort_order = default_sort_order, + can_paginate = can_paginate) + + return qpath \ No newline at end of file diff --git a/moksha/connector/utils.py b/moksha/connector/utils.py index 5826942..b30f82c 100644 --- a/moksha/connector/utils.py +++ b/moksha/connector/utils.py @@ -17,6 +17,8 @@ # Authors: John (J5) Palmieri johnp@redhat.com
from datetime import datetime, timedelta +from UserDict import DictMixin + import bisect
class DateTimeDisplay(object): @@ -95,3 +97,248 @@ class DateTimeDisplay(object):
return {'time':time, 'date':date , 'when':when, 'should_hide_time':should_hide_time}
+class odict(DictMixin): + + def __init__(self): + self._keys = [] + self._data = {} + + def index(self, i): + k = self._keys[i] + return self._data[k] + + def key_index(self, i): + return self._keys[i] + + def __setitem__(self, key, value): + if key not in self._data: + self._keys.append(key) + + self._data[key] = value + + def __getitem__(self, key): + return self._data[key] + + def __delitem__(self, key): + del self._data[key] + self._keys.remove(key) + + def __iter__(self): + for key in self._keys: + yield key + + def keys(self): + return list(self._keys) + + def copy(self): + copyDict = odict() + copyDict._data = self._data.copy() + copyDict._keys = self._keys[:] + return copyDict + + def __repr__(self): + result = [] + for key in self._keys: + result.append('(%s, %s)' % (repr(key), repr(self._data[key]))) + return ''.join(['OrderedDict', '([', ', '.join(result), '])']) + +class QueryCol(dict): + def __init__(self, + column, + default_visible, + can_sort, + can_filter_wildcards): + super(QueryCol, self).__init__(column = column, + default_visible = default_visible, + can_sort = can_sort, + can_filter_wildcards = can_filter_wildcards) + +class QueryPath(dict): + def __init__(self, + path, + query_func, + primary_key_col, + default_sort_col, + default_sort_order, + can_paginate): + super(QueryPath, self).__init__( + path = path, + query_func = query_func, + primary_key_col = primary_key_col, + default_sort_col = default_sort_col, + default_sort_order = default_sort_order, + can_paginate = can_paginate, + columns=odict()) + + def register_column(self, + column, + default_visible = True, + can_sort = False, + can_filter_wildcards = False): + + self["columns"][column] = QueryCol( + column = column, + default_visible = default_visible, + can_sort = can_sort, + can_filter_wildcards = can_filter_wildcards + ) + + def get_query(self): + return self['query_func'] + +class ParamFilter(object): + """Helper class for filtering query arguments""" + + def __init__(self): + self._translation_table = {} + self._param_table = {} + + def add_filter(self, param, args=[], cast=None, allow_none=True, filter_func=None): + pf = {} + if cast: + assert(isinstance(cast, type), + "cast should be of type <type> not cast %s" % str(type(cast))) + + pf['cast'] = cast + + pf['allow_none'] = allow_none + pf['filter_func'] = filter_func + + self._param_table[param] = pf + args.append(param) + for a in args: + assert(not(a in self._translation_table), + '''The argument %s has been registered for more than + one parameter translation''' % (a) + ) + + self._translation_table[a] = param + + def filter(self, d, conn=None): + results = {} + for k, v in d.iteritems(): + if k in self._translation_table: + param = self._translation_table[k] + allow_none = True + assign = True + if param in self._param_table: + pf = self._param_table[param] + cast = pf.get('cast') + if cast == bool: + if isinstance(v, basestring): + lv = v.lower() + if lv in ('t', 'y', 'true', 'yes'): + v = True + else: + v = False + elif not isinstance(v, bool): + v = False + elif cast: + v = cast(v) + + allow_none = pf['allow_none'] + + ff = pf['filter_func'] + if ff: + ff(conn, results, k, v, allow_none) + assign = False + + if (allow_none or (v != None)) and assign: + results[param] = v + + return results + +class WeightedSearch(object): + # FIXME: Need to dial in the weighting algorithm + CACHE_EXPIRE_TIME = 30 * 60 + LIGHT_WEIGHT = 10 + MEDIUM_WEIGHT = 30 + HEAVY_WEIGHT = 100 + + def __init__(self, search_func, cols, cache=None): + self.search_func = search_func + self.cache = cache + self.cols = cols + + def weigh(self, search_term, weighted_hash): + search_term_len = len(search_term) + l = len(self.cols) + + # each field gets a decelerating percentage of it's calculated weight + # e.g. each consecutive field is an order of magnatude less important + # than the previous field + factor = 1/sum(xrange(l)) + + r = weighted_hash[0] + for i, result_field in enumerate(self.cols): + x = l - i + weight_factor = x * factor + + l_result_field = r[result_field].lower() + index = l_result_field.find(search_term) + + while(index != -1): + weighted_hash[1] += self.LIGHT_WEIGHT * weight_factor + if index == 0: + # in front + weighted_hash[1] += self.MEDIUM_WEIGHT * weight_factor + if search_term_len == len(l_result_field): + weighted_hash[1] += self.HEAVY_WEIGHT + + if index + search_term_len == l: + # in back + weighted_hash[1] += self.MEDIUM_WEIGHT * weight_factor + + index = l_result_field.find(search_term, index + 1) + + def weighted_sort(self, a, b): + result = 0 + (a_val, a_weight) = a + (b_val, b_weight) = b + + result = -cmp(a_weight, b_weight) + if result == 0: + result = cmp(a_val[self.cols.key_index(0)], b_val[self.cols.key_index(0)]) + return result + + def search(self, search_string): + if not search_string: + return [] + + search = search_string.lower().replace(',', ' ').split() + + weighted_results = {} + for s in search: + results = self.cache.get_value(key = s, + createfunc=lambda : self.search_func(s), + type="memory", + expiretime=self.CACHE_EXPIRE_TIME) + + for r in results: + rkey = r[self.cols.key_index(0)] + + # if we have already weighted this result get the + # weighted hash to add weight to + # else we create a new weighted hash + if rkey in weighted_results: + weighted_hash = weighted_results[rkey] + else: + weighted_hash = [r, 0] + + + self.weigh(s, weighted_hash) + weighted_results[rkey] = weighted_hash + + sorted_list = weighted_results.values() + sorted_list.sort(self.weighted_sort) + + for i, v in enumerate(sorted_list): + sorted_list[i] = v[0] + + return sorted_list + + + + + + diff --git a/moksha/middleware/connector.py b/moksha/middleware/connector.py index b69c2f9..d868554 100644 --- a/moksha/middleware/connector.py +++ b/moksha/middleware/connector.py @@ -65,6 +65,8 @@ class MokshaConnectorMiddleware(object):
if k not in params: params[k] = p.getall(k) + if params[k] and len(params[k]) == 1: + params[k] = params[k][0]
response = self._run_connector(environ, request, s[0], s[1], *s[2:],
moksha-commits@lists.fedorahosted.org