2 commits - moksha/connector moksha/middleware - Moksha-commits - Fedora mailing-lists

23 Mar 2009

moksha/connector/__init__.py   |    3 
 moksha/connector/connector.py  |  146 ++++++------------------
 moksha/connector/utils.py      |  247 +++++++++++++++++++++++++++++++++++++++++
 moksha/middleware/connector.py |    2 
 4 files changed, 293 insertions(+), 105 deletions(-)
New commits:
commit 44141d6c892403e6f28b14cc3276b568c425e019
Merge: dfb3f82... 23cce5f...
Author: John (J5) Palmieri johnp@redhat.com
Date:   Mon Mar 23 18:03:12 2009 -0400
Merge branch 'master' of git+ssh://johnp@git.fedorahosted.org/git/moksha
commit dfb3f82c39e1001fe51aef1195f8ee6eca23f567
Author: John (J5) Palmieri johnp@redhat.com
Date:   Mon Mar 23 18:02:31 2009 -0400
add search to the connector interface

diff --git a/moksha/connector/__init__.py b/moksha/connector/__init__.py
index d789d84..76f4e31 100644
--- a/moksha/connector/__init__.py
+++ b/moksha/connector/__init__.py
@@ -1 +1,2 @@
-from connector import IConnector, ICall, IQuery, IFeed, INotify, ParamFilter
\ No newline at end of file
+from connector import IConnector, ICall, IQuery, IFeed, INotify, ISearch
+from utils import ParamFilter
\ No newline at end of file
diff --git a/moksha/connector/connector.py b/moksha/connector/connector.py
index 4045e1c..c8cbfc5 100644
--- a/moksha/connector/connector.py
+++ b/moksha/connector/connector.py
@@ -16,6 +16,9 @@
 # Copyright 2008, Red Hat, Inc.
 # Authors: John (J5) Palmieri johnp@redhat.com
+from utils import QueryPath, QueryCol, ParamFilter, WeightedSearch
+from beaker.cache import Cache
+
 """ Data Connector Interfaces
A Data Connector is an object which translate Moksha data requests to the native
@@ -30,50 +33,6 @@ implemented (e.g. sorting in the ITable interface) must
 raise NotImplementedError if the value is set to anything but None
 """
-class QueryCol(dict):
-    def __init__(self,
-                 column,
-                 default_visible,
-                 can_sort,
-                 can_filter_wildcards):
-        super(QueryCol, self).__init__(column = column,
-                                       default_visible = default_visible,
-                                       can_sort = can_sort,
-                                       can_filter_wildcards = can_filter_wildcards)
-
-class QueryPath(dict):
-    def __init__(self,
-                 path,
-                 query_func,
-                 primary_key_col,
-                 default_sort_col,
-                 default_sort_order,
-                 can_paginate):
-        super(QueryPath, self).__init__(
-                         path = path,
-                         query_func = query_func,
-                         primary_key_col = primary_key_col,
-                         default_sort_col = default_sort_col,
-                         default_sort_order = default_sort_order,
-                         can_paginate = can_paginate,
-                         columns={})
-
-    def register_column(self,
-                        column,
-                        default_visible = True,
-                        can_sort = False,
-                        can_filter_wildcards = False):
-
-        self["columns"][column] = QueryCol(
-                column = column,
-                default_visible = default_visible,
-                can_sort = can_sort,
-                can_filter_wildcards = can_filter_wildcards
-              )
-
-    def get_query(self):
-        return self['query_func']
-
 class IConnector(object):
     """ Data connector interface
@@ -350,64 +309,43 @@ class INotify(object):
     def register_listener(self, listener_cb):
         pass
-class ParamFilter(object):
-    """Helper class for filtering query arguments"""
-
-    def __init__(self):
-        self._translation_table = {}
-        self._param_table = {}
-
-    def add_filter(self, param, args=[], cast=None, allow_none=True, filter_func=None):
-        pf = {}
-        if cast:
-            assert(isinstance(cast, type),
-                   "cast should be of type <type> not cast %s" % str(type(cast)))
-
-            pf['cast'] = cast
-
-        pf['allow_none'] = allow_none
-        pf['filter_func'] = filter_func
-
-        self._param_table[param] = pf
-        args.append(param)
-        for a in args:
-            assert(not(a in self._translation_table),
-                   '''The argument %s has been registered for more than
-                   one parameter translation''' % (a)
-                   )
-
-            self._translation_table[a] = param
-
-    def filter(self, d, conn=None):
-        results = {}
-        for k, v in d.iteritems():
-            if k in self._translation_table:
-                param = self._translation_table[k]
-                allow_none = True
-                assign = True
-                if param in self._param_table:
-                    pf = self._param_table[param]
-                    cast = pf.get('cast')
-                    if cast == bool:
-                        if isinstance(v, basestring):
-                            lv = v.lower()
-                            if lv in ('t', 'y', 'true', 'yes'):
-                                v = True
-                            else:
-                                v = False
-                        elif not isinstance(v, bool):
-                            v = False
-                    elif cast:
-                        v = cast(v)
-
-                    allow_none = pf['allow_none']
-
-                    ff = pf['filter_func']
-                    if ff:
-                        ff(conn, results, k, v, allow_none)
-                        assign = False
-
-                    if (allow_none or (v != None)) and assign:
-                        results[param] = v
+class ISearch(IQuery):
+    filters = ParamFilter()
+    filters.add_filter('search', ['s'])
-        return results
+    @classmethod
+    def register_search_path(cls,
+                             path,
+                             search_func,
+                             primary_key_col = None,
+                             default_sort_col = None,
+                             default_sort_order = None,
+                             can_paginate = True):
+
+        cls._search_cache = fas_cache = Cache('moksha_search_cache_ ' + path)
+
+        def query_func(conn=None,
+                       start_row=0,
+                       rows_per_page=10,
+                       order=-1,
+                       sort_col=None,
+                       filters={},
+                       **params):
+
+            s = WeightedSearch(lambda search_term: search_func(conn, search_term),
+                               cls._paths[path]['columns'],
+                               cls._search_cache)
+            search_string = cls.filters.filter(filters).get('search')
+            results = s.search(search_string)
+
+
+            return (len(results), results[start_row:start_row + rows_per_page])
+
+        qpath = cls.register_path(path = path,
+                          query_func = query_func,
+                          primary_key_col = primary_key_col,
+                          default_sort_col = default_sort_col,
+                          default_sort_order = default_sort_order,
+                          can_paginate = can_paginate)
+
+        return qpath
\ No newline at end of file
diff --git a/moksha/connector/utils.py b/moksha/connector/utils.py
index 5826942..b30f82c 100644
--- a/moksha/connector/utils.py
+++ b/moksha/connector/utils.py
@@ -17,6 +17,8 @@
 # Authors: John (J5) Palmieri johnp@redhat.com
from datetime import datetime, timedelta
+from UserDict import DictMixin
+
 import bisect
class DateTimeDisplay(object):
@@ -95,3 +97,248 @@ class DateTimeDisplay(object):
return {'time':time, 'date':date , 'when':when, 'should_hide_time':should_hide_time}
+class odict(DictMixin):
+
+    def __init__(self):
+        self._keys = []
+        self._data = {}
+
+    def index(self, i):
+        k = self._keys[i]
+        return self._data[k]
+
+    def key_index(self, i):
+        return self._keys[i]
+
+    def __setitem__(self, key, value):
+        if key not in self._data:
+            self._keys.append(key)
+
+        self._data[key] = value
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __delitem__(self, key):
+        del self._data[key]
+        self._keys.remove(key)
+
+    def __iter__(self):
+        for key in self._keys:
+            yield key
+
+    def keys(self):
+        return list(self._keys)
+
+    def copy(self):
+        copyDict = odict()
+        copyDict._data = self._data.copy()
+        copyDict._keys = self._keys[:]
+        return copyDict
+
+    def __repr__(self):
+        result = []
+        for key in self._keys:
+            result.append('(%s, %s)' % (repr(key), repr(self._data[key])))
+        return ''.join(['OrderedDict', '([', ', '.join(result), '])'])
+
+class QueryCol(dict):
+    def __init__(self,
+                 column,
+                 default_visible,
+                 can_sort,
+                 can_filter_wildcards):
+        super(QueryCol, self).__init__(column = column,
+                                       default_visible = default_visible,
+                                       can_sort = can_sort,
+                                       can_filter_wildcards = can_filter_wildcards)
+
+class QueryPath(dict):
+    def __init__(self,
+                 path,
+                 query_func,
+                 primary_key_col,
+                 default_sort_col,
+                 default_sort_order,
+                 can_paginate):
+        super(QueryPath, self).__init__(
+                         path = path,
+                         query_func = query_func,
+                         primary_key_col = primary_key_col,
+                         default_sort_col = default_sort_col,
+                         default_sort_order = default_sort_order,
+                         can_paginate = can_paginate,
+                         columns=odict())
+
+    def register_column(self,
+                        column,
+                        default_visible = True,
+                        can_sort = False,
+                        can_filter_wildcards = False):
+
+        self["columns"][column] = QueryCol(
+                column = column,
+                default_visible = default_visible,
+                can_sort = can_sort,
+                can_filter_wildcards = can_filter_wildcards
+              )
+
+    def get_query(self):
+        return self['query_func']
+
+class ParamFilter(object):
+    """Helper class for filtering query arguments"""
+
+    def __init__(self):
+        self._translation_table = {}
+        self._param_table = {}
+
+    def add_filter(self, param, args=[], cast=None, allow_none=True, filter_func=None):
+        pf = {}
+        if cast:
+            assert(isinstance(cast, type),
+                   "cast should be of type <type> not cast %s" % str(type(cast)))
+
+            pf['cast'] = cast
+
+        pf['allow_none'] = allow_none
+        pf['filter_func'] = filter_func
+
+        self._param_table[param] = pf
+        args.append(param)
+        for a in args:
+            assert(not(a in self._translation_table),
+                   '''The argument %s has been registered for more than
+                   one parameter translation''' % (a)
+                   )
+
+            self._translation_table[a] = param
+
+    def filter(self, d, conn=None):
+        results = {}
+        for k, v in d.iteritems():
+            if k in self._translation_table:
+                param = self._translation_table[k]
+                allow_none = True
+                assign = True
+                if param in self._param_table:
+                    pf = self._param_table[param]
+                    cast = pf.get('cast')
+                    if cast == bool:
+                        if isinstance(v, basestring):
+                            lv = v.lower()
+                            if lv in ('t', 'y', 'true', 'yes'):
+                                v = True
+                            else:
+                                v = False
+                        elif not isinstance(v, bool):
+                            v = False
+                    elif cast:
+                        v = cast(v)
+
+                    allow_none = pf['allow_none']
+
+                    ff = pf['filter_func']
+                    if ff:
+                        ff(conn, results, k, v, allow_none)
+                        assign = False
+
+                    if (allow_none or (v != None)) and assign:
+                        results[param] = v
+
+        return results
+
+class WeightedSearch(object):
+    # FIXME: Need to dial in the weighting algorithm
+    CACHE_EXPIRE_TIME = 30 * 60
+    LIGHT_WEIGHT = 10
+    MEDIUM_WEIGHT = 30
+    HEAVY_WEIGHT = 100
+
+    def __init__(self, search_func, cols, cache=None):
+            self.search_func = search_func
+            self.cache = cache
+            self.cols = cols
+
+    def weigh(self, search_term, weighted_hash):
+        search_term_len = len(search_term)
+        l = len(self.cols)
+
+        # each field gets a decelerating percentage of it's calculated weight
+        # e.g. each consecutive field is an order of magnatude less important
+        # than the previous field
+        factor = 1/sum(xrange(l))
+
+        r = weighted_hash[0]
+        for i, result_field in enumerate(self.cols):
+            x = l - i
+            weight_factor = x * factor
+
+            l_result_field = r[result_field].lower()
+            index = l_result_field.find(search_term)
+
+            while(index != -1):
+                weighted_hash[1] += self.LIGHT_WEIGHT * weight_factor
+                if index == 0:
+                    # in front
+                    weighted_hash[1] += self.MEDIUM_WEIGHT * weight_factor
+                    if search_term_len == len(l_result_field):
+                        weighted_hash[1] += self.HEAVY_WEIGHT
+
+                if index + search_term_len == l:
+                    # in back
+                    weighted_hash[1] += self.MEDIUM_WEIGHT * weight_factor
+
+                index = l_result_field.find(search_term, index + 1)
+
+    def weighted_sort(self, a, b):
+        result = 0
+        (a_val, a_weight) = a
+        (b_val, b_weight) = b
+
+        result = -cmp(a_weight, b_weight)
+        if result == 0:
+            result = cmp(a_val[self.cols.key_index(0)], b_val[self.cols.key_index(0)])
+        return result
+
+    def search(self, search_string):
+        if not search_string:
+            return []
+
+        search = search_string.lower().replace(',', ' ').split()
+
+        weighted_results = {}
+        for s in search:
+            results = self.cache.get_value(key = s,
+                               createfunc=lambda : self.search_func(s),
+                               type="memory",
+                               expiretime=self.CACHE_EXPIRE_TIME)
+
+            for r in results:
+                rkey = r[self.cols.key_index(0)]
+
+                # if we have already weighted this result get the
+                # weighted hash to add weight to
+                # else we create a new weighted hash
+                if rkey in weighted_results:
+                    weighted_hash = weighted_results[rkey]
+                else:
+                    weighted_hash = [r, 0]
+
+
+                self.weigh(s, weighted_hash)
+                weighted_results[rkey] = weighted_hash
+
+        sorted_list = weighted_results.values()
+        sorted_list.sort(self.weighted_sort)
+
+        for i, v in enumerate(sorted_list):
+            sorted_list[i] = v[0]
+
+        return sorted_list
+
+
+
+
+
+
diff --git a/moksha/middleware/connector.py b/moksha/middleware/connector.py
index b69c2f9..d868554 100644
--- a/moksha/middleware/connector.py
+++ b/moksha/middleware/connector.py
@@ -65,6 +65,8 @@ class MokshaConnectorMiddleware(object):
if k not in params:
                     params[k] = p.getall(k)
+                    if params[k] and len(params[k]) == 1:
+                        params[k] = params[k][0]
response = self._run_connector(environ, request,
                                            s[0], s[1], *s[2:],