aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSix <unknown>2011-05-03 14:51:26 -0400
committerSix <unknown>2011-05-03 14:51:26 -0400
commit5093e3b212ee692a3cc0f4f6273b87417200241d (patch)
treed9bc35dac712b0850ce5994baa0b7d7807357d36
parent3e4ea9e87eb78555cba374c725e3fc04e6dc7ae4 (diff)
downloadd2-5093e3b212ee692a3cc0f4f6273b87417200241d.tar.bz2
d2-5093e3b212ee692a3cc0f4f6273b87417200241d.tar.xz
d2-5093e3b212ee692a3cc0f4f6273b87417200241d.zip
lowercased the email address import from ldap and added search term lookup to handle better queries
-rw-r--r--lib/d2/app/adapters/search.py51
-rw-r--r--lib/d2/bin/d2_data_merge.py2
2 files changed, 50 insertions, 3 deletions
diff --git a/lib/d2/app/adapters/search.py b/lib/d2/app/adapters/search.py
index 14df59f..e507bc1 100644
--- a/lib/d2/app/adapters/search.py
+++ b/lib/d2/app/adapters/search.py
@@ -12,6 +12,7 @@ from whoosh.fields import Schema, TEXT, NUMERIC, ID
12from whoosh.index import create_in 12from whoosh.index import create_in
13from whoosh.index import open_dir 13from whoosh.index import open_dir
14from whoosh.qparser import QueryParser 14from whoosh.qparser import QueryParser
15from whoosh.spelling import SpellChecker
15import os 16import os
16import stat 17import stat
17 18
@@ -83,6 +84,12 @@ class SearchAdapter(BaseAdapter):
83 for row in results: 84 for row in results:
84 writer.add_document(**row) 85 writer.add_document(**row)
85 writer.commit() 86 writer.commit()
87 self._speller(results)
88
89 def _speller(self, data):
90 ix = open_dir(self.index_directory)
91 speller = SpellChecker(ix.storage)
92 speller.add_field(ix, "context")
86 93
87 def search_index_data(self, date_start=None): 94 def search_index_data(self, date_start=None):
88 hold = {u'plot':{}, u'occupant':{}} 95 hold = {u'plot':{}, u'occupant':{}}
@@ -128,11 +135,51 @@ class SearchAdapter(BaseAdapter):
128 out['context'] = u' '.join(hold) 135 out['context'] = u' '.join(hold)
129 return out 136 return out
130 137
131 def search(self, search_): 138 def _all_terms(self, user_query):
139 out = []
140 terms = set()
141 user_query.all_terms(terms)
142 for key, val in terms:
143 if key == 'context':
144 out.append(val)
145 return out
146
147 def _missing_terms(self, user_query):
148 out = []
149 terms = set()
150 user_query.existing_terms(self.ix.reader(), terms, reverse=True)
151 for key, val in terms:
152 if key == 'context':
153 out.append(val)
154 return out
155
156 def _build_query(self, user_query_string):
157 parser = QueryParser("context", self.ix.schema)
158 user_query = parser.parse(user_query_string)
159 all_terms = self._all_terms(user_query)
160 missing_terms = self._missing_terms(user_query)
161 if missing_terms:
162 out = []
163 speller = SpellChecker(self.ix.storage)
164 for term in all_terms:
165 if term in missing_terms:
166 suggestions = speller.suggest(term)
167 if suggestions:
168 out.append(suggestions[0])
169 else:
170 out.append(term)
171 else:
172 out.append(term)
173 return u' '.join(out)
174 else:
175 return u' '.join(all_terms)
176
177 def search(self, user_query_string):
132 out = [] 178 out = []
179 user_query_string = self._build_query(user_query_string)
133 with self.ix.searcher() as searcher: 180 with self.ix.searcher() as searcher:
134 query = QueryParser("context", self.ix.schema) 181 query = QueryParser("context", self.ix.schema)
135 myquery = query.parse(search_) 182 myquery = query.parse(user_query_string)
136 results = searcher.search(myquery) 183 results = searcher.search(myquery)
137 for row in results[0:len(results)]: 184 for row in results[0:len(results)]:
138 if row[u'id_type'] == 'plot': 185 if row[u'id_type'] == 'plot':
diff --git a/lib/d2/bin/d2_data_merge.py b/lib/d2/bin/d2_data_merge.py
index 01c1c5e..2270830 100644
--- a/lib/d2/bin/d2_data_merge.py
+++ b/lib/d2/bin/d2_data_merge.py
@@ -335,7 +335,7 @@ class DbEmail(BaseDb):
335 335
336 def _grab_data(self, occupant): 336 def _grab_data(self, occupant):
337 if 'mail' in occupant.ldap_data.keys(): 337 if 'mail' in occupant.ldap_data.keys():
338 return unicode(occupant.ldap_data['mail']) 338 return unicode(occupant.ldap_data['mail']).lower()
339 339
340 340
341class DbDepartment(BaseDb): 341class DbDepartment(BaseDb):