diff options
author | Six <unknown> | 2011-05-03 14:51:26 -0400 |
---|---|---|
committer | Six <unknown> | 2011-05-03 14:51:26 -0400 |
commit | 5093e3b212ee692a3cc0f4f6273b87417200241d (patch) | |
tree | d9bc35dac712b0850ce5994baa0b7d7807357d36 | |
parent | 3e4ea9e87eb78555cba374c725e3fc04e6dc7ae4 (diff) | |
download | d2-5093e3b212ee692a3cc0f4f6273b87417200241d.tar.bz2 d2-5093e3b212ee692a3cc0f4f6273b87417200241d.tar.xz d2-5093e3b212ee692a3cc0f4f6273b87417200241d.zip |
lowercased the email address import from ldap and added search term lookup to handle better queries
-rw-r--r-- | lib/d2/app/adapters/search.py | 51 | ||||
-rw-r--r-- | lib/d2/bin/d2_data_merge.py | 2 |
2 files changed, 50 insertions, 3 deletions
diff --git a/lib/d2/app/adapters/search.py b/lib/d2/app/adapters/search.py index 14df59f..e507bc1 100644 --- a/lib/d2/app/adapters/search.py +++ b/lib/d2/app/adapters/search.py | |||
@@ -12,6 +12,7 @@ from whoosh.fields import Schema, TEXT, NUMERIC, ID | |||
12 | from whoosh.index import create_in | 12 | from whoosh.index import create_in |
13 | from whoosh.index import open_dir | 13 | from whoosh.index import open_dir |
14 | from whoosh.qparser import QueryParser | 14 | from whoosh.qparser import QueryParser |
15 | from whoosh.spelling import SpellChecker | ||
15 | import os | 16 | import os |
16 | import stat | 17 | import stat |
17 | 18 | ||
@@ -83,6 +84,12 @@ class SearchAdapter(BaseAdapter): | |||
83 | for row in results: | 84 | for row in results: |
84 | writer.add_document(**row) | 85 | writer.add_document(**row) |
85 | writer.commit() | 86 | writer.commit() |
87 | self._speller(results) | ||
88 | |||
89 | def _speller(self, data): | ||
90 | ix = open_dir(self.index_directory) | ||
91 | speller = SpellChecker(ix.storage) | ||
92 | speller.add_field(ix, "context") | ||
86 | 93 | ||
87 | def search_index_data(self, date_start=None): | 94 | def search_index_data(self, date_start=None): |
88 | hold = {u'plot':{}, u'occupant':{}} | 95 | hold = {u'plot':{}, u'occupant':{}} |
@@ -128,11 +135,51 @@ class SearchAdapter(BaseAdapter): | |||
128 | out['context'] = u' '.join(hold) | 135 | out['context'] = u' '.join(hold) |
129 | return out | 136 | return out |
130 | 137 | ||
131 | def search(self, search_): | 138 | def _all_terms(self, user_query): |
139 | out = [] | ||
140 | terms = set() | ||
141 | user_query.all_terms(terms) | ||
142 | for key, val in terms: | ||
143 | if key == 'context': | ||
144 | out.append(val) | ||
145 | return out | ||
146 | |||
147 | def _missing_terms(self, user_query): | ||
148 | out = [] | ||
149 | terms = set() | ||
150 | user_query.existing_terms(self.ix.reader(), terms, reverse=True) | ||
151 | for key, val in terms: | ||
152 | if key == 'context': | ||
153 | out.append(val) | ||
154 | return out | ||
155 | |||
156 | def _build_query(self, user_query_string): | ||
157 | parser = QueryParser("context", self.ix.schema) | ||
158 | user_query = parser.parse(user_query_string) | ||
159 | all_terms = self._all_terms(user_query) | ||
160 | missing_terms = self._missing_terms(user_query) | ||
161 | if missing_terms: | ||
162 | out = [] | ||
163 | speller = SpellChecker(self.ix.storage) | ||
164 | for term in all_terms: | ||
165 | if term in missing_terms: | ||
166 | suggestions = speller.suggest(term) | ||
167 | if suggestions: | ||
168 | out.append(suggestions[0]) | ||
169 | else: | ||
170 | out.append(term) | ||
171 | else: | ||
172 | out.append(term) | ||
173 | return u' '.join(out) | ||
174 | else: | ||
175 | return u' '.join(all_terms) | ||
176 | |||
177 | def search(self, user_query_string): | ||
132 | out = [] | 178 | out = [] |
179 | user_query_string = self._build_query(user_query_string) | ||
133 | with self.ix.searcher() as searcher: | 180 | with self.ix.searcher() as searcher: |
134 | query = QueryParser("context", self.ix.schema) | 181 | query = QueryParser("context", self.ix.schema) |
135 | myquery = query.parse(search_) | 182 | myquery = query.parse(user_query_string) |
136 | results = searcher.search(myquery) | 183 | results = searcher.search(myquery) |
137 | for row in results[0:len(results)]: | 184 | for row in results[0:len(results)]: |
138 | if row[u'id_type'] == 'plot': | 185 | if row[u'id_type'] == 'plot': |
diff --git a/lib/d2/bin/d2_data_merge.py b/lib/d2/bin/d2_data_merge.py index 01c1c5e..2270830 100644 --- a/lib/d2/bin/d2_data_merge.py +++ b/lib/d2/bin/d2_data_merge.py | |||
@@ -335,7 +335,7 @@ class DbEmail(BaseDb): | |||
335 | 335 | ||
336 | def _grab_data(self, occupant): | 336 | def _grab_data(self, occupant): |
337 | if 'mail' in occupant.ldap_data.keys(): | 337 | if 'mail' in occupant.ldap_data.keys(): |
338 | return unicode(occupant.ldap_data['mail']) | 338 | return unicode(occupant.ldap_data['mail']).lower() |
339 | 339 | ||
340 | 340 | ||
341 | class DbDepartment(BaseDb): | 341 | class DbDepartment(BaseDb): |