From b4b6e6723a3b4fe107e607ce0ec5b81f72e72a5a Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Fri, 6 Jan 2017 20:42:43 +0100 Subject: Changed search interface to be closer to xapian expectations --- lesana/collection.py | 28 ++++++++++++++++++++++++---- lesana/command.py | 6 ++++-- tests/test_collection.py | 23 ++++++++++++++++++++++- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/lesana/collection.py b/lesana/collection.py index 81b7a66..e4898f6 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -91,6 +91,7 @@ class Collection(object): self.stemmer = xapian.Stem('english') else: self.stemmer = xapian.Stem('english') + self._enquire = None # This selects whether to load all other yaml files with # safe_load or load + RoundTripLoader self.safe = True @@ -207,7 +208,10 @@ class Collection(object): ) return cache - def search(self, querystring, offset=0, pagesize=12): + def start_search(self, querystring): + """ + Prepare a search for querystring. + """ cache = self._get_cache() queryparser = xapian.QueryParser() queryparser.set_stemmer(self.stemmer) @@ -217,12 +221,28 @@ class Collection(object): query = queryparser.parse_query(querystring) - enquire = xapian.Enquire(cache) - enquire.set_query(query) + self._enquire = xapian.Enquire(cache) + self._enquire.set_query(query) - for match in enquire.get_mset(offset, pagesize): + def get_search_results(self, offset=0, pagesize=12): + if not self._enquire: + return + for match in self._enquire.get_mset(offset, pagesize): yield self._match_to_entry(match) + def get_all_search_results(self): + if not self._enquire: + return + offset = 0 + pagesize = 100 + while True: + mset = self._enquire.get_mset(offset, pagesize) + if mset.size() == 0: + break + for match in mset: + yield self._match_to_entry(match) + offset += pagesize + def _match_to_entry(self, match): fname = match.document.get_value(0).decode('utf-8') if self.safe: diff --git a/lesana/command.py b/lesana/command.py index 9101e8e..9425bd2 100644 --- a/lesana/command.py +++ b/lesana/command.py @@ -142,8 +142,10 @@ class Search(guacamole.Command): help='Am', )), (['--offset'], dict( + type=int, )), (['--pagesize'], dict( + type=int, )), (['query'], dict( help='Xapian query to search in the collection', @@ -170,9 +172,9 @@ class Search(guacamole.Command): offset = ctx.args.offset or 0 pagesize = ctx.args.pagesize or 12 collection = Collection(ctx.args.collection) + collection.start_search(' '.join(ctx.args.query)) # TODO: pass the entries to a proper template - for entry in collection.search( - ' '.join(ctx.args.query), + for entry in collection.get_search_results( offset, pagesize): print(entry) diff --git a/tests/test_collection.py b/tests/test_collection.py index d8190ca..a2c1a87 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -61,14 +61,31 @@ class testCollectionLoading(unittest.TestCase): self.collection.safe = False self.collection.update_cache() + def test_full_search(self): + self.collection = lesana.Collection('tests/data/simple') + self.collection.start_search('Item') + res = self.collection.get_all_search_results() + matches = list(res) + self.assertEqual(len(matches), 2) + for m in matches: + self.assertIsInstance(m, lesana.Entry) + def test_search(self): self.collection = lesana.Collection('tests/data/simple') - res = self.collection.search('Item') + self.collection.start_search('Item') + res = self.collection.get_search_results() matches = list(res) self.assertEqual(len(matches), 2) for m in matches: self.assertIsInstance(m, lesana.Entry) + def test_search_non_init(self): + self.collection = lesana.Collection('tests/data/simple') + matches = list(self.collection.get_search_results()) + self.assertEqual(matches, []) + matches = list(self.collection.get_all_search_results()) + self.assertEqual(matches, []) + def test_entry_from_uid(self): self.collection = lesana.Collection('tests/data/simple') entry = self.collection.entry_from_uid( @@ -197,3 +214,7 @@ class testCollectionCreation(unittest.TestCase): self.assertTrue(os.path.isfile(os.path.join(tmpdir, 'settings.yaml'))) self.assertFalse(os.path.isfile(os.path.join(tmpdir, '.gitignore'))) shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + unittest.main() -- cgit v1.2.3