summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lesana/collection.py28
-rw-r--r--lesana/command.py6
-rw-r--r--tests/test_collection.py23
3 files changed, 50 insertions, 7 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index 81b7a66..e4898f6 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -91,6 +91,7 @@ class Collection(object):
self.stemmer = xapian.Stem('english')
else:
self.stemmer = xapian.Stem('english')
+ self._enquire = None
# This selects whether to load all other yaml files with
# safe_load or load + RoundTripLoader
self.safe = True
@@ -207,7 +208,10 @@ class Collection(object):
)
return cache
- def search(self, querystring, offset=0, pagesize=12):
+ def start_search(self, querystring):
+ """
+ Prepare a search for querystring.
+ """
cache = self._get_cache()
queryparser = xapian.QueryParser()
queryparser.set_stemmer(self.stemmer)
@@ -217,12 +221,28 @@ class Collection(object):
query = queryparser.parse_query(querystring)
- enquire = xapian.Enquire(cache)
- enquire.set_query(query)
+ self._enquire = xapian.Enquire(cache)
+ self._enquire.set_query(query)
- for match in enquire.get_mset(offset, pagesize):
+ def get_search_results(self, offset=0, pagesize=12):
+ if not self._enquire:
+ return
+ for match in self._enquire.get_mset(offset, pagesize):
yield self._match_to_entry(match)
+ def get_all_search_results(self):
+ if not self._enquire:
+ return
+ offset = 0
+ pagesize = 100
+ while True:
+ mset = self._enquire.get_mset(offset, pagesize)
+ if mset.size() == 0:
+ break
+ for match in mset:
+ yield self._match_to_entry(match)
+ offset += pagesize
+
def _match_to_entry(self, match):
fname = match.document.get_value(0).decode('utf-8')
if self.safe:
diff --git a/lesana/command.py b/lesana/command.py
index 9101e8e..9425bd2 100644
--- a/lesana/command.py
+++ b/lesana/command.py
@@ -142,8 +142,10 @@ class Search(guacamole.Command):
help='Am',
)),
(['--offset'], dict(
+ type=int,
)),
(['--pagesize'], dict(
+ type=int,
)),
(['query'], dict(
help='Xapian query to search in the collection',
@@ -170,9 +172,9 @@ class Search(guacamole.Command):
offset = ctx.args.offset or 0
pagesize = ctx.args.pagesize or 12
collection = Collection(ctx.args.collection)
+ collection.start_search(' '.join(ctx.args.query))
# TODO: pass the entries to a proper template
- for entry in collection.search(
- ' '.join(ctx.args.query),
+ for entry in collection.get_search_results(
offset,
pagesize):
print(entry)
diff --git a/tests/test_collection.py b/tests/test_collection.py
index d8190ca..a2c1a87 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -61,14 +61,31 @@ class testCollectionLoading(unittest.TestCase):
self.collection.safe = False
self.collection.update_cache()
+ def test_full_search(self):
+ self.collection = lesana.Collection('tests/data/simple')
+ self.collection.start_search('Item')
+ res = self.collection.get_all_search_results()
+ matches = list(res)
+ self.assertEqual(len(matches), 2)
+ for m in matches:
+ self.assertIsInstance(m, lesana.Entry)
+
def test_search(self):
self.collection = lesana.Collection('tests/data/simple')
- res = self.collection.search('Item')
+ self.collection.start_search('Item')
+ res = self.collection.get_search_results()
matches = list(res)
self.assertEqual(len(matches), 2)
for m in matches:
self.assertIsInstance(m, lesana.Entry)
+ def test_search_non_init(self):
+ self.collection = lesana.Collection('tests/data/simple')
+ matches = list(self.collection.get_search_results())
+ self.assertEqual(matches, [])
+ matches = list(self.collection.get_all_search_results())
+ self.assertEqual(matches, [])
+
def test_entry_from_uid(self):
self.collection = lesana.Collection('tests/data/simple')
entry = self.collection.entry_from_uid(
@@ -197,3 +214,7 @@ class testCollectionCreation(unittest.TestCase):
self.assertTrue(os.path.isfile(os.path.join(tmpdir, 'settings.yaml')))
self.assertFalse(os.path.isfile(os.path.join(tmpdir, '.gitignore')))
shutil.rmtree(tmpdir)
+
+
+if __name__ == '__main__':
+ unittest.main()