aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lesana/collection.py91
-rw-r--r--lesana/command.py42
-rwxr-xr-xscripts/lesana3
-rw-r--r--tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml2
-rw-r--r--tests/test_collection.py21
5 files changed, 125 insertions, 34 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index 2e63ee7..3af1cd4 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -19,21 +19,6 @@ class Entry(object):
self.uid = uuid.uuid4().hex
self.fname = self.uid + '.yaml'
- @property
- def indexed_fields(self):
- fields = []
- for field in self.collection.settings['fields']:
- if field['index'] in ['free', 'field']:
- prefix = field.get('prefix', 'X'+field['name'].upper())
- fields.append({
- 'value': self.data.get(field['name']),
- 'prefix': prefix,
- 'name': field['name'],
- 'free_search': field['index'] == 'free',
- })
-
- return fields
-
def empty_data(self):
data = {}
for field in self.collection.settings['fields']:
@@ -62,7 +47,6 @@ class Collection(object):
def __init__(self, directory=None, itemdir='items'):
self.basedir = directory or os.getcwd()
self.itemdir = os.path.join(self.basedir, itemdir)
- self.cache = None
try:
with open(os.path.join(self.basedir, 'settings.yaml')) as fp:
self.settings = ruamel.yaml.load(
@@ -87,38 +71,55 @@ class Collection(object):
# safe_load or load + RoundTripLoader
self.safe = True
- def _index_file(self, fname):
+ def _index_file(self, fname, cache):
with open(os.path.join(self.itemdir, fname)) as fp:
if self.safe:
data = ruamel.yaml.safe_load(fp)
else:
data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
- entry = Entry(self, data)
+ entry = Entry(self, data, fname)
doc = xapian.Document()
self.indexer.set_document(doc)
# Fields with prefix, for field search
- for field in entry.indexed_fields:
+ for field in self.indexed_fields:
try:
- self.indexer.index_text(field['value'], 1, field['prefix'])
+ self.indexer.index_text(
+ entry.data.get(field['name']),
+ 1,
+ field['prefix'])
except ValueError as e:
logging.info("Not indexing empty? value {}: {}".format(
- field['value'],
+ entry.data.get(field['name']),
str(e)))
# unprefixed fields, for full text search
- for field in entry.indexed_fields:
+ for field in self.indexed_fields:
if field.get('free_search', False):
try:
- self.indexer.index_text(field['value'])
+ self.indexer.index_text(entry.data.get(field['name']))
self.indexer.increase_termpos()
except ValueError as e:
# probably already logged earlier
pass
doc.set_data(entry.yaml_data)
doc.add_boolean_term(entry.idterm)
+ doc.add_value(0, entry.fname)
- self.cache.replace_document(entry.idterm, doc)
+ cache.replace_document(entry.idterm, doc)
+
+ @property
+ def indexed_fields(self):
+ fields = []
+ for field in self.settings['fields']:
+ if field['index'] in ['free', 'field']:
+ prefix = field.get('prefix', 'X'+field['name'].upper())
+ fields.append({
+ 'prefix': prefix,
+ 'name': field['name'],
+ 'free_search': field['index'] == 'free',
+ })
+ return fields
def update_cache(self, fnames=None):
"""
@@ -130,7 +131,7 @@ class Collection(object):
Return the number of files that have been added to the cache.
"""
- self.cache = xapian.WritableDatabase(
+ cache = xapian.WritableDatabase(
os.path.join(self.basedir, '.lesana/xapian'),
xapian.DB_CREATE_OR_OPEN
)
@@ -148,7 +149,7 @@ class Collection(object):
updated = 0
for fname in fnames:
try:
- self._index_file(fname)
+ self._index_file(fname, cache)
except IOError as e:
logging.warning("Could not load file {}: {}".format(
fname,
@@ -168,3 +169,41 @@ class Collection(object):
)
with open(complete_name, 'w') as fp:
fp.write(e.yaml_data)
+
+ def search(self, querystring, offset=0, pagesize=12):
+ try:
+ cache = xapian.Database(
+ os.path.join(self.basedir, '.lesana/xapian'),
+ )
+ except xapian.DatabaseOpeningError:
+ logging.info("No database found, indexing entries.")
+ self.update_cache()
+ cache = xapian.Database(
+ os.path.join(self.basedir, '.lesana/xapian'),
+ )
+ queryparser = xapian.QueryParser()
+ queryparser.set_stemmer(self.stemmer)
+
+ for field in self.indexed_fields:
+ queryparser.add_prefix(field['name'], field['prefix'])
+
+ query = queryparser.parse_query(querystring)
+
+ enquire = xapian.Enquire(cache)
+ enquire.set_query(query)
+
+ for match in enquire.get_mset(offset, pagesize):
+ fname = match.document.get_value(0)
+ if self.safe:
+ data = ruamel.yaml.safe_load(match.document.get_data())
+ else:
+ data = ruamel.yaml.load(
+ match.document.get_data(),
+ ruamel.yaml.RoundTripLoader
+ )
+ entry = Entry(
+ self,
+ data=data,
+ fname=fname,
+ )
+ yield entry
diff --git a/lesana/command.py b/lesana/command.py
index 2f24fa0..7f7f116 100644
--- a/lesana/command.py
+++ b/lesana/command.py
@@ -54,3 +54,45 @@ class Index(gadona.Command):
files = None
indexed = collection.update_cache(fnames=files)
print("Found and indexed {} entries".format(indexed))
+
+
+class Search(gadona.Command):
+ name = 'search'
+ arguments = [
+ (['--collection', '-c'], dict(
+ help='The collection to work on (default .)'
+ )),
+ (['--template', '-t'], dict(
+ help='Am',
+ )),
+ (['--offset'], dict(
+ )),
+ (['--pagesize'], dict(
+ )),
+ (['query'], dict(
+ help='Xapian query to search in the collection',
+ nargs='+'
+ )),
+ ]
+
+ def main(self):
+ # TODO: implement "searching" for everything
+ if self.settings.offset:
+ logging.warning(
+ "offset exposes an internal knob and MAY BE" +
+ "REMOVED from a future release of lesana"
+ )
+ if self.settings.pagesize:
+ logging.warning(
+ "pagesize exposes an internal knob and MAY BE" +
+ "REMOVED from a future release of lesana"
+ )
+ offset = self.settings.offset or 0
+ pagesize = self.settings.pagesize or 12
+ collection = Collection(self.settings.collection)
+ #TODO: pass the entries to a proper template
+ for entry in collection.search(
+ ' '.join(self.settings.query),
+ offset,
+ pagesize):
+ print(entry.fname)
diff --git a/scripts/lesana b/scripts/lesana
index 3886f52..c909888 100755
--- a/scripts/lesana
+++ b/scripts/lesana
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import gadona
-from lesana.command import New, Index
+from lesana.command import New, Index, Search
if __name__ == '__main__':
app = gadona.App()
@@ -9,5 +9,6 @@ if __name__ == '__main__':
app.commands = [
New(),
Index(),
+ Search(),
]
app.main()
diff --git a/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml
index 16d6917..082128c 100644
--- a/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml
+++ b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml
@@ -1,6 +1,6 @@
name: One Item
description: |
- This is a long block of test
+ This is a long block of text
that spans multiple lines.
position: somewhere
uid: 085682ed6792499da3ab9aebd683c011
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 68c29f3..ccd535f 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -17,7 +17,6 @@ class testCollectionLoading(unittest.TestCase):
self.assertEqual(self.collection.settings, {})
self.collection.update_cache()
- self.assertIsNotNone(self.collection.cache)
self.assertIsNotNone(self.collection.stemmer)
def test_simple(self):
@@ -28,9 +27,9 @@ class testCollectionLoading(unittest.TestCase):
"Simple lesana collection"
)
self.assertEqual(len(self.collection.settings['fields']), 4)
+ self.assertEqual(len(self.collection.indexed_fields), 2)
self.collection.update_cache()
- self.assertIsNotNone(self.collection.cache)
self.assertIsNotNone(self.collection.stemmer)
def test_wrong_language(self):
@@ -42,7 +41,6 @@ class testCollectionLoading(unittest.TestCase):
# The collection will default to english, but should still work.
self.collection.update_cache()
self.assertIsNotNone(self.collection.settings)
- self.assertIsNotNone(self.collection.cache)
self.assertIsNotNone(self.collection.stemmer)
def test_unsafe(self):
@@ -50,26 +48,37 @@ class testCollectionLoading(unittest.TestCase):
self.collection.safe = False
self.collection.update_cache()
+ def test_search(self):
+ self.collection = lesana.Collection('tests/data/simple')
+ res = self.collection.search('Item')
+ matches = list(res)
+ self.assertEqual(len(matches), 2)
+ for m in matches:
+ self.assertIsInstance(m, lesana.Entry)
+
+
class testEntries(unittest.TestCase):
def setUp(self):
self.collection = lesana.Collection('tests/data/simple')
self.basepath = 'tests/data/simple/items'
+ def tearDown(self):
+ shutil.rmtree(os.path.join(self.collection.basedir, '.lesana'))
+
+
def test_simple(self):
fname = '085682ed-6792-499d-a3ab-9aebd683c011.yaml'
with open(os.path.join(self.basepath, fname)) as fp:
data = ruamel.yaml.load(fp)
entry = lesana.Entry(self.collection, data=data, fname=fname)
self.assertEqual(entry.idterm, 'Q'+data['uid'])
- self.assertEqual(len(entry.indexed_fields), 2)
fname = '11189ee47ddf4796b718a483b379f976.yaml'
uid = '11189ee47ddf4796b718a483b379f976'
with open(os.path.join(self.basepath, fname)) as fp:
data = ruamel.yaml.load(fp)
entry = lesana.Entry(self.collection, data=data, fname=fname)
self.assertEqual(entry.idterm, 'Q'+uid)
- self.assertEqual(len(entry.indexed_fields), 2)
def test_write_new(self):
new_entry = lesana.Entry(self.collection)
@@ -99,7 +108,7 @@ class testComplexCollection(unittest.TestCase):
)
self.assertEqual(len(self.collection.settings['fields']), 3)
self.assertIsNotNone(self.collection.stemmer)
+ self.assertEqual(len(self.collection.indexed_fields), 2)
def test_index(self):
self.collection.update_cache()
- self.assertIsNotNone(self.collection.cache)