aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lesana/collection.py79
-rw-r--r--tests/test_collection.py14
2 files changed, 65 insertions, 28 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index 9b44b54..64b4246 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -10,18 +10,16 @@ class Entry(object):
self.collection = collection
self.data = data
- def index(self):
- # TODO: maybe add the ability to add a language specific to the
- # entry, and change the stemmer
- doc = xapian.Document()
- indexer = xapian.TermGenerator()
- indexer.set_stemmer(self.collection.stemmer)
- # FIXME: this is obviously wrong, actually index the right
- # things
- doc.set_data(str(self.data))
- indexer.set_document(doc)
- indexer.index_text(str(self.data))
- self.collection.cache.add_document(doc)
+ def indexed_fields(self):
+ return []
+
+ @property
+ def yaml_data(self):
+ return ""
+
+ @property
+ def idterm(self):
+ return "Q"
@@ -37,10 +35,6 @@ class Collection(object):
except FileNotFoundError:
self.schema = ruamel.yaml.load("{}")
os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True)
- self.cache = xapian.WritableDatabase(
- os.path.join(self.basedir, '.lesana/xapian'),
- xapian.DB_CREATE_OR_OPEN
- )
if 'lang' in self.schema:
try:
self.stemmer = xapian.Stem(self.schema['lang'])
@@ -56,12 +50,46 @@ class Collection(object):
# safe_load or load + RoundTripLoader
self.safe = True
+ def _index_file(self, fname):
+ with open(os.path.join(self.basedir, 'items', fname)) as fp:
+ if self.safe:
+ data = ruamel.yaml.safe_load(fp)
+ else:
+ data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
+ entry = Entry(self, data)
+
+ doc = xapian.Document()
+ self.indexer.set_document(doc)
+ # FIXME: this is obviously wrong, actually index the right
+ # things
+
+ # Fields with prefix, for field search
+ for field in entry.indexed_fields():
+ self.indexer.index_text(field['value'], 1, field['prefix'])
+ # unprefixed fields, for full text search
+ for field in entry.indexed_fields():
+ if field.get('free_search', False):
+ self.indexer.index_text(field['value'])
+ self.indexer.increase_termpos()
+ doc.set_data(entry.yaml_data)
+ doc.add_boolean_term(entry.idterm)
+
+ self.cache.replace_document(entry.idterm, doc)
+
def update_cache(self, files=None):
"""
Update the xapian db with the data in files.
If no files have been passed, add everything.
+
+ Return the number of files that have been added to the cache.
"""
+ self.cache = xapian.WritableDatabase(
+ os.path.join(self.basedir, '.lesana/xapian'),
+ xapian.DB_CREATE_OR_OPEN
+ )
+ self.indexer = xapian.TermGenerator()
+ self.indexer.set_stemmer(self.stemmer)
if not files:
try:
files = os.listdir(os.path.join(self.basedir, 'items'))
@@ -69,12 +97,15 @@ class Collection(object):
logging.warning(
"No such file or directory: %s, not updating cache",
os.path.join(self.basedir, 'items'))
- return False
+ return 0
+ updated = 0
for fname in files:
- with open(os.path.join(self.basedir, 'items', fname)) as fp:
- if self.safe:
- data = ruamel.yaml.safe_load(fp)
- else:
- data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
- entry = Entry(self, data)
- entry.index()
+ try:
+ self._index_file(fname)
+ except IOError as e:
+ logging.warning("Could not load file {}: {}".format(
+ fname,
+ str(e)))
+ else:
+ updated += 1
+ return updated
diff --git a/tests/test_collection.py b/tests/test_collection.py
index d0a9864..b70c85f 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -13,20 +13,20 @@ class testCollectionLoading(unittest.TestCase):
def test_empty(self):
self.collection = lesana.Collection('tests/data/empty')
self.assertEqual(self.collection.schema, {})
- self.assertIsNotNone(self.collection.cache)
- self.assertIsNotNone(self.collection.stemmer)
self.collection.update_cache()
+ self.assertIsNotNone(self.collection.cache)
+ self.assertIsNotNone(self.collection.stemmer)
def test_simple(self):
self.collection = lesana.Collection('tests/data/simple')
self.assertIsNotNone(self.collection.schema)
self.assertEqual(self.collection.schema['name'], "Simple lesana collection")
self.assertEqual(len(self.collection.schema['fields']), 3)
- self.assertIsNotNone(self.collection.cache)
- self.assertIsNotNone(self.collection.stemmer)
self.collection.update_cache()
+ self.assertIsNotNone(self.collection.cache)
+ self.assertIsNotNone(self.collection.stemmer)
def test_wrong_language(self):
# This loads a collection with an invalid value in lang
@@ -35,6 +35,12 @@ class testCollectionLoading(unittest.TestCase):
self.assertEqual(len(cm.output), 1)
self.assertIn("Invalid language", cm.output[0])
# The collection will default to english, but should still work.
+ self.collection.update_cache()
self.assertIsNotNone(self.collection.schema)
self.assertIsNotNone(self.collection.cache)
self.assertIsNotNone(self.collection.stemmer)
+
+ def test_unsafe(self):
+ self.collection = lesana.Collection('tests/data/simple')
+ self.collection.safe = False
+ self.collection.update_cache()