diff options
author | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2016-12-11 21:06:26 +0100 |
---|---|---|
committer | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2016-12-11 21:06:26 +0100 |
commit | 277c063e32e52106956b570e5d965d663969c79f (patch) | |
tree | 319948df294f6800bc4f1fd4964625cdfcc3d7df | |
parent | ad04af4812bbdebe47ce58333cc48332197e61b1 (diff) |
Load some data into xapian
-rw-r--r-- | lesana/collection.py | 53 | ||||
-rw-r--r-- | tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml | 6 | ||||
-rw-r--r-- | tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml | 3 | ||||
-rw-r--r-- | tests/data/wrong/schema.yaml | 12 | ||||
-rw-r--r-- | tests/test_collection.py | 20 |
5 files changed, 92 insertions, 2 deletions
diff --git a/lesana/collection.py b/lesana/collection.py index df38e89..9b44b54 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -1,9 +1,30 @@ +import logging import os import ruamel.yaml import xapian +class Entry(object): + def __init__(self, collection, data={}): + self.collection = collection + self.data = data + + def index(self): + # TODO: maybe add the ability to add a language specific to the + # entry, and change the stemmer + doc = xapian.Document() + indexer = xapian.TermGenerator() + indexer.set_stemmer(self.collection.stemmer) + # FIXME: this is obviously wrong, actually index the right + # things + doc.set_data(str(self.data)) + indexer.set_document(doc) + indexer.index_text(str(self.data)) + self.collection.cache.add_document(doc) + + + class Collection(object): """ """ @@ -14,12 +35,26 @@ class Collection(object): with open(os.path.join(self.basedir, 'schema.yaml')) as fp: self.schema = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader) except FileNotFoundError: - self.schema = ruamel.yaml.load("") + self.schema = ruamel.yaml.load("{}") os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True) self.cache = xapian.WritableDatabase( os.path.join(self.basedir, '.lesana/xapian'), xapian.DB_CREATE_OR_OPEN ) + if 'lang' in self.schema: + try: + self.stemmer = xapian.Stem(self.schema['lang']) + except xapian.InvalidArgumentError: + logging.warning( + "Invalid language %s, in schema.yaml: using english.", + self.schema['lang'] + ) + self.stemmer = xapian.Stem('english') + else: + self.stemmer = xapian.Stem('english') + # This selects whether to load all other yaml files with + # safe_load or load + RoundTripLoader + self.safe = True def update_cache(self, files=None): """ @@ -27,3 +62,19 @@ class Collection(object): If no files have been passed, add everything. """ + if not files: + try: + files = os.listdir(os.path.join(self.basedir, 'items')) + except FileNotFoundError: + logging.warning( + "No such file or directory: %s, not updating cache", + os.path.join(self.basedir, 'items')) + return False + for fname in files: + with open(os.path.join(self.basedir, 'items', fname)) as fp: + if self.safe: + data = ruamel.yaml.safe_load(fp) + else: + data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader) + entry = Entry(self, data) + entry.index() diff --git a/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml new file mode 100644 index 0000000..a66bb67 --- /dev/null +++ b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml @@ -0,0 +1,6 @@ +name: One Item +description: | + This is a long block of test + that spans multiple lines. +position: somewhere +id: 085682ed6792499da3ab9aebd683c011 diff --git a/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml b/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml new file mode 100644 index 0000000..e1f7fc1 --- /dev/null +++ b/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml @@ -0,0 +1,3 @@ +name: Another item +description: with just a short description +position: somewhere diff --git a/tests/data/wrong/schema.yaml b/tests/data/wrong/schema.yaml new file mode 100644 index 0000000..56031e4 --- /dev/null +++ b/tests/data/wrong/schema.yaml @@ -0,0 +1,12 @@ +name: "Lesana collection with certain errors" +lang: 'somethingish' +fields: + - name: name + type: string + index: free + - name: description + type: text + index: free + - name: position + type: string + index: facet diff --git a/tests/test_collection.py b/tests/test_collection.py index 64dd293..d0a9864 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -1,3 +1,4 @@ +import logging import os.path import shutil import unittest @@ -11,8 +12,11 @@ class testCollectionLoading(unittest.TestCase): def test_empty(self): self.collection = lesana.Collection('tests/data/empty') - self.assertIsNone(self.collection.schema) + self.assertEqual(self.collection.schema, {}) self.assertIsNotNone(self.collection.cache) + self.assertIsNotNone(self.collection.stemmer) + + self.collection.update_cache() def test_simple(self): self.collection = lesana.Collection('tests/data/simple') @@ -20,3 +24,17 @@ class testCollectionLoading(unittest.TestCase): self.assertEqual(self.collection.schema['name'], "Simple lesana collection") self.assertEqual(len(self.collection.schema['fields']), 3) self.assertIsNotNone(self.collection.cache) + self.assertIsNotNone(self.collection.stemmer) + + self.collection.update_cache() + + def test_wrong_language(self): + # This loads a collection with an invalid value in lang + with self.assertLogs(level=logging.WARNING) as cm: + self.collection = lesana.Collection('tests/data/wrong') + self.assertEqual(len(cm.output), 1) + self.assertIn("Invalid language", cm.output[0]) + # The collection will default to english, but should still work. + self.assertIsNotNone(self.collection.schema) + self.assertIsNotNone(self.collection.cache) + self.assertIsNotNone(self.collection.stemmer) |