summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2016-12-11 21:06:26 +0100
committerElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2016-12-11 21:06:26 +0100
commit277c063e32e52106956b570e5d965d663969c79f (patch)
tree319948df294f6800bc4f1fd4964625cdfcc3d7df
parentad04af4812bbdebe47ce58333cc48332197e61b1 (diff)
Load some data into xapian
-rw-r--r--lesana/collection.py53
-rw-r--r--tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml6
-rw-r--r--tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml3
-rw-r--r--tests/data/wrong/schema.yaml12
-rw-r--r--tests/test_collection.py20
5 files changed, 92 insertions, 2 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index df38e89..9b44b54 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -1,9 +1,30 @@
+import logging
import os
import ruamel.yaml
import xapian
+class Entry(object):
+ def __init__(self, collection, data={}):
+ self.collection = collection
+ self.data = data
+
+ def index(self):
+ # TODO: maybe add the ability to add a language specific to the
+ # entry, and change the stemmer
+ doc = xapian.Document()
+ indexer = xapian.TermGenerator()
+ indexer.set_stemmer(self.collection.stemmer)
+ # FIXME: this is obviously wrong, actually index the right
+ # things
+ doc.set_data(str(self.data))
+ indexer.set_document(doc)
+ indexer.index_text(str(self.data))
+ self.collection.cache.add_document(doc)
+
+
+
class Collection(object):
"""
"""
@@ -14,12 +35,26 @@ class Collection(object):
with open(os.path.join(self.basedir, 'schema.yaml')) as fp:
self.schema = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
except FileNotFoundError:
- self.schema = ruamel.yaml.load("")
+ self.schema = ruamel.yaml.load("{}")
os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True)
self.cache = xapian.WritableDatabase(
os.path.join(self.basedir, '.lesana/xapian'),
xapian.DB_CREATE_OR_OPEN
)
+ if 'lang' in self.schema:
+ try:
+ self.stemmer = xapian.Stem(self.schema['lang'])
+ except xapian.InvalidArgumentError:
+ logging.warning(
+ "Invalid language %s, in schema.yaml: using english.",
+ self.schema['lang']
+ )
+ self.stemmer = xapian.Stem('english')
+ else:
+ self.stemmer = xapian.Stem('english')
+ # This selects whether to load all other yaml files with
+ # safe_load or load + RoundTripLoader
+ self.safe = True
def update_cache(self, files=None):
"""
@@ -27,3 +62,19 @@ class Collection(object):
If no files have been passed, add everything.
"""
+ if not files:
+ try:
+ files = os.listdir(os.path.join(self.basedir, 'items'))
+ except FileNotFoundError:
+ logging.warning(
+ "No such file or directory: %s, not updating cache",
+ os.path.join(self.basedir, 'items'))
+ return False
+ for fname in files:
+ with open(os.path.join(self.basedir, 'items', fname)) as fp:
+ if self.safe:
+ data = ruamel.yaml.safe_load(fp)
+ else:
+ data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
+ entry = Entry(self, data)
+ entry.index()
diff --git a/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml
new file mode 100644
index 0000000..a66bb67
--- /dev/null
+++ b/tests/data/simple/items/085682ed-6792-499d-a3ab-9aebd683c011.yaml
@@ -0,0 +1,6 @@
+name: One Item
+description: |
+ This is a long block of test
+ that spans multiple lines.
+position: somewhere
+id: 085682ed6792499da3ab9aebd683c011
diff --git a/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml b/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml
new file mode 100644
index 0000000..e1f7fc1
--- /dev/null
+++ b/tests/data/simple/items/11189ee47ddf4796b718a483b379f976.yaml
@@ -0,0 +1,3 @@
+name: Another item
+description: with just a short description
+position: somewhere
diff --git a/tests/data/wrong/schema.yaml b/tests/data/wrong/schema.yaml
new file mode 100644
index 0000000..56031e4
--- /dev/null
+++ b/tests/data/wrong/schema.yaml
@@ -0,0 +1,12 @@
+name: "Lesana collection with certain errors"
+lang: 'somethingish'
+fields:
+ - name: name
+ type: string
+ index: free
+ - name: description
+ type: text
+ index: free
+ - name: position
+ type: string
+ index: facet
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 64dd293..d0a9864 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -1,3 +1,4 @@
+import logging
import os.path
import shutil
import unittest
@@ -11,8 +12,11 @@ class testCollectionLoading(unittest.TestCase):
def test_empty(self):
self.collection = lesana.Collection('tests/data/empty')
- self.assertIsNone(self.collection.schema)
+ self.assertEqual(self.collection.schema, {})
self.assertIsNotNone(self.collection.cache)
+ self.assertIsNotNone(self.collection.stemmer)
+
+ self.collection.update_cache()
def test_simple(self):
self.collection = lesana.Collection('tests/data/simple')
@@ -20,3 +24,17 @@ class testCollectionLoading(unittest.TestCase):
self.assertEqual(self.collection.schema['name'], "Simple lesana collection")
self.assertEqual(len(self.collection.schema['fields']), 3)
self.assertIsNotNone(self.collection.cache)
+ self.assertIsNotNone(self.collection.stemmer)
+
+ self.collection.update_cache()
+
+ def test_wrong_language(self):
+ # This loads a collection with an invalid value in lang
+ with self.assertLogs(level=logging.WARNING) as cm:
+ self.collection = lesana.Collection('tests/data/wrong')
+ self.assertEqual(len(cm.output), 1)
+ self.assertIn("Invalid language", cm.output[0])
+ # The collection will default to english, but should still work.
+ self.assertIsNotNone(self.collection.schema)
+ self.assertIsNotNone(self.collection.cache)
+ self.assertIsNotNone(self.collection.stemmer)