summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2017-11-21 16:56:21 +0100
committerElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2017-11-21 16:56:21 +0100
commitbb14bc77bd21284136ed7cc092b93c1a8d6026c1 (patch)
tree4fb57b42da2e5cc24367ad9dbca682ab8be5a80b
parent043e99bf5416e0899c02490fa600be4cfd049da1 (diff)
parent851914e5da250e6a38e83bccf182ab8af1db0d32 (diff)
Merge branch 'master' into tellico
-rw-r--r--README.rst4
-rw-r--r--lesana/collection.py110
-rw-r--r--tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml6
-rw-r--r--tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml10
-rw-r--r--tests/data/complex/settings.yaml7
-rw-r--r--tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml5
-rw-r--r--tests/data/wrong/settings.yaml3
-rw-r--r--tests/test_collection.py48
8 files changed, 169 insertions, 24 deletions
diff --git a/README.rst b/README.rst
index f62f4a1..fe67833 100644
--- a/README.rst
+++ b/README.rst
@@ -45,6 +45,10 @@ Under debian (and derivatives), the packages to install are::
(some of those are only available on stretch+ because earlier
versions lacked python3 support.)
+lesana can be run in place from the git checkout / extracted tarball; to
+use ``setup.py`` you will also need setuptools (e.g. from the
+``python3-setuptools`` package under debian and derivatives).
+
License
-------
diff --git a/lesana/collection.py b/lesana/collection.py
index 2ac99aa..fbdb935 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -64,10 +64,49 @@ class Entry(object):
def idterm(self):
return "Q"+self.uid
+ def validate(self):
+ errors = []
+ valid = True
+ for field in self.collection.settings['fields']:
+ value = self.data.get(field['name'], None)
+ if not value:
+ # empty fields are always fine
+ continue
+ t = field['type']
+ if t == 'integer':
+ try:
+ int(value)
+ except ValueError:
+ valid = False
+ errors.append({
+ 'field': field['name'],
+ 'error': 'Invalid value for integer field: {}'.format(
+ value
+ ),
+ })
+ elif t == 'float':
+ try:
+ float(value)
+ except ValueError:
+ valid = False
+ errors.append({
+ 'field': field['name'],
+ 'error': 'Invalid value for float field: {}'.format(
+ value
+ ),
+ })
+ return valid, errors
+
class Collection(object):
"""
"""
+ PARSER_FLAGS = (
+ xapian.QueryParser.FLAG_BOOLEAN |
+ xapian.QueryParser.FLAG_PHRASE |
+ xapian.QueryParser.FLAG_LOVEHATE |
+ xapian.QueryParser.FLAG_WILDCARD
+ )
def __init__(self, directory=None, itemdir='items'):
self.basedir = directory or os.getcwd()
@@ -96,6 +135,7 @@ class Collection(object):
# This selects whether to load all other yaml files with
# safe_load or load + RoundTripLoader
self.safe = False
+ self.entry_class = Entry
def _index_file(self, fname, cache):
with open(os.path.join(self.itemdir, fname)) as fp:
@@ -103,36 +143,55 @@ class Collection(object):
data = ruamel.yaml.safe_load(fp)
else:
data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
- entry = Entry(self, data, fname)
+ entry = self.entry_class(self, data, fname)
+ valid, errors = entry.validate()
+ if not valid:
+ logging.warning(
+ "Not indexing {fname}: invalid data".format(fname=fname)
+ )
+ return False, errors
doc = xapian.Document()
self.indexer.set_document(doc)
# Fields with prefix, for field search
for field in self.indexed_fields:
- try:
- self.indexer.index_text(
- entry.data.get(field['name']),
- 1,
- field['prefix'])
- except ValueError as e:
- logging.info("Not indexing empty? value {}: {}".format(
- entry.data.get(field['name']),
- str(e)))
+ if field['multi']:
+ values = entry.data.get(field['name'])
+ else:
+ values = [entry.data.get(field['name'])]
+ if not values:
+ values = []
+ for v in values:
+ try:
+ self.indexer.index_text(
+ v,
+ 1,
+ field['prefix'])
+ except ValueError as e:
+ logging.info("Not indexing empty? value {}: {}".format(
+ entry.data.get(field['name']),
+ str(e)))
# unprefixed fields, for full text search
for field in self.indexed_fields:
if field.get('free_search', False):
- try:
- self.indexer.index_text(entry.data.get(field['name']))
- self.indexer.increase_termpos()
- except ValueError as e:
- # probably already logged earlier
- pass
+ if field['multi']:
+ values = entry.data.get(field['name'])
+ else:
+ values = [entry.data.get(field['name'])]
+ for v in values:
+ try:
+ self.indexer.index_text(v)
+ self.indexer.increase_termpos()
+ except ValueError as e:
+ # probably already logged earlier
+ pass
doc.set_data(entry.yaml_data)
doc.add_boolean_term(entry.idterm)
doc.add_value(0, entry.fname.encode('utf-8'))
cache.replace_document(entry.idterm, doc)
+ return True, []
@property
def indexed_fields(self):
@@ -144,6 +203,7 @@ class Collection(object):
'prefix': prefix,
'name': field['name'],
'free_search': field['index'] == 'free',
+ 'multi': field['type'] in ['list']
})
return fields
@@ -175,14 +235,21 @@ class Collection(object):
updated = 0
for fname in fnames:
try:
- self._index_file(fname, cache)
+ valid, errors = self._index_file(fname, cache)
except IOError as e:
logging.warning("Could not load file {}: {}".format(
fname,
str(e))
)
else:
- updated += 1
+ if valid:
+ updated += 1
+ else:
+ logging.warning(
+ "File {fname} could not be indexed: {errors}".format(
+ fname=fname,
+ errors=errors)
+ )
return updated
def save_entries(self, entries=[]):
@@ -235,11 +302,14 @@ class Collection(object):
cache = self._get_cache()
queryparser = xapian.QueryParser()
queryparser.set_stemmer(self.stemmer)
+ queryparser.set_database(cache)
for field in self.indexed_fields:
queryparser.add_prefix(field['name'], field['prefix'])
- query = queryparser.parse_query(querystring)
+ query = queryparser.parse_query(
+ querystring,
+ self.PARSER_FLAGS)
self._enquire = xapian.Enquire(cache)
self._enquire.set_query(query)
@@ -282,7 +352,7 @@ class Collection(object):
doc.get_data(),
ruamel.yaml.RoundTripLoader
)
- entry = Entry(
+ entry = self.entry_class(
self,
data=data,
fname=fname,
diff --git a/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml
new file mode 100644
index 0000000..58b84bb
--- /dev/null
+++ b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml
@@ -0,0 +1,6 @@
+name: 'A tagless item'
+description: |
+ .
+position: 'somewhere'
+something: ''
+tags:
diff --git a/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml
new file mode 100644
index 0000000..1c7070c
--- /dev/null
+++ b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml
@@ -0,0 +1,10 @@
+name: 'An item'
+description: |
+ multi
+ line
+ description
+position: 'over there'
+something: ''
+tags:
+ - this
+ - that
diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml
index 57a1773..bd2179c 100644
--- a/tests/data/complex/settings.yaml
+++ b/tests/data/complex/settings.yaml
@@ -1,5 +1,6 @@
name: "Fully featured lesana collection"
lang: 'english'
+entry_label: '{{ uid}}: {{ name }} ({{ tags }})'
fields:
- name: name
type: string
@@ -11,6 +12,10 @@ fields:
index: free
- name: position
type: string
- index: facet
+ index: field
- name: something
type: yaml
+ - name: tags
+ type: list
+ list: string
+ index: field
diff --git a/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml
new file mode 100644
index 0000000..ec44b7c
--- /dev/null
+++ b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml
@@ -0,0 +1,5 @@
+name: 'Problematic entry'
+description: |
+ .
+position: 'somewhere'
+number: 'four'
diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml
index 9871421..ef9ab74 100644
--- a/tests/data/wrong/settings.yaml
+++ b/tests/data/wrong/settings.yaml
@@ -9,3 +9,6 @@ fields:
- name: position
type: string
index: facet
+ - name: number
+ type: integer
+ help: "Enter an integer here"
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 869828a..84f0a7a 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -54,7 +54,7 @@ class testCollection(unittest.TestCase):
self.assertIsNotNone(self.collection.settings)
self.assertIsNotNone(self.collection.stemmer)
# Fields with no "index" entry are not indexed
- self.assertEqual(len(self.collection.settings['fields']), 3)
+ self.assertEqual(len(self.collection.settings['fields']), 4)
self.assertEqual(len(self.collection.indexed_fields), 1)
def test_load_safe(self):
@@ -80,6 +80,15 @@ class testCollection(unittest.TestCase):
for m in matches:
self.assertIsInstance(m, lesana.Entry)
+ def test_search_wildcard(self):
+ self.collection = lesana.Collection('tests/data/simple')
+ self.collection.start_search('Ite*')
+ res = self.collection.get_search_results()
+ matches = list(res)
+ self.assertEqual(len(matches), 2)
+ for m in matches:
+ self.assertIsInstance(m, lesana.Entry)
+
def test_search_non_init(self):
self.collection = lesana.Collection('tests/data/simple')
matches = list(self.collection.get_search_results())
@@ -232,13 +241,46 @@ class testComplexCollection(unittest.TestCase):
self.collection.settings['name'],
"Fully featured lesana collection"
)
- self.assertEqual(len(self.collection.settings['fields']), 4)
+ self.assertEqual(len(self.collection.settings['fields']), 5)
self.assertIsNotNone(self.collection.stemmer)
- self.assertEqual(len(self.collection.indexed_fields), 2)
+ self.assertEqual(len(self.collection.indexed_fields), 4)
def test_index(self):
self.collection.update_cache()
+ def test_indexing_list(self):
+ self.collection.update_cache(['73097121f1874a6ea2f927db7dc4f11e.yaml'])
+ self.collection.start_search('tags:this')
+ res = self.collection.get_search_results()
+ matches = list(res)
+ self.assertEqual(len(matches), 1)
+ for m in matches:
+ self.assertIsInstance(m, lesana.Entry)
+
+
+class testCollectionWithErrors(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.collection = lesana.Collection('tests/data/wrong')
+
+ @classmethod
+ def tearDownClass(self):
+ shutil.rmtree(os.path.join(self.collection.basedir, '.lesana'))
+
+ def test_init(self):
+ self.assertIsNotNone(self.collection.settings)
+ self.assertEqual(
+ self.collection.settings['name'],
+ "Lesana collection with certain errors"
+ )
+ self.assertEqual(len(self.collection.settings['fields']), 4)
+ self.assertIsNotNone(self.collection.stemmer)
+ self.assertEqual(len(self.collection.indexed_fields), 1)
+
+ def test_index(self):
+ loaded = self.collection.update_cache()
+ self.assertEqual(loaded, 0)
+
class testCollectionCreation(unittest.TestCase):
def test_init(self):