aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2017-08-17 22:27:53 +0200
committerElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2017-08-17 22:27:53 +0200
commitb368e0744d440cb2ba5fde8593325071b8389240 (patch)
tree6290af3826d99b6293dfed30f1cc0d56d59f606b
parentd4d1b774cd487afaeaf3dee9f9c22e8261503c08 (diff)
Start validating entries before indexing them.
-rw-r--r--lesana/collection.py54
-rw-r--r--tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml5
-rw-r--r--tests/data/wrong/settings.yaml3
-rw-r--r--tests/test_collection.py26
4 files changed, 84 insertions, 4 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index 738f6ad..8f76ea7 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -64,6 +64,40 @@ class Entry(object):
def idterm(self):
return "Q"+self.uid
+ def validate(self):
+ print("validating", self)
+ errors = []
+ valid = True
+ for field in self.collection.settings['fields']:
+ value = self.data.get(field['name'], None)
+ if not value:
+ # empty fields are always fine
+ continue
+ t = field['type']
+ if t == 'integer':
+ try:
+ int(value)
+ except ValueError:
+ valid = False
+ errors.append({
+ 'field': field['name'],
+ 'error': 'Invalid value for integer field: {}'.format(
+ value
+ ),
+ })
+ elif t == 'float':
+ try:
+ float(value)
+ except ValueError:
+ valid = False
+ errors.append({
+ 'field': field['name'],
+ 'error': 'Invalid value for float field: {}'.format(
+ value
+ ),
+ })
+ return valid, errors
+
class Collection(object):
"""
@@ -104,7 +138,13 @@ class Collection(object):
data = ruamel.yaml.safe_load(fp)
else:
data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader)
- entry = self.entry_class(self, data, fname)
+ entry = self.entry_class(self, data, fname)
+ valid, errors = entry.validate()
+ if not valid:
+ logging.warning(
+ "Not indexing {fname}: invalid data".format(fname=fname)
+ )
+ return False, errors
doc = xapian.Document()
self.indexer.set_document(doc)
@@ -134,6 +174,7 @@ class Collection(object):
doc.add_value(0, entry.fname.encode('utf-8'))
cache.replace_document(entry.idterm, doc)
+ return True, []
@property
def indexed_fields(self):
@@ -176,14 +217,21 @@ class Collection(object):
updated = 0
for fname in fnames:
try:
- self._index_file(fname, cache)
+ valid, errors = self._index_file(fname, cache)
except IOError as e:
logging.warning("Could not load file {}: {}".format(
fname,
str(e))
)
else:
- updated += 1
+ if valid:
+ updated += 1
+ else:
+ logging.warning(
+ "File {fname} could not be indexed: {errors}".format(
+ fname=fname,
+ errors=errors)
+ )
return updated
def save_entries(self, entries=[]):
diff --git a/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml
new file mode 100644
index 0000000..ec44b7c
--- /dev/null
+++ b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml
@@ -0,0 +1,5 @@
+name: 'Problematic entry'
+description: |
+ .
+position: 'somewhere'
+number: 'four'
diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml
index 9871421..ef9ab74 100644
--- a/tests/data/wrong/settings.yaml
+++ b/tests/data/wrong/settings.yaml
@@ -9,3 +9,6 @@ fields:
- name: position
type: string
index: facet
+ - name: number
+ type: integer
+ help: "Enter an integer here"
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 869828a..9de681a 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -54,7 +54,7 @@ class testCollection(unittest.TestCase):
self.assertIsNotNone(self.collection.settings)
self.assertIsNotNone(self.collection.stemmer)
# Fields with no "index" entry are not indexed
- self.assertEqual(len(self.collection.settings['fields']), 3)
+ self.assertEqual(len(self.collection.settings['fields']), 4)
self.assertEqual(len(self.collection.indexed_fields), 1)
def test_load_safe(self):
@@ -240,6 +240,30 @@ class testComplexCollection(unittest.TestCase):
self.collection.update_cache()
+class testCollectionWithErrors(unittest.TestCase):
+ @classmethod
+ def setUpClass(self):
+ self.collection = lesana.Collection('tests/data/wrong')
+
+ @classmethod
+ def tearDownClass(self):
+ shutil.rmtree(os.path.join(self.collection.basedir, '.lesana'))
+
+ def test_init(self):
+ self.assertIsNotNone(self.collection.settings)
+ self.assertEqual(
+ self.collection.settings['name'],
+ "Lesana collection with certain errors"
+ )
+ self.assertEqual(len(self.collection.settings['fields']), 4)
+ self.assertIsNotNone(self.collection.stemmer)
+ self.assertEqual(len(self.collection.indexed_fields), 1)
+
+ def test_index(self):
+ loaded = self.collection.update_cache()
+ self.assertEqual(loaded, 0)
+
+
class testCollectionCreation(unittest.TestCase):
def test_init(self):
tmpdir = tempfile.mkdtemp()