summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.rst6
-rw-r--r--docs/field_types.rst4
-rw-r--r--docs/source/reference/lesana.rst1
-rw-r--r--docs/source/reference/lesana.types.rst7
-rw-r--r--lesana/collection.py97
-rw-r--r--lesana/types.py238
-rw-r--r--setup.py1
-rw-r--r--tests/data/wrong/settings.yaml3
-rw-r--r--tests/test_collection.py24
-rw-r--r--tests/test_types.py258
10 files changed, 587 insertions, 52 deletions
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 93f6654..bd87fab 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,6 +5,12 @@
Unreleased
==========
+* Validation of field contents have been made stricter: invalid contents
+ that were accepted in the past may now cause an indexing error.
+* The timestamp field type is now expected to contain a unix timestamp
+ (a yaml datetime is accepted, but may be converted to a unix
+ timestamp) and the types datetime and date have been added.
+
0.5.1
=====
diff --git a/docs/field_types.rst b/docs/field_types.rst
index baf2c5e..81e86ab 100644
--- a/docs/field_types.rst
+++ b/docs/field_types.rst
@@ -14,6 +14,10 @@ decimal:
.
timestamp:
.
+datetime:
+ .
+date:
+ .
boolean:
.
file:
diff --git a/docs/source/reference/lesana.rst b/docs/source/reference/lesana.rst
index bab9655..3dec184 100644
--- a/docs/source/reference/lesana.rst
+++ b/docs/source/reference/lesana.rst
@@ -22,3 +22,4 @@ Submodules
lesana.collection
lesana.command
+ lesana.types
diff --git a/docs/source/reference/lesana.types.rst b/docs/source/reference/lesana.types.rst
new file mode 100644
index 0000000..bbc6a91
--- /dev/null
+++ b/docs/source/reference/lesana.types.rst
@@ -0,0 +1,7 @@
+lesana.types module
+===================
+
+.. automodule:: lesana.types
+ :members:
+ :undoc-members:
+ :show-inheritance:
diff --git a/lesana/collection.py b/lesana/collection.py
index b265b4d..5b7b809 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -1,4 +1,3 @@
-import decimal
import logging
import os
import uuid
@@ -8,6 +7,7 @@ import xapian
import jinja2
from pkg_resources import resource_string
+from . import types
try:
import git
@@ -72,7 +72,14 @@ class Entry(object):
@property
def yaml_data(self):
- return ruamel.yaml.dump(self.data, Dumper=ruamel.yaml.RoundTripDumper)
+ to_dump = self.data.copy()
+ # Decimal fields can't be represented by
+ # ruamel.yaml.RoundTripDumper, but transforming them to strings
+ # should be enough for all cases that we need.
+ for field in self.collection.settings['fields']:
+ if field['type'] == 'decimal':
+ to_dump[field['name']] = str(to_dump.get(field['name'], ''))
+ return ruamel.yaml.dump(to_dump, Dumper=ruamel.yaml.RoundTripDumper)
@property
def idterm(self):
@@ -85,60 +92,28 @@ class Entry(object):
def validate(self):
errors = []
valid = True
- for field in self.collection.settings['fields']:
- value = self.data.get(field['name'], None)
- t = field['type']
+ for name, field in self.collection.fields.items():
+ value = self.data.get(name, None)
+ t = field.name
+ try:
+ self.data[name] = field.load(value)
+ except types.LesanaValueError as e:
+ errors.append(
+ {
+ 'field': name,
+ 'error': e,
+ }
+ )
+
if t != 'list' and not value:
# empty fields are always fine except for lists
continue
- if t == 'integer':
- try:
- int(value)
- except ValueError:
- valid = False
- errors.append(
- {
- 'field': field['name'],
- 'error':
- 'Invalid value for integer field: {}'.format(
- value
- ),
- }
- )
- elif t == 'float':
- try:
- float(value)
- except ValueError:
- valid = False
- errors.append(
- {
- 'field': field['name'],
- 'error':
- 'Invalid value for float field: {}'.format(
- value
- ),
- }
- )
- elif t == 'decimal':
- try:
- decimal.Decimal(value)
- except decimal.InvalidOperation:
- valid = False
- errors.append(
- {
- 'field': field['name'],
- 'error':
- 'Invalid value for decimal field: {}'.format(
- value
- ),
- }
- )
elif t == 'list':
if not hasattr(value, '__iter__'):
valid = False
errors.append(
{
- 'field': field['name'],
+ 'field': name,
'error': 'Invalid value for list field: {}'.format(
value
),
@@ -175,6 +150,7 @@ class Collection(object):
)
except FileNotFoundError:
self.settings = ruamel.yaml.safe_load("{}")
+ self.fields = self.load_field_types()
os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True)
if 'lang' in self.settings:
try:
@@ -193,6 +169,31 @@ class Collection(object):
self.safe = False
self.entry_class = Entry
+ def _get_subsubclasses(self, cls):
+ for c in cls.__subclasses__():
+ yield c
+ yield from self._get_subsubclasses(c)
+
+ def load_field_types(self):
+ type_loaders = {}
+ for t in self._get_subsubclasses(types.LesanaType):
+ type_loaders[t.name] = t
+ fields = {}
+ for field in self.settings.get('fields', []):
+ try:
+ fields[field['name']] = type_loaders[field['type']]()
+ except KeyError:
+ # unknown fields are treated as if they were
+ # (unvalidated) generic YAML to support working with
+ # collections based on lesana derivatives
+ logging.warning(
+ "Unknown field type %s in field %s",
+ field['type'],
+ field['name'],
+ )
+ fields[field['name']] = types.LesanaYAML()
+ return fields
+
def _index_file(self, fname, cache):
with open(os.path.join(self.itemdir, fname)) as fp:
if self.safe:
diff --git a/lesana/types.py b/lesana/types.py
new file mode 100644
index 0000000..a252830
--- /dev/null
+++ b/lesana/types.py
@@ -0,0 +1,238 @@
+"""
+Type checkers for lesana fields.
+
+Warning: this part of the code is still in flux and it may change
+significantly in a future release.
+"""
+import datetime
+import decimal
+
+import dateutil.parser
+
+
+class LesanaType:
+ """
+ Base class for lesana field types.
+ """
+ def load(self, data):
+ raise NotImplementedError
+
+ def empty(self):
+ raise NotImplementedError
+
+
+class LesanaString(LesanaType):
+ """
+ A string of unicode text
+ """
+ name = 'string'
+
+ def load(self, data):
+ if not data:
+ return data
+ return str(data)
+
+ def empty(self):
+ return ""
+
+
+class LesanaText(LesanaString):
+ """
+ A longer block of unicode text
+ """
+ name = 'text'
+
+
+class LesanaInt(LesanaType):
+ """
+ An integer number
+ """
+ name = "integer"
+
+ def load(self, data):
+ if not data:
+ return data
+ try:
+ return int(data)
+ except ValueError:
+ raise LesanaValueError(
+ "Invalid value for integer field: {}".format(data)
+ )
+
+ def empty(self):
+ return 0
+
+
+class LesanaFloat(LesanaType):
+ """
+ A floating point number
+ """
+ name = "float"
+
+ def load(self, data):
+ if not data:
+ return data
+ try:
+ return float(data)
+ except ValueError:
+ raise LesanaValueError(
+ "Invalid value for float field: {}".format(data)
+ )
+
+ def empty(self):
+ return 0.0
+
+
+class LesanaDecimal(LesanaType):
+ """
+ A floating point number
+ """
+ name = "decimal"
+
+ def load(self, data):
+ if not data:
+ return data
+ try:
+ return decimal.Decimal(data)
+ except decimal.InvalidOperation:
+ raise LesanaValueError(
+ "Invalid value for float field: {}".format(data)
+ )
+
+ def empty(self):
+ return decimal.Decimal(0)
+
+
+class LesanaTimestamp(LesanaType):
+ """
+ A unix timestamp
+ """
+ name = "timestamp"
+
+ def load(self, data):
+ if not data:
+ return data
+ if isinstance(data, datetime.datetime):
+ return data
+ try:
+ return datetime.datetime.fromtimestamp(int(data))
+ except (TypeError, ValueError):
+ raise LesanaValueError(
+ "Invalid value for timestamp field: {}".format(data)
+ )
+
+ def empty(self):
+ return None
+
+
+class LesanaDatetime(LesanaType):
+ """
+ A datetime
+ """
+ name = "datetime"
+
+ def load(self, data):
+ if not data:
+ return data
+ if isinstance(data, datetime.datetime):
+ return data
+ if isinstance(data, datetime.date):
+ return datetime.datetime(data.year, data.month, data.day)
+ try:
+ return dateutil.parser.parse(data)
+ except dateutil.parser.ParserError:
+ raise LesanaValueError(
+ "Invalid value for datetime field: {}".format(data)
+ )
+
+ def empty(self):
+ return None
+
+
+class LesanaDate(LesanaType):
+ """
+ A date
+ """
+ name = "date"
+
+ def load(self, data):
+ if not data:
+ return data
+ if isinstance(data, datetime.date):
+ return data
+ try:
+ return dateutil.parser.parse(data)
+ except dateutil.parser.ParserError:
+ raise LesanaValueError(
+ "Invalid value for date field: {}".format(data)
+ )
+
+ def empty(self):
+ return None
+
+
+class LesanaBoolean(LesanaType):
+ """
+ A boolean value
+ """
+ name = 'boolean'
+
+ def load(self, data):
+ if not data:
+ return data
+ if isinstance(data, bool):
+ return data
+ else:
+ raise LesanaValueError(
+ "Invalid value for boolean field: {}".format(data)
+ )
+
+ def empty(self):
+ return None
+
+
+class LesanaFile(LesanaString):
+ """
+ A path to a local file.
+
+ Relative paths are assumed to be relative to the base lesana
+ directory (i.e. where .lesana lives)
+ """
+ name = 'file'
+
+
+class LesanaURL(LesanaString):
+ """
+ An URL
+ """
+ name = 'url'
+
+
+class LesanaYAML(LesanaType):
+ """
+ Free YAML contents (no structure is enforced)
+ """
+ name = 'yaml'
+
+ def load(self, data):
+ return data
+
+ def empty(self):
+ return None
+
+
+class LesanaList(LesanaYAML):
+ """
+ A list of other values
+ """
+
+ name = 'list'
+
+ # Temporary definition so that tests aren't broken in the current
+ # commit
+
+
+class LesanaValueError(ValueError):
+ """
+ Raised in case of validation errors.
+ """
diff --git a/setup.py b/setup.py
index ffc79b9..b016fb3 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@ setup(
# 'xapian >= 1.4',
'ruamel.yaml',
'jinja2',
+ 'dateutil',
],
python_requires='>=3',
# Metadata
diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml
index 83a542b..bf79572 100644
--- a/tests/data/wrong/settings.yaml
+++ b/tests/data/wrong/settings.yaml
@@ -23,3 +23,6 @@ fields:
type: list
list: string
index: field
+ - name: cloud
+ type: cloud
+ help: 'There is no cloud type'
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 832f421..f7ddf6d 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -305,6 +305,22 @@ class testComplexCollection(unittest.TestCase):
self.assertIn('with_default', entry.yaml_data)
self.assertIn('amount: 0', entry.yaml_data)
+ def test_load_field_loaders(self):
+ # Check that all fields have been loaded, with the right types
+ to_test = (
+ ('name', lesana.types.LesanaString),
+ ('description', lesana.types.LesanaText),
+ ('position', lesana.types.LesanaString),
+ ('something', lesana.types.LesanaYAML),
+ ('tags', lesana.types.LesanaList),
+ ('keywords', lesana.types.LesanaList),
+ ('exists', lesana.types.LesanaBoolean),
+ ('with_default', lesana.types.LesanaString),
+ ('amount', lesana.types.LesanaInt),
+ )
+ for f in to_test:
+ self.assertIsInstance(self.collection.fields[f[0]], f[1])
+
class testCollectionWithErrors(unittest.TestCase):
def setUp(self):
@@ -320,8 +336,8 @@ class testCollectionWithErrors(unittest.TestCase):
# check that the log contains a warning.
with self.assertLogs(level=logging.WARNING) as cm:
self.collection = lesana.Collection(self.tmpdir)
- self.assertEqual(len(cm.output), 1)
- self.assertIn("Invalid language", cm.output[0])
+ self.assertEqual(len(cm.output), 2)
+ self.assertIn("Invalid language", cm.output[1])
# The collection will default to english, but should still work.
self.collection.update_cache()
self.assertIsNotNone(self.collection.settings)
@@ -334,7 +350,7 @@ class testCollectionWithErrors(unittest.TestCase):
self.assertIsNotNone(self.collection.settings)
self.assertIsNotNone(self.collection.stemmer)
# Fields with no "index" entry are not indexed
- self.assertEqual(len(self.collection.settings['fields']), 7)
+ self.assertEqual(len(self.collection.settings['fields']), 8)
self.assertEqual(len(self.collection.indexed_fields), 3)
def test_init(self):
@@ -343,7 +359,7 @@ class testCollectionWithErrors(unittest.TestCase):
self.collection.settings['name'],
"Lesana collection with certain errors",
)
- self.assertEqual(len(self.collection.settings['fields']), 7)
+ self.assertEqual(len(self.collection.settings['fields']), 8)
self.assertIsNotNone(self.collection.stemmer)
self.assertEqual(len(self.collection.indexed_fields), 3)
diff --git a/tests/test_types.py b/tests/test_types.py
new file mode 100644
index 0000000..f27089d
--- /dev/null
+++ b/tests/test_types.py
@@ -0,0 +1,258 @@
+import datetime
+import decimal
+import unittest
+
+from lesana import types
+
+
+class testTypes(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_base(self):
+ checker = types.LesanaType()
+
+ # The base class does not implement empty nor load
+ with self.assertRaises(NotImplementedError):
+ checker.empty()
+
+ with self.assertRaises(NotImplementedError):
+ checker.load("")
+
+ def test_string(self):
+ checker = types.LesanaString()
+
+ s = checker.empty()
+ self.assertEqual(s, "")
+
+ s = checker.load("Hello World!")
+ self.assertEqual(s, "Hello World!")
+
+ s = checker.load(None)
+ self.assertEqual(s, None)
+
+ def test_text(self):
+ checker = types.LesanaText()
+
+ s = checker.empty()
+ self.assertEqual(s, "")
+
+ s = checker.load("Hello World!")
+ self.assertEqual(s, "Hello World!")
+
+ s = checker.load(None)
+ self.assertEqual(s, None)
+
+ def test_int(self):
+ checker = types.LesanaInt()
+
+ v = checker.empty()
+ self.assertEqual(v, 0)
+
+ v = checker.load("10")
+ self.assertEqual(v, 10)
+
+ v = checker.load(10.5)
+ self.assertEqual(v, 10)
+
+ for d in ("ten", "10.5"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_float(self):
+ checker = types.LesanaFloat()
+
+ v = checker.empty()
+ self.assertEqual(v, 0.0)
+
+ v = checker.load("10")
+ self.assertEqual(v, 10)
+
+ v = checker.load(10.5)
+ self.assertEqual(v, 10.5)
+
+ v = checker.load("10.5")
+ self.assertEqual(v, 10.5)
+
+ for d in ("ten"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_decimal(self):
+ checker = types.LesanaDecimal()
+
+ v = checker.empty()
+ self.assertEqual(v, decimal.Decimal(0))
+
+ v = checker.load("10")
+ self.assertEqual(v, decimal.Decimal(10))
+
+ v = checker.load(10.5)
+ self.assertEqual(v, decimal.Decimal(10.5))
+
+ v = checker.load("10.5")
+ self.assertEqual(v, decimal.Decimal(10.5))
+
+ for d in ("ten"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_timestamp(self):
+ checker = types.LesanaTimestamp()
+
+ v = checker.empty()
+ self.assertEqual(v, None)
+
+ now = datetime.datetime.now()
+ v = checker.load(now)
+ self.assertEqual(v, now)
+
+ v = checker.load("1600000000")
+ self.assertEqual(v, datetime.datetime(2020, 9, 13, 14, 26, 40))
+
+ today = datetime.date.today()
+ for d in (
+ today,
+ "today",
+ "2020-13-01", "2020-01-01",
+ "2020-01-01 10:00"
+ ):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_datetime(self):
+ checker = types.LesanaDatetime()
+
+ v = checker.empty()
+ self.assertEqual(v, None)
+
+ now = datetime.datetime.now()
+ v = checker.load(now)
+ self.assertEqual(v, now)
+
+ today = datetime.date.today()
+ v = checker.load(today)
+ self.assertIsInstance(v, datetime.datetime)
+ for part in ('year', 'month', 'day'):
+ self.assertEqual(getattr(v, part), getattr(today, part))
+
+ v = checker.load("2020-01-01")
+ self.assertEqual(v, datetime.datetime(2020, 1, 1))
+
+ v = checker.load("2020-01-01 10:00")
+ self.assertEqual(v, datetime.datetime(2020, 1, 1, 10, 0))
+
+ for d in ("today", "2020-13-01"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_date(self):
+ checker = types.LesanaDate()
+
+ v = checker.empty()
+ self.assertEqual(v, None)
+
+ now = datetime.datetime.now()
+ v = checker.load(now)
+ self.assertIsInstance(v, datetime.date)
+ for part in ('year', 'month', 'day'):
+ self.assertEqual(getattr(v, part), getattr(now, part))
+
+ today = datetime.date.today()
+ v = checker.load(today)
+ self.assertEqual(v, today)
+
+ v = checker.load("2020-01-01")
+ self.assertEqual(v, datetime.datetime(2020, 1, 1))
+
+ v = checker.load("2020-01-01 10:00")
+ self.assertEqual(v, datetime.datetime(2020, 1, 1, 10, 0))
+
+ for d in ("today", "2020-13-01"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_boolean(self):
+ checker = types.LesanaBoolean()
+
+ v = checker.empty()
+ self.assertEqual(v, None)
+
+ v = checker.load(True)
+ self.assertEqual(v, True)
+
+ for d in ("maybe", "yes", "no"):
+ with self.assertRaises(types.LesanaValueError):
+ checker.load(d)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ def test_file(self):
+ checker = types.LesanaFile()
+
+ v = checker.empty()
+ self.assertEqual(v, "")
+
+ v = checker.load("relative/path/to/file")
+ self.assertEqual(v, "relative/path/to/file")
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ # TODO: check for invalid file paths
+
+ def test_url(self):
+ checker = types.LesanaURL()
+
+ v = checker.empty()
+ self.assertEqual(v, "")
+
+ v = checker.load("http://example.org")
+ self.assertEqual(v, "http://example.org")
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+ # TODO: check for invalid URLs
+
+ def test_yaml(self):
+ checker = types.LesanaYAML()
+
+ v = checker.empty()
+ self.assertEqual(v, None)
+
+ some_data = {
+ 'anything': 'goes',
+ 'everything': 42
+ }
+ v = checker.load(some_data)
+ self.assertEqual(v, some_data)
+
+ v = checker.load(None)
+ self.assertEqual(v, None)
+
+
+if __name__ == '__main__':
+ unittest.main()