diff options
-rw-r--r-- | CHANGELOG.rst | 6 | ||||
-rw-r--r-- | docs/field_types.rst | 4 | ||||
-rw-r--r-- | docs/source/reference/lesana.rst | 1 | ||||
-rw-r--r-- | docs/source/reference/lesana.types.rst | 7 | ||||
-rw-r--r-- | lesana/collection.py | 97 | ||||
-rw-r--r-- | lesana/types.py | 238 | ||||
-rw-r--r-- | setup.py | 1 | ||||
-rw-r--r-- | tests/data/wrong/settings.yaml | 3 | ||||
-rw-r--r-- | tests/test_collection.py | 24 | ||||
-rw-r--r-- | tests/test_types.py | 258 |
10 files changed, 587 insertions, 52 deletions
diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93f6654..bd87fab 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,12 @@ Unreleased ========== +* Validation of field contents have been made stricter: invalid contents + that were accepted in the past may now cause an indexing error. +* The timestamp field type is now expected to contain a unix timestamp + (a yaml datetime is accepted, but may be converted to a unix + timestamp) and the types datetime and date have been added. + 0.5.1 ===== diff --git a/docs/field_types.rst b/docs/field_types.rst index baf2c5e..81e86ab 100644 --- a/docs/field_types.rst +++ b/docs/field_types.rst @@ -14,6 +14,10 @@ decimal: . timestamp: . +datetime: + . +date: + . boolean: . file: diff --git a/docs/source/reference/lesana.rst b/docs/source/reference/lesana.rst index bab9655..3dec184 100644 --- a/docs/source/reference/lesana.rst +++ b/docs/source/reference/lesana.rst @@ -22,3 +22,4 @@ Submodules lesana.collection lesana.command + lesana.types diff --git a/docs/source/reference/lesana.types.rst b/docs/source/reference/lesana.types.rst new file mode 100644 index 0000000..bbc6a91 --- /dev/null +++ b/docs/source/reference/lesana.types.rst @@ -0,0 +1,7 @@ +lesana.types module +=================== + +.. automodule:: lesana.types + :members: + :undoc-members: + :show-inheritance: diff --git a/lesana/collection.py b/lesana/collection.py index b265b4d..5b7b809 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -1,4 +1,3 @@ -import decimal import logging import os import uuid @@ -8,6 +7,7 @@ import xapian import jinja2 from pkg_resources import resource_string +from . import types try: import git @@ -72,7 +72,14 @@ class Entry(object): @property def yaml_data(self): - return ruamel.yaml.dump(self.data, Dumper=ruamel.yaml.RoundTripDumper) + to_dump = self.data.copy() + # Decimal fields can't be represented by + # ruamel.yaml.RoundTripDumper, but transforming them to strings + # should be enough for all cases that we need. + for field in self.collection.settings['fields']: + if field['type'] == 'decimal': + to_dump[field['name']] = str(to_dump.get(field['name'], '')) + return ruamel.yaml.dump(to_dump, Dumper=ruamel.yaml.RoundTripDumper) @property def idterm(self): @@ -85,60 +92,28 @@ class Entry(object): def validate(self): errors = [] valid = True - for field in self.collection.settings['fields']: - value = self.data.get(field['name'], None) - t = field['type'] + for name, field in self.collection.fields.items(): + value = self.data.get(name, None) + t = field.name + try: + self.data[name] = field.load(value) + except types.LesanaValueError as e: + errors.append( + { + 'field': name, + 'error': e, + } + ) + if t != 'list' and not value: # empty fields are always fine except for lists continue - if t == 'integer': - try: - int(value) - except ValueError: - valid = False - errors.append( - { - 'field': field['name'], - 'error': - 'Invalid value for integer field: {}'.format( - value - ), - } - ) - elif t == 'float': - try: - float(value) - except ValueError: - valid = False - errors.append( - { - 'field': field['name'], - 'error': - 'Invalid value for float field: {}'.format( - value - ), - } - ) - elif t == 'decimal': - try: - decimal.Decimal(value) - except decimal.InvalidOperation: - valid = False - errors.append( - { - 'field': field['name'], - 'error': - 'Invalid value for decimal field: {}'.format( - value - ), - } - ) elif t == 'list': if not hasattr(value, '__iter__'): valid = False errors.append( { - 'field': field['name'], + 'field': name, 'error': 'Invalid value for list field: {}'.format( value ), @@ -175,6 +150,7 @@ class Collection(object): ) except FileNotFoundError: self.settings = ruamel.yaml.safe_load("{}") + self.fields = self.load_field_types() os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True) if 'lang' in self.settings: try: @@ -193,6 +169,31 @@ class Collection(object): self.safe = False self.entry_class = Entry + def _get_subsubclasses(self, cls): + for c in cls.__subclasses__(): + yield c + yield from self._get_subsubclasses(c) + + def load_field_types(self): + type_loaders = {} + for t in self._get_subsubclasses(types.LesanaType): + type_loaders[t.name] = t + fields = {} + for field in self.settings.get('fields', []): + try: + fields[field['name']] = type_loaders[field['type']]() + except KeyError: + # unknown fields are treated as if they were + # (unvalidated) generic YAML to support working with + # collections based on lesana derivatives + logging.warning( + "Unknown field type %s in field %s", + field['type'], + field['name'], + ) + fields[field['name']] = types.LesanaYAML() + return fields + def _index_file(self, fname, cache): with open(os.path.join(self.itemdir, fname)) as fp: if self.safe: diff --git a/lesana/types.py b/lesana/types.py new file mode 100644 index 0000000..a252830 --- /dev/null +++ b/lesana/types.py @@ -0,0 +1,238 @@ +""" +Type checkers for lesana fields. + +Warning: this part of the code is still in flux and it may change +significantly in a future release. +""" +import datetime +import decimal + +import dateutil.parser + + +class LesanaType: + """ + Base class for lesana field types. + """ + def load(self, data): + raise NotImplementedError + + def empty(self): + raise NotImplementedError + + +class LesanaString(LesanaType): + """ + A string of unicode text + """ + name = 'string' + + def load(self, data): + if not data: + return data + return str(data) + + def empty(self): + return "" + + +class LesanaText(LesanaString): + """ + A longer block of unicode text + """ + name = 'text' + + +class LesanaInt(LesanaType): + """ + An integer number + """ + name = "integer" + + def load(self, data): + if not data: + return data + try: + return int(data) + except ValueError: + raise LesanaValueError( + "Invalid value for integer field: {}".format(data) + ) + + def empty(self): + return 0 + + +class LesanaFloat(LesanaType): + """ + A floating point number + """ + name = "float" + + def load(self, data): + if not data: + return data + try: + return float(data) + except ValueError: + raise LesanaValueError( + "Invalid value for float field: {}".format(data) + ) + + def empty(self): + return 0.0 + + +class LesanaDecimal(LesanaType): + """ + A floating point number + """ + name = "decimal" + + def load(self, data): + if not data: + return data + try: + return decimal.Decimal(data) + except decimal.InvalidOperation: + raise LesanaValueError( + "Invalid value for float field: {}".format(data) + ) + + def empty(self): + return decimal.Decimal(0) + + +class LesanaTimestamp(LesanaType): + """ + A unix timestamp + """ + name = "timestamp" + + def load(self, data): + if not data: + return data + if isinstance(data, datetime.datetime): + return data + try: + return datetime.datetime.fromtimestamp(int(data)) + except (TypeError, ValueError): + raise LesanaValueError( + "Invalid value for timestamp field: {}".format(data) + ) + + def empty(self): + return None + + +class LesanaDatetime(LesanaType): + """ + A datetime + """ + name = "datetime" + + def load(self, data): + if not data: + return data + if isinstance(data, datetime.datetime): + return data + if isinstance(data, datetime.date): + return datetime.datetime(data.year, data.month, data.day) + try: + return dateutil.parser.parse(data) + except dateutil.parser.ParserError: + raise LesanaValueError( + "Invalid value for datetime field: {}".format(data) + ) + + def empty(self): + return None + + +class LesanaDate(LesanaType): + """ + A date + """ + name = "date" + + def load(self, data): + if not data: + return data + if isinstance(data, datetime.date): + return data + try: + return dateutil.parser.parse(data) + except dateutil.parser.ParserError: + raise LesanaValueError( + "Invalid value for date field: {}".format(data) + ) + + def empty(self): + return None + + +class LesanaBoolean(LesanaType): + """ + A boolean value + """ + name = 'boolean' + + def load(self, data): + if not data: + return data + if isinstance(data, bool): + return data + else: + raise LesanaValueError( + "Invalid value for boolean field: {}".format(data) + ) + + def empty(self): + return None + + +class LesanaFile(LesanaString): + """ + A path to a local file. + + Relative paths are assumed to be relative to the base lesana + directory (i.e. where .lesana lives) + """ + name = 'file' + + +class LesanaURL(LesanaString): + """ + An URL + """ + name = 'url' + + +class LesanaYAML(LesanaType): + """ + Free YAML contents (no structure is enforced) + """ + name = 'yaml' + + def load(self, data): + return data + + def empty(self): + return None + + +class LesanaList(LesanaYAML): + """ + A list of other values + """ + + name = 'list' + + # Temporary definition so that tests aren't broken in the current + # commit + + +class LesanaValueError(ValueError): + """ + Raised in case of validation errors. + """ @@ -20,6 +20,7 @@ setup( # 'xapian >= 1.4', 'ruamel.yaml', 'jinja2', + 'dateutil', ], python_requires='>=3', # Metadata diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml index 83a542b..bf79572 100644 --- a/tests/data/wrong/settings.yaml +++ b/tests/data/wrong/settings.yaml @@ -23,3 +23,6 @@ fields: type: list list: string index: field + - name: cloud + type: cloud + help: 'There is no cloud type' diff --git a/tests/test_collection.py b/tests/test_collection.py index 832f421..f7ddf6d 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -305,6 +305,22 @@ class testComplexCollection(unittest.TestCase): self.assertIn('with_default', entry.yaml_data) self.assertIn('amount: 0', entry.yaml_data) + def test_load_field_loaders(self): + # Check that all fields have been loaded, with the right types + to_test = ( + ('name', lesana.types.LesanaString), + ('description', lesana.types.LesanaText), + ('position', lesana.types.LesanaString), + ('something', lesana.types.LesanaYAML), + ('tags', lesana.types.LesanaList), + ('keywords', lesana.types.LesanaList), + ('exists', lesana.types.LesanaBoolean), + ('with_default', lesana.types.LesanaString), + ('amount', lesana.types.LesanaInt), + ) + for f in to_test: + self.assertIsInstance(self.collection.fields[f[0]], f[1]) + class testCollectionWithErrors(unittest.TestCase): def setUp(self): @@ -320,8 +336,8 @@ class testCollectionWithErrors(unittest.TestCase): # check that the log contains a warning. with self.assertLogs(level=logging.WARNING) as cm: self.collection = lesana.Collection(self.tmpdir) - self.assertEqual(len(cm.output), 1) - self.assertIn("Invalid language", cm.output[0]) + self.assertEqual(len(cm.output), 2) + self.assertIn("Invalid language", cm.output[1]) # The collection will default to english, but should still work. self.collection.update_cache() self.assertIsNotNone(self.collection.settings) @@ -334,7 +350,7 @@ class testCollectionWithErrors(unittest.TestCase): self.assertIsNotNone(self.collection.settings) self.assertIsNotNone(self.collection.stemmer) # Fields with no "index" entry are not indexed - self.assertEqual(len(self.collection.settings['fields']), 7) + self.assertEqual(len(self.collection.settings['fields']), 8) self.assertEqual(len(self.collection.indexed_fields), 3) def test_init(self): @@ -343,7 +359,7 @@ class testCollectionWithErrors(unittest.TestCase): self.collection.settings['name'], "Lesana collection with certain errors", ) - self.assertEqual(len(self.collection.settings['fields']), 7) + self.assertEqual(len(self.collection.settings['fields']), 8) self.assertIsNotNone(self.collection.stemmer) self.assertEqual(len(self.collection.indexed_fields), 3) diff --git a/tests/test_types.py b/tests/test_types.py new file mode 100644 index 0000000..f27089d --- /dev/null +++ b/tests/test_types.py @@ -0,0 +1,258 @@ +import datetime +import decimal +import unittest + +from lesana import types + + +class testTypes(unittest.TestCase): + def setUp(self): + pass + + def tearDown(self): + pass + + def test_base(self): + checker = types.LesanaType() + + # The base class does not implement empty nor load + with self.assertRaises(NotImplementedError): + checker.empty() + + with self.assertRaises(NotImplementedError): + checker.load("") + + def test_string(self): + checker = types.LesanaString() + + s = checker.empty() + self.assertEqual(s, "") + + s = checker.load("Hello World!") + self.assertEqual(s, "Hello World!") + + s = checker.load(None) + self.assertEqual(s, None) + + def test_text(self): + checker = types.LesanaText() + + s = checker.empty() + self.assertEqual(s, "") + + s = checker.load("Hello World!") + self.assertEqual(s, "Hello World!") + + s = checker.load(None) + self.assertEqual(s, None) + + def test_int(self): + checker = types.LesanaInt() + + v = checker.empty() + self.assertEqual(v, 0) + + v = checker.load("10") + self.assertEqual(v, 10) + + v = checker.load(10.5) + self.assertEqual(v, 10) + + for d in ("ten", "10.5"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_float(self): + checker = types.LesanaFloat() + + v = checker.empty() + self.assertEqual(v, 0.0) + + v = checker.load("10") + self.assertEqual(v, 10) + + v = checker.load(10.5) + self.assertEqual(v, 10.5) + + v = checker.load("10.5") + self.assertEqual(v, 10.5) + + for d in ("ten"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_decimal(self): + checker = types.LesanaDecimal() + + v = checker.empty() + self.assertEqual(v, decimal.Decimal(0)) + + v = checker.load("10") + self.assertEqual(v, decimal.Decimal(10)) + + v = checker.load(10.5) + self.assertEqual(v, decimal.Decimal(10.5)) + + v = checker.load("10.5") + self.assertEqual(v, decimal.Decimal(10.5)) + + for d in ("ten"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_timestamp(self): + checker = types.LesanaTimestamp() + + v = checker.empty() + self.assertEqual(v, None) + + now = datetime.datetime.now() + v = checker.load(now) + self.assertEqual(v, now) + + v = checker.load("1600000000") + self.assertEqual(v, datetime.datetime(2020, 9, 13, 14, 26, 40)) + + today = datetime.date.today() + for d in ( + today, + "today", + "2020-13-01", "2020-01-01", + "2020-01-01 10:00" + ): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_datetime(self): + checker = types.LesanaDatetime() + + v = checker.empty() + self.assertEqual(v, None) + + now = datetime.datetime.now() + v = checker.load(now) + self.assertEqual(v, now) + + today = datetime.date.today() + v = checker.load(today) + self.assertIsInstance(v, datetime.datetime) + for part in ('year', 'month', 'day'): + self.assertEqual(getattr(v, part), getattr(today, part)) + + v = checker.load("2020-01-01") + self.assertEqual(v, datetime.datetime(2020, 1, 1)) + + v = checker.load("2020-01-01 10:00") + self.assertEqual(v, datetime.datetime(2020, 1, 1, 10, 0)) + + for d in ("today", "2020-13-01"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_date(self): + checker = types.LesanaDate() + + v = checker.empty() + self.assertEqual(v, None) + + now = datetime.datetime.now() + v = checker.load(now) + self.assertIsInstance(v, datetime.date) + for part in ('year', 'month', 'day'): + self.assertEqual(getattr(v, part), getattr(now, part)) + + today = datetime.date.today() + v = checker.load(today) + self.assertEqual(v, today) + + v = checker.load("2020-01-01") + self.assertEqual(v, datetime.datetime(2020, 1, 1)) + + v = checker.load("2020-01-01 10:00") + self.assertEqual(v, datetime.datetime(2020, 1, 1, 10, 0)) + + for d in ("today", "2020-13-01"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_boolean(self): + checker = types.LesanaBoolean() + + v = checker.empty() + self.assertEqual(v, None) + + v = checker.load(True) + self.assertEqual(v, True) + + for d in ("maybe", "yes", "no"): + with self.assertRaises(types.LesanaValueError): + checker.load(d) + + v = checker.load(None) + self.assertEqual(v, None) + + def test_file(self): + checker = types.LesanaFile() + + v = checker.empty() + self.assertEqual(v, "") + + v = checker.load("relative/path/to/file") + self.assertEqual(v, "relative/path/to/file") + + v = checker.load(None) + self.assertEqual(v, None) + + # TODO: check for invalid file paths + + def test_url(self): + checker = types.LesanaURL() + + v = checker.empty() + self.assertEqual(v, "") + + v = checker.load("http://example.org") + self.assertEqual(v, "http://example.org") + + v = checker.load(None) + self.assertEqual(v, None) + + # TODO: check for invalid URLs + + def test_yaml(self): + checker = types.LesanaYAML() + + v = checker.empty() + self.assertEqual(v, None) + + some_data = { + 'anything': 'goes', + 'everything': 42 + } + v = checker.load(some_data) + self.assertEqual(v, some_data) + + v = checker.load(None) + self.assertEqual(v, None) + + +if __name__ == '__main__': + unittest.main() |