diff options
| -rw-r--r-- | docs/source/user/settings.rst | 4 | ||||
| -rw-r--r-- | lesana/collection.py | 6 | ||||
| -rw-r--r-- | lesana/types.py | 49 | ||||
| -rw-r--r-- | tests/data/complex/settings.yaml | 2 | ||||
| -rw-r--r-- | tests/test_collection.py | 2 | ||||
| -rw-r--r-- | tests/test_types.py | 93 | 
6 files changed, 147 insertions, 9 deletions
| diff --git a/docs/source/user/settings.rst b/docs/source/user/settings.rst index 68c4a0d..0d2eec9 100644 --- a/docs/source/user/settings.rst +++ b/docs/source/user/settings.rst @@ -36,6 +36,10 @@ Field definitions     fields that should be available in the free text search and ``field``     for fields that should only be available by specifying the field name     in the search. +``sortable``: +   boolean; whether this field is sortable. Sortable fields enable +   sorting the results and search by ranges, but having too many +   sortable fields make the search more resurce intensive.  ``help``:     a description for the field; this is e.g. added to new entries as a     comment. diff --git a/lesana/collection.py b/lesana/collection.py index 77023f7..7a4ff11 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -166,11 +166,15 @@ class Collection(object):          for t in self._get_subsubclasses(types.LesanaType):              type_loaders[t.name] = t          fields = {} -        for field in self.settings.get('fields', []): +        for i, field in enumerate(self.settings.get('fields', [])):              try:                  fields[field['name']] = type_loaders[field['type']](                      field,                      type_loaders, +                    # value slot 0 is used to store the filename, and we +                    # reserve a few more slots just in case they are +                    # needed by lesana or some derivative +                    value_index=i + 16,                  )              except KeyError:                  # unknown fields are treated as if they were diff --git a/lesana/types.py b/lesana/types.py index b013a48..ce15b69 100644 --- a/lesana/types.py +++ b/lesana/types.py @@ -10,13 +10,16 @@ import logging  import dateutil.parser +import xapian +  class LesanaType:      """      Base class for lesana field types.      """ -    def __init__(self, field, types): +    def __init__(self, field, types, value_index=None):          self.field = field +        self.value_index = value_index      def load(self, data):          raise NotImplementedError @@ -24,6 +27,18 @@ class LesanaType:      def empty(self):          raise NotImplementedError +    def _to_index_text(self, value): +        """ +        Prepare a value for indexing. +        """ +        return str(value) + +    def _to_value(self, value): +        """ +        Prepare a value for indexing in a value slot +        """ +        return str(value) +      def index(self, doc, indexer, value):          """          Index a value for this field type. @@ -35,16 +50,24 @@ class LesanaType:          """          to_index = self.field.get('index', False)          if not to_index: -            return False +            return          if not value:              logging.info(                  "Not indexing empty value {}".format(value)              ) +            return          prefix = self.field.get('prefix', 'X' + self.field['name'].upper()) -        indexer.index_text(str(value), 1, prefix) +        indexer.index_text(self._to_index_text(value), 1, prefix)          if to_index == 'free': -            indexer.index_text(str(value)) +            indexer.index_text(self._to_index_text(value))              indexer.increase_termpos() +        if self.field.get('sortable', False): +            if self.value_index and self.value_index >= 16: +                doc.add_value(self.value_index, self._to_value(value)) +            else: +                logging.debug( +                    "Index values up to 8 are reserved for internal use" +                )  class LesanaString(LesanaType): @@ -88,6 +111,18 @@ class LesanaInt(LesanaType):      def empty(self):          return 0 +    def _to_index_text(self, value): +        """ +        Prepare a value for indexing. +        """ +        return str(value) + +    def _to_value(self, value): +        """ +        Prepare a value for indexing in a value slot +        """ +        return xapian.sortable_serialise(value) +  class LesanaFloat(LesanaType):      """ @@ -257,8 +292,8 @@ class LesanaList(LesanaType):      name = 'list' -    def __init__(self, field, types): -        super().__init__(field, types) +    def __init__(self, field, types, value_index=None): +        super().__init__(field, types, value_index)          try:              self.sub_type = types[field['list']](field, types)          except KeyError: @@ -285,7 +320,7 @@ class LesanaList(LesanaType):      def index(self, doc, indexer, value):          for v in value: -            super().index(doc, indexer, v) +            self.sub_type.index(doc, indexer, v)  class LesanaValueError(ValueError): diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml index 51f313f..e6781b5 100644 --- a/tests/data/complex/settings.yaml +++ b/tests/data/complex/settings.yaml @@ -31,3 +31,5 @@ fields:        default: 'default value'      - name: amount        type: integer +      index: field +      sortable: true diff --git a/tests/test_collection.py b/tests/test_collection.py index bbc35ba..f3e06da 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -272,7 +272,7 @@ class testComplexCollection(unittest.TestCase):          )          self.assertEqual(len(self.collection.settings['fields']), 9)          self.assertIsNotNone(self.collection.stemmer) -        self.assertEqual(len(self.collection.indexed_fields), 6) +        self.assertEqual(len(self.collection.indexed_fields), 7)      def test_index(self):          self.collection.update_cache() diff --git a/tests/test_types.py b/tests/test_types.py index 90f6482..6f0c33e 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -2,6 +2,8 @@ import datetime  import decimal  import unittest +import xapian +  from lesana import types @@ -308,5 +310,96 @@ class testTypes(unittest.TestCase):                  checker.load(d) +class testTypeIndexing(unittest.TestCase): +    def setUp(self): +        self.doc = xapian.Document() +        self.indexer = xapian.TermGenerator() + +    def _get_field_def(self, type_name): +        return { +            'type': type_name, +            'name': 'test_field', +            'index': 'field', +            'sortable': True, +        } + +    def test_base(self): +        checker = types.LesanaType(self._get_field_def('base'), {}, 16) + +        checker.index(self.doc, self.indexer, "some string") + +    def test_base_value_index_too_low(self): +        checker = types.LesanaType(self._get_field_def('base'), {}, 1) + +        checker.index(self.doc, self.indexer, "some string") + +        # TODO: check that the string has not been indexed + +    def test_string(self): +        checker = types.LesanaString(self._get_field_def('string'), {}, 16) + +        checker.index(self.doc, self.indexer, "some string") + +    def test_text(self): +        checker = types.LesanaText(self._get_field_def('text'), {}, 16) + +        checker.index(self.doc, self.indexer, "some string") + +    def test_int(self): +        checker = types.LesanaInt(self._get_field_def('integer'), {}, 16) + +        checker.index(self.doc, self.indexer, 1) + +    def test_float(self): +        checker = types.LesanaFloat(self._get_field_def('float'), {}, 16) + +        checker.index(self.doc, self.indexer, 1.5) + +    def test_decimal(self): +        checker = types.LesanaDecimal(self._get_field_def('decimal'), {}, 16) + +        checker.index(self.doc, self.indexer, decimal.Decimal('1.0')) + +    def test_timestamp(self): +        checker = types.LesanaTimestamp( +            self._get_field_def('timestamp'), {}, 16 +        ) + +        checker.index(self.doc, self.indexer, 1600000000) + +    def test_datetime(self): +        checker = types.LesanaDatetime(self._get_field_def('datetime'), {}, 16) + +        checker.index(self.doc, self.indexer, datetime.datetime.now()) + +    def test_date(self): +        checker = types.LesanaDate(self._get_field_def('date'), {}, 16) + +        checker.index(self.doc, self.indexer, datetime.date.today()) + +    def test_boolean(self): +        checker = types.LesanaBoolean(self._get_field_def('boolean'), {}, 16) + +        checker.index(self.doc, self.indexer, True) + +    def test_url(self): +        checker = types.LesanaURL(self._get_field_def('url'), {}, 16) + +        checker.index(self.doc, self.indexer, "http://example.org") + +    def test_yaml(self): +        checker = types.LesanaYAML(self._get_field_def('yaml'), {}, 16) + +        checker.index(self.doc, self.indexer, {'a': 1, 'b': 2}) + +    def test_list(self): +        field_def = self._get_field_def('yaml') +        # we use one type that is easy to check for correct validation +        field_def['list'] = 'int' +        checker = types.LesanaList(field_def, {'int': types.LesanaInt}, 16) + +        checker.index(self.doc, self.indexer, ["some", "thing"]) + +  if __name__ == '__main__':      unittest.main() | 
