aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/source/user/settings.rst4
-rw-r--r--lesana/collection.py6
-rw-r--r--lesana/types.py49
-rw-r--r--tests/data/complex/settings.yaml2
-rw-r--r--tests/test_collection.py2
-rw-r--r--tests/test_types.py93
6 files changed, 147 insertions, 9 deletions
diff --git a/docs/source/user/settings.rst b/docs/source/user/settings.rst
index 68c4a0d..0d2eec9 100644
--- a/docs/source/user/settings.rst
+++ b/docs/source/user/settings.rst
@@ -36,6 +36,10 @@ Field definitions
fields that should be available in the free text search and ``field``
for fields that should only be available by specifying the field name
in the search.
+``sortable``:
+ boolean; whether this field is sortable. Sortable fields enable
+ sorting the results and search by ranges, but having too many
+ sortable fields make the search more resurce intensive.
``help``:
a description for the field; this is e.g. added to new entries as a
comment.
diff --git a/lesana/collection.py b/lesana/collection.py
index 77023f7..7a4ff11 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -166,11 +166,15 @@ class Collection(object):
for t in self._get_subsubclasses(types.LesanaType):
type_loaders[t.name] = t
fields = {}
- for field in self.settings.get('fields', []):
+ for i, field in enumerate(self.settings.get('fields', [])):
try:
fields[field['name']] = type_loaders[field['type']](
field,
type_loaders,
+ # value slot 0 is used to store the filename, and we
+ # reserve a few more slots just in case they are
+ # needed by lesana or some derivative
+ value_index=i + 16,
)
except KeyError:
# unknown fields are treated as if they were
diff --git a/lesana/types.py b/lesana/types.py
index b013a48..ce15b69 100644
--- a/lesana/types.py
+++ b/lesana/types.py
@@ -10,13 +10,16 @@ import logging
import dateutil.parser
+import xapian
+
class LesanaType:
"""
Base class for lesana field types.
"""
- def __init__(self, field, types):
+ def __init__(self, field, types, value_index=None):
self.field = field
+ self.value_index = value_index
def load(self, data):
raise NotImplementedError
@@ -24,6 +27,18 @@ class LesanaType:
def empty(self):
raise NotImplementedError
+ def _to_index_text(self, value):
+ """
+ Prepare a value for indexing.
+ """
+ return str(value)
+
+ def _to_value(self, value):
+ """
+ Prepare a value for indexing in a value slot
+ """
+ return str(value)
+
def index(self, doc, indexer, value):
"""
Index a value for this field type.
@@ -35,16 +50,24 @@ class LesanaType:
"""
to_index = self.field.get('index', False)
if not to_index:
- return False
+ return
if not value:
logging.info(
"Not indexing empty value {}".format(value)
)
+ return
prefix = self.field.get('prefix', 'X' + self.field['name'].upper())
- indexer.index_text(str(value), 1, prefix)
+ indexer.index_text(self._to_index_text(value), 1, prefix)
if to_index == 'free':
- indexer.index_text(str(value))
+ indexer.index_text(self._to_index_text(value))
indexer.increase_termpos()
+ if self.field.get('sortable', False):
+ if self.value_index and self.value_index >= 16:
+ doc.add_value(self.value_index, self._to_value(value))
+ else:
+ logging.debug(
+ "Index values up to 8 are reserved for internal use"
+ )
class LesanaString(LesanaType):
@@ -88,6 +111,18 @@ class LesanaInt(LesanaType):
def empty(self):
return 0
+ def _to_index_text(self, value):
+ """
+ Prepare a value for indexing.
+ """
+ return str(value)
+
+ def _to_value(self, value):
+ """
+ Prepare a value for indexing in a value slot
+ """
+ return xapian.sortable_serialise(value)
+
class LesanaFloat(LesanaType):
"""
@@ -257,8 +292,8 @@ class LesanaList(LesanaType):
name = 'list'
- def __init__(self, field, types):
- super().__init__(field, types)
+ def __init__(self, field, types, value_index=None):
+ super().__init__(field, types, value_index)
try:
self.sub_type = types[field['list']](field, types)
except KeyError:
@@ -285,7 +320,7 @@ class LesanaList(LesanaType):
def index(self, doc, indexer, value):
for v in value:
- super().index(doc, indexer, v)
+ self.sub_type.index(doc, indexer, v)
class LesanaValueError(ValueError):
diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml
index 51f313f..e6781b5 100644
--- a/tests/data/complex/settings.yaml
+++ b/tests/data/complex/settings.yaml
@@ -31,3 +31,5 @@ fields:
default: 'default value'
- name: amount
type: integer
+ index: field
+ sortable: true
diff --git a/tests/test_collection.py b/tests/test_collection.py
index bbc35ba..f3e06da 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -272,7 +272,7 @@ class testComplexCollection(unittest.TestCase):
)
self.assertEqual(len(self.collection.settings['fields']), 9)
self.assertIsNotNone(self.collection.stemmer)
- self.assertEqual(len(self.collection.indexed_fields), 6)
+ self.assertEqual(len(self.collection.indexed_fields), 7)
def test_index(self):
self.collection.update_cache()
diff --git a/tests/test_types.py b/tests/test_types.py
index 90f6482..6f0c33e 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -2,6 +2,8 @@ import datetime
import decimal
import unittest
+import xapian
+
from lesana import types
@@ -308,5 +310,96 @@ class testTypes(unittest.TestCase):
checker.load(d)
+class testTypeIndexing(unittest.TestCase):
+ def setUp(self):
+ self.doc = xapian.Document()
+ self.indexer = xapian.TermGenerator()
+
+ def _get_field_def(self, type_name):
+ return {
+ 'type': type_name,
+ 'name': 'test_field',
+ 'index': 'field',
+ 'sortable': True,
+ }
+
+ def test_base(self):
+ checker = types.LesanaType(self._get_field_def('base'), {}, 16)
+
+ checker.index(self.doc, self.indexer, "some string")
+
+ def test_base_value_index_too_low(self):
+ checker = types.LesanaType(self._get_field_def('base'), {}, 1)
+
+ checker.index(self.doc, self.indexer, "some string")
+
+ # TODO: check that the string has not been indexed
+
+ def test_string(self):
+ checker = types.LesanaString(self._get_field_def('string'), {}, 16)
+
+ checker.index(self.doc, self.indexer, "some string")
+
+ def test_text(self):
+ checker = types.LesanaText(self._get_field_def('text'), {}, 16)
+
+ checker.index(self.doc, self.indexer, "some string")
+
+ def test_int(self):
+ checker = types.LesanaInt(self._get_field_def('integer'), {}, 16)
+
+ checker.index(self.doc, self.indexer, 1)
+
+ def test_float(self):
+ checker = types.LesanaFloat(self._get_field_def('float'), {}, 16)
+
+ checker.index(self.doc, self.indexer, 1.5)
+
+ def test_decimal(self):
+ checker = types.LesanaDecimal(self._get_field_def('decimal'), {}, 16)
+
+ checker.index(self.doc, self.indexer, decimal.Decimal('1.0'))
+
+ def test_timestamp(self):
+ checker = types.LesanaTimestamp(
+ self._get_field_def('timestamp'), {}, 16
+ )
+
+ checker.index(self.doc, self.indexer, 1600000000)
+
+ def test_datetime(self):
+ checker = types.LesanaDatetime(self._get_field_def('datetime'), {}, 16)
+
+ checker.index(self.doc, self.indexer, datetime.datetime.now())
+
+ def test_date(self):
+ checker = types.LesanaDate(self._get_field_def('date'), {}, 16)
+
+ checker.index(self.doc, self.indexer, datetime.date.today())
+
+ def test_boolean(self):
+ checker = types.LesanaBoolean(self._get_field_def('boolean'), {}, 16)
+
+ checker.index(self.doc, self.indexer, True)
+
+ def test_url(self):
+ checker = types.LesanaURL(self._get_field_def('url'), {}, 16)
+
+ checker.index(self.doc, self.indexer, "http://example.org")
+
+ def test_yaml(self):
+ checker = types.LesanaYAML(self._get_field_def('yaml'), {}, 16)
+
+ checker.index(self.doc, self.indexer, {'a': 1, 'b': 2})
+
+ def test_list(self):
+ field_def = self._get_field_def('yaml')
+ # we use one type that is easy to check for correct validation
+ field_def['list'] = 'int'
+ checker = types.LesanaList(field_def, {'int': types.LesanaInt}, 16)
+
+ checker.index(self.doc, self.indexer, ["some", "thing"])
+
+
if __name__ == '__main__':
unittest.main()