diff options
| author | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2021-03-25 09:51:23 +0100 | 
|---|---|---|
| committer | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2021-03-25 09:51:23 +0100 | 
| commit | f556ea9ad72af80c17a456bcc8248e57f2afe0e5 (patch) | |
| tree | 2dde9b74d7a1059d51646b5b50420bc73f3548a9 | |
| parent | 3abed981be50f84dad531b370d43e13fa093d4da (diff) | |
Start getting the list and frequency of values for a field.
| -rw-r--r-- | lesana/collection.py | 36 | ||||
| -rw-r--r-- | lesana/types.py | 2 | ||||
| -rw-r--r-- | tests/data/complex/settings.yaml | 1 | ||||
| -rw-r--r-- | tests/test_collection.py | 19 | 
4 files changed, 57 insertions, 1 deletions
diff --git a/lesana/collection.py b/lesana/collection.py index cbef6ee..c148890 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -1,3 +1,4 @@ +import collections  import io  import logging  import os @@ -402,6 +403,41 @@ class Collection(object):              doc = cache.get_document(post.docid)              yield self._doc_to_entry(doc) +    def get_field_values(self, field, querystring='*'): +        field = self.fields[field] +        if field.field.get('sortable', False): +            self.start_search(querystring) + +            spy = xapian.ValueCountMatchSpy(field.value_index) +            self._enquire.add_matchspy(spy) + +            cache = self._get_cache() +            self._enquire.get_mset(0, cache.get_doccount()) + +            for v in spy.values(): +                yield { +                    'value': v.term, +                    'frequency': v.termfreq, +                } + +        else: +            logging.info( +                "Trying to get the list of values for a non sortable field." +            ) +            logging.info( +                "This is going to be pretty inefficient." +            ) +            values = ( +                e.data[field.field['name']] +                for e in self.get_all_documents() +            ) +            counter = collections.Counter(values) +            for v in counter.most_common(): +                yield { +                    'value': v[0], +                    'frequency': v[1], +                } +      def _match_to_entry(self, match):          return self._doc_to_entry(match.document) diff --git a/lesana/types.py b/lesana/types.py index 83d12ba..6e5d21d 100644 --- a/lesana/types.py +++ b/lesana/types.py @@ -76,7 +76,7 @@ class LesanaType:                  doc.add_value(self.value_index, self._to_value(value))              else:                  logging.debug( -                    "Index values up to 8 are reserved for internal use" +                    "Index values up to 15 are reserved for internal use"                  ) diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml index 7aaf47b..671a9b0 100644 --- a/tests/data/complex/settings.yaml +++ b/tests/data/complex/settings.yaml @@ -15,6 +15,7 @@ fields:      - name: position        type: string        index: field +      sortable: true      - name: something        type: yaml      - name: tags diff --git a/tests/test_collection.py b/tests/test_collection.py index 2af4cfe..d05415f 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -285,6 +285,15 @@ class testSimpleCollection(unittest.TestCase):          with open(fname, 'r') as fp:              self.assertEqual(entry.yaml_data, fp.read()) +    def test_list_values(self): +        values = self.collection.get_field_values('position') +        values = list(values) +        self.assertEqual(len(values), 2) +        self.assertEqual(values, [ +            {'value': 'somewhere', 'frequency': 2}, +            {'value': None, 'frequency': 1}, +        ]) +  class testComplexCollection(unittest.TestCase):      def setUp(self): @@ -455,6 +464,16 @@ class testComplexCollection(unittest.TestCase):          self.assertEqual(entry.data['updated'].year, now.year)          self.assertEqual(entry.data['version'], 2) +    def test_list_values(self): +        values = self.collection.get_field_values('position') +        values = list(values) +        self.assertEqual(values, [ +            {'value': b'Somewhere', 'frequency': 1}, +            {'value': b'over there', 'frequency': 1}, +            {'value': b'somewhere', 'frequency': 1}, +            {'value': b'there', 'frequency': 1}, +        ]) +  class testCollectionWithErrors(unittest.TestCase):      def setUp(self):  | 
