From f556ea9ad72af80c17a456bcc8248e57f2afe0e5 Mon Sep 17 00:00:00 2001
From: Elena ``of Valhalla'' Grandi <valhalla@trueelena.org>
Date: Thu, 25 Mar 2021 09:51:23 +0100
Subject: Start getting the list and frequency of values for a field.

---
 lesana/collection.py             | 36 ++++++++++++++++++++++++++++++++++++
 lesana/types.py                  |  2 +-
 tests/data/complex/settings.yaml |  1 +
 tests/test_collection.py         | 19 +++++++++++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/lesana/collection.py b/lesana/collection.py
index cbef6ee..c148890 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -1,3 +1,4 @@
+import collections
 import io
 import logging
 import os
@@ -402,6 +403,41 @@ class Collection(object):
             doc = cache.get_document(post.docid)
             yield self._doc_to_entry(doc)
 
+    def get_field_values(self, field, querystring='*'):
+        field = self.fields[field]
+        if field.field.get('sortable', False):
+            self.start_search(querystring)
+
+            spy = xapian.ValueCountMatchSpy(field.value_index)
+            self._enquire.add_matchspy(spy)
+
+            cache = self._get_cache()
+            self._enquire.get_mset(0, cache.get_doccount())
+
+            for v in spy.values():
+                yield {
+                    'value': v.term,
+                    'frequency': v.termfreq,
+                }
+
+        else:
+            logging.info(
+                "Trying to get the list of values for a non sortable field."
+            )
+            logging.info(
+                "This is going to be pretty inefficient."
+            )
+            values = (
+                e.data[field.field['name']]
+                for e in self.get_all_documents()
+            )
+            counter = collections.Counter(values)
+            for v in counter.most_common():
+                yield {
+                    'value': v[0],
+                    'frequency': v[1],
+                }
+
     def _match_to_entry(self, match):
         return self._doc_to_entry(match.document)
 
diff --git a/lesana/types.py b/lesana/types.py
index 83d12ba..6e5d21d 100644
--- a/lesana/types.py
+++ b/lesana/types.py
@@ -76,7 +76,7 @@ class LesanaType:
                 doc.add_value(self.value_index, self._to_value(value))
             else:
                 logging.debug(
-                    "Index values up to 8 are reserved for internal use"
+                    "Index values up to 15 are reserved for internal use"
                 )
 
 
diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml
index 7aaf47b..671a9b0 100644
--- a/tests/data/complex/settings.yaml
+++ b/tests/data/complex/settings.yaml
@@ -15,6 +15,7 @@ fields:
     - name: position
       type: string
       index: field
+      sortable: true
     - name: something
       type: yaml
     - name: tags
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 2af4cfe..d05415f 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -285,6 +285,15 @@ class testSimpleCollection(unittest.TestCase):
         with open(fname, 'r') as fp:
             self.assertEqual(entry.yaml_data, fp.read())
 
+    def test_list_values(self):
+        values = self.collection.get_field_values('position')
+        values = list(values)
+        self.assertEqual(len(values), 2)
+        self.assertEqual(values, [
+            {'value': 'somewhere', 'frequency': 2},
+            {'value': None, 'frequency': 1},
+        ])
+
 
 class testComplexCollection(unittest.TestCase):
     def setUp(self):
@@ -455,6 +464,16 @@ class testComplexCollection(unittest.TestCase):
         self.assertEqual(entry.data['updated'].year, now.year)
         self.assertEqual(entry.data['version'], 2)
 
+    def test_list_values(self):
+        values = self.collection.get_field_values('position')
+        values = list(values)
+        self.assertEqual(values, [
+            {'value': b'Somewhere', 'frequency': 1},
+            {'value': b'over there', 'frequency': 1},
+            {'value': b'somewhere', 'frequency': 1},
+            {'value': b'there', 'frequency': 1},
+        ])
+
 
 class testCollectionWithErrors(unittest.TestCase):
     def setUp(self):
-- 
cgit v1.2.3