summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2021-03-25 09:51:23 +0100
committerElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2021-03-25 09:51:23 +0100
commitf556ea9ad72af80c17a456bcc8248e57f2afe0e5 (patch)
tree2dde9b74d7a1059d51646b5b50420bc73f3548a9
parent3abed981be50f84dad531b370d43e13fa093d4da (diff)
Start getting the list and frequency of values for a field.
-rw-r--r--lesana/collection.py36
-rw-r--r--lesana/types.py2
-rw-r--r--tests/data/complex/settings.yaml1
-rw-r--r--tests/test_collection.py19
4 files changed, 57 insertions, 1 deletions
diff --git a/lesana/collection.py b/lesana/collection.py
index cbef6ee..c148890 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -1,3 +1,4 @@
+import collections
import io
import logging
import os
@@ -402,6 +403,41 @@ class Collection(object):
doc = cache.get_document(post.docid)
yield self._doc_to_entry(doc)
+ def get_field_values(self, field, querystring='*'):
+ field = self.fields[field]
+ if field.field.get('sortable', False):
+ self.start_search(querystring)
+
+ spy = xapian.ValueCountMatchSpy(field.value_index)
+ self._enquire.add_matchspy(spy)
+
+ cache = self._get_cache()
+ self._enquire.get_mset(0, cache.get_doccount())
+
+ for v in spy.values():
+ yield {
+ 'value': v.term,
+ 'frequency': v.termfreq,
+ }
+
+ else:
+ logging.info(
+ "Trying to get the list of values for a non sortable field."
+ )
+ logging.info(
+ "This is going to be pretty inefficient."
+ )
+ values = (
+ e.data[field.field['name']]
+ for e in self.get_all_documents()
+ )
+ counter = collections.Counter(values)
+ for v in counter.most_common():
+ yield {
+ 'value': v[0],
+ 'frequency': v[1],
+ }
+
def _match_to_entry(self, match):
return self._doc_to_entry(match.document)
diff --git a/lesana/types.py b/lesana/types.py
index 83d12ba..6e5d21d 100644
--- a/lesana/types.py
+++ b/lesana/types.py
@@ -76,7 +76,7 @@ class LesanaType:
doc.add_value(self.value_index, self._to_value(value))
else:
logging.debug(
- "Index values up to 8 are reserved for internal use"
+ "Index values up to 15 are reserved for internal use"
)
diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml
index 7aaf47b..671a9b0 100644
--- a/tests/data/complex/settings.yaml
+++ b/tests/data/complex/settings.yaml
@@ -15,6 +15,7 @@ fields:
- name: position
type: string
index: field
+ sortable: true
- name: something
type: yaml
- name: tags
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 2af4cfe..d05415f 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -285,6 +285,15 @@ class testSimpleCollection(unittest.TestCase):
with open(fname, 'r') as fp:
self.assertEqual(entry.yaml_data, fp.read())
+ def test_list_values(self):
+ values = self.collection.get_field_values('position')
+ values = list(values)
+ self.assertEqual(len(values), 2)
+ self.assertEqual(values, [
+ {'value': 'somewhere', 'frequency': 2},
+ {'value': None, 'frequency': 1},
+ ])
+
class testComplexCollection(unittest.TestCase):
def setUp(self):
@@ -455,6 +464,16 @@ class testComplexCollection(unittest.TestCase):
self.assertEqual(entry.data['updated'].year, now.year)
self.assertEqual(entry.data['version'], 2)
+ def test_list_values(self):
+ values = self.collection.get_field_values('position')
+ values = list(values)
+ self.assertEqual(values, [
+ {'value': b'Somewhere', 'frequency': 1},
+ {'value': b'over there', 'frequency': 1},
+ {'value': b'somewhere', 'frequency': 1},
+ {'value': b'there', 'frequency': 1},
+ ])
+
class testCollectionWithErrors(unittest.TestCase):
def setUp(self):