From b368e0744d440cb2ba5fde8593325071b8389240 Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Thu, 17 Aug 2017 22:27:53 +0200 Subject: Start validating entries before indexing them. --- .../items/b9a832309c984ada9f267471660c1313.yaml | 5 +++++ tests/data/wrong/settings.yaml | 3 +++ tests/test_collection.py | 26 +++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml (limited to 'tests') diff --git a/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml new file mode 100644 index 0000000..ec44b7c --- /dev/null +++ b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml @@ -0,0 +1,5 @@ +name: 'Problematic entry' +description: | + . +position: 'somewhere' +number: 'four' diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml index 9871421..ef9ab74 100644 --- a/tests/data/wrong/settings.yaml +++ b/tests/data/wrong/settings.yaml @@ -9,3 +9,6 @@ fields: - name: position type: string index: facet + - name: number + type: integer + help: "Enter an integer here" diff --git a/tests/test_collection.py b/tests/test_collection.py index 869828a..9de681a 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -54,7 +54,7 @@ class testCollection(unittest.TestCase): self.assertIsNotNone(self.collection.settings) self.assertIsNotNone(self.collection.stemmer) # Fields with no "index" entry are not indexed - self.assertEqual(len(self.collection.settings['fields']), 3) + self.assertEqual(len(self.collection.settings['fields']), 4) self.assertEqual(len(self.collection.indexed_fields), 1) def test_load_safe(self): @@ -240,6 +240,30 @@ class testComplexCollection(unittest.TestCase): self.collection.update_cache() +class testCollectionWithErrors(unittest.TestCase): + @classmethod + def setUpClass(self): + self.collection = lesana.Collection('tests/data/wrong') + + @classmethod + def tearDownClass(self): + shutil.rmtree(os.path.join(self.collection.basedir, '.lesana')) + + def test_init(self): + self.assertIsNotNone(self.collection.settings) + self.assertEqual( + self.collection.settings['name'], + "Lesana collection with certain errors" + ) + self.assertEqual(len(self.collection.settings['fields']), 4) + self.assertIsNotNone(self.collection.stemmer) + self.assertEqual(len(self.collection.indexed_fields), 1) + + def test_index(self): + loaded = self.collection.update_cache() + self.assertEqual(loaded, 0) + + class testCollectionCreation(unittest.TestCase): def test_init(self): tmpdir = tempfile.mkdtemp() -- cgit v1.2.3 From 8e15a0763e3476e333b1e71b70015cf3bd757160 Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Sat, 19 Aug 2017 17:12:24 +0200 Subject: Support indexing list fields --- lesana/collection.py | 41 ++++++++++++++-------- .../items/73097121f1874a6ea2f927db7dc4f11e.yaml | 10 ++++++ tests/data/complex/settings.yaml | 7 +++- tests/test_collection.py | 13 +++++-- 4 files changed, 53 insertions(+), 18 deletions(-) create mode 100644 tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml (limited to 'tests') diff --git a/lesana/collection.py b/lesana/collection.py index 8f76ea7..cbdd3a7 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -151,24 +151,34 @@ class Collection(object): # Fields with prefix, for field search for field in self.indexed_fields: - try: - self.indexer.index_text( - entry.data.get(field['name']), - 1, - field['prefix']) - except ValueError as e: - logging.info("Not indexing empty? value {}: {}".format( - entry.data.get(field['name']), - str(e))) + if field['multi']: + values = entry.data.get(field['name']) + else: + values = [entry.data.get(field['name'])] + for v in values: + try: + self.indexer.index_text( + v, + 1, + field['prefix']) + except ValueError as e: + logging.info("Not indexing empty? value {}: {}".format( + entry.data.get(field['name']), + str(e))) # unprefixed fields, for full text search for field in self.indexed_fields: if field.get('free_search', False): - try: - self.indexer.index_text(entry.data.get(field['name'])) - self.indexer.increase_termpos() - except ValueError as e: - # probably already logged earlier - pass + if field['multi']: + values = entry.data.get(field['name']) + else: + values = [entry.data.get(field['name'])] + for v in values: + try: + self.indexer.index_text(v) + self.indexer.increase_termpos() + except ValueError as e: + # probably already logged earlier + pass doc.set_data(entry.yaml_data) doc.add_boolean_term(entry.idterm) doc.add_value(0, entry.fname.encode('utf-8')) @@ -186,6 +196,7 @@ class Collection(object): 'prefix': prefix, 'name': field['name'], 'free_search': field['index'] == 'free', + 'multi': field['type'] in ['list'] }) return fields diff --git a/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml new file mode 100644 index 0000000..1c7070c --- /dev/null +++ b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml @@ -0,0 +1,10 @@ +name: 'An item' +description: | + multi + line + description +position: 'over there' +something: '' +tags: + - this + - that diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml index 57a1773..bd2179c 100644 --- a/tests/data/complex/settings.yaml +++ b/tests/data/complex/settings.yaml @@ -1,5 +1,6 @@ name: "Fully featured lesana collection" lang: 'english' +entry_label: '{{ uid}}: {{ name }} ({{ tags }})' fields: - name: name type: string @@ -11,6 +12,10 @@ fields: index: free - name: position type: string - index: facet + index: field - name: something type: yaml + - name: tags + type: list + list: string + index: field diff --git a/tests/test_collection.py b/tests/test_collection.py index 9de681a..875a35a 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -232,13 +232,22 @@ class testComplexCollection(unittest.TestCase): self.collection.settings['name'], "Fully featured lesana collection" ) - self.assertEqual(len(self.collection.settings['fields']), 4) + self.assertEqual(len(self.collection.settings['fields']), 5) self.assertIsNotNone(self.collection.stemmer) - self.assertEqual(len(self.collection.indexed_fields), 2) + self.assertEqual(len(self.collection.indexed_fields), 4) def test_index(self): self.collection.update_cache() + def test_indexing_list(self): + self.collection.update_cache(['73097121f1874a6ea2f927db7dc4f11e.yaml']) + self.collection.start_search('tags:this') + res = self.collection.get_search_results() + matches = list(res) + self.assertEqual(len(matches), 1) + for m in matches: + self.assertIsInstance(m, lesana.Entry) + class testCollectionWithErrors(unittest.TestCase): @classmethod -- cgit v1.2.3 From a2c8c2617e3bfc95b4d6c396c56afabeee59dadf Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Sat, 19 Aug 2017 18:17:45 +0200 Subject: Allow empty list fields --- lesana/collection.py | 2 ++ tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml (limited to 'tests') diff --git a/lesana/collection.py b/lesana/collection.py index cbdd3a7..50ed21e 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -155,6 +155,8 @@ class Collection(object): values = entry.data.get(field['name']) else: values = [entry.data.get(field['name'])] + if not values: + values = [] for v in values: try: self.indexer.index_text( diff --git a/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml new file mode 100644 index 0000000..58b84bb --- /dev/null +++ b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml @@ -0,0 +1,6 @@ +name: 'A tagless item' +description: | + . +position: 'somewhere' +something: '' +tags: -- cgit v1.2.3 From 851914e5da250e6a38e83bccf182ab8af1db0d32 Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Sat, 19 Aug 2017 18:50:29 +0200 Subject: Enable wildcard searches --- lesana/collection.py | 11 ++++++++++- tests/test_collection.py | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/lesana/collection.py b/lesana/collection.py index 7c654d8..fbdb935 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -101,6 +101,12 @@ class Entry(object): class Collection(object): """ """ + PARSER_FLAGS = ( + xapian.QueryParser.FLAG_BOOLEAN | + xapian.QueryParser.FLAG_PHRASE | + xapian.QueryParser.FLAG_LOVEHATE | + xapian.QueryParser.FLAG_WILDCARD + ) def __init__(self, directory=None, itemdir='items'): self.basedir = directory or os.getcwd() @@ -296,11 +302,14 @@ class Collection(object): cache = self._get_cache() queryparser = xapian.QueryParser() queryparser.set_stemmer(self.stemmer) + queryparser.set_database(cache) for field in self.indexed_fields: queryparser.add_prefix(field['name'], field['prefix']) - query = queryparser.parse_query(querystring) + query = queryparser.parse_query( + querystring, + self.PARSER_FLAGS) self._enquire = xapian.Enquire(cache) self._enquire.set_query(query) diff --git a/tests/test_collection.py b/tests/test_collection.py index 875a35a..84f0a7a 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -80,6 +80,15 @@ class testCollection(unittest.TestCase): for m in matches: self.assertIsInstance(m, lesana.Entry) + def test_search_wildcard(self): + self.collection = lesana.Collection('tests/data/simple') + self.collection.start_search('Ite*') + res = self.collection.get_search_results() + matches = list(res) + self.assertEqual(len(matches), 2) + for m in matches: + self.assertIsInstance(m, lesana.Entry) + def test_search_non_init(self): self.collection = lesana.Collection('tests/data/simple') matches = list(self.collection.get_search_results()) -- cgit v1.2.3