diff options
-rw-r--r-- | README.rst | 4 | ||||
-rw-r--r-- | lesana/collection.py | 110 | ||||
-rw-r--r-- | tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml | 6 | ||||
-rw-r--r-- | tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml | 10 | ||||
-rw-r--r-- | tests/data/complex/settings.yaml | 7 | ||||
-rw-r--r-- | tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml | 5 | ||||
-rw-r--r-- | tests/data/wrong/settings.yaml | 3 | ||||
-rw-r--r-- | tests/test_collection.py | 48 |
8 files changed, 169 insertions, 24 deletions
@@ -45,6 +45,10 @@ Under debian (and derivatives), the packages to install are:: (some of those are only available on stretch+ because earlier versions lacked python3 support.) +lesana can be run in place from the git checkout / extracted tarball; to +use ``setup.py`` you will also need setuptools (e.g. from the +``python3-setuptools`` package under debian and derivatives). + License ------- diff --git a/lesana/collection.py b/lesana/collection.py index 2ac99aa..fbdb935 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -64,10 +64,49 @@ class Entry(object): def idterm(self): return "Q"+self.uid + def validate(self): + errors = [] + valid = True + for field in self.collection.settings['fields']: + value = self.data.get(field['name'], None) + if not value: + # empty fields are always fine + continue + t = field['type'] + if t == 'integer': + try: + int(value) + except ValueError: + valid = False + errors.append({ + 'field': field['name'], + 'error': 'Invalid value for integer field: {}'.format( + value + ), + }) + elif t == 'float': + try: + float(value) + except ValueError: + valid = False + errors.append({ + 'field': field['name'], + 'error': 'Invalid value for float field: {}'.format( + value + ), + }) + return valid, errors + class Collection(object): """ """ + PARSER_FLAGS = ( + xapian.QueryParser.FLAG_BOOLEAN | + xapian.QueryParser.FLAG_PHRASE | + xapian.QueryParser.FLAG_LOVEHATE | + xapian.QueryParser.FLAG_WILDCARD + ) def __init__(self, directory=None, itemdir='items'): self.basedir = directory or os.getcwd() @@ -96,6 +135,7 @@ class Collection(object): # This selects whether to load all other yaml files with # safe_load or load + RoundTripLoader self.safe = False + self.entry_class = Entry def _index_file(self, fname, cache): with open(os.path.join(self.itemdir, fname)) as fp: @@ -103,36 +143,55 @@ class Collection(object): data = ruamel.yaml.safe_load(fp) else: data = ruamel.yaml.load(fp, ruamel.yaml.RoundTripLoader) - entry = Entry(self, data, fname) + entry = self.entry_class(self, data, fname) + valid, errors = entry.validate() + if not valid: + logging.warning( + "Not indexing {fname}: invalid data".format(fname=fname) + ) + return False, errors doc = xapian.Document() self.indexer.set_document(doc) # Fields with prefix, for field search for field in self.indexed_fields: - try: - self.indexer.index_text( - entry.data.get(field['name']), - 1, - field['prefix']) - except ValueError as e: - logging.info("Not indexing empty? value {}: {}".format( - entry.data.get(field['name']), - str(e))) + if field['multi']: + values = entry.data.get(field['name']) + else: + values = [entry.data.get(field['name'])] + if not values: + values = [] + for v in values: + try: + self.indexer.index_text( + v, + 1, + field['prefix']) + except ValueError as e: + logging.info("Not indexing empty? value {}: {}".format( + entry.data.get(field['name']), + str(e))) # unprefixed fields, for full text search for field in self.indexed_fields: if field.get('free_search', False): - try: - self.indexer.index_text(entry.data.get(field['name'])) - self.indexer.increase_termpos() - except ValueError as e: - # probably already logged earlier - pass + if field['multi']: + values = entry.data.get(field['name']) + else: + values = [entry.data.get(field['name'])] + for v in values: + try: + self.indexer.index_text(v) + self.indexer.increase_termpos() + except ValueError as e: + # probably already logged earlier + pass doc.set_data(entry.yaml_data) doc.add_boolean_term(entry.idterm) doc.add_value(0, entry.fname.encode('utf-8')) cache.replace_document(entry.idterm, doc) + return True, [] @property def indexed_fields(self): @@ -144,6 +203,7 @@ class Collection(object): 'prefix': prefix, 'name': field['name'], 'free_search': field['index'] == 'free', + 'multi': field['type'] in ['list'] }) return fields @@ -175,14 +235,21 @@ class Collection(object): updated = 0 for fname in fnames: try: - self._index_file(fname, cache) + valid, errors = self._index_file(fname, cache) except IOError as e: logging.warning("Could not load file {}: {}".format( fname, str(e)) ) else: - updated += 1 + if valid: + updated += 1 + else: + logging.warning( + "File {fname} could not be indexed: {errors}".format( + fname=fname, + errors=errors) + ) return updated def save_entries(self, entries=[]): @@ -235,11 +302,14 @@ class Collection(object): cache = self._get_cache() queryparser = xapian.QueryParser() queryparser.set_stemmer(self.stemmer) + queryparser.set_database(cache) for field in self.indexed_fields: queryparser.add_prefix(field['name'], field['prefix']) - query = queryparser.parse_query(querystring) + query = queryparser.parse_query( + querystring, + self.PARSER_FLAGS) self._enquire = xapian.Enquire(cache) self._enquire.set_query(query) @@ -282,7 +352,7 @@ class Collection(object): doc.get_data(), ruamel.yaml.RoundTripLoader ) - entry = Entry( + entry = self.entry_class( self, data=data, fname=fname, diff --git a/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml new file mode 100644 index 0000000..58b84bb --- /dev/null +++ b/tests/data/complex/items/28b15099c84b41ab892133cd64876a32.yaml @@ -0,0 +1,6 @@ +name: 'A tagless item' +description: | + . +position: 'somewhere' +something: '' +tags: diff --git a/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml new file mode 100644 index 0000000..1c7070c --- /dev/null +++ b/tests/data/complex/items/73097121f1874a6ea2f927db7dc4f11e.yaml @@ -0,0 +1,10 @@ +name: 'An item' +description: | + multi + line + description +position: 'over there' +something: '' +tags: + - this + - that diff --git a/tests/data/complex/settings.yaml b/tests/data/complex/settings.yaml index 57a1773..bd2179c 100644 --- a/tests/data/complex/settings.yaml +++ b/tests/data/complex/settings.yaml @@ -1,5 +1,6 @@ name: "Fully featured lesana collection" lang: 'english' +entry_label: '{{ uid}}: {{ name }} ({{ tags }})' fields: - name: name type: string @@ -11,6 +12,10 @@ fields: index: free - name: position type: string - index: facet + index: field - name: something type: yaml + - name: tags + type: list + list: string + index: field diff --git a/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml new file mode 100644 index 0000000..ec44b7c --- /dev/null +++ b/tests/data/wrong/items/b9a832309c984ada9f267471660c1313.yaml @@ -0,0 +1,5 @@ +name: 'Problematic entry' +description: | + . +position: 'somewhere' +number: 'four' diff --git a/tests/data/wrong/settings.yaml b/tests/data/wrong/settings.yaml index 9871421..ef9ab74 100644 --- a/tests/data/wrong/settings.yaml +++ b/tests/data/wrong/settings.yaml @@ -9,3 +9,6 @@ fields: - name: position type: string index: facet + - name: number + type: integer + help: "Enter an integer here" diff --git a/tests/test_collection.py b/tests/test_collection.py index 869828a..84f0a7a 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -54,7 +54,7 @@ class testCollection(unittest.TestCase): self.assertIsNotNone(self.collection.settings) self.assertIsNotNone(self.collection.stemmer) # Fields with no "index" entry are not indexed - self.assertEqual(len(self.collection.settings['fields']), 3) + self.assertEqual(len(self.collection.settings['fields']), 4) self.assertEqual(len(self.collection.indexed_fields), 1) def test_load_safe(self): @@ -80,6 +80,15 @@ class testCollection(unittest.TestCase): for m in matches: self.assertIsInstance(m, lesana.Entry) + def test_search_wildcard(self): + self.collection = lesana.Collection('tests/data/simple') + self.collection.start_search('Ite*') + res = self.collection.get_search_results() + matches = list(res) + self.assertEqual(len(matches), 2) + for m in matches: + self.assertIsInstance(m, lesana.Entry) + def test_search_non_init(self): self.collection = lesana.Collection('tests/data/simple') matches = list(self.collection.get_search_results()) @@ -232,13 +241,46 @@ class testComplexCollection(unittest.TestCase): self.collection.settings['name'], "Fully featured lesana collection" ) - self.assertEqual(len(self.collection.settings['fields']), 4) + self.assertEqual(len(self.collection.settings['fields']), 5) self.assertIsNotNone(self.collection.stemmer) - self.assertEqual(len(self.collection.indexed_fields), 2) + self.assertEqual(len(self.collection.indexed_fields), 4) def test_index(self): self.collection.update_cache() + def test_indexing_list(self): + self.collection.update_cache(['73097121f1874a6ea2f927db7dc4f11e.yaml']) + self.collection.start_search('tags:this') + res = self.collection.get_search_results() + matches = list(res) + self.assertEqual(len(matches), 1) + for m in matches: + self.assertIsInstance(m, lesana.Entry) + + +class testCollectionWithErrors(unittest.TestCase): + @classmethod + def setUpClass(self): + self.collection = lesana.Collection('tests/data/wrong') + + @classmethod + def tearDownClass(self): + shutil.rmtree(os.path.join(self.collection.basedir, '.lesana')) + + def test_init(self): + self.assertIsNotNone(self.collection.settings) + self.assertEqual( + self.collection.settings['name'], + "Lesana collection with certain errors" + ) + self.assertEqual(len(self.collection.settings['fields']), 4) + self.assertIsNotNone(self.collection.stemmer) + self.assertEqual(len(self.collection.indexed_fields), 1) + + def test_index(self): + loaded = self.collection.update_cache() + self.assertEqual(loaded, 0) + class testCollectionCreation(unittest.TestCase): def test_init(self): |