From 864048b9dd320f9ab5fd4b2312c89df0986bb636 Mon Sep 17 00:00:00 2001 From: Elena ``of Valhalla'' Grandi Date: Mon, 7 Dec 2020 12:01:32 +0100 Subject: Add --reset to delete the xapian cache before indexing --- docs/source/man/lesana-index.rst | 2 ++ lesana/collection.py | 8 +++++++- lesana/command.py | 12 +++++++++++- tests/test_collection.py | 3 +++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/source/man/lesana-index.rst b/docs/source/man/lesana-index.rst index 0ba52f1..a4a7ae4 100644 --- a/docs/source/man/lesana-index.rst +++ b/docs/source/man/lesana-index.rst @@ -20,4 +20,6 @@ OPTIONS Prints an help message and exits. --collection COLLECTION, -c COLLECTION The collection to work on. Default is ``.`` +--reset + Delete the existing xapian cache before indexing. diff --git a/lesana/collection.py b/lesana/collection.py index af3977d..efd329e 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -1,6 +1,7 @@ import io import logging import os +import shutil import uuid import ruamel.yaml @@ -228,7 +229,7 @@ class Collection(object): ) return fields - def update_cache(self, fnames=None): + def update_cache(self, fnames=None, reset=False): """ Update the xapian db with the data in files. @@ -236,8 +237,13 @@ class Collection(object): If no files have been passed, add everything. + if ``reset`` the existing xapian db is deleted before indexing + Return the number of files that have been added to the cache. """ + if reset: + shutil.rmtree(os.path.join(self.basedir, '.lesana')) + os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True) cache = xapian.WritableDatabase( os.path.join(self.basedir, '.lesana/xapian'), xapian.DB_CREATE_OR_OPEN, diff --git a/lesana/command.py b/lesana/command.py index 31a452b..c3d1ba9 100644 --- a/lesana/command.py +++ b/lesana/command.py @@ -213,6 +213,13 @@ class Index(Command): ['--collection', '-c'], dict(help='The collection to work on (default .)'), ), + ( + ['--reset'], + dict( + action='store_true', + help='Delete the existing index and reindex from scratch.', + ), + ), ( ['files'], dict( @@ -229,7 +236,10 @@ class Index(Command): files = (os.path.basename(f) for f in self.args.files) else: files = None - indexed = collection.update_cache(fnames=files) + indexed = collection.update_cache( + fnames=files, + reset=self.args.reset + ) print("Found and indexed {} entries".format(indexed)) diff --git a/tests/test_collection.py b/tests/test_collection.py index 8d4cf88..6a65ff3 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -219,6 +219,9 @@ class testSimpleCollection(unittest.TestCase): self.assertEqual(len(cm.output), 1) self.assertIn("non_existing_file", cm.output[0]) + def test_index_reset(self): + self.collection.update_cache(reset=True) + def test_get_entry_missing_eid(self): entry = self.collection.entry_from_eid('this is not an eid') self.assertIsNone(entry) -- cgit v1.2.3