diff options
author | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2020-12-07 12:01:32 +0100 |
---|---|---|
committer | Elena ``of Valhalla'' Grandi <valhalla@trueelena.org> | 2020-12-07 12:01:32 +0100 |
commit | 864048b9dd320f9ab5fd4b2312c89df0986bb636 (patch) | |
tree | 72559e69eff19f88b0464eff725b7a64d777fef9 | |
parent | d4ee3a366faed3ba56bb05e45b0883495a61d719 (diff) |
Add --reset to delete the xapian cache before indexing
-rw-r--r-- | docs/source/man/lesana-index.rst | 2 | ||||
-rw-r--r-- | lesana/collection.py | 8 | ||||
-rw-r--r-- | lesana/command.py | 12 | ||||
-rw-r--r-- | tests/test_collection.py | 3 |
4 files changed, 23 insertions, 2 deletions
diff --git a/docs/source/man/lesana-index.rst b/docs/source/man/lesana-index.rst index 0ba52f1..a4a7ae4 100644 --- a/docs/source/man/lesana-index.rst +++ b/docs/source/man/lesana-index.rst @@ -20,4 +20,6 @@ OPTIONS Prints an help message and exits. --collection COLLECTION, -c COLLECTION The collection to work on. Default is ``.`` +--reset + Delete the existing xapian cache before indexing. diff --git a/lesana/collection.py b/lesana/collection.py index af3977d..efd329e 100644 --- a/lesana/collection.py +++ b/lesana/collection.py @@ -1,6 +1,7 @@ import io import logging import os +import shutil import uuid import ruamel.yaml @@ -228,7 +229,7 @@ class Collection(object): ) return fields - def update_cache(self, fnames=None): + def update_cache(self, fnames=None, reset=False): """ Update the xapian db with the data in files. @@ -236,8 +237,13 @@ class Collection(object): If no files have been passed, add everything. + if ``reset`` the existing xapian db is deleted before indexing + Return the number of files that have been added to the cache. """ + if reset: + shutil.rmtree(os.path.join(self.basedir, '.lesana')) + os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True) cache = xapian.WritableDatabase( os.path.join(self.basedir, '.lesana/xapian'), xapian.DB_CREATE_OR_OPEN, diff --git a/lesana/command.py b/lesana/command.py index 31a452b..c3d1ba9 100644 --- a/lesana/command.py +++ b/lesana/command.py @@ -214,6 +214,13 @@ class Index(Command): dict(help='The collection to work on (default .)'), ), ( + ['--reset'], + dict( + action='store_true', + help='Delete the existing index and reindex from scratch.', + ), + ), + ( ['files'], dict( help='List of files to index (default: everything)', @@ -229,7 +236,10 @@ class Index(Command): files = (os.path.basename(f) for f in self.args.files) else: files = None - indexed = collection.update_cache(fnames=files) + indexed = collection.update_cache( + fnames=files, + reset=self.args.reset + ) print("Found and indexed {} entries".format(indexed)) diff --git a/tests/test_collection.py b/tests/test_collection.py index 8d4cf88..6a65ff3 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -219,6 +219,9 @@ class testSimpleCollection(unittest.TestCase): self.assertEqual(len(cm.output), 1) self.assertIn("non_existing_file", cm.output[0]) + def test_index_reset(self): + self.collection.update_cache(reset=True) + def test_get_entry_missing_eid(self): entry = self.collection.entry_from_eid('this is not an eid') self.assertIsNone(entry) |