summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2020-12-07 12:01:32 +0100
committerElena ``of Valhalla'' Grandi <valhalla@trueelena.org>2020-12-07 12:01:32 +0100
commit864048b9dd320f9ab5fd4b2312c89df0986bb636 (patch)
tree72559e69eff19f88b0464eff725b7a64d777fef9
parentd4ee3a366faed3ba56bb05e45b0883495a61d719 (diff)
Add --reset to delete the xapian cache before indexing
-rw-r--r--docs/source/man/lesana-index.rst2
-rw-r--r--lesana/collection.py8
-rw-r--r--lesana/command.py12
-rw-r--r--tests/test_collection.py3
4 files changed, 23 insertions, 2 deletions
diff --git a/docs/source/man/lesana-index.rst b/docs/source/man/lesana-index.rst
index 0ba52f1..a4a7ae4 100644
--- a/docs/source/man/lesana-index.rst
+++ b/docs/source/man/lesana-index.rst
@@ -20,4 +20,6 @@ OPTIONS
Prints an help message and exits.
--collection COLLECTION, -c COLLECTION
The collection to work on. Default is ``.``
+--reset
+ Delete the existing xapian cache before indexing.
diff --git a/lesana/collection.py b/lesana/collection.py
index af3977d..efd329e 100644
--- a/lesana/collection.py
+++ b/lesana/collection.py
@@ -1,6 +1,7 @@
import io
import logging
import os
+import shutil
import uuid
import ruamel.yaml
@@ -228,7 +229,7 @@ class Collection(object):
)
return fields
- def update_cache(self, fnames=None):
+ def update_cache(self, fnames=None, reset=False):
"""
Update the xapian db with the data in files.
@@ -236,8 +237,13 @@ class Collection(object):
If no files have been passed, add everything.
+ if ``reset`` the existing xapian db is deleted before indexing
+
Return the number of files that have been added to the cache.
"""
+ if reset:
+ shutil.rmtree(os.path.join(self.basedir, '.lesana'))
+ os.makedirs(os.path.join(self.basedir, '.lesana'), exist_ok=True)
cache = xapian.WritableDatabase(
os.path.join(self.basedir, '.lesana/xapian'),
xapian.DB_CREATE_OR_OPEN,
diff --git a/lesana/command.py b/lesana/command.py
index 31a452b..c3d1ba9 100644
--- a/lesana/command.py
+++ b/lesana/command.py
@@ -214,6 +214,13 @@ class Index(Command):
dict(help='The collection to work on (default .)'),
),
(
+ ['--reset'],
+ dict(
+ action='store_true',
+ help='Delete the existing index and reindex from scratch.',
+ ),
+ ),
+ (
['files'],
dict(
help='List of files to index (default: everything)',
@@ -229,7 +236,10 @@ class Index(Command):
files = (os.path.basename(f) for f in self.args.files)
else:
files = None
- indexed = collection.update_cache(fnames=files)
+ indexed = collection.update_cache(
+ fnames=files,
+ reset=self.args.reset
+ )
print("Found and indexed {} entries".format(indexed))
diff --git a/tests/test_collection.py b/tests/test_collection.py
index 8d4cf88..6a65ff3 100644
--- a/tests/test_collection.py
+++ b/tests/test_collection.py
@@ -219,6 +219,9 @@ class testSimpleCollection(unittest.TestCase):
self.assertEqual(len(cm.output), 1)
self.assertIn("non_existing_file", cm.output[0])
+ def test_index_reset(self):
+ self.collection.update_cache(reset=True)
+
def test_get_entry_missing_eid(self):
entry = self.collection.entry_from_eid('this is not an eid')
self.assertIsNone(entry)