diff --git a/README.md b/README.md index 88c1673..b38d6d3 100755 --- a/README.md +++ b/README.md @@ -41,7 +41,17 @@ It also comes with a dryrun option. ckan -c /var/www/ckan/development.ini ogdch cleanup_resources ``` -## Command to clean up the package extra table. +### Command to cleanup resource-files from the filestore. +When a resource gets deleted will be marked as deleted in the database and also its associated file in the CKAN-FileStore won't be deleted. +This command finds these orphaned files by checking whether their corresponding resource still exists. +It is meant to be run regularly by a cronjob. +It also comes with a dryrun option. + +```bash +paster --plugin=ckanext-ogdchcommands ogdch cleanup_filestore -c /var/www/ckan/development.ini +``` + +## Command to cleanup the package extra table. When a key is no longer needed in the package_extra table, since it is no longer part of the dataset, then after the data have been migrated that old key can be removed from the package_extra table and from the dependent table package_extra_revision. diff --git a/ckanext/ogdchcommands/logic.py b/ckanext/ogdchcommands/logic.py index 463304c..7d83bc1 100644 --- a/ckanext/ogdchcommands/logic.py +++ b/ckanext/ogdchcommands/logic.py @@ -233,10 +233,10 @@ def ogdch_cleanup_filestore(context, data_dict): filepaths = [] errors = [] - for subdir, dirs, files in os.walk(storage_path): + for subdir, dirs, files in os.walk(resource_path): for file in files: fullpath = os.path.join(subdir, file) - relpath = os.path.relpath(fullpath, storage_path) + relpath = os.path.relpath(fullpath, resource_path) resource_id = get_resource_id(relpath) tk.check_access("resource_show", context, {"id": resource_id})