Skip to content

Commit

Permalink
Don't unnecessarily download remote sources to cache
Browse files Browse the repository at this point in the history
  • Loading branch information
lkubb authored and dwoz committed May 13, 2024
1 parent cf9ef70 commit 1976763
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 3 deletions.
1 change: 1 addition & 0 deletions changelog/66342.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Made `file.managed` skip download of a remote source if the managed file already exists with the correct hash
61 changes: 59 additions & 2 deletions salt/states/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2501,8 +2501,12 @@ def managed(
Set to ``False`` to discard the cached copy of the source file once the
state completes. This can be useful for larger files to keep them from
taking up space in minion cache. However, keep in mind that discarding
the source file will result in the state needing to re-download the
source file if the state is run again.
the source file might result in the state needing to re-download the
source file if the state is run again. If the source is not a local or
``salt://`` one, the source hash is known, ``skip_verify`` is not true
and the managed file exists with the correct hash and is not templated,
this is not the case (i.e. remote downloads are avoided if the local hash
matches the expected one).
.. versionadded:: 2017.7.3
Expand Down Expand Up @@ -3220,6 +3224,59 @@ def managed(
if defaults and not isinstance(defaults, dict):
return _error(ret, "Defaults must be formed as a dict")

# If we're pulling from a remote source untemplated and we have a source hash,
# check early if the local file exists with the correct hash and skip
# managing contents if so. This avoids a lot of overhead.
if (
contents is None
and not template
and source
and not skip_verify
and os.path.exists(name)
and replace
):
try:
# If the source is a list, find the first existing file.
# We're doing this after basic checks to not slow down
# runs where it does not matter.
source, source_hash = __salt__["file.source_list"](
source, source_hash, __env__
)
source_sum = None
if (
source
and source_hash
and urllib.parse.urlparse(source).scheme
not in (
"salt",
"file",
)
and not os.path.isabs(source)
):
source_sum = __salt__["file.get_source_sum"](
name,
source,
source_hash,
source_hash_name,
__env__,
verify_ssl=verify_ssl,
source_hash_sig=source_hash_sig,
signed_by_any=signed_by_any,
signed_by_all=signed_by_all,
keyring=keyring,
gnupghome=gnupghome,
)
hsum = __salt__["file.get_hash"](name, source_sum["hash_type"])
except (CommandExecutionError, OSError) as err:
log.error(
"Failed checking existing file's hash against specified source_hash: %s",
err,
exc_info_on_loglevel=logging.DEBUG,
)
else:
if source_sum and source_sum["hsum"] == hsum:
replace = False

if not replace and os.path.exists(name):
ret_perms = {}
# Check and set the permissions if necessary
Expand Down
2 changes: 1 addition & 1 deletion tests/pytests/functional/states/file/test_managed.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,7 @@ def test_file_managed_remote_source_does_not_refetch_existing_file_with_correct_
Issue #64373
"""
name = tmp_path / "scene33"
name.write_text(grail_scene33_file.read_text())
name.write_bytes(grail_scene33_file.read_bytes())
ret = file.managed(
str(name),
source="http://127.0.0.1:1337/does/not/exist",
Expand Down

0 comments on commit 1976763

Please sign in to comment.