From 9d6202773d2a8336f4a92325ad3a050eab6d0504 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Fri, 25 Oct 2024 16:28:46 +0300 Subject: [PATCH 1/4] update secrets deco docstring --- metaflow/plugins/secrets/secrets_decorator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metaflow/plugins/secrets/secrets_decorator.py b/metaflow/plugins/secrets/secrets_decorator.py index 85ee372748e..60ff838bc33 100644 --- a/metaflow/plugins/secrets/secrets_decorator.py +++ b/metaflow/plugins/secrets/secrets_decorator.py @@ -188,6 +188,8 @@ class SecretsDecorator(StepDecorator): ---------- sources : List[Union[str, Dict[str, Any]]], default: [] List of secret specs, defining how the secrets are to be retrieved + role : str, optional + Role to use for fetching secrets """ name = "secrets" From fd4cbd5e39d5412615c2ed7ee770712766d4c374 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Fri, 25 Oct 2024 16:29:24 +0300 Subject: [PATCH 2/4] introduce function for fetching secrets in user code --- metaflow/plugins/secrets/__init__.py | 3 ++ metaflow/plugins/secrets/secrets_func.py | 61 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 metaflow/plugins/secrets/secrets_func.py diff --git a/metaflow/plugins/secrets/__init__.py b/metaflow/plugins/secrets/__init__.py index 544cf1ad220..56d6f9fdfe2 100644 --- a/metaflow/plugins/secrets/__init__.py +++ b/metaflow/plugins/secrets/__init__.py @@ -9,3 +9,6 @@ class SecretsProvider(abc.ABC): def get_secret_as_dict(self, secret_id, options={}, role=None) -> Dict[str, str]: """Retrieve the secret from secrets backend, and return a dictionary of environment variables.""" + + +from .secrets_func import get_secrets diff --git a/metaflow/plugins/secrets/secrets_func.py b/metaflow/plugins/secrets/secrets_func.py new file mode 100644 index 00000000000..74b1f123ade --- /dev/null +++ b/metaflow/plugins/secrets/secrets_func.py @@ -0,0 +1,61 @@ +from typing import Any, Dict, List, Union + +from metaflow.exception import MetaflowException +from metaflow.plugins.secrets.secrets_decorator import ( + SecretSpec, + get_secrets_backend_provider, +) + + +def get_secrets( + sources: List[Union[str, Dict[str, Any]]] = [], role: str = None +) -> Dict[SecretSpec, Dict[str, str]]: + """ + Get secrets from sources + + Parameters + ---------- + sources : List[Union[str, Dict[str, Any]]], default: [] + List of secret specs, defining how the secrets are to be retrieved + role : str, optional + Role to use for fetching secrets + """ + if role is None: + role = DEFAULT_SECRETS_ROLE + + # List of pairs (secret_spec, env_vars_from_this_spec) + all_secrets = [] + secret_specs = [] + + for secret_spec_str_or_dict in sources: + if isinstance(secret_spec_str_or_dict, str): + secret_specs.append( + SecretSpec.secret_spec_from_str(secret_spec_str_or_dict, role=role) + ) + elif isinstance(secret_spec_str_or_dict, dict): + secret_specs.append( + SecretSpec.secret_spec_from_dict(secret_spec_str_or_dict, role=role) + ) + else: + raise MetaflowException( + "get_secrets sources items must be either a string or a dict" + ) + + for secret_spec in secret_specs: + secrets_backend_provider = get_secrets_backend_provider( + secret_spec.secrets_backend_type + ) + try: + dict_for_secret = secrets_backend_provider.get_secret_as_dict( + secret_spec.secret_id, + options=secret_spec.options, + role=secret_spec.role, + ) + except Exception as e: + raise MetaflowException( + "Failed to retrieve secret '%s': %s" % (secret_spec.secret_id, e) + ) + + all_secrets.append((secret_spec, dict_for_secret)) + + return all_secrets From ec68d3bbd137816c0876d937c2c6b63e84dae391 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Mon, 28 Oct 2024 13:39:05 +0200 Subject: [PATCH 3/4] refactor secrets plugin --- metaflow/plugins/secrets/secrets_decorator.py | 180 +----------------- metaflow/plugins/secrets/secrets_func.py | 13 +- metaflow/plugins/secrets/secrets_spec.py | 101 ++++++++++ metaflow/plugins/secrets/utils.py | 74 +++++++ 4 files changed, 188 insertions(+), 180 deletions(-) create mode 100644 metaflow/plugins/secrets/secrets_spec.py create mode 100644 metaflow/plugins/secrets/utils.py diff --git a/metaflow/plugins/secrets/secrets_decorator.py b/metaflow/plugins/secrets/secrets_decorator.py index 60ff838bc33..16c2ea03d1a 100644 --- a/metaflow/plugins/secrets/secrets_decorator.py +++ b/metaflow/plugins/secrets/secrets_decorator.py @@ -1,183 +1,17 @@ import os -import re from metaflow.exception import MetaflowException from metaflow.decorators import StepDecorator from metaflow.metaflow_config import DEFAULT_SECRETS_ROLE +from metaflow.plugins.secrets.secrets_spec import SecretSpec +from metaflow.plugins.secrets.utils import ( + get_secrets_backend_provider, + validate_env_vars, + validate_env_vars_across_secrets, + validate_env_vars_vs_existing_env, +) from metaflow.unbounded_foreach import UBF_TASK -from typing import Any, Dict, List, Union - -DISALLOWED_SECRETS_ENV_VAR_PREFIXES = ["METAFLOW_"] - - -def get_default_secrets_backend_type(): - from metaflow.metaflow_config import DEFAULT_SECRETS_BACKEND_TYPE - - if DEFAULT_SECRETS_BACKEND_TYPE is None: - raise MetaflowException( - "No default secrets backend type configured, but needed by @secrets. " - "Set METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE." - ) - return DEFAULT_SECRETS_BACKEND_TYPE - - -class SecretSpec: - def __init__(self, secrets_backend_type, secret_id, options={}, role=None): - self._secrets_backend_type = secrets_backend_type - self._secret_id = secret_id - self._options = options - self._role = role - - @property - def secrets_backend_type(self): - return self._secrets_backend_type - - @property - def secret_id(self): - return self._secret_id - - @property - def options(self): - return self._options - - @property - def role(self): - return self._role - - def to_json(self): - """Mainly used for testing... not the same as the input dict in secret_spec_from_dict()!""" - return { - "secrets_backend_type": self.secrets_backend_type, - "secret_id": self.secret_id, - "options": self.options, - "role": self.role, - } - - def __str__(self): - return "%s (%s)" % (self._secret_id, self._secrets_backend_type) - - @staticmethod - def secret_spec_from_str(secret_spec_str, role): - # "." may be used in secret_id one day (provider specific). HOWEVER, it provides the best UX for - # non-conflicting cases (i.e. for secret ids that don't contain "."). This is true for all AWS - # Secrets Manager secrets. - # - # So we skew heavily optimize for best upfront UX for the present (1/2023). - # - # If/when a certain secret backend supports "." secret names, we can figure out a solution at that time. - # At a minimum, dictionary style secret spec may be used with no code changes (see secret_spec_from_dict()). - # Other options could be: - # - accept and document that "." secret_ids don't work in Metaflow (across all possible providers) - # - add a Metaflow config variable that specifies the separator (default ".") - # - smarter spec parsing, that errors on secrets that look ambiguous. "aws-secrets-manager.XYZ" could mean: - # + secret_id "XYZ" in aws-secrets-manager backend, OR - # + secret_id "aws-secrets-manager.XYZ" default backend (if it is defined). - # + in this case, user can simply set "azure-key-vault.aws-secrets-manager.XYZ" instead! - parts = secret_spec_str.split(".", maxsplit=1) - if len(parts) == 1: - secrets_backend_type = get_default_secrets_backend_type() - secret_id = parts[0] - else: - secrets_backend_type = parts[0] - secret_id = parts[1] - return SecretSpec( - secrets_backend_type, secret_id=secret_id, options={}, role=role - ) - - @staticmethod - def secret_spec_from_dict(secret_spec_dict, role): - if "type" not in secret_spec_dict: - secrets_backend_type = get_default_secrets_backend_type() - else: - secrets_backend_type = secret_spec_dict["type"] - if not isinstance(secrets_backend_type, str): - raise MetaflowException( - "Bad @secrets specification - 'type' must be a string - found %s" - % type(secrets_backend_type) - ) - secret_id = secret_spec_dict.get("id") - if not isinstance(secret_id, str): - raise MetaflowException( - "Bad @secrets specification - 'id' must be a string - found %s" - % type(secret_id) - ) - options = secret_spec_dict.get("options", {}) - if not isinstance(options, dict): - raise MetaflowException( - "Bad @secrets specification - 'option' must be a dict - found %s" - % type(options) - ) - role_for_source = secret_spec_dict.get("role", None) - if role_for_source is not None: - if not isinstance(role_for_source, str): - raise MetaflowException( - "Bad @secrets specification - 'role' must be a str - found %s" - % type(role_for_source) - ) - role = role_for_source - return SecretSpec( - secrets_backend_type, secret_id=secret_id, options=options, role=role - ) - - -def validate_env_vars_across_secrets(all_secrets_env_vars): - vars_injected_by = {} - for secret_spec, env_vars in all_secrets_env_vars: - for k in env_vars: - if k in vars_injected_by: - raise MetaflowException( - "Secret '%s' will inject '%s' as env var, and it is also added by '%s'" - % (secret_spec, k, vars_injected_by[k]) - ) - vars_injected_by[k] = secret_spec - - -def validate_env_vars_vs_existing_env(all_secrets_env_vars): - for secret_spec, env_vars in all_secrets_env_vars: - for k in env_vars: - if k in os.environ: - raise MetaflowException( - "Secret '%s' will inject '%s' as env var, but it already exists in env" - % (secret_spec, k) - ) - - -def validate_env_vars(env_vars): - for k, v in env_vars.items(): - if not isinstance(k, str): - raise MetaflowException("Found non string key %s (%s)" % (str(k), type(k))) - if not isinstance(v, str): - raise MetaflowException( - "Found non string value %s (%s)" % (str(v), type(v)) - ) - if not re.fullmatch("[a-zA-Z_][a-zA-Z0-9_]*", k): - raise MetaflowException("Found invalid env var name '%s'." % k) - for disallowed_prefix in DISALLOWED_SECRETS_ENV_VAR_PREFIXES: - if k.startswith(disallowed_prefix): - raise MetaflowException( - "Found disallowed env var name '%s' (starts with '%s')." - % (k, disallowed_prefix) - ) - - -def get_secrets_backend_provider(secrets_backend_type): - from metaflow.plugins import SECRETS_PROVIDERS - - try: - provider_cls = [ - pc for pc in SECRETS_PROVIDERS if pc.TYPE == secrets_backend_type - ][0] - return provider_cls() - except IndexError: - raise MetaflowException( - "Unknown secrets backend type %s (available types: %s)" - % ( - secrets_backend_type, - ", ".join(pc.TYPE for pc in SECRETS_PROVIDERS if pc.TYPE != "inline"), - ) - ) - class SecretsDecorator(StepDecorator): """ diff --git a/metaflow/plugins/secrets/secrets_func.py b/metaflow/plugins/secrets/secrets_func.py index 74b1f123ade..4d9d5aa810d 100644 --- a/metaflow/plugins/secrets/secrets_func.py +++ b/metaflow/plugins/secrets/secrets_func.py @@ -1,14 +1,13 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union +from metaflow.metaflow_config import DEFAULT_SECRETS_ROLE from metaflow.exception import MetaflowException -from metaflow.plugins.secrets.secrets_decorator import ( - SecretSpec, - get_secrets_backend_provider, -) +from metaflow.plugins.secrets.secrets_spec import SecretSpec +from metaflow.plugins.secrets.utils import get_secrets_backend_provider def get_secrets( - sources: List[Union[str, Dict[str, Any]]] = [], role: str = None + sources: List[Union[str, Dict[str, Any]]] = [], role: Optional[str] = None ) -> Dict[SecretSpec, Dict[str, str]]: """ Get secrets from sources @@ -23,7 +22,7 @@ def get_secrets( if role is None: role = DEFAULT_SECRETS_ROLE - # List of pairs (secret_spec, env_vars_from_this_spec) + # List of pairs (secret_spec, dict_of_secrets) all_secrets = [] secret_specs = [] diff --git a/metaflow/plugins/secrets/secrets_spec.py b/metaflow/plugins/secrets/secrets_spec.py new file mode 100644 index 00000000000..bbdd90be29f --- /dev/null +++ b/metaflow/plugins/secrets/secrets_spec.py @@ -0,0 +1,101 @@ +from metaflow.exception import MetaflowException +from metaflow.plugins.secrets.utils import get_default_secrets_backend_type + + +class SecretSpec: + def __init__(self, secrets_backend_type, secret_id, options={}, role=None): + self._secrets_backend_type = secrets_backend_type + self._secret_id = secret_id + self._options = options + self._role = role + + @property + def secrets_backend_type(self): + return self._secrets_backend_type + + @property + def secret_id(self): + return self._secret_id + + @property + def options(self): + return self._options + + @property + def role(self): + return self._role + + def to_json(self): + """Mainly used for testing... not the same as the input dict in secret_spec_from_dict()!""" + return { + "secrets_backend_type": self.secrets_backend_type, + "secret_id": self.secret_id, + "options": self.options, + "role": self.role, + } + + def __str__(self): + return "%s (%s)" % (self._secret_id, self._secrets_backend_type) + + @staticmethod + def secret_spec_from_str(secret_spec_str, role): + # "." may be used in secret_id one day (provider specific). HOWEVER, it provides the best UX for + # non-conflicting cases (i.e. for secret ids that don't contain "."). This is true for all AWS + # Secrets Manager secrets. + # + # So we skew heavily optimize for best upfront UX for the present (1/2023). + # + # If/when a certain secret backend supports "." secret names, we can figure out a solution at that time. + # At a minimum, dictionary style secret spec may be used with no code changes (see secret_spec_from_dict()). + # Other options could be: + # - accept and document that "." secret_ids don't work in Metaflow (across all possible providers) + # - add a Metaflow config variable that specifies the separator (default ".") + # - smarter spec parsing, that errors on secrets that look ambiguous. "aws-secrets-manager.XYZ" could mean: + # + secret_id "XYZ" in aws-secrets-manager backend, OR + # + secret_id "aws-secrets-manager.XYZ" default backend (if it is defined). + # + in this case, user can simply set "azure-key-vault.aws-secrets-manager.XYZ" instead! + parts = secret_spec_str.split(".", maxsplit=1) + if len(parts) == 1: + secrets_backend_type = get_default_secrets_backend_type() + secret_id = parts[0] + else: + secrets_backend_type = parts[0] + secret_id = parts[1] + return SecretSpec( + secrets_backend_type, secret_id=secret_id, options={}, role=role + ) + + @staticmethod + def secret_spec_from_dict(secret_spec_dict, role): + if "type" not in secret_spec_dict: + secrets_backend_type = get_default_secrets_backend_type() + else: + secrets_backend_type = secret_spec_dict["type"] + if not isinstance(secrets_backend_type, str): + raise MetaflowException( + "Bad @secrets specification - 'type' must be a string - found %s" + % type(secrets_backend_type) + ) + secret_id = secret_spec_dict.get("id") + if not isinstance(secret_id, str): + raise MetaflowException( + "Bad @secrets specification - 'id' must be a string - found %s" + % type(secret_id) + ) + options = secret_spec_dict.get("options", {}) + if not isinstance(options, dict): + raise MetaflowException( + "Bad @secrets specification - 'option' must be a dict - found %s" + % type(options) + ) + role_for_source = secret_spec_dict.get("role", None) + if role_for_source is not None: + if not isinstance(role_for_source, str): + raise MetaflowException( + "Bad @secrets specification - 'role' must be a str - found %s" + % type(role_for_source) + ) + role = role_for_source + return SecretSpec( + secrets_backend_type, secret_id=secret_id, options=options, role=role + ) diff --git a/metaflow/plugins/secrets/utils.py b/metaflow/plugins/secrets/utils.py new file mode 100644 index 00000000000..279654042a3 --- /dev/null +++ b/metaflow/plugins/secrets/utils.py @@ -0,0 +1,74 @@ +import os +import re +from metaflow.exception import MetaflowException + +DISALLOWED_SECRETS_ENV_VAR_PREFIXES = ["METAFLOW_"] + + +def get_default_secrets_backend_type(): + from metaflow.metaflow_config import DEFAULT_SECRETS_BACKEND_TYPE + + if DEFAULT_SECRETS_BACKEND_TYPE is None: + raise MetaflowException( + "No default secrets backend type configured, but needed by @secrets. " + "Set METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE." + ) + return DEFAULT_SECRETS_BACKEND_TYPE + + +def validate_env_vars_across_secrets(all_secrets_env_vars): + vars_injected_by = {} + for secret_spec, env_vars in all_secrets_env_vars: + for k in env_vars: + if k in vars_injected_by: + raise MetaflowException( + "Secret '%s' will inject '%s' as env var, and it is also added by '%s'" + % (secret_spec, k, vars_injected_by[k]) + ) + vars_injected_by[k] = secret_spec + + +def validate_env_vars_vs_existing_env(all_secrets_env_vars): + for secret_spec, env_vars in all_secrets_env_vars: + for k in env_vars: + if k in os.environ: + raise MetaflowException( + "Secret '%s' will inject '%s' as env var, but it already exists in env" + % (secret_spec, k) + ) + + +def validate_env_vars(env_vars): + for k, v in env_vars.items(): + if not isinstance(k, str): + raise MetaflowException("Found non string key %s (%s)" % (str(k), type(k))) + if not isinstance(v, str): + raise MetaflowException( + "Found non string value %s (%s)" % (str(v), type(v)) + ) + if not re.fullmatch("[a-zA-Z_][a-zA-Z0-9_]*", k): + raise MetaflowException("Found invalid env var name '%s'." % k) + for disallowed_prefix in DISALLOWED_SECRETS_ENV_VAR_PREFIXES: + if k.startswith(disallowed_prefix): + raise MetaflowException( + "Found disallowed env var name '%s' (starts with '%s')." + % (k, disallowed_prefix) + ) + + +def get_secrets_backend_provider(secrets_backend_type): + from metaflow.plugins import SECRETS_PROVIDERS + + try: + provider_cls = [ + pc for pc in SECRETS_PROVIDERS if pc.TYPE == secrets_backend_type + ][0] + return provider_cls() + except IndexError: + raise MetaflowException( + "Unknown secrets backend type %s (available types: %s)" + % ( + secrets_backend_type, + ", ".join(pc.TYPE for pc in SECRETS_PROVIDERS if pc.TYPE != "inline"), + ) + ) From 252012d5f4bc46cb21e842be4b098ace7a4915d9 Mon Sep 17 00:00:00 2001 From: Sakari Ikonen Date: Mon, 28 Oct 2024 14:21:16 +0200 Subject: [PATCH 4/4] fix docstring for stubs --- metaflow/plugins/secrets/secrets_decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metaflow/plugins/secrets/secrets_decorator.py b/metaflow/plugins/secrets/secrets_decorator.py index 16c2ea03d1a..ca01adb8083 100644 --- a/metaflow/plugins/secrets/secrets_decorator.py +++ b/metaflow/plugins/secrets/secrets_decorator.py @@ -22,7 +22,7 @@ class SecretsDecorator(StepDecorator): ---------- sources : List[Union[str, Dict[str, Any]]], default: [] List of secret specs, defining how the secrets are to be retrieved - role : str, optional + role : str, optional, default: None Role to use for fetching secrets """