Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Introduce secrets function #2122

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions metaflow/plugins/secrets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ class SecretsProvider(abc.ABC):
def get_secret_as_dict(self, secret_id, options={}, role=None) -> Dict[str, str]:
"""Retrieve the secret from secrets backend, and return a dictionary of
environment variables."""


from .secrets_func import get_secrets
182 changes: 9 additions & 173 deletions metaflow/plugins/secrets/secrets_decorator.py
Original file line number Diff line number Diff line change
@@ -1,183 +1,17 @@
import os
import re

from metaflow.exception import MetaflowException
from metaflow.decorators import StepDecorator
from metaflow.metaflow_config import DEFAULT_SECRETS_ROLE
from metaflow.plugins.secrets.secrets_spec import SecretSpec
from metaflow.plugins.secrets.utils import (
get_secrets_backend_provider,
validate_env_vars,
validate_env_vars_across_secrets,
validate_env_vars_vs_existing_env,
)
from metaflow.unbounded_foreach import UBF_TASK

from typing import Any, Dict, List, Union

DISALLOWED_SECRETS_ENV_VAR_PREFIXES = ["METAFLOW_"]


def get_default_secrets_backend_type():
from metaflow.metaflow_config import DEFAULT_SECRETS_BACKEND_TYPE

if DEFAULT_SECRETS_BACKEND_TYPE is None:
raise MetaflowException(
"No default secrets backend type configured, but needed by @secrets. "
"Set METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE."
)
return DEFAULT_SECRETS_BACKEND_TYPE


class SecretSpec:
def __init__(self, secrets_backend_type, secret_id, options={}, role=None):
self._secrets_backend_type = secrets_backend_type
self._secret_id = secret_id
self._options = options
self._role = role

@property
def secrets_backend_type(self):
return self._secrets_backend_type

@property
def secret_id(self):
return self._secret_id

@property
def options(self):
return self._options

@property
def role(self):
return self._role

def to_json(self):
"""Mainly used for testing... not the same as the input dict in secret_spec_from_dict()!"""
return {
"secrets_backend_type": self.secrets_backend_type,
"secret_id": self.secret_id,
"options": self.options,
"role": self.role,
}

def __str__(self):
return "%s (%s)" % (self._secret_id, self._secrets_backend_type)

@staticmethod
def secret_spec_from_str(secret_spec_str, role):
# "." may be used in secret_id one day (provider specific). HOWEVER, it provides the best UX for
# non-conflicting cases (i.e. for secret ids that don't contain "."). This is true for all AWS
# Secrets Manager secrets.
#
# So we skew heavily optimize for best upfront UX for the present (1/2023).
#
# If/when a certain secret backend supports "." secret names, we can figure out a solution at that time.
# At a minimum, dictionary style secret spec may be used with no code changes (see secret_spec_from_dict()).
# Other options could be:
# - accept and document that "." secret_ids don't work in Metaflow (across all possible providers)
# - add a Metaflow config variable that specifies the separator (default ".")
# - smarter spec parsing, that errors on secrets that look ambiguous. "aws-secrets-manager.XYZ" could mean:
# + secret_id "XYZ" in aws-secrets-manager backend, OR
# + secret_id "aws-secrets-manager.XYZ" default backend (if it is defined).
# + in this case, user can simply set "azure-key-vault.aws-secrets-manager.XYZ" instead!
parts = secret_spec_str.split(".", maxsplit=1)
if len(parts) == 1:
secrets_backend_type = get_default_secrets_backend_type()
secret_id = parts[0]
else:
secrets_backend_type = parts[0]
secret_id = parts[1]
return SecretSpec(
secrets_backend_type, secret_id=secret_id, options={}, role=role
)

@staticmethod
def secret_spec_from_dict(secret_spec_dict, role):
if "type" not in secret_spec_dict:
secrets_backend_type = get_default_secrets_backend_type()
else:
secrets_backend_type = secret_spec_dict["type"]
if not isinstance(secrets_backend_type, str):
raise MetaflowException(
"Bad @secrets specification - 'type' must be a string - found %s"
% type(secrets_backend_type)
)
secret_id = secret_spec_dict.get("id")
if not isinstance(secret_id, str):
raise MetaflowException(
"Bad @secrets specification - 'id' must be a string - found %s"
% type(secret_id)
)
options = secret_spec_dict.get("options", {})
if not isinstance(options, dict):
raise MetaflowException(
"Bad @secrets specification - 'option' must be a dict - found %s"
% type(options)
)
role_for_source = secret_spec_dict.get("role", None)
if role_for_source is not None:
if not isinstance(role_for_source, str):
raise MetaflowException(
"Bad @secrets specification - 'role' must be a str - found %s"
% type(role_for_source)
)
role = role_for_source
return SecretSpec(
secrets_backend_type, secret_id=secret_id, options=options, role=role
)


def validate_env_vars_across_secrets(all_secrets_env_vars):
vars_injected_by = {}
for secret_spec, env_vars in all_secrets_env_vars:
for k in env_vars:
if k in vars_injected_by:
raise MetaflowException(
"Secret '%s' will inject '%s' as env var, and it is also added by '%s'"
% (secret_spec, k, vars_injected_by[k])
)
vars_injected_by[k] = secret_spec


def validate_env_vars_vs_existing_env(all_secrets_env_vars):
for secret_spec, env_vars in all_secrets_env_vars:
for k in env_vars:
if k in os.environ:
raise MetaflowException(
"Secret '%s' will inject '%s' as env var, but it already exists in env"
% (secret_spec, k)
)


def validate_env_vars(env_vars):
for k, v in env_vars.items():
if not isinstance(k, str):
raise MetaflowException("Found non string key %s (%s)" % (str(k), type(k)))
if not isinstance(v, str):
raise MetaflowException(
"Found non string value %s (%s)" % (str(v), type(v))
)
if not re.fullmatch("[a-zA-Z_][a-zA-Z0-9_]*", k):
raise MetaflowException("Found invalid env var name '%s'." % k)
for disallowed_prefix in DISALLOWED_SECRETS_ENV_VAR_PREFIXES:
if k.startswith(disallowed_prefix):
raise MetaflowException(
"Found disallowed env var name '%s' (starts with '%s')."
% (k, disallowed_prefix)
)


def get_secrets_backend_provider(secrets_backend_type):
from metaflow.plugins import SECRETS_PROVIDERS

try:
provider_cls = [
pc for pc in SECRETS_PROVIDERS if pc.TYPE == secrets_backend_type
][0]
return provider_cls()
except IndexError:
raise MetaflowException(
"Unknown secrets backend type %s (available types: %s)"
% (
secrets_backend_type,
", ".join(pc.TYPE for pc in SECRETS_PROVIDERS if pc.TYPE != "inline"),
)
)


class SecretsDecorator(StepDecorator):
"""
Expand All @@ -188,6 +22,8 @@ class SecretsDecorator(StepDecorator):
----------
sources : List[Union[str, Dict[str, Any]]], default: []
List of secret specs, defining how the secrets are to be retrieved
role : str, optional, default: None
Role to use for fetching secrets
"""

name = "secrets"
Expand Down
60 changes: 60 additions & 0 deletions metaflow/plugins/secrets/secrets_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import Any, Dict, List, Optional, Union

from metaflow.metaflow_config import DEFAULT_SECRETS_ROLE
from metaflow.exception import MetaflowException
from metaflow.plugins.secrets.secrets_spec import SecretSpec
from metaflow.plugins.secrets.utils import get_secrets_backend_provider


def get_secrets(
sources: List[Union[str, Dict[str, Any]]] = [], role: Optional[str] = None
) -> Dict[SecretSpec, Dict[str, str]]:
"""
Get secrets from sources

Parameters
----------
sources : List[Union[str, Dict[str, Any]]], default: []
List of secret specs, defining how the secrets are to be retrieved
role : str, optional
Role to use for fetching secrets
"""
if role is None:
role = DEFAULT_SECRETS_ROLE

# List of pairs (secret_spec, dict_of_secrets)
all_secrets = []
secret_specs = []

for secret_spec_str_or_dict in sources:
if isinstance(secret_spec_str_or_dict, str):
secret_specs.append(
SecretSpec.secret_spec_from_str(secret_spec_str_or_dict, role=role)
)
elif isinstance(secret_spec_str_or_dict, dict):
secret_specs.append(
SecretSpec.secret_spec_from_dict(secret_spec_str_or_dict, role=role)
)
else:
raise MetaflowException(
"get_secrets sources items must be either a string or a dict"
)

for secret_spec in secret_specs:
secrets_backend_provider = get_secrets_backend_provider(
secret_spec.secrets_backend_type
)
try:
dict_for_secret = secrets_backend_provider.get_secret_as_dict(
secret_spec.secret_id,
options=secret_spec.options,
role=secret_spec.role,
)
except Exception as e:
raise MetaflowException(
"Failed to retrieve secret '%s': %s" % (secret_spec.secret_id, e)
)

all_secrets.append((secret_spec, dict_for_secret))

return all_secrets
101 changes: 101 additions & 0 deletions metaflow/plugins/secrets/secrets_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from metaflow.exception import MetaflowException
from metaflow.plugins.secrets.utils import get_default_secrets_backend_type


class SecretSpec:
def __init__(self, secrets_backend_type, secret_id, options={}, role=None):
self._secrets_backend_type = secrets_backend_type
self._secret_id = secret_id
self._options = options
self._role = role

@property
def secrets_backend_type(self):
return self._secrets_backend_type

@property
def secret_id(self):
return self._secret_id

@property
def options(self):
return self._options

@property
def role(self):
return self._role

def to_json(self):
"""Mainly used for testing... not the same as the input dict in secret_spec_from_dict()!"""
return {
"secrets_backend_type": self.secrets_backend_type,
"secret_id": self.secret_id,
"options": self.options,
"role": self.role,
}

def __str__(self):
return "%s (%s)" % (self._secret_id, self._secrets_backend_type)

@staticmethod
def secret_spec_from_str(secret_spec_str, role):
# "." may be used in secret_id one day (provider specific). HOWEVER, it provides the best UX for
# non-conflicting cases (i.e. for secret ids that don't contain "."). This is true for all AWS
# Secrets Manager secrets.
#
# So we skew heavily optimize for best upfront UX for the present (1/2023).
#
# If/when a certain secret backend supports "." secret names, we can figure out a solution at that time.
# At a minimum, dictionary style secret spec may be used with no code changes (see secret_spec_from_dict()).
# Other options could be:
# - accept and document that "." secret_ids don't work in Metaflow (across all possible providers)
# - add a Metaflow config variable that specifies the separator (default ".")
# - smarter spec parsing, that errors on secrets that look ambiguous. "aws-secrets-manager.XYZ" could mean:
# + secret_id "XYZ" in aws-secrets-manager backend, OR
# + secret_id "aws-secrets-manager.XYZ" default backend (if it is defined).
# + in this case, user can simply set "azure-key-vault.aws-secrets-manager.XYZ" instead!
parts = secret_spec_str.split(".", maxsplit=1)
if len(parts) == 1:
secrets_backend_type = get_default_secrets_backend_type()
secret_id = parts[0]
else:
secrets_backend_type = parts[0]
secret_id = parts[1]
return SecretSpec(
secrets_backend_type, secret_id=secret_id, options={}, role=role
)

@staticmethod
def secret_spec_from_dict(secret_spec_dict, role):
if "type" not in secret_spec_dict:
secrets_backend_type = get_default_secrets_backend_type()
else:
secrets_backend_type = secret_spec_dict["type"]
if not isinstance(secrets_backend_type, str):
raise MetaflowException(
"Bad @secrets specification - 'type' must be a string - found %s"
% type(secrets_backend_type)
)
secret_id = secret_spec_dict.get("id")
if not isinstance(secret_id, str):
raise MetaflowException(
"Bad @secrets specification - 'id' must be a string - found %s"
% type(secret_id)
)
options = secret_spec_dict.get("options", {})
if not isinstance(options, dict):
raise MetaflowException(
"Bad @secrets specification - 'option' must be a dict - found %s"
% type(options)
)
role_for_source = secret_spec_dict.get("role", None)
if role_for_source is not None:
if not isinstance(role_for_source, str):
raise MetaflowException(
"Bad @secrets specification - 'role' must be a str - found %s"
% type(role_for_source)
)
role = role_for_source
return SecretSpec(
secrets_backend_type, secret_id=secret_id, options=options, role=role
)
Loading
Loading