From d438422bed2379c70057499cde15d713ecca77f1 Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 6 Sep 2024 15:58:43 +0800 Subject: [PATCH 01/32] Add multi keychain when checking if images exist --- api/pkg/imagebuilder/imagebuilder.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index f80136371..5296f5e92 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -402,20 +402,25 @@ func getGCPSubDomains() []string { // https://github.com/google/go-containerregistry/blob/master/cmd/crane/README.md // https://github.com/google/go-containerregistry/blob/master/pkg/v1/google/README.md func (c *imageBuilder) imageRefExists(imageName, imageTag string) (bool, error) { - var keychain authn.Keychain - keychain = authn.DefaultKeychain + // The DefaultKeychain will use credentials as described in the Docker config file whose location is specified by + // the DOCKER_CONFIG environment variable, if set. + keychains := []authn.Keychain{ + authn.DefaultKeychain, + } for _, domain := range getGCPSubDomains() { if strings.Contains(c.config.DockerRegistry, domain) { - keychain = google.Keychain + keychains = append(keychains, google.Keychain) } } + multiKeychain := authn.NewMultiKeychain(keychains...) + repo, err := name.NewRepository(imageName) if err != nil { return false, fmt.Errorf("unable to parse docker repository %s: %w", imageName, err) } - tags, err := remote.List(repo, remote.WithAuthFromKeychain(keychain)) + tags, err := remote.List(repo, remote.WithAuthFromKeychain(multiKeychain)) if err != nil { var terr *transport.Error if errors.As(err, &terr) { From ec94d9fd7780cdd6a5c641c8d155ff75628bf4ac Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 11:40:56 +0800 Subject: [PATCH 02/32] Refactor API methods to make GCP dependencies optional --- api/cmd/api/setup.go | 81 +-- api/config/config.go | 28 +- api/pkg/imagebuilder/config.go | 4 + api/pkg/imagebuilder/imagebuilder.go | 90 +++- api/pkg/imagebuilder/imagebuilder_test.go | 624 ++++++++++++++-------- 5 files changed, 539 insertions(+), 288 deletions(-) diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index 26c5b501b..a23dac719 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -6,7 +6,6 @@ import ( "net/http" "time" - gcs "cloud.google.com/go/storage" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/clientset/versioned" "github.com/caraml-dev/merlin/webhook" "github.com/caraml-dev/mlp/api/pkg/artifact" @@ -106,54 +105,60 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB var artifactService artifact.Service if cfg.ImageBuilderConfig.ArtifactServiceType == "gcs" { - gcsClient, err := gcs.NewClient(context.Background()) - if err != nil { - log.Panicf("%s,failed initializing gcs for mlflow delete package", err.Error()) - } - artifactService = artifact.NewGcsArtifactClient(gcsClient) + artifactService, err = artifact.NewGcsArtifactClient() + } else if cfg.ImageBuilderConfig.ArtifactServiceType == "s3" { + artifactService, err = artifact.NewS3ArtifactClient() } else if cfg.ImageBuilderConfig.ArtifactServiceType == "nop" { artifactService = artifact.NewNopArtifactClient() } else { log.Panicf("invalid artifact service type %s", cfg.ImageBuilderConfig.ArtifactServiceType) } + if err != nil { + log.Panicf("%s,failed initializing mlflow artifact service", err.Error()) + } + webServiceConfig := imagebuilder.Config{ - BaseImage: cfg.ImageBuilderConfig.BaseImage, - BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, - DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, - BuildTimeoutDuration: timeout, - KanikoImage: cfg.ImageBuilderConfig.KanikoImage, - KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, - KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, - DefaultResources: cfg.ImageBuilderConfig.DefaultResources, - Tolerations: cfg.ImageBuilderConfig.Tolerations, - NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, - MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, - SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, - ClusterName: cfg.ImageBuilderConfig.ClusterName, - GcpProject: cfg.ImageBuilderConfig.GcpProject, - Environment: cfg.Environment, - SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, + BaseImage: cfg.ImageBuilderConfig.BaseImage, + BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, + DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, + BuildTimeoutDuration: timeout, + KanikoImage: cfg.ImageBuilderConfig.KanikoImage, + KanikoPushRegistryType: cfg.ImageBuilderConfig.KanikoPushRegistryType, + KanikoDockerCredentialSecretName: cfg.ImageBuilderConfig.KanikoDockerCredentialSecretName, + KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, + KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, + DefaultResources: cfg.ImageBuilderConfig.DefaultResources, + Tolerations: cfg.ImageBuilderConfig.Tolerations, + NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, + MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, + SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, + ClusterName: cfg.ImageBuilderConfig.ClusterName, + GcpProject: cfg.ImageBuilderConfig.GcpProject, + Environment: cfg.Environment, + SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, } webserviceBuilder = imagebuilder.NewModelServiceImageBuilder(kubeClient, webServiceConfig, artifactService) predJobConfig := imagebuilder.Config{ - BaseImage: cfg.ImageBuilderConfig.PredictionJobBaseImage, - BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, - DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, - BuildTimeoutDuration: timeout, - KanikoImage: cfg.ImageBuilderConfig.KanikoImage, - KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, - KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, - DefaultResources: cfg.ImageBuilderConfig.DefaultResources, - Tolerations: cfg.ImageBuilderConfig.Tolerations, - NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, - MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, - SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, - ClusterName: cfg.ImageBuilderConfig.ClusterName, - GcpProject: cfg.ImageBuilderConfig.GcpProject, - Environment: cfg.Environment, - SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, + BaseImage: cfg.ImageBuilderConfig.PredictionJobBaseImage, + BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, + DockerRegistry: cfg.ImageBuilderConfig.DockerRegistry, + BuildTimeoutDuration: timeout, + KanikoImage: cfg.ImageBuilderConfig.KanikoImage, + KanikoPushRegistryType: cfg.ImageBuilderConfig.KanikoPushRegistryType, + KanikoDockerCredentialSecretName: cfg.ImageBuilderConfig.KanikoDockerCredentialSecretName, + KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount, + KanikoAdditionalArgs: cfg.ImageBuilderConfig.KanikoAdditionalArgs, + DefaultResources: cfg.ImageBuilderConfig.DefaultResources, + Tolerations: cfg.ImageBuilderConfig.Tolerations, + NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors, + MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry, + SafeToEvict: cfg.ImageBuilderConfig.SafeToEvict, + ClusterName: cfg.ImageBuilderConfig.ClusterName, + GcpProject: cfg.ImageBuilderConfig.GcpProject, + Environment: cfg.Environment, + SupportedPythonVersions: cfg.ImageBuilderConfig.SupportedPythonVersions, } predJobBuilder = imagebuilder.NewPredictionJobImageBuilder(kubeClient, predJobConfig, artifactService) diff --git a/api/config/config.go b/api/config/config.go index 4119c93a4..be2ce6870 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -104,7 +104,7 @@ type BaseImageConfig struct { ImageName string `validate:"required" json:"imageName"` // Dockerfile Path within the build context DockerfilePath string `validate:"required" json:"dockerfilePath"` - // GCS URL Containing build context + // GCS/S3 URL Containing build context BuildContextURI string `validate:"required" json:"buildContextURI"` // Path to sub folder which is intended to build instead of using root folder BuildContextSubPath string `json:"buildContextSubPath"` @@ -209,18 +209,20 @@ type ClusterConfig struct { } type ImageBuilderConfig struct { - ClusterName string `validate:"required"` - GcpProject string - ArtifactServiceType string - BaseImage BaseImageConfig `validate:"required"` - PredictionJobBaseImage BaseImageConfig `validate:"required"` - BuildNamespace string `validate:"required" default:"mlp"` - DockerRegistry string `validate:"required"` - BuildTimeout string `validate:"required" default:"10m"` - KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` - KanikoServiceAccount string - KanikoAdditionalArgs []string - DefaultResources ResourceRequestsLimits `validate:"required"` + ClusterName string `validate:"required"` + GcpProject string + ArtifactServiceType string + BaseImage BaseImageConfig `validate:"required"` + PredictionJobBaseImage BaseImageConfig `validate:"required"` + BuildNamespace string `validate:"required" default:"mlp"` + DockerRegistry string `validate:"required"` + BuildTimeout string `validate:"required" default:"10m"` + KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` + KanikoServiceAccount string + KanikoPushRegistryType string `validate:"required"` + KanikoDockerCredentialSecretName string + KanikoAdditionalArgs []string + DefaultResources ResourceRequestsLimits `validate:"required"` // How long to keep the image building job resource in the Kubernetes cluster. Default: 2 days (48 hours). Retention time.Duration `validate:"required" default:"48h"` Tolerations Tolerations diff --git a/api/pkg/imagebuilder/config.go b/api/pkg/imagebuilder/config.go index c0a52063b..e4cd99675 100644 --- a/api/pkg/imagebuilder/config.go +++ b/api/pkg/imagebuilder/config.go @@ -36,6 +36,10 @@ type Config struct { BuildTimeoutDuration time.Duration // Kaniko docker image KanikoImage string + // Kaniko push registry type + KanikoPushRegistryType string + // Kaniko docker credential secret name + KanikoDockerCredentialSecretName string // Kaniko kubernetes service account KanikoServiceAccount string // Kaniko additional args diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 5296f5e92..33baf21ed 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -15,16 +15,17 @@ package imagebuilder import ( + "cloud.google.com/go/storage" "context" "crypto/sha256" "errors" "fmt" "net/http" + "os" "sort" "strings" "time" - "cloud.google.com/go/storage" "github.com/caraml-dev/mlp/api/pkg/artifact" backoff "github.com/cenkalti/backoff/v4" "github.com/google/go-containerregistry/pkg/authn" @@ -108,6 +109,9 @@ const ( gacEnvKey = "GOOGLE_APPLICATION_CREDENTIALS" saFilePath = "/secret/kaniko-secret.json" + dockerCredentialSecretVolume = "docker-registry-credentials" + dockerCredentialConfigPath = "/kaniko/.docker" + baseImageEnvKey = "BASE_IMAGE" modelDependenciesUrlEnvKey = "MODEL_DEPENDENCIES_URL" modelArtifactsUrlEnvKey = "MODEL_ARTIFACTS_URL" @@ -159,7 +163,7 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio return "", err } - condaEnvUrl := fmt.Sprintf("gs://%s/%s/model/conda.yaml", artifactURL.Bucket, artifactURL.Object) + condaEnvUrl := fmt.Sprintf("%s://%s/%s/model/conda.yaml", c.artifactService.GetURLScheme(), artifactURL.Bucket, artifactURL.Object) condaEnv, err := c.artifactService.ReadArtifact(ctx, condaEnvUrl) if err != nil { @@ -170,7 +174,7 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio hash.Write([]byte(condaEnv)) hashEnv := hash.Sum(nil) - hashedDependenciesUrl := fmt.Sprintf("gs://%s%s/%x", artifactURL.Bucket, modelDependenciesPath, hashEnv) + hashedDependenciesUrl := fmt.Sprintf("%s://%s%s/%x", c.artifactService.GetURLScheme(), artifactURL.Bucket, modelDependenciesPath, hashEnv) _, err = c.artifactService.ReadArtifact(ctx, hashedDependenciesUrl) if err == nil { @@ -620,11 +624,24 @@ func (c *imageBuilder) createKanikoJobSpec( fmt.Sprintf("--dockerfile=%s", baseImageTag.DockerfilePath), fmt.Sprintf("--context=%s", baseImageTag.BuildContextURI), fmt.Sprintf("--build-arg=%s=%s", baseImageEnvKey, baseImageTag.ImageName), + fmt.Sprintf("--build-arg=%s=%s", "MLFLOW_ARTIFACT_STORAGE_TYPE", c.artifactService.GetType()), fmt.Sprintf("--build-arg=%s=%s", modelDependenciesUrlEnvKey, modelDependenciesUrl), fmt.Sprintf("--build-arg=%s=%s/model", modelArtifactsUrlEnvKey, version.ArtifactURI), fmt.Sprintf("--destination=%s", imageRef), } + if artifactType := c.artifactService.GetType(); artifactType == "gcs" { + //kanikoArgs = append(kanikoArgs, fmt.Sprintf("--build-arg=%s=%s", "", modelDependenciesUrl)) + } else if c.artifactService.GetType() == "s3" { + kanikoArgs = append( + kanikoArgs, + // TODO: To refactor env var values into configs? + fmt.Sprintf("--build-arg=%s=%s", "AWS_ACCESS_KEY_ID", os.Getenv("AWS_ACCESS_KEY_ID")), + fmt.Sprintf("--build-arg=%s=%s", "AWS_SECRET_ACCESS_KEY", os.Getenv("AWS_SECRET_ACCESS_KEY")), + fmt.Sprintf("--build-arg=%s=%s", "AWS_DEFAULT_REGION", os.Getenv("AWS_DEFAULT_REGION")), + fmt.Sprintf("--build-arg=%s=%s", "AWS_ENDPOINT_URL", os.Getenv("AWS_ENDPOINT_URL")), + ) + } if c.config.BaseImage.BuildContextSubPath != "" { kanikoArgs = append(kanikoArgs, fmt.Sprintf("--context-sub-path=%s", c.config.BaseImage.BuildContextSubPath)) } @@ -632,36 +649,51 @@ func (c *imageBuilder) createKanikoJobSpec( kanikoArgs = append(kanikoArgs, c.config.KanikoAdditionalArgs...) activeDeadlineSeconds := int64(c.config.BuildTimeoutDuration / time.Second) - var volume []v1.Volume - var volumeMount []v1.VolumeMount + var volumes []v1.Volume + var volumeMounts []v1.VolumeMount var envVar []v1.EnvVar - // If kaniko service account is not set, use kaniko secret - if c.config.KanikoServiceAccount == "" { - kanikoArgs = append(kanikoArgs, - fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) - volume = []v1.Volume{ - { - Name: kanikoSecretName, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: kanikoSecretName, + // Configure additional authentication requirements for specific image registries + if c.config.KanikoPushRegistryType == "gcr" { + if c.config.KanikoServiceAccount == "" { + kanikoArgs = append(kanikoArgs, + fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) + volumes = []v1.Volume{ + { + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: kanikoSecretName, + }, }, }, - }, - } - volumeMount = []v1.VolumeMount{ - { - Name: kanikoSecretName, - MountPath: "/secret", - }, + } + volumeMounts = []v1.VolumeMount{ + { + Name: kanikoSecretName, + MountPath: "/secret", + }, + } + envVar = []v1.EnvVar{ + { + Name: gacEnvKey, + Value: saFilePath, + }, + } } - envVar = []v1.EnvVar{ - { - Name: gacEnvKey, - Value: saFilePath, + } else if c.config.KanikoPushRegistryType == "docker" { + volumes = append(volumes, v1.Volume{ + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: c.config.KanikoDockerCredentialSecretName, + }, }, - } + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: kanikoSecretName, + MountPath: dockerCredentialConfigPath, + }) } var resourceRequirements RequestLimitResources @@ -718,13 +750,13 @@ func (c *imageBuilder) createKanikoJobSpec( Name: containerName, Image: c.config.KanikoImage, Args: kanikoArgs, - VolumeMounts: volumeMount, + VolumeMounts: volumeMounts, Env: envVar, Resources: resourceRequirements.Build(), TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, }, }, - Volumes: volume, + Volumes: volumes, Tolerations: c.config.Tolerations, NodeSelector: c.config.NodeSelectors, }, diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index 1706ba56c..585e3e44b 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -50,13 +50,13 @@ import ( ) const ( - testEnvironmentName = "dev" - testOrchestratorName = "merlin" - testProjectName = "test-project" - testModelName = "mymodel" - testArtifactURI = "gs://bucket-name/mlflow/11/68eb8538374c4053b3ecad99a44170bd/artifacts" - testCondaEnvUrl = testArtifactURI + "/model/conda.yaml" - testCondaEnvContent = `dependencies: + testEnvironmentName = "dev" + testOrchestratorName = "merlin" + testProjectName = "test-project" + testModelName = "mymodel" + testArtifactURISuffix = "://bucket-name/mlflow/11/68eb8538374c4053b3ecad99a44170bd/artifacts" + testCondaEnvUrlSuffix = testArtifactURISuffix + "/model/conda.yaml" + testCondaEnvContent = `dependencies: - python=3.8.* - pip: - mlflow @@ -65,9 +65,10 @@ const ( - scikit-learn - xgboost` - testBuildContextURL = "gs://bucket/build.tar.gz" - testBuildNamespace = "mynamespace" - testDockerRegistry = "ghcr.io" + testGCSBuildContextURL = "gs://bucket/build.tar.gz" + testS3BuildContextURL = "s3://bucket/build.tar.gz" + testBuildNamespace = "mynamespace" + testDockerRegistry = "ghcr.io" ) var ( @@ -92,9 +93,18 @@ var ( Name: testModelName, } - modelVersion = &models.Version{ + modelVersionWithGCSArtifact = &models.Version{ ID: models.ID(1), - ArtifactURI: testArtifactURI, + ArtifactURI: fmt.Sprintf("gs%s", testArtifactURISuffix), + PythonVersion: "3.10.*", + Labels: models.KV{ + "test": "true", + }, + } + + modelVersionWithS3Artifact = &models.Version{ + ID: models.ID(1), + ArtifactURI: fmt.Sprintf("s3%s", testArtifactURISuffix), PythonVersion: "3.10.*", Labels: models.KV{ "test": "true", @@ -114,11 +124,11 @@ var ( defaultSupportedPythonVersions = []string{"3.8.*", "3.9.*", "3.10.*"} - config = Config{ + configWithGCRPushRegistry = Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -127,6 +137,7 @@ var ( ClusterName: "my-cluster", GcpProject: "test-project", Environment: testEnvironmentName, + KanikoPushRegistryType: "gcr", KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, @@ -153,11 +164,52 @@ var ( }, MaximumRetry: jobBackOffLimit, } + configWithDockerPushRegistry = Config{ + BuildNamespace: testBuildNamespace, + BaseImage: cfg.BaseImageConfig{ + ImageName: "gojek/base-image:1", + BuildContextURI: testS3BuildContextURL, + BuildContextSubPath: "python/pyfunc-server", + DockerfilePath: "./Dockerfile", + }, + DockerRegistry: testDockerRegistry, + BuildTimeoutDuration: timeout, + ClusterName: "my-cluster", + GcpProject: "test-project", + Environment: testEnvironmentName, + KanikoPushRegistryType: "docker", + KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoAdditionalArgs: defaultKanikoAdditionalArgs, + KanikoDockerCredentialSecretName: "docker-secret", + SupportedPythonVersions: defaultSupportedPythonVersions, + DefaultResources: cfg.ResourceRequestsLimits{ + Requests: cfg.Resource{ + CPU: "500m", + Memory: "1Gi", + }, + Limits: cfg.Resource{ + CPU: "500m", + Memory: "1Gi", + }, + }, + Tolerations: []v1.Toleration{ + { + Key: "image-build-job", + Value: "true", + Operator: v1.TolerationOpEqual, + Effect: v1.TaintEffectNoSchedule, + }, + }, + NodeSelectors: map[string]string{ + "cloud.google.com/gke-nodepool": "image-building-job-node-pool", + }, + MaximumRetry: jobBackOffLimit, + } configWithSa = Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -264,7 +316,7 @@ func TestBuildImage(t *testing.T) { hash := sha256.New() hash.Write([]byte(testCondaEnvContent)) hashEnv := hash.Sum(nil) - modelDependenciesURL := fmt.Sprintf("gs://%s/merlin/model_dependencies/%x", testArtifactGsutilURL.Bucket, hashEnv) + modelDependenciesURLSuffix := fmt.Sprintf("://%s/merlin/model_dependencies/%x", testArtifactGsutilURL.Bucket, hashEnv) type args struct { project mlp.Project @@ -275,30 +327,32 @@ func TestBuildImage(t *testing.T) { } tests := []struct { - name string - args args - existingJob *batchv1.Job - wantCreateJob *batchv1.Job - wantDeleteJobName string - wantImageRef string - config Config + name string + args args + existingJob *batchv1.Job + wantCreateJob *batchv1.Job + wantDeleteJobName string + wantImageRef string + config Config + artifactServiceType string + artifactServiceURLScheme string }{ { - name: "success: no existing job", + name: "success: gcs artifact storage + gcr push registry; no existing job", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -319,7 +373,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -337,13 +391,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -392,28 +447,143 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, use K8s Service account", + name: "success: s3 artifact storage + docker push registry; no existing job", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithS3Artifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithS3Artifact.ID), + Namespace: configWithDockerPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": configWithDockerPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, + "sample": "true", + "test": "true", + }, + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false", + }, + }, + Spec: batchv1.JobSpec{ + Completions: &jobCompletions, + BackoffLimit: &jobBackOffLimit, + TTLSecondsAfterFinished: &jobTTLSecondAfterComplete, + ActiveDeadlineSeconds: &timeoutInSecond, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": configWithDockerPushRegistry.Environment, + "gojek.com/orchestrator": testOrchestratorName, + "gojek.com/stream": project.Stream, + "gojek.com/team": project.Team, + "sample": "true", + "test": "true", + }, + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/safe-to-evict": "false", + }, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Name: containerName, + Image: "gcr.io/kaniko-project/executor:v1.1.0", + Args: []string{ + fmt.Sprintf("--dockerfile=%s", configWithDockerPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithDockerPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithDockerPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "s3"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=s3%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=s3%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithDockerPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithS3Artifact.ID)), + fmt.Sprintf("--build-arg=AWS_ACCESS_KEY_ID=%s", ""), + fmt.Sprintf("--build-arg=AWS_SECRET_ACCESS_KEY=%s", ""), + fmt.Sprintf("--build-arg=AWS_DEFAULT_REGION=%s", ""), + fmt.Sprintf("--build-arg=AWS_ENDPOINT_URL=%s", ""), + fmt.Sprintf("--context-sub-path=%s", configWithDockerPushRegistry.BaseImage.BuildContextSubPath), + "--cache=true", + "--compressed-caching=false", + "--snapshot-mode=redo", + "--use-new-run", + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: kanikoSecretName, + MountPath: "/kaniko/.docker", + }, + }, + Resources: defaultResourceRequests.Build(), + TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, + }, + }, + Volumes: []v1.Volume{ + { + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: configWithDockerPushRegistry.KanikoDockerCredentialSecretName, + }, + }, + }, + }, + Tolerations: []v1.Toleration{ + { + Key: "image-build-job", + Operator: v1.TolerationOpEqual, + Value: "true", + Effect: v1.TaintEffectNoSchedule, + }, + }, + NodeSelector: map[string]string{ + "cloud.google.com/gke-nodepool": "image-building-job-node-pool", + }, + }, + }, + }, + Status: batchv1.JobStatus{}, + }, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithDockerPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithS3Artifact.ID), + config: configWithDockerPushRegistry, + artifactServiceType: "s3", + artifactServiceURLScheme: "s3", + }, + { + name: "success: gcs artifact storage + gcr push registry; no existing job, use K8s Service account", + args: args{ + project: project, + model: model, + version: modelVersionWithGCSArtifact, + }, + existingJob: nil, + wantCreateJob: &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, + Labels: map[string]string{ + "gojek.com/app": model.Name, + "gojek.com/orchestrator": testOrchestratorName, + "gojek.com/stream": project.Stream, + "gojek.com/team": project.Team, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/component": "image-builder", "sample": "true", "test": "true", @@ -434,7 +604,7 @@ func TestBuildImage(t *testing.T) { "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/component": "image-builder", "sample": "true", "test": "true", @@ -450,13 +620,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -483,26 +654,28 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: configWithSa, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithSa, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, tolerations is not set", + name: "success: gcs artifact storage + gcr push registry; no existing job, tolerations is not set", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -523,7 +696,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -541,13 +714,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -589,12 +763,12 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -604,31 +778,34 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoPushRegistryType: "gcr", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, + DefaultResources: configWithGCRPushRegistry.DefaultResources, NodeSelectors: map[string]string{ "cloud.google.com/gke-nodepool": "image-building-job-node-pool", }, MaximumRetry: jobBackOffLimit, }, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, node selectors is not set", + name: "success: gcs artifact storage + gcr push registry; no existing job, node selectors is not set", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -649,7 +826,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -667,13 +844,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -720,12 +898,12 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ BuildNamespace: testBuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, BuildContextSubPath: "python/pyfunc-server", DockerfilePath: "./Dockerfile", }, @@ -735,9 +913,10 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", + KanikoPushRegistryType: "gcr", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, + DefaultResources: configWithGCRPushRegistry.DefaultResources, Tolerations: []v1.Toleration{ { Key: "image-build-job", @@ -748,23 +927,25 @@ func TestBuildImage(t *testing.T) { }, MaximumRetry: jobBackOffLimit, }, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: no existing job, not using context sub path", + name: "success: gcs artifact storage + gcr push registry; no existing job, not using context sub path", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -785,7 +966,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -803,12 +984,13 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -858,43 +1040,46 @@ func TestBuildImage(t *testing.T) { Status: batchv1.JobStatus{}, }, wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: Config{ - BuildNamespace: config.BuildNamespace, + BuildNamespace: configWithGCRPushRegistry.BuildNamespace, BaseImage: cfg.BaseImageConfig{ ImageName: "gojek/base-image:1", - BuildContextURI: testBuildContextURL, + BuildContextURI: testGCSBuildContextURL, DockerfilePath: "./Dockerfile", }, - DockerRegistry: config.DockerRegistry, - BuildTimeoutDuration: config.BuildTimeoutDuration, - ClusterName: config.ClusterName, - GcpProject: config.GcpProject, - Environment: config.Environment, - KanikoImage: config.KanikoImage, + DockerRegistry: configWithGCRPushRegistry.DockerRegistry, + BuildTimeoutDuration: configWithGCRPushRegistry.BuildTimeoutDuration, + ClusterName: configWithGCRPushRegistry.ClusterName, + GcpProject: configWithGCRPushRegistry.GcpProject, + Environment: configWithGCRPushRegistry.Environment, + KanikoImage: configWithGCRPushRegistry.KanikoImage, + KanikoPushRegistryType: configWithGCRPushRegistry.KanikoPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, - DefaultResources: config.DefaultResources, - MaximumRetry: config.MaximumRetry, - NodeSelectors: config.NodeSelectors, - Tolerations: config.Tolerations, + DefaultResources: configWithGCRPushRegistry.DefaultResources, + MaximumRetry: configWithGCRPushRegistry.MaximumRetry, + NodeSelectors: configWithGCRPushRegistry.NodeSelectors, + Tolerations: configWithGCRPushRegistry.Tolerations, }, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: existing job is running", + name: "success: gcs artifact storage + gcr push registry; existing job is running", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -915,7 +1100,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -933,13 +1118,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -988,25 +1174,27 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantCreateJob: nil, - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantCreateJob: nil, + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: existing job already successful", + name: "success: gcs artifact storage + gcr push registry; existing job already successful", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1027,7 +1215,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1045,13 +1233,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1102,26 +1291,28 @@ func TestBuildImage(t *testing.T) { Succeeded: 1, }, }, - wantCreateJob: nil, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantCreateJob: nil, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: existing job failed", + name: "success: gcs artifact storage + gcr push registry; existing job failed", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, existingJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1142,7 +1333,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1160,13 +1351,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1219,12 +1411,12 @@ func TestBuildImage(t *testing.T) { }, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1245,7 +1437,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1263,13 +1455,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1318,16 +1511,18 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, { - name: "success: with custom resource request", + name: "success: gcs artifact storage + gcr push registry; with custom resource request", args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, resourceRequest: &models.ResourceRequest{ CPURequest: resource.MustParse("2"), MemoryRequest: resource.MustParse("4Gi"), @@ -1336,12 +1531,12 @@ func TestBuildImage(t *testing.T) { existingJob: nil, wantCreateJob: &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), - Namespace: config.BuildNamespace, + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), + Namespace: configWithGCRPushRegistry.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1362,7 +1557,7 @@ func TestBuildImage(t *testing.T) { Labels: map[string]string{ "gojek.com/app": model.Name, "gojek.com/component": models.ComponentImageBuilder, - "gojek.com/environment": config.Environment, + "gojek.com/environment": configWithGCRPushRegistry.Environment, "gojek.com/orchestrator": testOrchestratorName, "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, @@ -1380,13 +1575,14 @@ func TestBuildImage(t *testing.T) { Name: containerName, Image: "gcr.io/kaniko-project/executor:v1.1.0", Args: []string{ - fmt.Sprintf("--dockerfile=%s", config.BaseImage.DockerfilePath), - fmt.Sprintf("--context=%s", config.BaseImage.BuildContextURI), - fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=%s", modelDependenciesURL), - fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=%s/model", testArtifactURI), - fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)), - fmt.Sprintf("--context-sub-path=%s", config.BaseImage.BuildContextSubPath), + fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), + fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), + fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), + fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), + fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), + fmt.Sprintf("--context-sub-path=%s", configWithGCRPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", "--snapshot-mode=redo", @@ -1435,9 +1631,11 @@ func TestBuildImage(t *testing.T) { }, Status: batchv1.JobStatus{}, }, - wantDeleteJobName: "", - wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID), - config: config, + wantDeleteJobName: "", + wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), + config: configWithGCRPushRegistry, + artifactServiceType: "gcs", + artifactServiceURLScheme: "gs", }, } @@ -1516,9 +1714,15 @@ func TestBuildImage(t *testing.T) { imageBuilderCfg := tt.config artifactServiceMock := &mocks.Service{} - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ParseURL", fmt.Sprintf("%s%s", + tt.artifactServiceURLScheme, testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return(tt.artifactServiceURLScheme) + artifactServiceMock.On("GetURLScheme").Return(tt.artifactServiceURLScheme) + artifactServiceMock.On("GetType").Return(tt.artifactServiceType) + artifactServiceMock.On("ReadArtifact", mock.Anything, + fmt.Sprintf("%s%s", tt.artifactServiceURLScheme, testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ReadArtifact", mock.Anything, + fmt.Sprintf("%s%s", tt.artifactServiceURLScheme, modelDependenciesURLSuffix)).Return([]byte(testCondaEnvContent), nil) c := NewModelServiceImageBuilder(kubeClient, imageBuilderCfg, artifactServiceMock) @@ -1558,7 +1762,7 @@ func TestGetContainers(t *testing.T) { } modelVersion := &models.Version{ ID: models.ID(1), - ArtifactURI: testArtifactURI, + ArtifactURI: testArtifactURISuffix, } type args struct { @@ -1621,7 +1825,7 @@ func TestGetContainers(t *testing.T) { artifaceServiceMock := &mocks.Service{} - c := NewModelServiceImageBuilder(kubeClient, config, artifaceServiceMock) + c := NewModelServiceImageBuilder(kubeClient, configWithGCRPushRegistry, artifaceServiceMock) containers, err := c.GetContainers(context.Background(), tt.args.project, tt.args.model, tt.args.version) if !tt.wantError { @@ -1823,11 +2027,13 @@ func Test_imageBuilder_getHashedModelDependenciesUrl(t *testing.T) { name: "hash dependencies is already exist", args: args{ ctx: context.Background(), - version: modelVersion, + version: modelVersionWithGCSArtifact, }, artifactServiceMock: func(artifactServiceMock *mocks.Service) { - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ParseURL", fmt.Sprintf("gs%s", testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("ReadArtifact", mock.Anything, fmt.Sprintf("gs%s", testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return([]byte(testCondaEnvContent), nil) }, want: modelDependenciesURL, @@ -1837,11 +2043,13 @@ func Test_imageBuilder_getHashedModelDependenciesUrl(t *testing.T) { name: "hash dependencies is not exist yet", args: args{ ctx: context.Background(), - version: modelVersion, + version: modelVersionWithGCSArtifact, }, artifactServiceMock: func(artifactServiceMock *mocks.Service) { - artifactServiceMock.On("ParseURL", testArtifactURI).Return(testArtifactGsutilURL, nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, testCondaEnvUrl).Return([]byte(testCondaEnvContent), nil) + artifactServiceMock.On("ParseURL", fmt.Sprintf("gs%s", testArtifactURISuffix)).Return(testArtifactGsutilURL, nil) + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("GetURLScheme").Return("gs") + artifactServiceMock.On("ReadArtifact", mock.Anything, fmt.Sprintf("gs%s", testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return(nil, storage.ErrObjectNotExist) artifactServiceMock.On("WriteArtifact", mock.Anything, modelDependenciesURL, []byte(testCondaEnvContent)).Return(nil) }, @@ -1895,12 +2103,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1919,12 +2127,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1943,12 +2151,12 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Namespace: config.BuildNamespace, }, Status: batchv1.JobStatus{ @@ -1968,9 +2176,9 @@ func Test_imageBuilder_GetImageBuildingJobStatus(t *testing.T) { mockListPods: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s-1", project.Name, model.Name, modelVersion.ID), + Name: fmt.Sprintf("%s-%s-%s-1", project.Name, model.Name, modelVersionWithGCSArtifact.ID), Labels: map[string]string{ - "job-name": fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID), + "job-name": fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), }, }, Status: v1.PodStatus{ @@ -2021,7 +2229,7 @@ CondaEnvException: Pip failed`, args: args{ project: project, model: model, - version: modelVersion, + version: modelVersionWithGCSArtifact, }, mockGetJob: func(action ktesting.Action) (handled bool, ret runtime.Object, err error) { job := &batchv1.Job{} From ea8c5b3c52aa4eff1fe706779db16650ff244736 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 11:41:12 +0800 Subject: [PATCH 03/32] Update go mod file with temp dependencies --- api/go.mod | 19 +++++++++++++++++++ api/go.sum | 45 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/api/go.mod b/api/go.mod index 1f9c67111..b9f9ce185 100644 --- a/api/go.mod +++ b/api/go.mod @@ -247,6 +247,24 @@ require ( require ( github.com/avast/retry-go/v4 v4.6.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect + github.com/aws/aws-sdk-go-v2/config v1.17.8 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.12.21 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect + github.com/aws/smithy-go v1.20.4 // indirect golang.org/x/time v0.5.0 // indirect k8s.io/klog/v2 v2.120.1 // indirect k8s.io/kube-openapi v0.0.0-20231113174909-778a5567bc1e // indirect @@ -255,6 +273,7 @@ require ( replace ( github.com/caraml-dev/merlin-pyspark-app => ../python/batch-predictor + github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9 github.com/go-gota/gota => github.com/gojekfarm/gota v0.12.1-0.20230221101638-6cd9260bd598 github.com/googleapis/gnostic => github.com/google/gnostic v0.5.5 github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.3.1 diff --git a/api/go.sum b/api/go.sum index f608b5513..4a034b1f8 100644 --- a/api/go.sum +++ b/api/go.sum @@ -142,6 +142,47 @@ github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRS github.com/aws/aws-sdk-go v1.17.7/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.50.0 h1:HBtrLeO+QyDKnc3t1+5DR1RxodOHCGr8ZcrHudpv7jI= github.com/aws/aws-sdk-go v1.50.0/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= +github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k= +github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048 h1:wXvkIvYQ3EPVO5MhCoEv2u5LDwfWp+kLTQMIGyyvi/0= +github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048/go.mod h1:CT+ZPWXbYrci8chcARI3OmI/qgd+f6WtuLOoaIA8PR0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 h1:70PVAiL15/aBMh5LThwgXdSQorVr91L127ttckI9QQU= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4/go.mod h1:/MQxMqci8tlqDH+pjmoLu1i0tbWCUP1hhyMRuFxpQCw= +github.com/aws/aws-sdk-go-v2/config v1.17.8 h1:b9LGqNnOdg9vR4Q43tBTVWk4J6F+W774MSchvKJsqnE= +github.com/aws/aws-sdk-go-v2/config v1.17.8/go.mod h1:UkCI3kb0sCdvtjiXYiU4Zx5h07BOpgBTtkPu/49r+kA= +github.com/aws/aws-sdk-go-v2/credentials v1.12.21 h1:4tjlyCD0hRGNQivh5dN8hbP30qQhMLBE/FgQR1vHHWM= +github.com/aws/aws-sdk-go-v2/credentials v1.12.21/go.mod h1:O+4XyAt4e+oBAoIwNUYkRg3CVMscaIJdmZBOcPgJ8D8= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17 h1:r08j4sbZu/RVi+BNxkBJwPMUYY3P8mgSDuKkZ/ZN1lE= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.17/go.mod h1:yIkQcCDYNsZfXpd5UX2Cy+sWA1jPgIhGTw9cOBzfVnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17 h1:pI7Bzt0BJtYA0N/JEC6B8fJ4RBrEMi1LBrkMdFYNSnQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.17/go.mod h1:Dh5zzJYMtxfIjYW+/evjQ8uj2OyR/ve2KROHGHlSFqE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17 h1:Mqr/V5gvrhA2gvgnF42Zh5iMiQNcOYthFYwCyrnuWlc= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.17/go.mod h1:aLJpZlCmjE+V+KtN1q1uyZkfnUWpQGpbsn89XPKyzfU= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24 h1:wj5Rwc05hvUSvKuOF29IYb9QrCLjU+rHAy/x/o0DK2c= +github.com/aws/aws-sdk-go-v2/internal/ini v1.3.24/go.mod h1:jULHjqqjDlbyTa7pfM7WICATnOv+iOhjletM3N0Xbu8= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17 h1:Roo69qTpfu8OlJ2Tb7pAYVuF0CpuUMB0IYWwYP/4DZM= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.17/go.mod h1:NcWPxQzGM1USQggaTVwz6VpqMZPX1CvDJLDh6jnOCa4= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4 h1:KypMCbLPPHEmf9DgMGw51jMj77VfGPAN2Kv4cfhlfgI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.4/go.mod h1:Vz1JQXliGcQktFTN/LN6uGppAIRoLBR2bMvIMP0gOjc= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 h1:FLMkfEiRjhgeDTCjjLoc3URo/TBkgeQbocA78lfkzSI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19/go.mod h1:Vx+GucNSsdhaxs3aZIKfSUjKVGsxN25nX2SRcdhuw08= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.17/go.mod h1:4nYOrY41Lrbk2170/BGkcJKBhws9Pfn8MG3aGqjjeFI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 h1:rfprUlsdzgl7ZL2KlXiUAoJnI/VxfHCvDFr2QDFj6u4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19/go.mod h1:SCWkEdRq8/7EK60NcvvQ6NXKuTcchAD4ROAsC37VEZE= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 h1:u+EfGmksnJc/x5tq3A+OD7LrMbSSR/5TrKLvkdy/fhY= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17/go.mod h1:VaMx6302JHax2vHJWgRo+5n9zvbacs3bLU/23DNQrTY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 h1:Kp6PWAlXwP1UvIflkIP6MFZYBNDCa4mFCGtxrpICVOg= +github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2/go.mod h1:5FmD/Dqq57gP+XwaUnd5WFPipAuzrf0HmupX27Gvjvc= +github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 h1:pwvCchFUEnlceKIgPUouBJwK81aCkQ8UDMORfeFtW10= +github.com/aws/aws-sdk-go-v2/service/sso v1.11.23/go.mod h1:/w0eg9IhFGjGyyncHIQrXtU8wvNsTJOP0R6PPj0wf80= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 h1:OwhhKc1P9ElfWbMKPIbMMZBV6hzJlL2JKD76wNNVzgQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6/go.mod h1:csZuQY65DAdFBt1oIjO5hhBR49kQqop4+lcuCjf2arA= +github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 h1:9pPi0PsFNAGILFfPCk8Y0iyEBGc6lu6OQ97U7hmdesg= +github.com/aws/aws-sdk-go-v2/service/sts v1.16.19/go.mod h1:h4J3oPZQbxLhzGnk+j9dfYHi5qIOVJ5kczZd658/ydM= +github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= +github.com/aws/smithy-go v1.20.4 h1:2HK1zBdPgRbjFOHlfeQZfpC4r72MOb9bZkiFwggKO+4= +github.com/aws/smithy-go v1.20.4/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/bboughton/gcp-helpers v0.1.0 h1:oCl8Hu1twCcyafW3UU4F+opi2fwrma0NlviZwnDFq5Q= @@ -161,8 +202,6 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dR github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= -github.com/caraml-dev/mlp v1.13.2-rc2 h1:Zmyoy3OTPv2fU+42rxMwUt9erS9J6QA0nlZQy/xCPtk= -github.com/caraml-dev/mlp v1.13.2-rc2/go.mod h1:jKfnUEpCcARv/aJF6qH7vT7VMKICDVOq/pDFvj6V3vQ= github.com/caraml-dev/protopath v0.1.0 h1:hjJ/U9RGD6QZ0Ee9SIYbVmwPugps4S5EpL6R+5ZrBe0= github.com/caraml-dev/protopath v0.1.0/go.mod h1:hVA2HkTrMYv+Q57gtrzu9/P7EXlNtBUcTz43z6EE010= github.com/caraml-dev/universal-prediction-interface v1.0.0 h1:3Z6adv1XZnBVRzFIeCu3mPcPnJrdB5IByYfdD9K/atI= @@ -226,6 +265,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9 h1:HNDZJJbL+FF13lY2pO/NGoftPQIfPnTCvH9pC07V6S0= +github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= From 110adf74d6695c7dc9e2682af0cdaf2d30d81bc4 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 11:41:26 +0800 Subject: [PATCH 04/32] Update pyfunc server dockerfiles --- python/pyfunc-server/docker/Dockerfile | 28 ++++++++++++++++++--- python/pyfunc-server/docker/base.Dockerfile | 3 +++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/python/pyfunc-server/docker/Dockerfile b/python/pyfunc-server/docker/Dockerfile index 42d40f90d..3e5a095db 100644 --- a/python/pyfunc-server/docker/Dockerfile +++ b/python/pyfunc-server/docker/Dockerfile @@ -13,16 +13,38 @@ # limitations under the License. ARG BASE_IMAGE + FROM ${BASE_IMAGE} +ARG MLFLOW_ARTIFACT_STORAGE_TYPE + ARG GOOGLE_APPLICATION_CREDENTIALS -RUN if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; \ - then gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ - else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + +ARG AWS_ACCESS_KEY_ID +ARG AWS_SECRET_ACCESS_KEY +ARG AWS_DEFAULT_REGION +ARG AWS_ENDPOINT_URL + +RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then \ + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ + else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + fi \ + elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ + echo "S3 credentials used" \ + else \ + echo "No credentials are used" \ fi # Download and install user model dependencies ARG MODEL_DEPENDENCIES_URL +RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ + gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml \ + elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ + aws s3api get-object --bucket ${MODEL_DEPENDENCIES_S3_BUCKET} --key ${MODEL_DEPENDENCIES_URL}/conda.yaml conda.yaml \ + else \ + echo "No credentials are used" \ + fi RUN gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml ARG MERLIN_DEP_CONSTRAINT diff --git a/python/pyfunc-server/docker/base.Dockerfile b/python/pyfunc-server/docker/base.Dockerfile index 83fdddbc2..c7db5b0f6 100644 --- a/python/pyfunc-server/docker/base.Dockerfile +++ b/python/pyfunc-server/docker/base.Dockerfile @@ -16,6 +16,9 @@ FROM condaforge/miniforge3:23.11.0-0 ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - +# Install aws CLI +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && sudo ./aws/install + ENV PATH=$PATH:/google-cloud-sdk/bin ENV GRPC_HEALTH_PROBE_VERSION=v0.4.4 From c217220997910eb82afefcd8861fb26a969c5ba2 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 12:16:21 +0800 Subject: [PATCH 05/32] Replace curl with wget --- python/pyfunc-server/docker/base.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyfunc-server/docker/base.Dockerfile b/python/pyfunc-server/docker/base.Dockerfile index c7db5b0f6..19aab86a2 100644 --- a/python/pyfunc-server/docker/base.Dockerfile +++ b/python/pyfunc-server/docker/base.Dockerfile @@ -17,7 +17,7 @@ FROM condaforge/miniforge3:23.11.0-0 ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - # Install aws CLI -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && sudo ./aws/install +RUN wget -qO- https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && sudo ./aws/install ENV PATH=$PATH:/google-cloud-sdk/bin From 510f03e579a8042771cff0eadfd94f8d656ce23c Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 12:21:45 +0800 Subject: [PATCH 06/32] Rearrange dependencies --- api/pkg/imagebuilder/imagebuilder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 33baf21ed..af04120b8 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -15,7 +15,6 @@ package imagebuilder import ( - "cloud.google.com/go/storage" "context" "crypto/sha256" "errors" @@ -26,6 +25,7 @@ import ( "strings" "time" + "cloud.google.com/go/storage" "github.com/caraml-dev/mlp/api/pkg/artifact" backoff "github.com/cenkalti/backoff/v4" "github.com/google/go-containerregistry/pkg/authn" From e341e5d2bc0c12e3af1a0b9279b0e7c40cc2da92 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 12:48:21 +0800 Subject: [PATCH 07/32] Add step to install unzip --- python/pyfunc-server/docker/base.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyfunc-server/docker/base.Dockerfile b/python/pyfunc-server/docker/base.Dockerfile index 19aab86a2..fa07f70af 100644 --- a/python/pyfunc-server/docker/base.Dockerfile +++ b/python/pyfunc-server/docker/base.Dockerfile @@ -14,6 +14,8 @@ FROM condaforge/miniforge3:23.11.0-0 +RUN apt-get install unzip + ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - # Install aws CLI From 9502cbede9c1837512e253552ec56d31ff10a65f Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 13:03:34 +0800 Subject: [PATCH 08/32] Fix apt-get installation --- python/pyfunc-server/docker/base.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyfunc-server/docker/base.Dockerfile b/python/pyfunc-server/docker/base.Dockerfile index fa07f70af..dd0041b2d 100644 --- a/python/pyfunc-server/docker/base.Dockerfile +++ b/python/pyfunc-server/docker/base.Dockerfile @@ -14,12 +14,12 @@ FROM condaforge/miniforge3:23.11.0-0 -RUN apt-get install unzip +RUN apt-get update && apt-get install unzip ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - # Install aws CLI -RUN wget -qO- https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && sudo ./aws/install +RUN wget -q https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && ./aws/install ENV PATH=$PATH:/google-cloud-sdk/bin From 0000fcbe86cf91d6b79fcc712a228104b36d6851 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 12 Sep 2024 13:16:02 +0800 Subject: [PATCH 09/32] Update pyfunc server dockerfile --- python/pyfunc-server/docker/Dockerfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/pyfunc-server/docker/Dockerfile b/python/pyfunc-server/docker/Dockerfile index 3e5a095db..044057a11 100644 --- a/python/pyfunc-server/docker/Dockerfile +++ b/python/pyfunc-server/docker/Dockerfile @@ -45,7 +45,6 @@ RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ else \ echo "No credentials are used" \ fi -RUN gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml ARG MERLIN_DEP_CONSTRAINT RUN process_conda_env.sh conda.yaml "merlin-pyfunc-server" "${MERLIN_DEP_CONSTRAINT}" @@ -53,7 +52,13 @@ RUN conda env create --name merlin-model --file conda.yaml # Download and dry-run user model artifacts and code ARG MODEL_ARTIFACTS_URL -RUN gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . +RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ + gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . \ + elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ + aws s3 cp ${MODEL_ARTIFACTS_URL} . --recursive \ + else \ + echo "No credentials are used" \ + fi RUN /bin/bash -c ". activate merlin-model && merlin-pyfunc-server --model_dir model --dry_run" CMD ["run.sh"] From 980dc74fa38cad931765fa80f2699a0b31c50567 Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 13 Sep 2024 14:57:23 +0800 Subject: [PATCH 10/32] Fix image builder error parsing --- api/go.mod | 5 +++-- api/go.sum | 4 ++-- api/pkg/imagebuilder/imagebuilder.go | 9 +++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/api/go.mod b/api/go.mod index b9f9ce185..c0ab2bdc6 100644 --- a/api/go.mod +++ b/api/go.mod @@ -9,6 +9,7 @@ require ( github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5 github.com/antihax/optional v1.0.0 github.com/antonmedv/expr v1.12.5 + github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 github.com/bboughton/gcp-helpers v0.1.0 github.com/buger/jsonparser v1.1.1 github.com/caraml-dev/merlin-pyspark-app v0.0.3 @@ -260,7 +261,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect @@ -269,11 +269,12 @@ require ( k8s.io/klog/v2 v2.120.1 // indirect k8s.io/kube-openapi v0.0.0-20231113174909-778a5567bc1e // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + ) replace ( github.com/caraml-dev/merlin-pyspark-app => ../python/batch-predictor - github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9 + github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404 github.com/go-gota/gota => github.com/gojekfarm/gota v0.12.1-0.20230221101638-6cd9260bd598 github.com/googleapis/gnostic => github.com/google/gnostic v0.5.5 github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.3.1 diff --git a/api/go.sum b/api/go.sum index 4a034b1f8..3c6f83cb5 100644 --- a/api/go.sum +++ b/api/go.sum @@ -265,8 +265,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9 h1:HNDZJJbL+FF13lY2pO/NGoftPQIfPnTCvH9pC07V6S0= -github.com/deadlycoconuts/mlp v0.0.0-20240911060740-6b280fe7b5e9/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= +github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404 h1:JDEVb5kgRPK8nFYwmKSOxsivGjk+BDx06lG3ERBYe2U= +github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index af04120b8..3627f1fcc 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -19,13 +19,14 @@ import ( "crypto/sha256" "errors" "fmt" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/prometheus/client_golang/prometheus/promauto" "net/http" "os" "sort" "strings" "time" - "cloud.google.com/go/storage" "github.com/caraml-dev/mlp/api/pkg/artifact" backoff "github.com/cenkalti/backoff/v4" "github.com/google/go-containerregistry/pkg/authn" @@ -34,7 +35,6 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/v1/remote/transport" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" batchv1 "k8s.io/api/batch/v1" v1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -171,7 +171,7 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio } hash := sha256.New() - hash.Write([]byte(condaEnv)) + hash.Write(condaEnv) hashEnv := hash.Sum(nil) hashedDependenciesUrl := fmt.Sprintf("%s://%s%s/%x", c.artifactService.GetURLScheme(), artifactURL.Bucket, modelDependenciesPath, hashEnv) @@ -181,7 +181,8 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio return hashedDependenciesUrl, nil } - if !errors.Is(err, storage.ErrObjectNotExist) { + var nsk *types.NoSuchKey + if !errors.As(err, &nsk) { return "", err } From 66bd797b943592a4156f86facf36b3538dbb271a Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 13 Sep 2024 14:58:16 +0800 Subject: [PATCH 11/32] Fix pyfunc-server dockerfile --- python/pyfunc-server/docker/Dockerfile | 29 +++++++++++++------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/python/pyfunc-server/docker/Dockerfile b/python/pyfunc-server/docker/Dockerfile index 044057a11..afe9c7e6d 100644 --- a/python/pyfunc-server/docker/Dockerfile +++ b/python/pyfunc-server/docker/Dockerfile @@ -25,25 +25,26 @@ ARG AWS_SECRET_ACCESS_KEY ARG AWS_DEFAULT_REGION ARG AWS_ENDPOINT_URL -RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then \ gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ fi \ - elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ - echo "S3 credentials used" \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + echo "S3 credentials used"; \ else \ - echo "No credentials are used" \ + echo "No credentials are used"; \ fi # Download and install user model dependencies ARG MODEL_DEPENDENCIES_URL -RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ - gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml \ - elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ - aws s3api get-object --bucket ${MODEL_DEPENDENCIES_S3_BUCKET} --key ${MODEL_DEPENDENCIES_URL}/conda.yaml conda.yaml \ +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + S3_KEY=${MODEL_DEPENDENCIES_URL##*s3://}; \ + aws s3api get-object --bucket ${S3_KEY%%/*} --key ${S3_KEY#*/} conda.yaml; \ else \ - echo "No credentials are used" \ + echo "No credentials are used"; \ fi ARG MERLIN_DEP_CONSTRAINT @@ -52,12 +53,12 @@ RUN conda env create --name merlin-model --file conda.yaml # Download and dry-run user model artifacts and code ARG MODEL_ARTIFACTS_URL -RUN if [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "gcs" ]]; then \ - gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . \ - elif [[ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" == "s3" ]]; then \ - aws s3 cp ${MODEL_ARTIFACTS_URL} . --recursive \ +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil -m cp -r ${MODEL_ARTIFACTS_URL} .; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + aws s3 cp ${MODEL_ARTIFACTS_URL} . --recursive; \ else \ - echo "No credentials are used" \ + echo "No credentials are used"; \ fi RUN /bin/bash -c ". activate merlin-model && merlin-pyfunc-server --model_dir model --dry_run" From 36c5d54e45bd7c036684b339a5cc090faa4e689a Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 13 Sep 2024 15:40:57 +0800 Subject: [PATCH 12/32] Fix s3 cp destination in dockerfile --- python/pyfunc-server/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyfunc-server/docker/Dockerfile b/python/pyfunc-server/docker/Dockerfile index afe9c7e6d..ae362256f 100644 --- a/python/pyfunc-server/docker/Dockerfile +++ b/python/pyfunc-server/docker/Dockerfile @@ -56,7 +56,7 @@ ARG MODEL_ARTIFACTS_URL RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ gsutil -m cp -r ${MODEL_ARTIFACTS_URL} .; \ elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ - aws s3 cp ${MODEL_ARTIFACTS_URL} . --recursive; \ + aws s3 cp ${MODEL_ARTIFACTS_URL} model --recursive; \ else \ echo "No credentials are used"; \ fi From d3b9979d4cdc05bbfb611793a4e0631b85e9ed43 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 16 Sep 2024 10:47:40 +0800 Subject: [PATCH 13/32] Update batch predictor dockerfiles --- python/batch-predictor/docker/app.Dockerfile | 37 ++++++++++++++++--- python/batch-predictor/docker/base.Dockerfile | 3 ++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/python/batch-predictor/docker/app.Dockerfile b/python/batch-predictor/docker/app.Dockerfile index d1e518683..39d33a8b4 100644 --- a/python/batch-predictor/docker/app.Dockerfile +++ b/python/batch-predictor/docker/app.Dockerfile @@ -15,15 +15,36 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} +ARG MLFLOW_ARTIFACT_STORAGE_TYPE + ARG GOOGLE_APPLICATION_CREDENTIALS -RUN if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; \ - then gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ - else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + +ARG AWS_ACCESS_KEY_ID +ARG AWS_SECRET_ACCESS_KEY +ARG AWS_DEFAULT_REGION +ARG AWS_ENDPOINT_URL + +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + if [ ! -z "${GOOGLE_APPLICATION_CREDENTIALS}" ]; then \ + gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS} > gcloud-auth.txt; \ + else echo "GOOGLE_APPLICATION_CREDENTIALS is not set. Skipping gcloud auth command." > gcloud-auth.txt; \ + fi \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + echo "S3 credentials used"; \ + else \ + echo "No credentials are used"; \ fi # Download and install user model dependencies ARG MODEL_DEPENDENCIES_URL -RUN gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil cp ${MODEL_DEPENDENCIES_URL} conda.yaml; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + S3_KEY=${MODEL_DEPENDENCIES_URL##*s3://}; \ + aws s3api get-object --bucket ${S3_KEY%%/*} --key ${S3_KEY#*/} conda.yaml; \ + else \ + echo "No credentials are used"; \ + fi ARG MERLIN_DEP_CONSTRAINT RUN process_conda_env.sh conda.yaml "merlin-batch-predictor" "${MERLIN_DEP_CONSTRAINT}" @@ -31,7 +52,13 @@ RUN conda env create --name merlin-model --file conda.yaml # Download and dry-run user model artifacts and code ARG MODEL_ARTIFACTS_URL -RUN gsutil -m cp -r ${MODEL_ARTIFACTS_URL} . +RUN if [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "gcs" ]; then \ + gsutil -m cp -r ${MODEL_ARTIFACTS_URL} .; \ + elif [ "${MLFLOW_ARTIFACT_STORAGE_TYPE}" = "s3" ]; then \ + aws s3 cp ${MODEL_ARTIFACTS_URL} model --recursive; \ + else \ + echo "No credentials are used"; \ + fi RUN /bin/bash -c ". activate merlin-model && merlin-batch-predictor --dry-run-model ${HOME}/model" ENTRYPOINT ["merlin_entrypoint.sh"] diff --git a/python/batch-predictor/docker/base.Dockerfile b/python/batch-predictor/docker/base.Dockerfile index 07237e67a..32f9dc522 100644 --- a/python/batch-predictor/docker/base.Dockerfile +++ b/python/batch-predictor/docker/base.Dockerfile @@ -59,6 +59,9 @@ ENV PATH=$PATH:/google-cloud-sdk/bin ENV GCLOUD_VERSION=405.0.1 RUN wget -qO- https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz | tar xzf - -C / +# Install aws CLI +RUN wget -q https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip && unzip awscli-exe-linux-x86_64.zip && ./aws/install + # Configure non-root user ENV USER spark ENV UID 185 From cafc55479cc88c064c52142a66dea964a16d9656 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 16 Sep 2024 15:32:59 +0800 Subject: [PATCH 14/32] Fix broken model dependencies hash check --- api/go.mod | 2 +- api/go.sum | 4 ++-- api/pkg/imagebuilder/imagebuilder.go | 6 ++---- api/pkg/imagebuilder/imagebuilder_test.go | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/api/go.mod b/api/go.mod index c0ab2bdc6..6782595d7 100644 --- a/api/go.mod +++ b/api/go.mod @@ -274,7 +274,7 @@ require ( replace ( github.com/caraml-dev/merlin-pyspark-app => ../python/batch-predictor - github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404 + github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5 github.com/go-gota/gota => github.com/gojekfarm/gota v0.12.1-0.20230221101638-6cd9260bd598 github.com/googleapis/gnostic => github.com/google/gnostic v0.5.5 github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.3.1 diff --git a/api/go.sum b/api/go.sum index 3c6f83cb5..53f2e6dc8 100644 --- a/api/go.sum +++ b/api/go.sum @@ -265,8 +265,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404 h1:JDEVb5kgRPK8nFYwmKSOxsivGjk+BDx06lG3ERBYe2U= -github.com/deadlycoconuts/mlp v0.0.0-20240912080549-b0402448c404/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= +github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5 h1:m1CjgOBHIoo0E0QHxSsfzBskHptKdR25dxLEbcYSxn8= +github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 3627f1fcc..e44c6097c 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -19,8 +19,6 @@ import ( "crypto/sha256" "errors" "fmt" - "github.com/aws/aws-sdk-go-v2/service/s3/types" - "github.com/prometheus/client_golang/prometheus/promauto" "net/http" "os" "sort" @@ -35,6 +33,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/v1/remote/transport" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" batchv1 "k8s.io/api/batch/v1" v1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" @@ -181,8 +180,7 @@ func (c *imageBuilder) getHashedModelDependenciesUrl(ctx context.Context, versio return hashedDependenciesUrl, nil } - var nsk *types.NoSuchKey - if !errors.As(err, &nsk) { + if !errors.Is(err, artifact.ErrObjectNotExist) { return "", err } diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index 585e3e44b..e32f41220 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -25,7 +25,6 @@ import ( "testing" "time" - "cloud.google.com/go/storage" "github.com/caraml-dev/merlin/cluster/labeller" cfg "github.com/caraml-dev/merlin/config" "github.com/caraml-dev/merlin/mlp" @@ -2050,7 +2049,7 @@ func Test_imageBuilder_getHashedModelDependenciesUrl(t *testing.T) { artifactServiceMock.On("GetURLScheme").Return("gs") artifactServiceMock.On("GetURLScheme").Return("gs") artifactServiceMock.On("ReadArtifact", mock.Anything, fmt.Sprintf("gs%s", testCondaEnvUrlSuffix)).Return([]byte(testCondaEnvContent), nil) - artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return(nil, storage.ErrObjectNotExist) + artifactServiceMock.On("ReadArtifact", mock.Anything, modelDependenciesURL).Return(nil, artifact.ErrObjectNotExist) artifactServiceMock.On("WriteArtifact", mock.Anything, modelDependenciesURL, []byte(testCondaEnvContent)).Return(nil) }, want: modelDependenciesURL, From 8a44fa7057e8eccfd9baf4b6a2ba5117133c8f7e Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 16 Sep 2024 15:37:38 +0800 Subject: [PATCH 15/32] Add step to install unzip --- python/batch-predictor/docker/base.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/batch-predictor/docker/base.Dockerfile b/python/batch-predictor/docker/base.Dockerfile index 32f9dc522..f5b5ad47e 100644 --- a/python/batch-predictor/docker/base.Dockerfile +++ b/python/batch-predictor/docker/base.Dockerfile @@ -23,6 +23,8 @@ ENV PATH /opt/conda/bin:$PATH ENV SPARK_OPERATOR_VERSION=v1beta2-1.3.7-3.1.1 ENV SPARK_BQ_CONNECTOR_VERSION=0.27.0 +RUN apt-get update && apt-get install unzip + # Setup dependencies for Google Cloud Storage access. RUN rm $SPARK_HOME/jars/guava-14.0.1.jar ADD https://repo1.maven.org/maven2/com/google/guava/guava/23.0/guava-23.0.jar \ From 9f84f6d626db172399b94cbd6c456a62f0b439d9 Mon Sep 17 00:00:00 2001 From: ewezy Date: Tue, 17 Sep 2024 15:56:20 +0800 Subject: [PATCH 16/32] Add kaniko push registry type to e2e test configs --- api/cmd/api/setup.go | 5 +++++ api/config/config.go | 4 ++-- config.yaml | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index a23dac719..87e9931dd 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -114,6 +114,11 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB log.Panicf("invalid artifact service type %s", cfg.ImageBuilderConfig.ArtifactServiceType) } + if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcs" && + cfg.ImageBuilderConfig.KanikoPushRegistryType != "docker" { + log.Panicf("invalid kaniko push registry type %s", cfg.ImageBuilderConfig.KanikoPushRegistryType) + } + if err != nil { log.Panicf("%s,failed initializing mlflow artifact service", err.Error()) } diff --git a/api/config/config.go b/api/config/config.go index be2ce6870..174fd0a8f 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -211,7 +211,7 @@ type ClusterConfig struct { type ImageBuilderConfig struct { ClusterName string `validate:"required"` GcpProject string - ArtifactServiceType string + ArtifactServiceType string `validate:"required"` BaseImage BaseImageConfig `validate:"required"` PredictionJobBaseImage BaseImageConfig `validate:"required"` BuildNamespace string `validate:"required" default:"mlp"` @@ -219,7 +219,7 @@ type ImageBuilderConfig struct { BuildTimeout string `validate:"required" default:"10m"` KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` KanikoServiceAccount string - KanikoPushRegistryType string `validate:"required"` + KanikoPushRegistryType string `validate:"required" default:"docker"` KanikoDockerCredentialSecretName string KanikoAdditionalArgs []string DefaultResources ResourceRequestsLimits `validate:"required"` diff --git a/config.yaml b/config.yaml index 4d34b1db4..4125753c3 100644 --- a/config.yaml +++ b/config.yaml @@ -51,6 +51,7 @@ ImageBuilderConfig: command: gke-gcloud-auth-plugin interactiveMode: IfAvailable provideClusterInfo: true + KanikoPushRegistryType: docker KanikoAdditionalArgs: - "--cache=true" - "--compressed-caching=false" From 5bbdcac071f6439a48f687cfcded6c9437437036 Mon Sep 17 00:00:00 2001 From: ewezy Date: Tue, 17 Sep 2024 16:08:33 +0800 Subject: [PATCH 17/32] Fix unit test --- api/config/config_test.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/api/config/config_test.go b/api/config/config_test.go index 265be1470..c6e5eba9c 100644 --- a/api/config/config_test.go +++ b/api/config/config_test.go @@ -403,12 +403,13 @@ func TestLoad(t *testing.T) { BuildContextSubPath: "python", MainAppPath: "/home/spark/merlin-spark-app/main.py", }, - BuildNamespace: "caraml", - DockerRegistry: "test-docker.pkg.dev/test/caraml-registry", - BuildTimeout: "30m", - KanikoImage: "gcr.io/kaniko-project/executor:v1.21.0", - KanikoServiceAccount: "kaniko-merlin", - KanikoAdditionalArgs: []string{"--test=true", "--no-logs=false"}, + BuildNamespace: "caraml", + DockerRegistry: "test-docker.pkg.dev/test/caraml-registry", + BuildTimeout: "30m", + KanikoImage: "gcr.io/kaniko-project/executor:v1.21.0", + KanikoServiceAccount: "kaniko-merlin", + KanikoPushRegistryType: "docker", + KanikoAdditionalArgs: []string{"--test=true", "--no-logs=false"}, DefaultResources: ResourceRequestsLimits{ Requests: Resource{ CPU: "1", From 07bbfdfb896eda11a98f830e5384255795f73d79 Mon Sep 17 00:00:00 2001 From: ewezy Date: Tue, 17 Sep 2024 17:27:06 +0800 Subject: [PATCH 18/32] Update go mod files --- api/go.mod | 6 +++--- api/go.sum | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/go.mod b/api/go.mod index 6782595d7..fba7c923c 100644 --- a/api/go.mod +++ b/api/go.mod @@ -4,12 +4,10 @@ go 1.22 require ( cloud.google.com/go/bigtable v1.11.0 - cloud.google.com/go/storage v1.39.0 github.com/GoogleCloudPlatform/spark-on-k8s-operator v0.0.0-20221025152940-c261df66a006 github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5 github.com/antihax/optional v1.0.0 github.com/antonmedv/expr v1.12.5 - github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 github.com/bboughton/gcp-helpers v0.1.0 github.com/buger/jsonparser v1.1.1 github.com/caraml-dev/merlin-pyspark-app v0.0.3 @@ -247,6 +245,7 @@ require ( ) require ( + cloud.google.com/go/storage v1.39.0 // indirect github.com/avast/retry-go/v4 v4.6.0 // indirect github.com/aws/aws-sdk-go-v2 v1.30.6-0.20240906182417-827d25db0048 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.4 // indirect @@ -261,6 +260,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.3.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.19 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.17 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.61.2 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.11.23 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.16.19 // indirect @@ -274,7 +274,7 @@ require ( replace ( github.com/caraml-dev/merlin-pyspark-app => ../python/batch-predictor - github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5 + github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac github.com/go-gota/gota => github.com/gojekfarm/gota v0.12.1-0.20230221101638-6cd9260bd598 github.com/googleapis/gnostic => github.com/google/gnostic v0.5.5 github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.3.1 diff --git a/api/go.sum b/api/go.sum index 53f2e6dc8..afec65406 100644 --- a/api/go.sum +++ b/api/go.sum @@ -265,8 +265,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5 h1:m1CjgOBHIoo0E0QHxSsfzBskHptKdR25dxLEbcYSxn8= -github.com/deadlycoconuts/mlp v0.0.0-20240916072420-232c6a0805a5/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= +github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac h1:M6dR1d3O+xY4suQPumkbNO1Ie70M6WoYFaoW+cjKT+8= +github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= From 57aa17229841a74240bdab2632d70ec9d566710e Mon Sep 17 00:00:00 2001 From: ewezy Date: Wed, 18 Sep 2024 11:58:35 +0800 Subject: [PATCH 19/32] Clean up image builder --- api/pkg/imagebuilder/imagebuilder.go | 51 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index e44c6097c..8df76c049 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -108,12 +108,12 @@ const ( gacEnvKey = "GOOGLE_APPLICATION_CREDENTIALS" saFilePath = "/secret/kaniko-secret.json" - dockerCredentialSecretVolume = "docker-registry-credentials" - dockerCredentialConfigPath = "/kaniko/.docker" + dockerCredentialConfigPath = "/kaniko/.docker" - baseImageEnvKey = "BASE_IMAGE" - modelDependenciesUrlEnvKey = "MODEL_DEPENDENCIES_URL" - modelArtifactsUrlEnvKey = "MODEL_ARTIFACTS_URL" + baseImageEnvKey = "BASE_IMAGE" + modelArtifactStorageTypeKey = "MLFLOW_ARTIFACT_STORAGE_TYPE" + modelDependenciesUrlEnvKey = "MODEL_DEPENDENCIES_URL" + modelArtifactsUrlEnvKey = "MODEL_ARTIFACTS_URL" modelDependenciesPath = "/merlin/model_dependencies" ) @@ -407,16 +407,16 @@ func getGCPSubDomains() []string { func (c *imageBuilder) imageRefExists(imageName, imageTag string) (bool, error) { // The DefaultKeychain will use credentials as described in the Docker config file whose location is specified by // the DOCKER_CONFIG environment variable, if set. - keychains := []authn.Keychain{ - authn.DefaultKeychain, - } + var keychain authn.Keychain + keychain = authn.DefaultKeychain + for _, domain := range getGCPSubDomains() { if strings.Contains(c.config.DockerRegistry, domain) { - keychains = append(keychains, google.Keychain) + keychain = google.Keychain } } - multiKeychain := authn.NewMultiKeychain(keychains...) + multiKeychain := authn.NewMultiKeychain(keychain) repo, err := name.NewRepository(imageName) if err != nil { @@ -623,18 +623,15 @@ func (c *imageBuilder) createKanikoJobSpec( fmt.Sprintf("--dockerfile=%s", baseImageTag.DockerfilePath), fmt.Sprintf("--context=%s", baseImageTag.BuildContextURI), fmt.Sprintf("--build-arg=%s=%s", baseImageEnvKey, baseImageTag.ImageName), - fmt.Sprintf("--build-arg=%s=%s", "MLFLOW_ARTIFACT_STORAGE_TYPE", c.artifactService.GetType()), + fmt.Sprintf("--build-arg=%s=%s", modelArtifactStorageTypeKey, c.artifactService.GetType()), fmt.Sprintf("--build-arg=%s=%s", modelDependenciesUrlEnvKey, modelDependenciesUrl), fmt.Sprintf("--build-arg=%s=%s/model", modelArtifactsUrlEnvKey, version.ArtifactURI), fmt.Sprintf("--destination=%s", imageRef), } - if artifactType := c.artifactService.GetType(); artifactType == "gcs" { - //kanikoArgs = append(kanikoArgs, fmt.Sprintf("--build-arg=%s=%s", "", modelDependenciesUrl)) - } else if c.artifactService.GetType() == "s3" { + if c.artifactService.GetType() == "s3" { kanikoArgs = append( kanikoArgs, - // TODO: To refactor env var values into configs? fmt.Sprintf("--build-arg=%s=%s", "AWS_ACCESS_KEY_ID", os.Getenv("AWS_ACCESS_KEY_ID")), fmt.Sprintf("--build-arg=%s=%s", "AWS_SECRET_ACCESS_KEY", os.Getenv("AWS_SECRET_ACCESS_KEY")), fmt.Sprintf("--build-arg=%s=%s", "AWS_DEFAULT_REGION", os.Getenv("AWS_DEFAULT_REGION")), @@ -681,18 +678,22 @@ func (c *imageBuilder) createKanikoJobSpec( } } } else if c.config.KanikoPushRegistryType == "docker" { - volumes = append(volumes, v1.Volume{ - Name: kanikoSecretName, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: c.config.KanikoDockerCredentialSecretName, + volumes = []v1.Volume{ + { + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: c.config.KanikoDockerCredentialSecretName, + }, }, }, - }) - volumeMounts = append(volumeMounts, v1.VolumeMount{ - Name: kanikoSecretName, - MountPath: dockerCredentialConfigPath, - }) + } + volumeMounts = []v1.VolumeMount{ + { + Name: kanikoSecretName, + MountPath: dockerCredentialConfigPath, + }, + } } var resourceRequirements RequestLimitResources From f6b3ff0f1bdd4adcaceb7314204b4f5b42b5d90b Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 20 Sep 2024 11:37:24 +0800 Subject: [PATCH 20/32] Remove the redundant usage of multikeychains --- api/pkg/imagebuilder/imagebuilder.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 8df76c049..786cffbd9 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -409,21 +409,18 @@ func (c *imageBuilder) imageRefExists(imageName, imageTag string) (bool, error) // the DOCKER_CONFIG environment variable, if set. var keychain authn.Keychain keychain = authn.DefaultKeychain - for _, domain := range getGCPSubDomains() { if strings.Contains(c.config.DockerRegistry, domain) { keychain = google.Keychain } } - multiKeychain := authn.NewMultiKeychain(keychain) - repo, err := name.NewRepository(imageName) if err != nil { return false, fmt.Errorf("unable to parse docker repository %s: %w", imageName, err) } - tags, err := remote.List(repo, remote.WithAuthFromKeychain(multiKeychain)) + tags, err := remote.List(repo, remote.WithAuthFromKeychain(keychain)) if err != nil { var terr *transport.Error if errors.As(err, &terr) { From c0ec6201daf9167aa49863babf92433384680e78 Mon Sep 17 00:00:00 2001 From: ewezy Date: Fri, 20 Sep 2024 13:02:56 +0800 Subject: [PATCH 21/32] Remove redundant config value --- api/cmd/api/setup.go | 8 ++++---- api/config/config.go | 7 +++---- api/config/config_test.go | 5 ++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index 87e9931dd..8d7e3eba1 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -104,14 +104,14 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB } var artifactService artifact.Service - if cfg.ImageBuilderConfig.ArtifactServiceType == "gcs" { + if cfg.MlflowConfig.ArtifactServiceType == "gcs" { artifactService, err = artifact.NewGcsArtifactClient() - } else if cfg.ImageBuilderConfig.ArtifactServiceType == "s3" { + } else if cfg.MlflowConfig.ArtifactServiceType == "s3" { artifactService, err = artifact.NewS3ArtifactClient() - } else if cfg.ImageBuilderConfig.ArtifactServiceType == "nop" { + } else if cfg.MlflowConfig.ArtifactServiceType == "nop" { artifactService = artifact.NewNopArtifactClient() } else { - log.Panicf("invalid artifact service type %s", cfg.ImageBuilderConfig.ArtifactServiceType) + log.Panicf("invalid artifact service type %s", cfg.MlflowConfig.ArtifactServiceType) } if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcs" && diff --git a/api/config/config.go b/api/config/config.go index 174fd0a8f..0bad14011 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -211,7 +211,6 @@ type ClusterConfig struct { type ImageBuilderConfig struct { ClusterName string `validate:"required"` GcpProject string - ArtifactServiceType string `validate:"required"` BaseImage BaseImageConfig `validate:"required"` PredictionJobBaseImage BaseImageConfig `validate:"required"` BuildNamespace string `validate:"required" default:"mlp"` @@ -219,7 +218,7 @@ type ImageBuilderConfig struct { BuildTimeout string `validate:"required" default:"10m"` KanikoImage string `validate:"required" default:"gcr.io/kaniko-project/executor:v1.6.0"` KanikoServiceAccount string - KanikoPushRegistryType string `validate:"required" default:"docker"` + KanikoPushRegistryType string `validate:"required,oneof=docker gcr" default:"docker"` KanikoDockerCredentialSecretName string KanikoAdditionalArgs []string DefaultResources ResourceRequestsLimits `validate:"required"` @@ -455,8 +454,8 @@ type JaegerConfig struct { } type MlflowConfig struct { - TrackingURL string `validate:"required"` - ArtifactServiceType string `validate:"required"` + TrackingURL string `validate:"required_if=ArtifactServiceType gcs ArtifactServiceType s3"` + ArtifactServiceType string `validate:"required,oneof=nop gcs s3"` } func (cfg *Config) Validate() error { diff --git a/api/config/config_test.go b/api/config/config_test.go index c6e5eba9c..8e1abadfe 100644 --- a/api/config/config_test.go +++ b/api/config/config_test.go @@ -387,9 +387,8 @@ func TestLoad(t *testing.T) { EnvironmentConfigs: []*EnvironmentConfig{}, }, ImageBuilderConfig: ImageBuilderConfig{ - ClusterName: "test-cluster", - GcpProject: "test-project", - ArtifactServiceType: "gcs", + ClusterName: "test-cluster", + GcpProject: "test-project", BaseImage: BaseImageConfig{ ImageName: "ghcr.io/caraml-dev/merlin/merlin-pyfunc-base:0.0.0", DockerfilePath: "pyfunc-server/docker/Dockerfile", From f0d8b6d95ec3c8b1492c1691acaec69e484948cd Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 26 Sep 2024 15:39:40 +0800 Subject: [PATCH 22/32] Fix incorrect kaniko push registry type specified in app startup --- api/cmd/api/setup.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index 8d7e3eba1..1bcb81298 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -114,7 +114,7 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB log.Panicf("invalid artifact service type %s", cfg.MlflowConfig.ArtifactServiceType) } - if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcs" && + if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcr" && cfg.ImageBuilderConfig.KanikoPushRegistryType != "docker" { log.Panicf("invalid kaniko push registry type %s", cfg.ImageBuilderConfig.KanikoPushRegistryType) } From 659940d8c7ed03fe3e4db8ec6c68919363f23853 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 30 Sep 2024 18:23:34 +0800 Subject: [PATCH 23/32] Refactor code to improve code readability --- api/cmd/api/setup.go | 7 +- api/pkg/imagebuilder/imagebuilder.go | 106 +++++++++++++++------------ 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/api/cmd/api/setup.go b/api/cmd/api/setup.go index 1bcb81298..99d58e28b 100644 --- a/api/cmd/api/setup.go +++ b/api/cmd/api/setup.go @@ -113,16 +113,15 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB } else { log.Panicf("invalid artifact service type %s", cfg.MlflowConfig.ArtifactServiceType) } + if err != nil { + log.Panicf("%s,failed initializing mlflow artifact service", err.Error()) + } if cfg.ImageBuilderConfig.KanikoPushRegistryType != "gcr" && cfg.ImageBuilderConfig.KanikoPushRegistryType != "docker" { log.Panicf("invalid kaniko push registry type %s", cfg.ImageBuilderConfig.KanikoPushRegistryType) } - if err != nil { - log.Panicf("%s,failed initializing mlflow artifact service", err.Error()) - } - webServiceConfig := imagebuilder.Config{ BaseImage: cfg.ImageBuilderConfig.BaseImage, BuildNamespace: cfg.ImageBuilderConfig.BuildNamespace, diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 786cffbd9..fc85f3f5a 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -647,51 +647,9 @@ func (c *imageBuilder) createKanikoJobSpec( var envVar []v1.EnvVar // Configure additional authentication requirements for specific image registries - if c.config.KanikoPushRegistryType == "gcr" { - if c.config.KanikoServiceAccount == "" { - kanikoArgs = append(kanikoArgs, - fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) - volumes = []v1.Volume{ - { - Name: kanikoSecretName, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: kanikoSecretName, - }, - }, - }, - } - volumeMounts = []v1.VolumeMount{ - { - Name: kanikoSecretName, - MountPath: "/secret", - }, - } - envVar = []v1.EnvVar{ - { - Name: gacEnvKey, - Value: saFilePath, - }, - } - } - } else if c.config.KanikoPushRegistryType == "docker" { - volumes = []v1.Volume{ - { - Name: kanikoSecretName, - VolumeSource: v1.VolumeSource{ - Secret: &v1.SecretVolumeSource{ - SecretName: c.config.KanikoDockerCredentialSecretName, - }, - }, - }, - } - volumeMounts = []v1.VolumeMount{ - { - Name: kanikoSecretName, - MountPath: dockerCredentialConfigPath, - }, - } - } + kanikoArgs = c.configureKanikoArgsForKanikoPushRegistry(kanikoArgs) + volumes, volumeMounts = c.configureVolumesAndVolumeMountsForKanikoPushRegistry(volumes, volumeMounts) + envVar = c.configureEnvVarsForKanikoPushRegistry(envVar) var resourceRequirements RequestLimitResources cpuRequest := resource.MustParse(c.config.DefaultResources.Requests.CPU) @@ -766,6 +724,64 @@ func (c *imageBuilder) createKanikoJobSpec( return job, nil } +func (c *imageBuilder) configureVolumesAndVolumeMountsForKanikoPushRegistry( + volumes []v1.Volume, + volumeMounts []v1.VolumeMount, +) ([]v1.Volume, []v1.VolumeMount) { + if c.config.KanikoPushRegistryType == "gcr" { + if c.config.KanikoServiceAccount == "" { + volumes = append(volumes, v1.Volume{ + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: kanikoSecretName, + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: kanikoSecretName, + MountPath: "/secret", + }) + } + } else if c.config.KanikoPushRegistryType == "docker" { + volumes = append(volumes, v1.Volume{ + Name: kanikoSecretName, + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: c.config.KanikoDockerCredentialSecretName, + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: kanikoSecretName, + MountPath: dockerCredentialConfigPath, + }) + } + return volumes, volumeMounts +} + +func (c *imageBuilder) configureEnvVarsForKanikoPushRegistry(envVar []v1.EnvVar) []v1.EnvVar { + if c.config.KanikoPushRegistryType == "gcr" { + if c.config.KanikoServiceAccount == "" { + envVar = append(envVar, v1.EnvVar{ + Name: gacEnvKey, + Value: saFilePath, + }) + } + } + return envVar +} + +func (c *imageBuilder) configureKanikoArgsForKanikoPushRegistry(kanikoArgs []string) []string { + if c.config.KanikoPushRegistryType == "gcr" { + if c.config.KanikoServiceAccount == "" { + kanikoArgs = append(kanikoArgs, + fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) + } + } + return kanikoArgs +} + func (c *imageBuilder) GetImageBuildingJobStatus(ctx context.Context, project mlp.Project, model *models.Model, version *models.Version) (status models.ImageBuildingJobStatus) { status.State = models.ImageBuildingJobStateUnknown From 6bd7a4d492a791998a5010fcd5870bc773594b79 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 10 Oct 2024 12:57:47 +0800 Subject: [PATCH 24/32] Update go mod files --- api/go.mod | 3 +-- api/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/api/go.mod b/api/go.mod index fba7c923c..deb3aedff 100644 --- a/api/go.mod +++ b/api/go.mod @@ -11,7 +11,7 @@ require ( github.com/bboughton/gcp-helpers v0.1.0 github.com/buger/jsonparser v1.1.1 github.com/caraml-dev/merlin-pyspark-app v0.0.3 - github.com/caraml-dev/mlp v1.13.2-rc2 + github.com/caraml-dev/mlp v1.13.2 github.com/caraml-dev/protopath v0.1.0 github.com/caraml-dev/universal-prediction-interface v1.0.0 github.com/cenkalti/backoff/v4 v4.2.1 @@ -274,7 +274,6 @@ require ( replace ( github.com/caraml-dev/merlin-pyspark-app => ../python/batch-predictor - github.com/caraml-dev/mlp => github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac github.com/go-gota/gota => github.com/gojekfarm/gota v0.12.1-0.20230221101638-6cd9260bd598 github.com/googleapis/gnostic => github.com/google/gnostic v0.5.5 github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.3.1 diff --git a/api/go.sum b/api/go.sum index afec65406..f05153cb9 100644 --- a/api/go.sum +++ b/api/go.sum @@ -202,6 +202,8 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dR github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= +github.com/caraml-dev/mlp v1.13.2 h1:N3lk+ToQ281duZImQLTQ28uJtmoc9Zkxx1CR94rS15U= +github.com/caraml-dev/mlp v1.13.2/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/caraml-dev/protopath v0.1.0 h1:hjJ/U9RGD6QZ0Ee9SIYbVmwPugps4S5EpL6R+5ZrBe0= github.com/caraml-dev/protopath v0.1.0/go.mod h1:hVA2HkTrMYv+Q57gtrzu9/P7EXlNtBUcTz43z6EE010= github.com/caraml-dev/universal-prediction-interface v1.0.0 h1:3Z6adv1XZnBVRzFIeCu3mPcPnJrdB5IByYfdD9K/atI= @@ -265,8 +267,6 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac h1:M6dR1d3O+xY4suQPumkbNO1Ie70M6WoYFaoW+cjKT+8= -github.com/deadlycoconuts/mlp v0.0.0-20240917090435-d94d92572eac/go.mod h1:9kPooDSYsVu5q/z2K4T9uu08RGyiFNbCAFnQVBMJxOk= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= From acccc988c237ac4361456be502077fe5852be29d Mon Sep 17 00:00:00 2001 From: ewezy Date: Sun, 20 Oct 2024 18:31:32 +0800 Subject: [PATCH 25/32] Remove code to parse env vars into kaniko arguments --- api/config/config.go | 9 +++++++-- api/pkg/imagebuilder/imagebuilder.go | 10 ---------- api/pkg/imagebuilder/imagebuilder_test.go | 4 ---- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/api/config/config.go b/api/config/config.go index 0bad14011..10cd733f7 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -142,7 +142,7 @@ func (docs *Documentations) Decode(value string) error { type DatabaseConfig struct { Host string `validate:"required"` - Port int `validate:"required" default:"5432"` + Port int `validate:"required" default:"5431"` User string `validate:"required"` Password string `validate:"required"` Database string `validate:"required" default:"mlp"` @@ -454,7 +454,12 @@ type JaegerConfig struct { } type MlflowConfig struct { - TrackingURL string `validate:"required_if=ArtifactServiceType gcs ArtifactServiceType s3"` + TrackingURL string `validate:"required_if=ArtifactServiceType gcs ArtifactServiceType s3"` + // Note that the Kaniko image builder needs to be configured correctly to have the necessary credentials to download + // the artifacts from the blob storage tool depending on the artifact service type selected (gcs/s3). For gcs, the + // credentials can be provided via a k8s service account or a secret but for s3, the credentials can be provided via + // additional arguments in the config KanikoAdditionalArgs e.g. + // --build-arg=[AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY/AWS_DEFAULT_REGION/AWS_ENDPOINT_URL]=xxx ArtifactServiceType string `validate:"required,oneof=nop gcs s3"` } diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index fc85f3f5a..b870f9572 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "net/http" - "os" "sort" "strings" "time" @@ -626,15 +625,6 @@ func (c *imageBuilder) createKanikoJobSpec( fmt.Sprintf("--destination=%s", imageRef), } - if c.artifactService.GetType() == "s3" { - kanikoArgs = append( - kanikoArgs, - fmt.Sprintf("--build-arg=%s=%s", "AWS_ACCESS_KEY_ID", os.Getenv("AWS_ACCESS_KEY_ID")), - fmt.Sprintf("--build-arg=%s=%s", "AWS_SECRET_ACCESS_KEY", os.Getenv("AWS_SECRET_ACCESS_KEY")), - fmt.Sprintf("--build-arg=%s=%s", "AWS_DEFAULT_REGION", os.Getenv("AWS_DEFAULT_REGION")), - fmt.Sprintf("--build-arg=%s=%s", "AWS_ENDPOINT_URL", os.Getenv("AWS_ENDPOINT_URL")), - ) - } if c.config.BaseImage.BuildContextSubPath != "" { kanikoArgs = append(kanikoArgs, fmt.Sprintf("--context-sub-path=%s", c.config.BaseImage.BuildContextSubPath)) } diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index e32f41220..22660999a 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -513,10 +513,6 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=s3%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=s3%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithDockerPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithS3Artifact.ID)), - fmt.Sprintf("--build-arg=AWS_ACCESS_KEY_ID=%s", ""), - fmt.Sprintf("--build-arg=AWS_SECRET_ACCESS_KEY=%s", ""), - fmt.Sprintf("--build-arg=AWS_DEFAULT_REGION=%s", ""), - fmt.Sprintf("--build-arg=AWS_ENDPOINT_URL=%s", ""), fmt.Sprintf("--context-sub-path=%s", configWithDockerPushRegistry.BaseImage.BuildContextSubPath), "--cache=true", "--compressed-caching=false", From 81155724833cd485d3a8afe022057d0f4cadee47 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 21 Oct 2024 01:50:36 +0800 Subject: [PATCH 26/32] Remove codecov github action and configs temporarily --- .github/workflows/codecov-config/codecov.yml | 5 ----- .github/workflows/merlin.yml | 14 -------------- 2 files changed, 19 deletions(-) delete mode 100644 .github/workflows/codecov-config/codecov.yml diff --git a/.github/workflows/codecov-config/codecov.yml b/.github/workflows/codecov-config/codecov.yml deleted file mode 100644 index 8901728c4..000000000 --- a/.github/workflows/codecov-config/codecov.yml +++ /dev/null @@ -1,5 +0,0 @@ -coverage: - status: - patch: - default: - threshold: 0.03% \ No newline at end of file diff --git a/.github/workflows/merlin.yml b/.github/workflows/merlin.yml index 422df7312..420649e69 100644 --- a/.github/workflows/merlin.yml +++ b/.github/workflows/merlin.yml @@ -137,13 +137,6 @@ jobs: E2E_USE_GOOGLE_OAUTH: false working-directory: ./python/sdk run: make unit-test - - uses: codecov/codecov-action@v4 - with: - flags: sdk-test-${{ matrix.python-version }} - name: sdk-test-${{ matrix.python-version }} - token: ${{ secrets.CODECOV_TOKEN }} - working-directory: ./python/sdk - codecov_yml_path: ../../.github/workflows/codecov-config/codecov.yml lint-api: runs-on: ubuntu-latest @@ -188,13 +181,6 @@ jobs: POSTGRES_USER: ${{ secrets.DB_USERNAME }} POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }} run: make it-test-api-ci - - uses: codecov/codecov-action@v4 - with: - flags: api-test - name: api-test - token: ${{ secrets.CODECOV_TOKEN }} - working-directory: ./api - codecov_yml_path: ../.github/workflows/codecov-config/codecov.yml test-observation-publisher: runs-on: ubuntu-latest From e2dd955ecefe353cfd9d829c5a695c737311d4f5 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 21 Oct 2024 04:12:56 +0800 Subject: [PATCH 27/32] Fix bug whereby gcs credentials are not passed to kaniko when gcs is configured as an artifact strore --- api/pkg/imagebuilder/imagebuilder.go | 36 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index b870f9572..00ea5f827 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -636,10 +636,10 @@ func (c *imageBuilder) createKanikoJobSpec( var volumeMounts []v1.VolumeMount var envVar []v1.EnvVar - // Configure additional authentication requirements for specific image registries - kanikoArgs = c.configureKanikoArgsForKanikoPushRegistry(kanikoArgs) - volumes, volumeMounts = c.configureVolumesAndVolumeMountsForKanikoPushRegistry(volumes, volumeMounts) - envVar = c.configureEnvVarsForKanikoPushRegistry(envVar) + // Configure additional credentials for specific image registries and artifact services + kanikoArgs = c.configureKanikoArgsToAddCredentials(kanikoArgs) + volumes, volumeMounts = c.configureVolumesAndVolumeMountsToAddCredentials(volumes, volumeMounts) + envVar = c.configureEnvVarsToAddCredentials(envVar) var resourceRequirements RequestLimitResources cpuRequest := resource.MustParse(c.config.DefaultResources.Requests.CPU) @@ -714,11 +714,21 @@ func (c *imageBuilder) createKanikoJobSpec( return job, nil } -func (c *imageBuilder) configureVolumesAndVolumeMountsForKanikoPushRegistry( +func (c *imageBuilder) configureKanikoArgsToAddCredentials(kanikoArgs []string) []string { + if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { + if c.config.KanikoServiceAccount == "" { + kanikoArgs = append(kanikoArgs, + fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) + } + } + return kanikoArgs +} + +func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials( volumes []v1.Volume, volumeMounts []v1.VolumeMount, ) ([]v1.Volume, []v1.VolumeMount) { - if c.config.KanikoPushRegistryType == "gcr" { + if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { if c.config.KanikoServiceAccount == "" { volumes = append(volumes, v1.Volume{ Name: kanikoSecretName, @@ -750,8 +760,8 @@ func (c *imageBuilder) configureVolumesAndVolumeMountsForKanikoPushRegistry( return volumes, volumeMounts } -func (c *imageBuilder) configureEnvVarsForKanikoPushRegistry(envVar []v1.EnvVar) []v1.EnvVar { - if c.config.KanikoPushRegistryType == "gcr" { +func (c *imageBuilder) configureEnvVarsToAddCredentials(envVar []v1.EnvVar) []v1.EnvVar { + if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { if c.config.KanikoServiceAccount == "" { envVar = append(envVar, v1.EnvVar{ Name: gacEnvKey, @@ -762,16 +772,6 @@ func (c *imageBuilder) configureEnvVarsForKanikoPushRegistry(envVar []v1.EnvVar) return envVar } -func (c *imageBuilder) configureKanikoArgsForKanikoPushRegistry(kanikoArgs []string) []string { - if c.config.KanikoPushRegistryType == "gcr" { - if c.config.KanikoServiceAccount == "" { - kanikoArgs = append(kanikoArgs, - fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) - } - } - return kanikoArgs -} - func (c *imageBuilder) GetImageBuildingJobStatus(ctx context.Context, project mlp.Project, model *models.Model, version *models.Version) (status models.ImageBuildingJobStatus) { status.State = models.ImageBuildingJobStateUnknown From ea7901e3a5576bef3b871a7e2d061915d1e77404 Mon Sep 17 00:00:00 2001 From: ewezy Date: Mon, 21 Oct 2024 04:17:14 +0800 Subject: [PATCH 28/32] Undo unnecessary default db port change --- api/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/config/config.go b/api/config/config.go index 10cd733f7..15dc76784 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -142,7 +142,7 @@ func (docs *Documentations) Decode(value string) error { type DatabaseConfig struct { Host string `validate:"required"` - Port int `validate:"required" default:"5431"` + Port int `validate:"required" default:"5432"` User string `validate:"required"` Password string `validate:"required"` Database string `validate:"required" default:"mlp"` From 35ad11ec93db40093d792027e78bc23b3256618b Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 7 Nov 2024 13:05:42 +0800 Subject: [PATCH 29/32] Add additional steps to download merlin artifacts from s3 storage locally --- python/sdk/merlin/model.py | 4 +-- python/sdk/merlin/util.py | 56 +++++++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/python/sdk/merlin/model.py b/python/sdk/merlin/model.py index 175cc4e70..2150305a0 100644 --- a/python/sdk/merlin/model.py +++ b/python/sdk/merlin/model.py @@ -58,7 +58,7 @@ from merlin.transformer import Transformer from merlin.util import ( autostr, - download_files_from_gcs, + download_files_from_blob_storage, extract_optional_value_with_default, guess_mlp_ui_url, valid_name_check, @@ -956,7 +956,7 @@ def download_artifact(self, destination_path): if artifact_uri is None or artifact_uri == "": raise Exception("There is no artifact uri for this model version") - download_files_from_gcs(artifact_uri, destination_path) + download_files_from_blob_storage(artifact_uri, destination_path) def log_artifacts(self, local_dir, artifact_path=None): """ diff --git a/python/sdk/merlin/util.py b/python/sdk/merlin/util.py index 2b01e36be..399570f16 100644 --- a/python/sdk/merlin/util.py +++ b/python/sdk/merlin/util.py @@ -14,6 +14,7 @@ import re import os +import boto3 from urllib.parse import urlparse from google.cloud import storage from os.path import dirname @@ -66,6 +67,11 @@ def valid_name_check(input_name: str) -> bool: return matching_group == input_name +def get_blob_storage_schema(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) + return parsed_result.scheme + + def get_bucket_name(gcs_uri: str) -> str: parsed_result = urlparse(gcs_uri) return parsed_result.netloc @@ -76,24 +82,42 @@ def get_gcs_path(gcs_uri: str) -> str: return parsed_result.path.strip("/") -def download_files_from_gcs(gcs_uri: str, destination_path: str): +def download_files_from_blob_storage(artifact_uri: str, destination_path: str): makedirs(destination_path, exist_ok=True) - client = storage.Client() - bucket_name = get_bucket_name(gcs_uri) - path = get_gcs_path(gcs_uri) - - bucket = client.get_bucket(bucket_name) - blobs = bucket.list_blobs(prefix=path) - for blob in blobs: - # Get only the path after .../artifacts/model - # E.g. - # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb - # we only want to extract 1/saved_model.pb - artifact_path = os.path.join(*blob.name.split("/")[5:]) - dir = os.path.join(destination_path, dirname(artifact_path)) - makedirs(dir, exist_ok=True) - blob.download_to_filename(os.path.join(destination_path, artifact_path)) + storage_schema = get_blob_storage_schema(artifact_uri) + bucket_name = get_bucket_name(artifact_uri) + path = get_gcs_path(artifact_uri) + + if storage_schema == "gs": + client = storage.Client() + bucket = client.get_bucket(bucket_name) + blobs = bucket.list_blobs(prefix=path) + for blob in blobs: + # Get only the path after .../artifacts/model + # E.g. + # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb + # we only want to extract 1/saved_model.pb + artifact_path = os.path.join(*blob.name.split("/")[5:]) + dir = os.path.join(destination_path, dirname(artifact_path)) + makedirs(dir, exist_ok=True) + blob.download_to_filename(os.path.join(destination_path, artifact_path)) + elif storage_schema == "s3": + client = boto3.client("s3") + bucket = client.list_objects_v2(Prefix=path, Bucket=bucket_name)["Contents"] + for s3_object in bucket: + # we do this because the list_objects_v2 method lists all subdirectories in addition to files + if not s3_object['Key'].endswith('/'): + # Get only the path after .../artifacts/model + # E.g. + # Some blob looks like this mlflow/3/ad8f15a4023f461796955f71e1152bac/artifacts/model/1/saved_model.pb + # we only want to extract 1/saved_model.pb + object_paths = s3_object['Key'].split("/")[5:] + if len(object_paths) != 0: + artifact_path = os.path.join(*object_paths) + os.makedirs(os.path.join(destination_path, dirname(artifact_path)), exist_ok=True) + client.download_file(bucket_name, s3_object['Key'], os.path.join(destination_path, artifact_path)) + def extract_optional_value_with_default(opt: Optional[Any], default: Any) -> Any: if opt is not None: From fe5de0daba9cd76b37592b6bda1cb9ce66caa8a0 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 7 Nov 2024 13:18:57 +0800 Subject: [PATCH 30/32] Introduce new constants for commonly used artifact and registry types and fix incorrect if-else logic --- api/pkg/imagebuilder/imagebuilder.go | 16 ++++++-- api/pkg/imagebuilder/imagebuilder_test.go | 46 +++++++++++------------ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index 00ea5f827..78156e297 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -104,6 +104,10 @@ const ( // jobDeletionTickDurationMilliseconds is the interval at which the API server checks if a job has been deleted jobDeletionTickDurationMilliseconds = 100 + dockerRegistryPushRegistryType = "docker" + googleCloudRegistryPushRegistryType = "gcr" + googleCloudStorageArtifactServiceType = "gcs" + gacEnvKey = "GOOGLE_APPLICATION_CREDENTIALS" saFilePath = "/secret/kaniko-secret.json" @@ -715,7 +719,8 @@ func (c *imageBuilder) createKanikoJobSpec( } func (c *imageBuilder) configureKanikoArgsToAddCredentials(kanikoArgs []string) []string { - if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { if c.config.KanikoServiceAccount == "" { kanikoArgs = append(kanikoArgs, fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath)) @@ -728,7 +733,8 @@ func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials( volumes []v1.Volume, volumeMounts []v1.VolumeMount, ) ([]v1.Volume, []v1.VolumeMount) { - if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { if c.config.KanikoServiceAccount == "" { volumes = append(volumes, v1.Volume{ Name: kanikoSecretName, @@ -743,7 +749,8 @@ func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials( MountPath: "/secret", }) } - } else if c.config.KanikoPushRegistryType == "docker" { + } + if c.config.KanikoPushRegistryType == dockerRegistryPushRegistryType { volumes = append(volumes, v1.Volume{ Name: kanikoSecretName, VolumeSource: v1.VolumeSource{ @@ -761,7 +768,8 @@ func (c *imageBuilder) configureVolumesAndVolumeMountsToAddCredentials( } func (c *imageBuilder) configureEnvVarsToAddCredentials(envVar []v1.EnvVar) []v1.EnvVar { - if c.config.KanikoPushRegistryType == "gcr" || c.artifactService.GetType() == "gcs" { + if c.config.KanikoPushRegistryType == googleCloudRegistryPushRegistryType || + c.artifactService.GetType() == googleCloudStorageArtifactServiceType { if c.config.KanikoServiceAccount == "" { envVar = append(envVar, v1.EnvVar{ Name: gacEnvKey, diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index 22660999a..5675a4aa2 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -136,7 +136,7 @@ var ( ClusterName: "my-cluster", GcpProject: "test-project", Environment: testEnvironmentName, - KanikoPushRegistryType: "gcr", + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, @@ -176,7 +176,7 @@ var ( ClusterName: "my-cluster", GcpProject: "test-project", Environment: testEnvironmentName, - KanikoPushRegistryType: "docker", + KanikoPushRegistryType: dockerRegistryPushRegistryType, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", KanikoAdditionalArgs: defaultKanikoAdditionalArgs, KanikoDockerCredentialSecretName: "docker-secret", @@ -393,7 +393,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -449,7 +449,7 @@ func TestBuildImage(t *testing.T) { wantDeleteJobName: "", wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithGCRPushRegistry, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -618,7 +618,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -652,7 +652,7 @@ func TestBuildImage(t *testing.T) { wantDeleteJobName: "", wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithSa, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -712,7 +712,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -773,7 +773,7 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", - KanikoPushRegistryType: "gcr", + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, DefaultResources: configWithGCRPushRegistry.DefaultResources, @@ -782,7 +782,7 @@ func TestBuildImage(t *testing.T) { }, MaximumRetry: jobBackOffLimit, }, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -842,7 +842,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -908,7 +908,7 @@ func TestBuildImage(t *testing.T) { GcpProject: "test-project", Environment: testEnvironmentName, KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0", - KanikoPushRegistryType: "gcr", + KanikoPushRegistryType: googleCloudRegistryPushRegistryType, KanikoAdditionalArgs: defaultKanikoAdditionalArgs, SupportedPythonVersions: defaultSupportedPythonVersions, DefaultResources: configWithGCRPushRegistry.DefaultResources, @@ -922,7 +922,7 @@ func TestBuildImage(t *testing.T) { }, MaximumRetry: jobBackOffLimit, }, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -982,7 +982,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1057,7 +1057,7 @@ func TestBuildImage(t *testing.T) { NodeSelectors: configWithGCRPushRegistry.NodeSelectors, Tolerations: configWithGCRPushRegistry.Tolerations, }, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -1116,7 +1116,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1172,7 +1172,7 @@ func TestBuildImage(t *testing.T) { wantCreateJob: nil, wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithGCRPushRegistry, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -1231,7 +1231,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1290,7 +1290,7 @@ func TestBuildImage(t *testing.T) { wantDeleteJobName: "", wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithGCRPushRegistry, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -1349,7 +1349,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1453,7 +1453,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1509,7 +1509,7 @@ func TestBuildImage(t *testing.T) { wantDeleteJobName: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersionWithGCSArtifact.ID), wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithGCRPushRegistry, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, { @@ -1573,7 +1573,7 @@ func TestBuildImage(t *testing.T) { fmt.Sprintf("--dockerfile=%s", configWithGCRPushRegistry.BaseImage.DockerfilePath), fmt.Sprintf("--context=%s", configWithGCRPushRegistry.BaseImage.BuildContextURI), fmt.Sprintf("--build-arg=BASE_IMAGE=%s", configWithGCRPushRegistry.BaseImage.ImageName), - fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", "gcs"), + fmt.Sprintf("--build-arg=MLFLOW_ARTIFACT_STORAGE_TYPE=%s", googleCloudStorageArtifactServiceType), fmt.Sprintf("--build-arg=MODEL_DEPENDENCIES_URL=gs%s", modelDependenciesURLSuffix), fmt.Sprintf("--build-arg=MODEL_ARTIFACTS_URL=gs%s/model", testArtifactURISuffix), fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID)), @@ -1629,7 +1629,7 @@ func TestBuildImage(t *testing.T) { wantDeleteJobName: "", wantImageRef: fmt.Sprintf("%s/%s-%s:%s", configWithGCRPushRegistry.DockerRegistry, project.Name, model.Name, modelVersionWithGCSArtifact.ID), config: configWithGCRPushRegistry, - artifactServiceType: "gcs", + artifactServiceType: googleCloudStorageArtifactServiceType, artifactServiceURLScheme: "gs", }, } From 163f60baffbcaa8923840b4bf563bddf90b16e80 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 7 Nov 2024 13:31:25 +0800 Subject: [PATCH 31/32] Fix leftover gcs naming --- python/sdk/merlin/util.py | 14 +++++++------- python/sdk/test/utils_unit_test.py | 23 ++++++++++++++++------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/python/sdk/merlin/util.py b/python/sdk/merlin/util.py index 399570f16..0efea1c52 100644 --- a/python/sdk/merlin/util.py +++ b/python/sdk/merlin/util.py @@ -67,27 +67,27 @@ def valid_name_check(input_name: str) -> bool: return matching_group == input_name -def get_blob_storage_schema(artifact_uri: str) -> str: +def get_blob_storage_scheme(artifact_uri: str) -> str: parsed_result = urlparse(artifact_uri) return parsed_result.scheme -def get_bucket_name(gcs_uri: str) -> str: - parsed_result = urlparse(gcs_uri) +def get_bucket_name(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) return parsed_result.netloc -def get_gcs_path(gcs_uri: str) -> str: - parsed_result = urlparse(gcs_uri) +def get_artifact_path(artifact_uri: str) -> str: + parsed_result = urlparse(artifact_uri) return parsed_result.path.strip("/") def download_files_from_blob_storage(artifact_uri: str, destination_path: str): makedirs(destination_path, exist_ok=True) - storage_schema = get_blob_storage_schema(artifact_uri) + storage_schema = get_blob_storage_scheme(artifact_uri) bucket_name = get_bucket_name(artifact_uri) - path = get_gcs_path(artifact_uri) + path = get_artifact_path(artifact_uri) if storage_schema == "gs": client = storage.Client() diff --git a/python/sdk/test/utils_unit_test.py b/python/sdk/test/utils_unit_test.py index ba3843f1f..9763b8c67 100644 --- a/python/sdk/test/utils_unit_test.py +++ b/python/sdk/test/utils_unit_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from merlin.util import guess_mlp_ui_url, valid_name_check, get_bucket_name, get_gcs_path +from merlin.util import guess_mlp_ui_url, valid_name_check, get_bucket_name, get_artifact_path import pytest @@ -34,19 +34,28 @@ def test_name_check(): assert result is True +@pytest.mark.unit +def test_get_blob_storage_scheme(): + gcs_artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert test_get_blob_storage_scheme(gcs_artifact_uri) == 'gs' + + s3_artifact_uri = 's3://some-bucket/mlflow/81/ddd' + assert test_get_blob_storage_scheme(s3_artifact_uri) == 's3' + + @pytest.mark.unit def test_get_bucket_name(): - gcs_uri = 'gs://some-bucket/mlflow/81/ddd' - assert get_bucket_name(gcs_uri) == 'some-bucket' + artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert get_bucket_name(artifact_uri) == 'some-bucket' @pytest.mark.unit -def test_get_gcs_path(): - gcs_uri = 'gs://some-bucket/mlflow/81/ddd' - assert get_gcs_path(gcs_uri) == 'mlflow/81/ddd' +def test_get_artifact_path(): + artifact_uri = 'gs://some-bucket/mlflow/81/ddd' + assert get_artifact_path(artifact_uri) == 'mlflow/81/ddd' double_slash_uri_path = 'gs://some-bucket//mlflow/81/ddd' - assert get_gcs_path(double_slash_uri_path) == 'mlflow/81/ddd' + assert get_artifact_path(double_slash_uri_path) == 'mlflow/81/ddd' @pytest.mark.parametrize("mlp_api_url,expected", [ From 76ecdc5315c50773ef6bd6a781bc1061c2ceffa0 Mon Sep 17 00:00:00 2001 From: ewezy Date: Thu, 7 Nov 2024 15:08:48 +0800 Subject: [PATCH 32/32] Fix incorrect function name called in unit test --- python/sdk/test/utils_unit_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sdk/test/utils_unit_test.py b/python/sdk/test/utils_unit_test.py index 9763b8c67..712bbc272 100644 --- a/python/sdk/test/utils_unit_test.py +++ b/python/sdk/test/utils_unit_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from merlin.util import guess_mlp_ui_url, valid_name_check, get_bucket_name, get_artifact_path +from merlin.util import guess_mlp_ui_url, valid_name_check, get_blob_storage_scheme, get_bucket_name, get_artifact_path import pytest @@ -37,10 +37,10 @@ def test_name_check(): @pytest.mark.unit def test_get_blob_storage_scheme(): gcs_artifact_uri = 'gs://some-bucket/mlflow/81/ddd' - assert test_get_blob_storage_scheme(gcs_artifact_uri) == 'gs' + assert get_blob_storage_scheme(gcs_artifact_uri) == 'gs' s3_artifact_uri = 's3://some-bucket/mlflow/81/ddd' - assert test_get_blob_storage_scheme(s3_artifact_uri) == 's3' + assert get_blob_storage_scheme(s3_artifact_uri) == 's3' @pytest.mark.unit