diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..80f7286 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,76 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: workflow_dispatch + +env: + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.22' + + - name: Install Protoc + run: | + PROTOC_ZIP=protoc-28.2-linux-x86_64.zip + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v28.2/$PROTOC_ZIP + sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc + sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' + rm -f $PROTOC_ZIP + + - name: Verify Protoc Version + run: protoc --version + + - name: Build + run: | + make init-proto + make build + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha,priority=100,prefix=,suffix=,format=short + + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000..a9989e3 --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,81 @@ +# This workflow will build a golang project +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go + +name: Go + +on: + pull_request: + types: + - opened + - synchronize + - reopened + branches: + - main + push: + branches: + - main + +jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.22' + + - name: Lint + uses: golangci/golangci-lint-action@v5 + with: + version: 'v1.58.0' + args: -v --timeout 5m + skip-cache: true + + - name: vet + run: + make vet + + - name: fmt + run: + test -z "$(go fmt ./...)" + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.22' + + - name: Test + run: go test -v ./... + + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.22' + + - name: Install Protoc + run: | + PROTOC_ZIP=protoc-28.2-linux-x86_64.zip + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v28.2/$PROTOC_ZIP + sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc + sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*' + rm -f $PROTOC_ZIP + + - name: Verify Protoc Version + run: protoc --version + + - name: Build + run: | + make init-proto + make build diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..4734b7e --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,24 @@ +run: + timeout: 5m + +linters: + enable: + - errcheck + - goimports + - gosec + - gosimple + - govet + - ineffassign + - staticcheck + - unused + - gosimple + - typecheck + - unused + +issues: + exclude-rules: + - severity: "warning" + - severity: "info" + exclude-use-default: false + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/cmd/node-observer/main.go b/cmd/node-observer/main.go index da506bf..53e02b4 100644 --- a/cmd/node-observer/main.go +++ b/cmd/node-observer/main.go @@ -28,7 +28,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/state_observer" + "github.com/NVIDIA/topograph/pkg/node_observer" ) var GitTag string @@ -55,7 +55,7 @@ func main() { } func mainInternal(c string) error { - cfg, err := state_observer.NewConfigFromFile(c) + cfg, err := node_observer.NewConfigFromFile(c) if err != nil { return err } @@ -73,7 +73,7 @@ func mainInternal(c string) error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - controller, err := state_observer.NewController(ctx, kubeClient, cfg) + controller, err := node_observer.NewController(ctx, kubeClient, cfg) if err != nil { return err } diff --git a/cmd/topograph/main.go b/cmd/topograph/main.go index ea94c49..d615c97 100644 --- a/cmd/topograph/main.go +++ b/cmd/topograph/main.go @@ -59,7 +59,9 @@ func mainInternal(c string) error { return err } - cfg.UpdateEnv() + if err = cfg.UpdateEnv(); err != nil { + return err + } ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/pkg/aws/imds.go b/pkg/aws/imds.go index 4bbe9c9..dedaed3 100644 --- a/pkg/aws/imds.go +++ b/pkg/aws/imds.go @@ -52,7 +52,7 @@ func getMetadata(path string) ([]byte, error) { if err != nil { return nil, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("HTTP status: %s", resp.Status) diff --git a/pkg/config/config.go b/pkg/config/config.go index 2c38491..a94d8dd 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -99,15 +99,20 @@ func (cfg *Config) validate() error { return cfg.readCredentials() } -func (cfg *Config) UpdateEnv() { +func (cfg *Config) UpdateEnv() (err error) { for env, val := range cfg.Env { if env == "PATH" { // special case for PATH env var - os.Setenv("PATH", fmt.Sprintf("%s:%s", os.Getenv("PATH"), val)) + err = os.Setenv("PATH", fmt.Sprintf("%s:%s", os.Getenv("PATH"), val)) } else { - os.Setenv(env, val) + err = os.Setenv(env, val) + } + if err != nil { + return fmt.Errorf("failed to set %q environment variable: %v", env, err) } klog.Infof("Updated env %s=%s", env, os.Getenv(env)) } + + return } func (cfg *Config) readCredentials() error { @@ -122,7 +127,7 @@ func (cfg *Config) readCredentials() error { if err != nil { return err } - defer file.Close() + defer func() { _ = file.Close() }() data, err := io.ReadAll(file) if err != nil { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5445aac..1cfba53 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -42,23 +42,23 @@ env: func TestConfig(t *testing.T) { file, err := os.CreateTemp("", "test-cfg-*.yml") require.NoError(t, err) - defer os.Remove(file.Name()) - defer file.Close() + defer func() { _ = os.Remove(file.Name()) }() + defer func() { _ = file.Close() }() cert, err := os.CreateTemp("", "test-cert-*.yml") require.NoError(t, err) - defer os.Remove(cert.Name()) - defer cert.Close() + defer func() { _ = os.Remove(cert.Name()) }() + defer func() { _ = cert.Close() }() key, err := os.CreateTemp("", "test-key-*.yml") require.NoError(t, err) - defer os.Remove(key.Name()) - defer key.Close() + defer func() { _ = os.Remove(key.Name()) }() + defer func() { _ = key.Close() }() caCert, err := os.CreateTemp("", "test-ca-cert-*.yml") require.NoError(t, err) - defer os.Remove(caCert.Name()) - defer caCert.Close() + defer func() { _ = os.Remove(caCert.Name()) }() + defer func() { _ = caCert.Close() }() _, err = file.WriteString(fmt.Sprintf(configTemplate, cert.Name(), key.Name(), caCert.Name())) require.NoError(t, err) @@ -84,32 +84,34 @@ func TestConfig(t *testing.T) { } require.Equal(t, expected, cfg) - var oldPath, newPath = os.Getenv("PATH"), "" - if len(oldPath) == 0 { - newPath = oldPath + var path string + if path = os.Getenv("PATH"); len(path) == 0 { + path = "/a/b/c" } else { - newPath = oldPath + ":" + "/a/b/c" + path = path + ":" + "/a/b/c" } - cfg.UpdateEnv() + err = cfg.UpdateEnv() + require.NoError(t, err) + require.Equal(t, "/etc/slurm/config.yaml", os.Getenv("SLURM_CONF")) - require.Equal(t, newPath, os.Getenv("PATH")) + require.Equal(t, path, os.Getenv("PATH")) } func TestValidate(t *testing.T) { cert, err := os.CreateTemp("", "test-cert-*.yml") require.NoError(t, err) - defer os.Remove(cert.Name()) - defer cert.Close() + defer func() { _ = os.Remove(cert.Name()) }() + defer func() { _ = cert.Close() }() key, err := os.CreateTemp("", "test-key-*.yml") require.NoError(t, err) - defer os.Remove(key.Name()) - defer key.Close() + defer func() { _ = os.Remove(key.Name()) }() + defer func() { _ = key.Close() }() caCert, err := os.CreateTemp("", "test-ca-cert-*.yml") require.NoError(t, err) - defer os.Remove(caCert.Name()) - defer caCert.Close() + defer func() { _ = os.Remove(caCert.Name()) }() + defer func() { _ = caCert.Close() }() testCases := []struct { name string diff --git a/pkg/gcp/instance_topology.go b/pkg/gcp/instance_topology.go index 061807d..4d960fc 100644 --- a/pkg/gcp/instance_topology.go +++ b/pkg/gcp/instance_topology.go @@ -45,7 +45,7 @@ func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNode if err != nil { return nil, fmt.Errorf("unable to get zones client: %s", err.Error()) } - projectID, err := metadata.ProjectID() + projectID, err := metadata.ProjectIDWithContext(ctx) if err != nil { return nil, fmt.Errorf("unable to get project ID: %s", err.Error()) } diff --git a/pkg/state_observer/config.go b/pkg/node_observer/config.go similarity index 98% rename from pkg/state_observer/config.go rename to pkg/node_observer/config.go index 5a9dea7..5be9715 100644 --- a/pkg/state_observer/config.go +++ b/pkg/node_observer/config.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package state_observer +package node_observer import ( "fmt" diff --git a/pkg/state_observer/controller.go b/pkg/node_observer/controller.go similarity index 98% rename from pkg/state_observer/controller.go rename to pkg/node_observer/controller.go index 51a36a5..a1dd5a8 100644 --- a/pkg/state_observer/controller.go +++ b/pkg/node_observer/controller.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package state_observer +package node_observer import ( "context" diff --git a/pkg/state_observer/node_informer.go b/pkg/node_observer/node_informer.go similarity index 95% rename from pkg/state_observer/node_informer.go rename to pkg/node_observer/node_informer.go index 1382247..00b7107 100644 --- a/pkg/state_observer/node_informer.go +++ b/pkg/node_observer/node_informer.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package state_observer +package node_observer import ( "context" @@ -56,7 +56,7 @@ func (n *NodeInformer) Start() error { informer := n.factory.Core().V1().Nodes().Informer() - informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { node := obj.(*v1.Node) klog.V(4).Infof("Node informer added node %s", node.Name) @@ -72,6 +72,9 @@ func (n *NodeInformer) Start() error { n.SendRequest() }, }) + if err != nil { + return err + } informer.Run(n.ctx.Done()) diff --git a/pkg/server/grpc_client.go b/pkg/server/grpc_client.go index 7fa4007..6fea126 100644 --- a/pkg/server/grpc_client.go +++ b/pkg/server/grpc_client.go @@ -34,7 +34,7 @@ func forwardRequest(ctx context.Context, tr *TopologyRequest, url string, cis [] if err != nil { return nil, fmt.Errorf("failed to connect to %s: %v", url, err) } - defer conn.Close() + defer func() { _ = conn.Close() }() client := pb.NewTopologyServiceClient(conn) diff --git a/pkg/server/http_server.go b/pkg/server/http_server.go index 8ea77fc..c33cd99 100644 --- a/pkg/server/http_server.go +++ b/pkg/server/http_server.go @@ -91,7 +91,7 @@ func (s *HttpServer) Stop(err error) { func healthz(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) - w.Write([]byte("OK\n")) + _, _ = w.Write([]byte("OK\n")) } func generate(w http.ResponseWriter, r *http.Request) { @@ -103,7 +103,7 @@ func generate(w http.ResponseWriter, r *http.Request) { uid := srv.async.queue.Submit(tr) w.WriteHeader(http.StatusAccepted) - w.Write([]byte(uid)) + _, _ = w.Write([]byte(uid)) } func readRequest(w http.ResponseWriter, r *http.Request) *TopologyRequest { @@ -117,7 +117,7 @@ func readRequest(w http.ResponseWriter, r *http.Request) *TopologyRequest { http.Error(w, "Unable to read request body", http.StatusInternalServerError) return nil } - defer r.Body.Close() + defer func() { _ = r.Body.Close() }() tr := &TopologyRequest{} @@ -205,6 +205,6 @@ func getresult(w http.ResponseWriter, r *http.Request) { http.Error(w, res.Message, res.Status) } else { w.WriteHeader(res.Status) - w.Write(res.Ret.([]byte)) + _, _ = w.Write(res.Ret.([]byte)) } } diff --git a/pkg/translate/output.go b/pkg/translate/output.go index 3b4d40f..f77b260 100644 --- a/pkg/translate/output.go +++ b/pkg/translate/output.go @@ -55,7 +55,9 @@ func ToSLURM(wr io.Writer, root *common.Vertex) error { for _, sw := range parents { if _, ok := leaves[sw.ID]; !ok { - writeSwitch(wr, sw) + if err := writeSwitch(wr, sw); err != nil { + return err + } } } diff --git a/pkg/utils/http.go b/pkg/utils/http.go index 370cf73..54bd072 100644 --- a/pkg/utils/http.go +++ b/pkg/utils/http.go @@ -48,7 +48,7 @@ func HttpRequest(req *http.Request) (*http.Response, []byte, error) { if err != nil { return nil, nil, fmt.Errorf("failed to send HTTP request: %v", err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() body, err := io.ReadAll(resp.Body) if err != nil { diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 8a6c3db..49cd957 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -36,7 +36,7 @@ func CreateFile(path string, data []byte) error { if err != nil { return fmt.Errorf("failed to create %q: %v", path, err) } - defer file.Close() + defer func() { _ = file.Close() }() _, err = file.Write(data) if err != nil { diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index 4d47368..95df540 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -55,8 +55,8 @@ func TestValidateFile(t *testing.T) { if tc.generate { f, err := os.CreateTemp("", "test-*") require.NoError(t, err) - defer os.Remove(f.Name()) - defer f.Close() + defer func() { _ = os.Remove(f.Name()) }() + defer func() { _ = f.Close() }() tc.fname = f.Name() } err := ValidateFile(tc.fname, tc.descr)