-
Notifications
You must be signed in to change notification settings - Fork 295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🌱 test: fix collector for machines not having an IP in status and change ignition ssh user to capv #3010
🌱 test: fix collector for machines not having an IP in status and change ignition ssh user to capv #3010
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
- op: add | ||
path: /spec/kubeadmConfigSpec/users/0/name | ||
value: "capv" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
- op: add | ||
path: /spec/template/spec/users/0/name | ||
value: "capv" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,37 +22,42 @@ import ( | |
"context" | ||
"fmt" | ||
"io" | ||
"net" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
"sync" | ||
|
||
"github.com/pkg/errors" | ||
"github.com/vmware/govmomi" | ||
"github.com/vmware/govmomi/find" | ||
"github.com/vmware/govmomi/vim25/mo" | ||
"golang.org/x/crypto/ssh" | ||
kerrors "k8s.io/apimachinery/pkg/util/errors" | ||
"k8s.io/klog/v2" | ||
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" | ||
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
kinderrors "sigs.k8s.io/kind/pkg/errors" | ||
) | ||
|
||
const ( | ||
DefaultUserName = "capv" | ||
VSpherePrivateKeyFilePath = "VSPHERE_SSH_PRIVATE_KEY" | ||
) | ||
|
||
type MachineLogCollector struct{} | ||
type MachineLogCollector struct { | ||
Client *govmomi.Client | ||
Finder *find.Finder | ||
} | ||
|
||
func (collector MachineLogCollector) CollectMachinePoolLog(_ context.Context, _ client.Client, _ *expv1.MachinePool, _ string) error { | ||
func (c *MachineLogCollector) CollectMachinePoolLog(_ context.Context, _ client.Client, _ *expv1.MachinePool, _ string) error { | ||
return nil | ||
} | ||
|
||
func (collector MachineLogCollector) CollectMachineLog(_ context.Context, _ client.Client, m *clusterv1.Machine, outputPath string) error { | ||
var hostIPAddr string | ||
for _, address := range m.Status.Addresses { | ||
if address.Type != clusterv1.MachineExternalIP { | ||
continue | ||
} | ||
hostIPAddr = address.Address | ||
break | ||
func (c *MachineLogCollector) CollectMachineLog(ctx context.Context, _ client.Client, m *clusterv1.Machine, outputPath string) error { | ||
machineIPAddresses, err := c.machineIPAddresses(ctx, m) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
captureLogs := func(hostFileName, command string, args ...string) func() error { | ||
|
@@ -62,11 +67,24 @@ func (collector MachineLogCollector) CollectMachineLog(_ context.Context, _ clie | |
return err | ||
} | ||
defer f.Close() | ||
return executeRemoteCommand(f, hostIPAddr, command, args...) | ||
var errs []error | ||
// Try with all available IPs unless it succeeded. | ||
for _, machineIPAddress := range machineIPAddresses { | ||
if err := executeRemoteCommand(f, machineIPAddress, command, args...); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: does this imply we do expect ssh connectivity from the prow container to the machine? is this a new constraint introduced by this PR or it already exists for something else (thinking about constraints for the new test environment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already did this for a while to get logs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR just fixes the usage where we try to pull data via SSH to make it work in more failure cases. Before we tried but often did not get any data because there was no IP address yet set on the Machine's status object. |
||
errs = append(errs, err) | ||
continue | ||
} | ||
return nil | ||
} | ||
|
||
if err := kerrors.NewAggregate(errs); err != nil { | ||
return errors.Wrapf(err, "failed to run command %s for machine %s on ips [%s]", command, klog.KObj(m), strings.Join(machineIPAddresses, ", ")) | ||
} | ||
return nil | ||
} | ||
} | ||
|
||
return kinderrors.AggregateConcurrent([]func() error{ | ||
return aggregateConcurrent( | ||
captureLogs("kubelet.log", | ||
"sudo journalctl", "--no-pager", "--output=short-precise", "-u", "kubelet.service"), | ||
captureLogs("containerd.log", | ||
|
@@ -75,13 +93,56 @@ func (collector MachineLogCollector) CollectMachineLog(_ context.Context, _ clie | |
"sudo", "cat", "/var/log/cloud-init.log"), | ||
captureLogs("cloud-init-output.log", | ||
"sudo", "cat", "/var/log/cloud-init-output.log"), | ||
}) | ||
) | ||
} | ||
|
||
func (collector MachineLogCollector) CollectInfrastructureLogs(_ context.Context, _ client.Client, _ *clusterv1.Cluster, _ string) error { | ||
func (c *MachineLogCollector) CollectInfrastructureLogs(_ context.Context, _ client.Client, _ *clusterv1.Cluster, _ string) error { | ||
return nil | ||
} | ||
|
||
func (c *MachineLogCollector) machineIPAddresses(ctx context.Context, m *clusterv1.Machine) ([]string, error) { | ||
for _, address := range m.Status.Addresses { | ||
if address.Type != clusterv1.MachineExternalIP { | ||
continue | ||
} | ||
return []string{address.Address}, nil | ||
} | ||
|
||
vmObj, err := c.Finder.VirtualMachine(ctx, m.GetName()) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
var vm mo.VirtualMachine | ||
|
||
if err := c.Client.RetrieveOne(ctx, vmObj.Reference(), []string{"guest.net"}, &vm); err != nil { | ||
// We cannot get the properties e.g. when the vm already got deleted or is getting deleted. | ||
return nil, errors.Errorf("error retrieving properties for machine %s", klog.KObj(m)) | ||
} | ||
|
||
addresses := []string{} | ||
|
||
// Return all IPs so we can try each of them until one succeeded. | ||
for _, nic := range vm.Guest.Net { | ||
if nic.IpConfig == nil { | ||
continue | ||
} | ||
for _, ip := range nic.IpConfig.IpAddress { | ||
netIP := net.ParseIP(ip.IpAddress) | ||
ipv4 := netIP.To4() | ||
if ipv4 != nil { | ||
addresses = append(addresses, ip.IpAddress) | ||
} | ||
} | ||
} | ||
|
||
if len(addresses) == 0 { | ||
return nil, errors.Errorf("unable to find IP Addresses for Machine %s", klog.KObj(m)) | ||
} | ||
|
||
return addresses, nil | ||
} | ||
|
||
func createOutputFile(path string) (*os.File, error) { | ||
if err := os.MkdirAll(filepath.Dir(path), os.ModePerm); err != nil { | ||
return nil, err | ||
|
@@ -155,3 +216,28 @@ func readPrivateKey() ([]byte, error) { | |
|
||
return os.ReadFile(filepath.Clean(privateKeyFilePath)) | ||
} | ||
|
||
// aggregateConcurrent runs fns concurrently, returning aggregated errors. | ||
func aggregateConcurrent(funcs ...func() error) error { | ||
// run all fns concurrently | ||
ch := make(chan error, len(funcs)) | ||
var wg sync.WaitGroup | ||
for _, f := range funcs { | ||
f := f | ||
wg.Add(1) | ||
go func() { | ||
defer wg.Done() | ||
ch <- f() | ||
}() | ||
} | ||
wg.Wait() | ||
close(ch) | ||
// collect up and return errors | ||
errs := []error{} | ||
for err := range ch { | ||
if err != nil { | ||
errs = append(errs, err) | ||
} | ||
} | ||
return kerrors.NewAggregate(errs) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
q: is it possible to change it cluster-template-ignition.yaml instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should keep the core user there. It's sort of standard with Ignition I think? (IIRC)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree,
core
is the standard in ignition.