From 373c3187d33f2768f347d0fe41373077951c9d44 Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Wed, 7 Feb 2024 17:33:45 +0100 Subject: [PATCH 1/6] test: Add NVME test This adds a NVME test to TestPCIIDs. Signed-off-by: Christophe Fergeau --- test/vm_test.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/vm_test.go b/test/vm_test.go index 0ef0892b..dcd476a0 100644 --- a/test/vm_test.go +++ b/test/vm_test.go @@ -228,6 +228,20 @@ var pciidMacOS13Tests = map[string]pciidTest{ }, } +var pciidMacOS14Tests = map[string]pciidTest{ + "nvm-express": { + vendorID: 0x106b, // Apple + deviceID: 0x1a09, + createDev: func(t *testing.T) (config.VirtioDevice, error) { + diskimg := filepath.Join(t.TempDir(), "nvmexpress.img") + f, err := os.Create(diskimg) + require.NoError(t, err) + require.NoError(t, f.Close()) + return config.NVMExpressControllerNew(diskimg) + }, + }, +} + func testPCIId(t *testing.T, test pciidTest, provider OsProvider) { vm := NewTestVM(t, provider) defer vm.Close(t) @@ -266,6 +280,16 @@ func TestPCIIds(t *testing.T) { } else { t.Log("Skipping macOS 13 tests") } + + if err := macOSAvailable(14); err == nil { + for name, test := range pciidMacOS14Tests { + t.Run(name, func(t *testing.T) { + testPCIId(t, test, puipuiProvider) + }) + } + } else { + t.Log("Skipping macOS 14 tests") + } } func checkPCIDevice(t *testing.T, vm *testVM, vendorID, deviceID int) { From a318dee6544d9d261ff42666237dd3370696d17b Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Wed, 7 Feb 2024 17:37:19 +0100 Subject: [PATCH 2/6] test: Factor macOS version specific test code The loops over the macOS13-only tests and macOS14-only tests are exactly the same, this commit refactors them so that we have a single loop. Signed-off-by: Christophe Fergeau --- test/vm_test.go | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/test/vm_test.go b/test/vm_test.go index dcd476a0..75155325 100644 --- a/test/vm_test.go +++ b/test/vm_test.go @@ -242,6 +242,11 @@ var pciidMacOS14Tests = map[string]pciidTest{ }, } +var pciidVersionedTests = map[int]map[string]pciidTest{ + 13: pciidMacOS13Tests, + 14: pciidMacOS14Tests, +} + func testPCIId(t *testing.T, test pciidTest, provider OsProvider) { vm := NewTestVM(t, provider) defer vm.Close(t) @@ -271,24 +276,16 @@ func TestPCIIds(t *testing.T) { }) } - if err := macOSAvailable(13); err == nil { - for name, test := range pciidMacOS13Tests { - t.Run(name, func(t *testing.T) { - testPCIId(t, test, puipuiProvider) - }) - } - } else { - t.Log("Skipping macOS 13 tests") - } - - if err := macOSAvailable(14); err == nil { - for name, test := range pciidMacOS14Tests { - t.Run(name, func(t *testing.T) { - testPCIId(t, test, puipuiProvider) - }) + for macosVersion, tests := range pciidVersionedTests { + if err := macOSAvailable(float64(macosVersion)); err == nil { + for name, test := range tests { + t.Run(name, func(t *testing.T) { + testPCIId(t, test, puipuiProvider) + }) + } + } else { + t.Logf("Skipping macOS %d tests", macosVersion) } - } else { - t.Log("Skipping macOS 14 tests") } } From 0253d868d863a9af807c79a9c18328fc31f9bd15 Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Thu, 8 Feb 2024 12:58:39 +0100 Subject: [PATCH 3/6] test: Refactor ssh code This introduces a retrySSHDial and reworks the API used to connect to the VM over vsock/tcp. This will be useful in subsequent commits which will add a way to error out from the test if the vfkit process unexpectedly stopped. Signed-off-by: Christophe Fergeau --- test/vm_helpers.go | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/test/vm_helpers.go b/test/vm_helpers.go index 4b87dec4..28bb69e7 100644 --- a/test/vm_helpers.go +++ b/test/vm_helpers.go @@ -17,7 +17,7 @@ import ( "golang.org/x/crypto/ssh" ) -func sshOverTCP(macAddress string, sshPort int, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { +func retryIPFromMAC(macAddress string) (string, error) { var ( err error ip string @@ -32,36 +32,25 @@ func sshOverTCP(macAddress string, sshPort int, sshConfig *ssh.ClientConfig) (*s time.Sleep(time.Second) } if err != nil { - return nil, err + return "", err } log.Infof("found IP address %s for MAC %s", ip, macAddress) - var sshClient *ssh.Client - for i := 0; i < 10; i++ { - sshClient, err = ssh.Dial("tcp", net.JoinHostPort(ip, strconv.Itoa(sshPort)), sshConfig) - if err == nil { - break - } - time.Sleep(time.Second) - } - log.Infof("established SSH connection to %s:%d over TCP", ip, sshPort) - return sshClient, err + return ip, nil } -func sshOverVsock(unixSocket string, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { - var ( - sshClient *ssh.Client - err error - ) - for i := 0; i < 100; i++ { - sshClient, err = ssh.Dial("unix", unixSocket, sshConfig) +func retrySSHDial(scheme string, address string, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { + for i := 0; i < 10; i++ { + log.Debugf("ssh dial try #%d", i) + sshClient, err := ssh.Dial(scheme, address, sshConfig) if err == nil { - break + log.Infof("established SSH connection to %s over %s", address, scheme) + return sshClient, nil } + log.Debugf("ssh failed: %v", err) time.Sleep(time.Second) } - log.Infof("established SSH connection over Unix socket %s", unixSocket) - return sshClient, err + return nil, fmt.Errorf("timeout waiting for SSH") } type vfkitRunner struct { @@ -202,10 +191,12 @@ func (vm *testVM) WaitForSSH(t *testing.T) { ) switch vm.sshNetwork { case "tcp": - sshClient, err = sshOverTCP(vm.macAddress, vm.port, vm.provider.SSHConfig()) + ip, err := retryIPFromMAC(vm.macAddress) + require.NoError(t, err) + sshClient, err = retrySSHDial("tcp", net.JoinHostPort(ip, strconv.Itoa(vm.port)), vm.provider.SSHConfig()) require.NoError(t, err) case "vsock": - sshClient, err = sshOverVsock(vm.vsockPath, vm.provider.SSHConfig()) + sshClient, err = retrySSHDial("unix", vm.vsockPath, vm.provider.SSHConfig()) require.NoError(t, err) default: t.FailNow() From 70b7406c64c638bcef85068cd5e3a590264a8cd0 Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Thu, 8 Feb 2024 13:07:12 +0100 Subject: [PATCH 4/6] test: Use select{} in retry loops Instead of a for{} loop with a time.Sleep, this commit switches to using select{} for retry loops. This is in preparation for catching unexpected exits of the vfkit process while running the test Signed-off-by: Christophe Fergeau --- test/vm_helpers.go | 47 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/test/vm_helpers.go b/test/vm_helpers.go index 28bb69e7..b78bda8a 100644 --- a/test/vm_helpers.go +++ b/test/vm_helpers.go @@ -23,34 +23,39 @@ func retryIPFromMAC(macAddress string) (string, error) { ip string ) - for i := 0; i < 100; i++ { - ip, err = vfkithelpers.GetIPAddressByMACAddress(macAddress) - if err == nil { - break + for { + select { + case <-time.After(1 * time.Second): + ip, err = vfkithelpers.GetIPAddressByMACAddress(macAddress) + if err == nil { + log.Infof("found IP address %s for MAC %s", ip, macAddress) + return ip, nil + } + case <-time.After(10 * time.Second): + return "", fmt.Errorf("timeout getting IP from MAC: %w", err) } - - time.Sleep(time.Second) - } - if err != nil { - return "", err } - log.Infof("found IP address %s for MAC %s", ip, macAddress) - - return ip, nil } func retrySSHDial(scheme string, address string, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { - for i := 0; i < 10; i++ { - log.Debugf("ssh dial try #%d", i) - sshClient, err := ssh.Dial(scheme, address, sshConfig) - if err == nil { - log.Infof("established SSH connection to %s over %s", address, scheme) - return sshClient, nil + var ( + sshClient *ssh.Client + err error + ) + for { + select { + case <-time.After(1 * time.Second): + log.Debugf("trying ssh dial") + sshClient, err = ssh.Dial(scheme, address, sshConfig) + if err == nil { + log.Infof("established SSH connection to %s over %s", address, scheme) + return sshClient, nil + } + log.Debugf("ssh failed: %v", err) + case <-time.After(10 * time.Second): + return nil, fmt.Errorf("timeout waiting for SSH: %w", err) } - log.Debugf("ssh failed: %v", err) - time.Sleep(time.Second) } - return nil, fmt.Errorf("timeout waiting for SSH") } type vfkitRunner struct { From a38d84c167f97a520f859e17283c40c299e68e03 Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Thu, 8 Feb 2024 12:38:35 +0100 Subject: [PATCH 5/6] test: Catch vfkit process exiting unexpectedly This commit an a go routine which will `Wait()` for the vfkit process. When it exits, it will push the corresponding error (or nil) to a channel. The code waiting for the VM to startup can then check for errors in their select{} retry loop, and fail acccordingly. This will make debugging test failures easier, as currently if the vfkit process dies early, the test appears to be taking a very long time without any indication as to what's going on. Signed-off-by: Christophe Fergeau --- test/vm_helpers.go | 27 +++++++++++++++++++++------ test/vm_test.go | 23 +++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/test/vm_helpers.go b/test/vm_helpers.go index b78bda8a..9ddbb3a8 100644 --- a/test/vm_helpers.go +++ b/test/vm_helpers.go @@ -17,7 +17,7 @@ import ( "golang.org/x/crypto/ssh" ) -func retryIPFromMAC(macAddress string) (string, error) { +func retryIPFromMAC(errCh chan error, macAddress string) (string, error) { var ( err error ip string @@ -25,6 +25,8 @@ func retryIPFromMAC(macAddress string) (string, error) { for { select { + case err := <-errCh: + return "", err case <-time.After(1 * time.Second): ip, err = vfkithelpers.GetIPAddressByMACAddress(macAddress) if err == nil { @@ -37,13 +39,15 @@ func retryIPFromMAC(macAddress string) (string, error) { } } -func retrySSHDial(scheme string, address string, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { +func retrySSHDial(errCh chan error, scheme string, address string, sshConfig *ssh.ClientConfig) (*ssh.Client, error) { var ( sshClient *ssh.Client err error ) for { select { + case err := <-errCh: + return nil, err case <-time.After(1 * time.Second): log.Debugf("trying ssh dial") sshClient, err = ssh.Dial(scheme, address, sshConfig) @@ -60,6 +64,7 @@ func retrySSHDial(scheme string, address string, sshConfig *ssh.ClientConfig) (* type vfkitRunner struct { *exec.Cmd + errCh chan error gracefullyShutdown bool } @@ -75,15 +80,25 @@ func startVfkit(t *testing.T, vm *config.VirtualMachine) *vfkitRunner { err = vfkitCmd.Start() require.NoError(t, err) + errCh := make(chan error) + go func() { + err := vfkitCmd.Wait() + if err != nil { + log.Infof("vfkitCmd.Wait() returned %v", err) + } + errCh <- err + close(errCh) + }() return &vfkitRunner{ vfkitCmd, + errCh, false, } } func (cmd *vfkitRunner) Wait(t *testing.T) { - err := cmd.Cmd.Wait() + err := <-cmd.errCh require.NoError(t, err) cmd.gracefullyShutdown = true } @@ -196,12 +211,12 @@ func (vm *testVM) WaitForSSH(t *testing.T) { ) switch vm.sshNetwork { case "tcp": - ip, err := retryIPFromMAC(vm.macAddress) + ip, err := retryIPFromMAC(vm.vfkitCmd.errCh, vm.macAddress) require.NoError(t, err) - sshClient, err = retrySSHDial("tcp", net.JoinHostPort(ip, strconv.Itoa(vm.port)), vm.provider.SSHConfig()) + sshClient, err = retrySSHDial(vm.vfkitCmd.errCh, "tcp", net.JoinHostPort(ip, strconv.Itoa(vm.port)), vm.provider.SSHConfig()) require.NoError(t, err) case "vsock": - sshClient, err = retrySSHDial("unix", vm.vsockPath, vm.provider.SSHConfig()) + sshClient, err = retrySSHDial(vm.vfkitCmd.errCh, "unix", vm.vsockPath, vm.provider.SSHConfig()) require.NoError(t, err) default: t.FailNow() diff --git a/test/vm_test.go b/test/vm_test.go index 75155325..b57c25dd 100644 --- a/test/vm_test.go +++ b/test/vm_test.go @@ -16,6 +16,29 @@ import ( "github.com/stretchr/testify/require" ) +func TestFailedVfkitStart(t *testing.T) { + puipuiProvider := NewPuipuiProvider() + log.Info("fetching os image") + err := puipuiProvider.Fetch(t.TempDir()) + require.NoError(t, err) + + vm := NewTestVM(t, puipuiProvider) + defer vm.Close(t) + require.NotNil(t, vm) + + vm.AddSSH(t, "vsock") + + dev, err := config.NVMExpressControllerNew("/a/b") + require.NoError(t, err) + vm.AddDevice(t, dev) + + vm.Start(t) + + log.Infof("waiting for SSH") + _, err = retrySSHDial(vm.vfkitCmd.errCh, "unix", vm.vsockPath, vm.provider.SSHConfig()) + require.Error(t, err) +} + func testSSHAccess(t *testing.T, vm *testVM, network string) { log.Infof("testing SSH access over %s", network) vm.AddSSH(t, network) From ae317e41f1bef1aa7d7dbb7e414a49fc69d91a58 Mon Sep 17 00:00:00 2001 From: Christophe Fergeau Date: Thu, 8 Feb 2024 13:24:16 +0100 Subject: [PATCH 6/6] test: Log vfkit output to a file This captures vfkit stdout/stderr to a file to help understand some test failures. This file is currently removed after the test ends, its path needs to be changed if one needs to look at it after. Signed-off-by: Christophe Fergeau --- test/vm_helpers.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/vm_helpers.go b/test/vm_helpers.go index 9ddbb3a8..a414fc62 100644 --- a/test/vm_helpers.go +++ b/test/vm_helpers.go @@ -3,9 +3,11 @@ package test import ( "fmt" "net" + "os" "os/exec" "path/filepath" "strconv" + "strings" "testing" "time" @@ -71,12 +73,19 @@ type vfkitRunner struct { func startVfkit(t *testing.T, vm *config.VirtualMachine) *vfkitRunner { const vfkitRelativePath = "../out/vfkit" + logFilePath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", strings.ReplaceAll(t.Name(), "/", ""))) + logFile, err := os.Create(logFilePath) + require.NoError(t, err) + log.Infof("vfkit log file: %s", logFilePath) + binaryPath, err := exec.LookPath(vfkitRelativePath) require.NoError(t, err) log.Infof("starting %s", binaryPath) vfkitCmd, err := vm.Cmd(binaryPath) require.NoError(t, err) + vfkitCmd.Stdout = logFile + vfkitCmd.Stderr = logFile err = vfkitCmd.Start() require.NoError(t, err)