Skip to content

Commit

Permalink
tests(e2e): minor e2e test fixups (#941)
Browse files Browse the repository at this point in the history
Some low-hanging fruit I noticed as we got some activity on the E2E test
workflow:

### Deprovisioning can happen from multiple initial states

Our current model only allows for one initial state and one target
state. Some commands such as the deprovisioning one, can work
incrementally for the client/AD. For example, if we manage to provision
the client but fail to provision the AD, the script should still release
any client resources. As of now, it only works if the AD was
successfully provisioned.

There are multiple ways to handle this, but I've opted for the less
intrusive one of making states variadic arguments, where all but the
last are considered the "initial states", and the last is the state that
is being transitioned to. We still sacrifice some readability but
existing code will work, and I've made sure to document the new
intricacies of the `WithStateTransition` function.

### Add simple retry mechanism for SSH connections

As seen in some failing CI runs, sometimes we fail to establish SSH
connection to either a client or the AD controller. I believe this is a
transient failure that could be mitigated if we retry the operation
multiple times.

This implements a simple retry mechanism that for now attempts to
connect a maximum of 5 times at an interval of 3 seconds.

### Update apt package list before realm join

It seems that realm join installs some packages on its own, but doesn't
run `apt update` first, resulting in errors such as:

* Installing necessary packages: sssd-tools adcli sssd libnss-sss
libpam-sss
    Password for localadmin:
! E: http://azure.archive.ubuntu.com/ubuntu focal-updates/main amd64
libc-ares2 amd64 1.15.0-1ubuntu0.4 is not (yet) available (404 Not Found
[IP: 20.54.144.51 80])

We already run `apt update` just after, before installing adsys, so this
is a case of moving the update bit above `realm join`. In addition,
upgrade packages to avoid Ubuntu running unattended-upgr on its own and
breaking our use of `apt`.

### Move realm join after installing all packages

There's a pesky unattended upgrade that hits us often on focal... let's
group all apt commands before joining the realm so there's less chance
of it starting between our apt runs.

If it becomes a bigger problem we can stop and disable the unattended
upgrade service.

### Add a sleep before checking machine scripts

Once a while we seem to have issues asserting the existence of the
startup script. Let's see if we can rule this out with a sleep after
reboot.

### Increase max timeout for client reboot

1 minute may not be enough, as per
https://github.com/ubuntu/adsys/actions/runs/8195833180/job/22414854516

-------------

Passing CI run: https://github.com/ubuntu/adsys/actions/runs/8201760903
  • Loading branch information
GabrielNagy authored Mar 8, 2024
2 parents d45043e + 1df84dd commit 2cef786
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 51 deletions.
14 changes: 10 additions & 4 deletions e2e/cmd/run_tests/01_provision_client/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,14 @@ func action(ctx context.Context, cmd *command.Command) error {
}
}

log.Infof("Joining VM to domain...")
_, err = client.Run(ctx, fmt.Sprintf("realm join warthogs.biz -U localadmin -v --unattended <<<'%s'", adPassword))
log.Infof("Upgrading packages...")
_, err = client.Run(ctx, "apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get -y upgrade")
if err != nil {
return fmt.Errorf("failed to join VM to domain: %w", err)
return fmt.Errorf("failed to update package list: %w", err)
}

log.Infof("Installing adsys package...")
_, err = client.Run(ctx, "apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y /debs/*.deb")
_, err = client.Run(ctx, "DEBIAN_FRONTEND=noninteractive apt-get install -y /debs/*.deb")
if err != nil {
return fmt.Errorf("failed to install adsys package: %w", err)
}
Expand All @@ -195,6 +195,12 @@ func action(ctx context.Context, cmd *command.Command) error {
log.Warningf("Some packages failed to install: %v", err)
}

log.Infof("Joining VM to domain...")
_, err = client.Run(ctx, fmt.Sprintf("realm join warthogs.biz -U localadmin -v --unattended <<<'%s'", adPassword))
if err != nil {
return fmt.Errorf("failed to join VM to domain: %w", err)
}

cmd.Inventory.IP = ipAddress
cmd.Inventory.VMID = id
cmd.Inventory.UUID = uuid
Expand Down
4 changes: 4 additions & 0 deletions e2e/cmd/run_tests/04_test_pro_managers/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ ftp_proxy="http://127.0.0.1:8080"`); err != nil {
if err := client.Reboot(); err != nil {
return err
}

// Sleep a few seconds to ensure the machine startup script has time to run
time.Sleep(10 * time.Second)

if err := client.RequireFileExists(ctx, "/etc/created-by-adsys-machine-shutdown-script"); err != nil {
return err
}
Expand Down
31 changes: 18 additions & 13 deletions e2e/cmd/run_tests/99_deprovision/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func main() {
func run() int {
cmd := command.New(action,
command.WithValidateFunc(validate),
command.WithStateTransition(inventory.ADProvisioned, inventory.Deprovisioned),
command.WithStateTransition(inventory.ClientProvisioned, inventory.ADProvisioned, inventory.Deprovisioned),
)
cmd.Usage = fmt.Sprintf(`go run ./%s [options]
Expand Down Expand Up @@ -66,6 +66,23 @@ func action(ctx context.Context, cmd *command.Command) error {
return fmt.Errorf("failed to leave domain: %w", err)
}

// Destroy the client VM
log.Infof("Destroying client VM %q", cmd.Inventory.VMName)
_, _, err = az.RunCommand(ctx, "vm", "delete",
"--resource-group", "AD",
"--name", cmd.Inventory.VMName,
"--force-deletion", "true",
"--yes",
)
if err != nil {
return err
}

// Return early if we don't need to deprovision AD resources
if cmd.Inventory.State != inventory.ADProvisioned {
return nil
}

// Connect to the domain controller
client, err = remote.NewClient(inventory.DomainControllerIP, "localadmin", sshKey)
if err != nil {
Expand All @@ -82,17 +99,5 @@ func action(ctx context.Context, cmd *command.Command) error {
return fmt.Errorf("error running the PowerShell script: %w", err)
}

// Destroy the client VM
log.Infof("Destroying client VM %q", cmd.Inventory.VMName)
_, _, err = az.RunCommand(ctx, "vm", "delete",
"--resource-group", "AD",
"--name", cmd.Inventory.VMName,
"--force-deletion", "true",
"--yes",
)
if err != nil {
return err
}

return nil
}
71 changes: 46 additions & 25 deletions e2e/internal/command/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,62 +40,75 @@ type Command struct {

fSet *flag.FlagSet

fromState inventory.State
toState inventory.State
fromStates []inventory.State
toState inventory.State
}

// WithStateTransition sets the expected state transition for the command.
func WithStateTransition(from, to inventory.State) func(*options) {
return func(a *options) {
a.fromState = from
a.toState = to
// WithStateTransition sets the expected state transition for the command,
// allowing for one or more initial states, and a final state.
func WithStateTransition(states ...inventory.State) func(*options) error {
return func(a *options) error {
if len(states) < 2 {
return errors.New("expected at least two states")
}

a.fromStates = states[:len(states)-1]
a.toState = states[len(states)-1]

return nil
}
}

// WithRequiredState ensures that the inventory file is in the given state,
// without a transition being performed.
func WithRequiredState(state inventory.State) func(*options) {
return func(a *options) {
a.fromState = state
func WithRequiredState(state inventory.State) func(*options) error {
return func(a *options) error {
a.fromStates = []inventory.State{state}
a.toState = state

return nil
}
}

// WithValidateFunc sets the validation function for the command.
func WithValidateFunc(validate cmdFunc) func(*options) {
return func(a *options) {
func WithValidateFunc(validate cmdFunc) func(*options) error {
return func(a *options) error {
a.validate = validate

return nil
}
}

type options struct {
validate cmdFunc
fromState inventory.State
toState inventory.State
validate cmdFunc
fromStates []inventory.State
toState inventory.State
}

// Option is a function that configures the command.
type Option func(*options)
type Option func(*options) error

// New creates a new command.
func New(action cmdFunc, args ...Option) *Command {
// Apply given options
opts := options{
fromState: inventory.Null,
toState: inventory.Null,
fromStates: []inventory.State{inventory.Null},
toState: inventory.Null,
}

for _, f := range args {
f(&opts)
if err := f(&opts); err != nil {
log.Fatalf("Failed to apply option: %s", err)
}
}

return &Command{
action: action,
validate: opts.validate,

fSet: flag.NewFlagSet("", flag.ContinueOnError),
fromState: opts.fromState,
toState: opts.toState,
fSet: flag.NewFlagSet("", flag.ContinueOnError),
fromStates: opts.fromStates,
toState: opts.toState,
}
}

Expand Down Expand Up @@ -195,8 +208,16 @@ func (c *Command) Execute(ctx context.Context) int {
return 1
}

if c.Inventory.State != c.fromState {
log.Errorf("Inventory file is not in the expected state: %s", c.fromState)
// Allow at least one of the expected states
found := false
for _, s := range c.fromStates {
if c.Inventory.State == s {
found = true
break
}
}
if !found {
log.Errorf("Inventory file is not in any of the expected initial states: %v", c.fromStates)
return 1
}
}
Expand Down Expand Up @@ -227,7 +248,7 @@ func (c *Command) Execute(ctx context.Context) int {
}

func (c *Command) requireInventory() bool {
return c.fromState != inventory.Null
return c.fromStates[0] != inventory.Null
}

func (c *Command) installSignalHandler(cancel context.CancelFunc) func() {
Expand Down
17 changes: 12 additions & 5 deletions e2e/internal/command/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ func TestAddFlags(t *testing.T) {

func TestInventory(t *testing.T) {
tests := map[string]struct {
fromState inventory.State
toState inventory.State
fromState inventory.State
toState inventory.State
additionalFromState inventory.State

existingInventory string

Expand All @@ -47,9 +48,11 @@ func TestInventory(t *testing.T) {
"From null state doesn't require existing data": {toState: inventory.BaseVMCreated},
"From existing state requires existing data": {fromState: inventory.BaseVMCreated, toState: inventory.TemplateCreated, existingInventory: "inventory_from_template_created"},
"To null state doesn't write data": {toState: inventory.Null, wantNoFile: true},
"Multiple from states requires at least one": {fromState: inventory.ClientProvisioned, toState: inventory.TemplateCreated, additionalFromState: inventory.BaseVMCreated, existingInventory: "inventory_from_template_created"},

"Error if inventory file is required and doesn't exist": {fromState: inventory.TemplateCreated, wantErr: true},
"Error if inventory state does not match expected state": {fromState: inventory.TemplateCreated, existingInventory: "inventory_from_template_created", wantErr: true},
"Error if inventory file is required and doesn't exist": {fromState: inventory.TemplateCreated, wantErr: true},
"Error if inventory state does not match expected state": {fromState: inventory.TemplateCreated, existingInventory: "inventory_from_template_created", wantErr: true},
"Error if inventory state does not match any expected state": {fromState: inventory.TemplateCreated, additionalFromState: inventory.Null, existingInventory: "inventory_from_template_created", wantErr: true},
}

for name, tc := range tests {
Expand All @@ -73,7 +76,11 @@ func TestInventory(t *testing.T) {
inventoryPath := filepath.Join(tempDir, "inventory", tc.existingInventory)
os.Args = append(args, "--inventory-file", inventoryPath)

cmd := command.New(mockAction, command.WithStateTransition(tc.fromState, tc.toState))
states := []inventory.State{tc.fromState}
states = append(states, tc.additionalFromState)
states = append(states, tc.toState)

cmd := command.New(mockAction, command.WithStateTransition(states...))
ret := cmd.Execute(context.Background())

if tc.wantErr {
Expand Down
21 changes: 17 additions & 4 deletions e2e/internal/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,22 @@ func NewClient(host string, username string, secret string) (Client, error) {
Timeout: 10 * time.Second,
}

client, err := ssh.Dial("tcp", host+":22", config)
var client *ssh.Client

interval := 3 * time.Second
retries := 10

for i := 1; i <= retries; i++ {
log.Debugf("Establishing SSH connection to %q (attempt %d/%d)", host, i, retries)
client, err = ssh.Dial("tcp", host+":22", config)
if err == nil {
break
}
log.Warningf("Failed to connect to %q: %v (attempt %d/%d)", host, err, i, retries)
time.Sleep(interval)
}
if err != nil {
return Client{}, fmt.Errorf("failed to establish connection to remote host: %w", err)
return Client{}, fmt.Errorf("failed to connect to %q: %w", host, err)
}

return Client{
Expand Down Expand Up @@ -218,7 +231,7 @@ func (c Client) Upload(localPath string, remotePath string) error {
// Reboot reboots the remote host and waits for it to come back online, then
// reestablishes the SSH connection.
// It first waits for the host to go offline, then returns an error if the host
// does not come back online within 1 minute.
// does not come back online within 3 minutes.
func (c *Client) Reboot() error {
log.Infof("Rebooting host %q", c.client.RemoteAddr().String())
_, _ = c.Run(context.Background(), "reboot")
Expand All @@ -238,7 +251,7 @@ func (c *Client) Reboot() error {
return fmt.Errorf("host did not go offline in time")
}

ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()

// Wait for the host to come back online
Expand Down

0 comments on commit 2cef786

Please sign in to comment.