Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Healthcheck plugins on dispense, re-instantiating broken ones #931

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 45 additions & 17 deletions plugins/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,45 @@ func (pm *PluginManager) killPluginLocked(pID plugins.PluginID) {
// Dispense returns a PluginInstance for use by safely obtaining the
// PluginInstance from storage if we have it.
func (pm *PluginManager) Dispense(name, pluginType string) (PluginInstance, error) {

// Measure the time taken to dispense a plugin. This helps identify
// contention and pressure obtaining plugin client handles.
labels := []metrics.Label{{Name: "plugin_name", Value: name}, {Name: "plugin_type", Value: pluginType}}
defer metrics.MeasureSinceWithLabels([]string{"plugin", "manager", "access_ms"}, time.Now(), labels)

pm.pluginInstancesLock.RLock()
defer pm.pluginInstancesLock.RUnlock()
pID := plugins.PluginID{Name: name, PluginType: pluginType}

// Attempt to pull our plugin instance from the store and pass this to the
// caller.
//
// TODO(jrasell) if we do not find the instance, we should probably try and
// dispense the plugin. We should also check the plugin instance has not
// exited.
inst, ok := pm.pluginInstances[plugins.PluginID{Name: name, PluginType: pluginType}]
// caller. If the plugin isn't in the store, try to instantiate it; if it
// is, do a health check and attempt to re-instantiate if it fails.
pm.pluginInstancesLock.Lock()
inst, exists := pm.pluginInstances[pID]

if exists {
if _, err := pm.pluginInfo(pID, inst); err == nil {
// Plugin is fine, return it
pm.pluginInstancesLock.Unlock()
return inst, nil
}

// The plugin exists, but is broken. Remove it.
pm.logger.Warn("plugin failed healthcheck", "plugin_name", pID.Name)
pm.killPluginLocked(pID)
} else {
pm.logger.Warn("plugin not in store", "plugin_name", pID.Name)
}
pm.pluginInstancesLock.Unlock()

if err := pm.dispensePlugins(); err != nil {
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q: %w", name, pluginType, err)
}

pm.pluginInstancesLock.RLock()
inst, ok := pm.pluginInstances[pID]
pm.pluginInstancesLock.RUnlock()
if !ok {
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q is not stored", name, pluginType)
}

return inst, nil
}

Expand Down Expand Up @@ -322,7 +342,23 @@ func (pm *PluginManager) launchExternalPlugin(id plugins.PluginID, info *pluginI
}

func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo, raw interface{}) (*base.PluginInfo, error) {
pluginInfo, err := pm.pluginInfo(id, raw)
if err != nil {
return nil, err
}

// If the plugin name, or plugin do not match it means the executed plugin
// has returned its metadata that is different to the configured. This is a
// problem, particularly in the PluginType sense as it means it will be
// unable to fulfill its role.
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
}

return pluginInfo, nil
}

func (pm *PluginManager) pluginInfo(id plugins.PluginID, raw interface{}) (*base.PluginInfo, error) {
// Check that the plugin implements the base plugin interface. As these are
// external plugins we need to check this safely, otherwise an incorrect
// plugin can cause the core application to panic.
Expand All @@ -336,14 +372,6 @@ func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo
return nil, fmt.Errorf("failed to call PluginInfo on %s: %v", id.Name, err)
}

// If the plugin name, or plugin do not match it means the executed plugin
// has returned its metadata that is different to the configured. This is a
// problem, particularly in the PluginType sense as it means it will be
// unable to fulfill its role.
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
}

return pluginInfo, nil
}

Expand Down
76 changes: 56 additions & 20 deletions plugins/manager/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad-autoscaler/agent/config"
"github.com/hashicorp/nomad-autoscaler/plugins/apm"
"github.com/hashicorp/nomad-autoscaler/plugins/base"
"github.com/hashicorp/nomad-autoscaler/plugins/strategy"
"github.com/hashicorp/nomad-autoscaler/plugins/target"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestLoad(t *testing.T) {
Expand Down Expand Up @@ -153,26 +155,7 @@ func TestDispense(t *testing.T) {
{
name: "external plugin",
pluginDir: "../test/bin",
cfg: map[string][]*config.Plugin{
"apm": {
&config.Plugin{
Name: "noop",
Driver: "noop-apm",
},
},
"strategy": {
&config.Plugin{
Name: "noop",
Driver: "noop-strategy",
},
},
"target": {
&config.Plugin{
Name: "noop",
Driver: "noop-target",
},
},
},
cfg: externalPlugins(),
},
}

Expand Down Expand Up @@ -205,3 +188,56 @@ func TestDispense(t *testing.T) {
})
}
}

func TestExternalPluginDies(t *testing.T) {
logger := hclog.NewNullLogger()
pm := NewPluginManager(logger, "../test/bin", externalPlugins())
defer pm.KillPlugins()

err := pm.Load()
require.NoError(t, err)
pi, err := pm.Dispense("noop", "target")
require.NoError(t, err)

// Verify we can talk to the plugin
client := pi.Plugin().(base.Base)
_, err = client.PluginInfo()
require.NoError(t, err)

// Kill the plugin without the manager noticing
pi.Kill()
_, err = client.PluginInfo()
require.Error(t, err)

// Now, re-dispense. The manager should recover
pi, err = pm.Dispense("noop", "target")
require.NoError(t, err)

// Verify that the returned plugin works
client = pi.Plugin().(base.Base)
_, err = client.PluginInfo()
require.NoError(t, err)
}

func externalPlugins() map[string][]*config.Plugin {
return map[string][]*config.Plugin{
"apm": {
&config.Plugin{
Name: "noop",
Driver: "noop-apm",
},
},
"strategy": {
&config.Plugin{
Name: "noop",
Driver: "noop-strategy",
},
},
"target": {
&config.Plugin{
Name: "noop",
Driver: "noop-target",
},
},
}
}