Skip to content

Commit

Permalink
Healthcheck plugins on dispense, re-instantiating broken ones
Browse files Browse the repository at this point in the history
  • Loading branch information
nick-kentik committed Jul 2, 2024
1 parent db858bb commit 35129e0
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 37 deletions.
62 changes: 45 additions & 17 deletions plugins/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,45 @@ func (pm *PluginManager) killPluginLocked(pID plugins.PluginID) {
// Dispense returns a PluginInstance for use by safely obtaining the
// PluginInstance from storage if we have it.
func (pm *PluginManager) Dispense(name, pluginType string) (PluginInstance, error) {

// Measure the time taken to dispense a plugin. This helps identify
// contention and pressure obtaining plugin client handles.
labels := []metrics.Label{{Name: "plugin_name", Value: name}, {Name: "plugin_type", Value: pluginType}}
defer metrics.MeasureSinceWithLabels([]string{"plugin", "manager", "access_ms"}, time.Now(), labels)

pm.pluginInstancesLock.RLock()
defer pm.pluginInstancesLock.RUnlock()
pID := plugins.PluginID{Name: name, PluginType: pluginType}

// Attempt to pull our plugin instance from the store and pass this to the
// caller.
//
// TODO(jrasell) if we do not find the instance, we should probably try and
// dispense the plugin. We should also check the plugin instance has not
// exited.
inst, ok := pm.pluginInstances[plugins.PluginID{Name: name, PluginType: pluginType}]
// caller. If the plugin isn't in the store, try to instantiate it; if it
// is, do a health check and attempt to re-instantiate if it fails.
pm.pluginInstancesLock.Lock()
inst, exists := pm.pluginInstances[pID]

if exists {
if _, err := pm.pluginInfo(pID, inst); err == nil {
// Plugin is fine, return it
pm.pluginInstancesLock.Unlock()
return inst, nil
}

// The plugin exists, but is broken. Remove it.
pm.logger.Warn("plugin failed healthcheck", "plugin_name", pID.Name)
pm.killPluginLocked(pID)
} else {
pm.logger.Warn("plugin not in store", "plugin_name", pID.Name)
}
pm.pluginInstancesLock.Unlock()

if err := pm.dispensePlugins(); err != nil {
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q: %w", name, pluginType, err)
}

pm.pluginInstancesLock.RLock()
inst, ok := pm.pluginInstances[pID]
pm.pluginInstancesLock.RUnlock()
if !ok {
return nil, fmt.Errorf("failed to dispense plugin: %q of type %q is not stored", name, pluginType)
}

return inst, nil
}

Expand Down Expand Up @@ -322,7 +342,23 @@ func (pm *PluginManager) launchExternalPlugin(id plugins.PluginID, info *pluginI
}

func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo, raw interface{}) (*base.PluginInfo, error) {
pluginInfo, err := pm.pluginInfo(id, raw)
if err != nil {
return nil, err
}

// If the plugin name, or plugin do not match it means the executed plugin
// has returned its metadata that is different to the configured. This is a
// problem, particularly in the PluginType sense as it means it will be
// unable to fulfill its role.
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
}

return pluginInfo, nil
}

func (pm *PluginManager) pluginInfo(id plugins.PluginID, raw interface{}) (*base.PluginInfo, error) {
// Check that the plugin implements the base plugin interface. As these are
// external plugins we need to check this safely, otherwise an incorrect
// plugin can cause the core application to panic.
Expand All @@ -336,14 +372,6 @@ func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo
return nil, fmt.Errorf("failed to call PluginInfo on %s: %v", id.Name, err)
}

// If the plugin name, or plugin do not match it means the executed plugin
// has returned its metadata that is different to the configured. This is a
// problem, particularly in the PluginType sense as it means it will be
// unable to fulfill its role.
if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType {
return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err)
}

return pluginInfo, nil
}

Expand Down
81 changes: 61 additions & 20 deletions plugins/manager/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad-autoscaler/agent/config"
"github.com/hashicorp/nomad-autoscaler/plugins/apm"
"github.com/hashicorp/nomad-autoscaler/plugins/base"
"github.com/hashicorp/nomad-autoscaler/plugins/strategy"
"github.com/hashicorp/nomad-autoscaler/plugins/target"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestLoad(t *testing.T) {
Expand Down Expand Up @@ -153,26 +155,7 @@ func TestDispense(t *testing.T) {
{
name: "external plugin",
pluginDir: "../test/bin",
cfg: map[string][]*config.Plugin{
"apm": {
&config.Plugin{
Name: "noop",
Driver: "noop-apm",
},
},
"strategy": {
&config.Plugin{
Name: "noop",
Driver: "noop-strategy",
},
},
"target": {
&config.Plugin{
Name: "noop",
Driver: "noop-target",
},
},
},
cfg: externalPlugins(),
},
}

Expand Down Expand Up @@ -205,3 +188,61 @@ func TestDispense(t *testing.T) {
})
}
}

func TestExternalPluginDies(t *testing.T) {
logger := hclog.NewNullLogger()
pm := NewPluginManager(logger, "../test/bin", externalPlugins())
defer pm.KillPlugins()

err := pm.Load()
require.NoError(t, err)

// Verify we can talk to the external target plugin
pi, err := pm.Dispense("noop", "target")
require.NoError(t, err)

// Verify we can talk to the plugin
client := pi.Plugin().(base.Base)

info, err := client.PluginInfo()
require.NoError(t, err)
t.Logf("%#+v", info)

// Make the plugin process dead
pi.Kill()

// Verify that we get an error trying to call it
info, err = client.PluginInfo()
require.Error(t, err)

// Now, re-dispense. The manager should recover
pi, err = pm.Dispense("noop", "target")
require.NoError(t, err)

client = pi.Plugin().(base.Base)
info, err = client.PluginInfo()
require.NoError(t, err)
}

func externalPlugins() map[string][]*config.Plugin {
return map[string][]*config.Plugin{
"apm": {
&config.Plugin{
Name: "noop",
Driver: "noop-apm",
},
},
"strategy": {
&config.Plugin{
Name: "noop",
Driver: "noop-strategy",
},
},
"target": {
&config.Plugin{
Name: "noop",
Driver: "noop-target",
},
},
}
}

0 comments on commit 35129e0

Please sign in to comment.