diff --git a/plugins/manager/manager.go b/plugins/manager/manager.go index f8bda866..02ea8652 100644 --- a/plugins/manager/manager.go +++ b/plugins/manager/manager.go @@ -187,25 +187,45 @@ func (pm *PluginManager) killPluginLocked(pID plugins.PluginID) { // Dispense returns a PluginInstance for use by safely obtaining the // PluginInstance from storage if we have it. func (pm *PluginManager) Dispense(name, pluginType string) (PluginInstance, error) { - // Measure the time taken to dispense a plugin. This helps identify // contention and pressure obtaining plugin client handles. labels := []metrics.Label{{Name: "plugin_name", Value: name}, {Name: "plugin_type", Value: pluginType}} defer metrics.MeasureSinceWithLabels([]string{"plugin", "manager", "access_ms"}, time.Now(), labels) - pm.pluginInstancesLock.RLock() - defer pm.pluginInstancesLock.RUnlock() + pID := plugins.PluginID{Name: name, PluginType: pluginType} // Attempt to pull our plugin instance from the store and pass this to the - // caller. - // - // TODO(jrasell) if we do not find the instance, we should probably try and - // dispense the plugin. We should also check the plugin instance has not - // exited. - inst, ok := pm.pluginInstances[plugins.PluginID{Name: name, PluginType: pluginType}] + // caller. If the plugin isn't in the store, try to instantiate it; if it + // is, do a health check and attempt to re-instantiate if it fails. + pm.pluginInstancesLock.Lock() + inst, exists := pm.pluginInstances[pID] + + if exists { + if _, err := pm.pluginInfo(pID, inst); err == nil { + // Plugin is fine, return it + pm.pluginInstancesLock.Unlock() + return inst, nil + } + + // The plugin exists, but is broken. Remove it. + pm.logger.Warn("plugin failed healthcheck", "plugin_name", pID.Name) + pm.killPluginLocked(pID) + } else { + pm.logger.Warn("plugin not in store", "plugin_name", pID.Name) + } + pm.pluginInstancesLock.Unlock() + + if err := pm.dispensePlugins(); err != nil { + return nil, fmt.Errorf("failed to dispense plugin: %q of type %q: %w", name, pluginType, err) + } + + pm.pluginInstancesLock.RLock() + inst, ok := pm.pluginInstances[pID] + pm.pluginInstancesLock.RUnlock() if !ok { return nil, fmt.Errorf("failed to dispense plugin: %q of type %q is not stored", name, pluginType) } + return inst, nil } @@ -322,7 +342,23 @@ func (pm *PluginManager) launchExternalPlugin(id plugins.PluginID, info *pluginI } func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo, raw interface{}) (*base.PluginInfo, error) { + pluginInfo, err := pm.pluginInfo(id, raw) + if err != nil { + return nil, err + } + + // If the plugin name, or plugin do not match it means the executed plugin + // has returned its metadata that is different to the configured. This is a + // problem, particularly in the PluginType sense as it means it will be + // unable to fulfill its role. + if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType { + return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err) + } + + return pluginInfo, nil +} +func (pm *PluginManager) pluginInfo(id plugins.PluginID, raw interface{}) (*base.PluginInfo, error) { // Check that the plugin implements the base plugin interface. As these are // external plugins we need to check this safely, otherwise an incorrect // plugin can cause the core application to panic. @@ -336,14 +372,6 @@ func (pm *PluginManager) pluginLaunchCheck(id plugins.PluginID, info *pluginInfo return nil, fmt.Errorf("failed to call PluginInfo on %s: %v", id.Name, err) } - // If the plugin name, or plugin do not match it means the executed plugin - // has returned its metadata that is different to the configured. This is a - // problem, particularly in the PluginType sense as it means it will be - // unable to fulfill its role. - if pluginInfo.Name != info.driver || pluginInfo.PluginType != id.PluginType { - return nil, fmt.Errorf("plugin %s remote info doesn't match local config: %v", id.Name, err) - } - return pluginInfo, nil } diff --git a/plugins/manager/manager_test.go b/plugins/manager/manager_test.go index 51b0d4dd..3a5880e9 100644 --- a/plugins/manager/manager_test.go +++ b/plugins/manager/manager_test.go @@ -9,9 +9,11 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad-autoscaler/agent/config" "github.com/hashicorp/nomad-autoscaler/plugins/apm" + "github.com/hashicorp/nomad-autoscaler/plugins/base" "github.com/hashicorp/nomad-autoscaler/plugins/strategy" "github.com/hashicorp/nomad-autoscaler/plugins/target" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestLoad(t *testing.T) { @@ -153,26 +155,7 @@ func TestDispense(t *testing.T) { { name: "external plugin", pluginDir: "../test/bin", - cfg: map[string][]*config.Plugin{ - "apm": { - &config.Plugin{ - Name: "noop", - Driver: "noop-apm", - }, - }, - "strategy": { - &config.Plugin{ - Name: "noop", - Driver: "noop-strategy", - }, - }, - "target": { - &config.Plugin{ - Name: "noop", - Driver: "noop-target", - }, - }, - }, + cfg: externalPlugins(), }, } @@ -205,3 +188,56 @@ func TestDispense(t *testing.T) { }) } } + +func TestExternalPluginDies(t *testing.T) { + logger := hclog.NewNullLogger() + pm := NewPluginManager(logger, "../test/bin", externalPlugins()) + defer pm.KillPlugins() + + err := pm.Load() + require.NoError(t, err) + pi, err := pm.Dispense("noop", "target") + require.NoError(t, err) + + // Verify we can talk to the plugin + client := pi.Plugin().(base.Base) + _, err = client.PluginInfo() + require.NoError(t, err) + + // Kill the plugin without the manager noticing + pi.Kill() + _, err = client.PluginInfo() + require.Error(t, err) + + // Now, re-dispense. The manager should recover + pi, err = pm.Dispense("noop", "target") + require.NoError(t, err) + + // Verify that the returned plugin works + client = pi.Plugin().(base.Base) + _, err = client.PluginInfo() + require.NoError(t, err) +} + +func externalPlugins() map[string][]*config.Plugin { + return map[string][]*config.Plugin{ + "apm": { + &config.Plugin{ + Name: "noop", + Driver: "noop-apm", + }, + }, + "strategy": { + &config.Plugin{ + Name: "noop", + Driver: "noop-strategy", + }, + }, + "target": { + &config.Plugin{ + Name: "noop", + Driver: "noop-target", + }, + }, + } +}