Skip to content

Commit

Permalink
Customize results cleaning (using smuggled interface) (#3235)
Browse files Browse the repository at this point in the history
We have identified some cases in which the results "cleaning" logic (the logic that eliminates superfluous results) should not run. In order to allow this, we need to expose the cleaning logic to the engine. This PR does so by doing these things:

- Create a CustomResultsCleaner interface that can be implemented by detectors that want to use custom cleaning logic
- Implement this interface for the aws and awssessionkey detectors (and remove their previous invocation of their custom cleaning logic)
- Modify the engine to invoke this logic (conditionally)

This PR also removes the "custom" cleaning logic for the opsgenie, razorpay, and twilio detectors, because it was added erroneously.

This is an alternative implementation of #3233.
  • Loading branch information
rosecodym authored Aug 21, 2024
1 parent a0400c1 commit f39a525
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 11 deletions.
9 changes: 7 additions & 2 deletions pkg/detectors/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func WithSkipIDs(skipIDs []string) func(*scanner) {
// Ensure the scanner satisfies the interface at compile time.
var _ detectors.Detector = (*scanner)(nil)
var _ detectors.MultiPartCredentialProvider = (*scanner)(nil)
var _ detectors.CustomResultsCleaner = (*scanner)(nil)

var (
defaultVerificationClient = common.SaneHttpClient()
Expand Down Expand Up @@ -245,7 +246,11 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
}
return awsCustomCleanResults(results), nil
return results, nil
}

func (s scanner) ShouldCleanResultsIrrespectiveOfConfiguration() bool {
return true
}

func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch string, retryOn403 bool) (bool, map[string]string, error) {
Expand Down Expand Up @@ -399,7 +404,7 @@ func (s scanner) verifyCanary(resIDMatch, resSecretMatch string) (bool, string,
}
}

func awsCustomCleanResults(results []detectors.Result) []detectors.Result {
func (s scanner) CleanResults(results []detectors.Result) []detectors.Result {
if len(results) == 0 {
return results
}
Expand Down
9 changes: 7 additions & 2 deletions pkg/detectors/awssessionkeys/awssessionkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func WithSkipIDs(skipIDs []string) func(*scanner) {

// Ensure the scanner satisfies the interface at compile time.
var _ detectors.Detector = (*scanner)(nil)
var _ detectors.CustomResultsCleaner = (*scanner)(nil)

var (
defaultVerificationClient = common.SaneHttpClient()
Expand Down Expand Up @@ -167,7 +168,11 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}
}
return awsCustomCleanResults(results), nil
return results, nil
}

func (s scanner) ShouldCleanResultsIrrespectiveOfConfiguration() bool {
return true
}

func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch string, resSessionMatch string, retryOn403 bool) (bool, map[string]string, error) {
Expand Down Expand Up @@ -283,7 +288,7 @@ func (s scanner) verifyMatch(ctx context.Context, resIDMatch, resSecretMatch str
}
}

func awsCustomCleanResults(results []detectors.Result) []detectors.Result {
func (s scanner) CleanResults(results []detectors.Result) []detectors.Result {
if len(results) == 0 {
return results
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ type Detector interface {
Type() detectorspb.DetectorType
}

// CustomResultsCleaner is an optional interface that a detector can implement to customize how its generated results
// are "cleaned," which is defined as removing superfluous results from those found in a given chunk. The default
// implementation of this logic removes all unverified results if there are any verified results, and all unverified
// results except for one otherwise, but this interface allows a detector to specify different logic. (This logic must
// be implemented outside results generation because there are circumstances under which the engine should not execute
// it.)
type CustomResultsCleaner interface {
// CleanResults removes "superfluous" results from a result set (where the definition of "superfluous" is detector-
// specific).
CleanResults(results []Result) []Result
// ShouldCleanResultsIrrespectiveOfConfiguration allows a custom cleaner to instruct the engine to ignore
// user-provided configuration that controls whether results are cleaned. (User-provided configuration is not the
// only factor that determines whether the engine runs cleaning logic.)
ShouldCleanResultsIrrespectiveOfConfiguration() bool
}

// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/opsgenie/opsgenie.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,5 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
results = append(results, s1)
}

return detectors.CleanResults(results), nil
return results, nil
}
7 changes: 4 additions & 3 deletions pkg/detectors/razorpay/razorpay.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@ package razorpay
import (
"context"
"encoding/json"
regexp "github.com/wasilibs/go-re2"
"io"
"net/http"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

Expand Down Expand Up @@ -77,7 +78,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result

}

return detectors.CleanResults(results), nil
return results, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
Expand Down
2 changes: 1 addition & 1 deletion pkg/detectors/twilio/twilio.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
}
}

return detectors.CleanResults(results), nil
return results, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
Expand Down
13 changes: 11 additions & 2 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,15 +1060,24 @@ func (e *Engine) filterResults(
detector *ahocorasick.DetectorMatch,
results []detectors.Result,
) []detectors.Result {
if e.filterUnverified {
results = detectors.CleanResults(results)
clean := detectors.CleanResults
ignoreConfig := false
if cleaner, ok := detector.Detector.(detectors.CustomResultsCleaner); ok {
clean = cleaner.CleanResults
ignoreConfig = cleaner.ShouldCleanResultsIrrespectiveOfConfiguration()
}
if e.filterUnverified || ignoreConfig {
results = clean(results)
}

if !e.retainFalsePositives {
results = detectors.FilterKnownFalsePositives(ctx, detector.Detector, results)
}

if e.filterEntropy != 0 {
results = detectors.FilterResultsWithEntropy(ctx, results, e.filterEntropy, e.retainFalsePositives)
}

return results
}

Expand Down
69 changes: 69 additions & 0 deletions pkg/engine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,75 @@ func TestLikelyDuplicate(t *testing.T) {
}
}

type customCleaner struct {
ignoreConfig bool
}

var _ detectors.CustomResultsCleaner = (*customCleaner)(nil)
var _ detectors.Detector = (*customCleaner)(nil)

func (c customCleaner) FromData(aCtx.Context, bool, []byte) ([]detectors.Result, error) {
return []detectors.Result{}, nil
}

func (c customCleaner) Keywords() []string { return []string{} }
func (c customCleaner) Type() detectorspb.DetectorType { return detectorspb.DetectorType(-1) }

func (c customCleaner) CleanResults([]detectors.Result) []detectors.Result {
return []detectors.Result{}
}
func (c customCleaner) ShouldCleanResultsIrrespectiveOfConfiguration() bool { return c.ignoreConfig }

func TestFilterResults_CustomCleaner(t *testing.T) {
testCases := []struct {
name string
cleaningConfigured bool
ignoreConfig bool
resultsToClean []detectors.Result
wantResults []detectors.Result
}{
{
name: "respect config to clean",
cleaningConfigured: true,
ignoreConfig: false,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{},
},
{
name: "respect config to not clean",
cleaningConfigured: false,
ignoreConfig: false,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{{}},
},
{
name: "clean irrespective of config",
cleaningConfigured: false,
ignoreConfig: true,
resultsToClean: []detectors.Result{{}},
wantResults: []detectors.Result{},
},
}

for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
match := ahocorasick.DetectorMatch{
Detector: customCleaner{
ignoreConfig: tt.ignoreConfig,
},
}
engine := Engine{
filterUnverified: tt.cleaningConfigured,
retainFalsePositives: true,
}

cleaned := engine.filterResults(context.Background(), &match, tt.resultsToClean)

assert.ElementsMatch(t, tt.wantResults, cleaned)
})
}
}

func BenchmarkPopulateMatchingDetectors(b *testing.B) {
allDetectors := DefaultDetectors()
ac := ahocorasick.NewAhoCorasickCore(allDetectors)
Expand Down

0 comments on commit f39a525

Please sign in to comment.