Skip to content

[Feat] Added New AccuWeather Detector Version #4114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
)

type Scanner struct {
client *http.Client
Client *http.Client
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might have overseen but I could not understand to make it public ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The v2 integration tests include a test cases that require Client to be exported, e.g.:

name: "found, real secrets, verification error due to timeout",
s: Scanner{Scanner: v1.Scanner{Client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}},

}

const accuweatherURL = "https://dataservice.accuweather.com"
Expand All @@ -35,27 +35,24 @@ func (s Scanner) Keywords() []string {
return []string{"accuweather"}
}

func (s Scanner) Version() int { return 1 }

func (s Scanner) getClient() *http.Client {
if s.client != nil {
return s.client
if s.Client != nil {
return s.Client
}
return defaultClient
}

// FromData will find and optionally verify Accuweather secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

matches := make(map[string]struct{})
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
k := match[1]
if detectors.StringShannonEntropy(k) < requiredShannonEntropy {
continue
}
matches[k] = struct{}{}
}
allMatches := keyPat.FindAllStringSubmatch(string(data), -1)
return s.VerifyAllMatches(ctx, allMatches, verify)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we call it processMatches ? I believe this function is processing then verifying the findings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks. That does seem like a more suitable name. Since it needs to be exported, I can rename it to ProcessMatches.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

}

for key := range matches {
func (s Scanner) VerifyAllMatches(ctx context.Context, allMatches [][]string, verify bool) (results []detectors.Result, err error) {
uniqueMatches := getUniqueMatches(allMatches)
for key := range uniqueMatches {
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Accuweather,
Raw: []byte(key),
Expand All @@ -71,7 +68,19 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
results = append(results, s1)
}

return results, nil
return
}

func getUniqueMatches(allMatches [][]string) map[string]struct{} {
uniqueMatches := map[string]struct{}{}
for _, match := range allMatches {
k := match[1]
if detectors.StringShannonEntropy(k) < requiredShannonEntropy {
continue
}
uniqueMatches[k] = struct{}{}
}
return uniqueMatches
}

func verifyAccuweather(ctx context.Context, client *http.Client, key string) (bool, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestAccuweather_FromChunk(t *testing.T) {
},
{
name: "found, real secrets, verification error due to timeout",
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
s: Scanner{Client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within", secret)),
Expand All @@ -75,7 +75,7 @@ func TestAccuweather_FromChunk(t *testing.T) {
},
{
name: "found, real secrets, verification error due to unexpected api surface",
s: Scanner{client: common.ConstantResponseHttpClient(500, "{}")},
s: Scanner{Client: common.ConstantResponseHttpClient(500, "{}")},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within", secret)),
Expand Down
30 changes: 30 additions & 0 deletions pkg/detectors/accuweather/v2/accuweather.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package accuweather

import (
"context"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
v1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/accuweather/v1"
)

type Scanner struct {
v1.Scanner
}

func (s Scanner) Version() int { return 2 }

var (
// Ensure the Scanner satisfies the interface at compile time.
_ detectors.Detector = (*Scanner)(nil)

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"accuweather"}) + `\b([a-zA-Z0-9]{32})\b`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion, the pattern difference between V1 and V2 isn’t significant, and both can be validated against the same API. Could we loosen the V1 pattern slightly to accommodate V2 as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @kashifkhan0771. I believe the pattern would become \([a-z0-9A-Z\%]{35})|\b[a-zA-Z0-9]{32})\b, correct?

Other than avoiding the extra computational cost of the regex, I believe having a newer version of the detector may be make it simpler for us to end support for the older version in case we need to.

There's other examples of new versions being added solely to accommodate a pattern difference such as the netlify v1 and v2 detectors.

What are your thoughts on this?

)

// FromData will find and optionally verify Accuweather secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
allMatches := keyPat.FindAllStringSubmatch(string(data), -1)
return s.VerifyAllMatches(ctx, allMatches, verify)
}
169 changes: 169 additions & 0 deletions pkg/detectors/accuweather/v2/accuweather_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//go:build detectors
// +build detectors

package accuweather

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
v1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/accuweather/v1"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestAccuweather_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
secret := testSecrets.MustGetField("ACCUWEATHER")
inactiveSecret := testSecrets.MustGetField("ACCUWEATHER_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within but verified", secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Accuweather,
Verified: true,
},
},
wantErr: false,
},
{
name: "found, real secrets, verification error due to timeout",
s: Scanner{Scanner: v1.Scanner{Client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within", secret)),
verify: true,
},
want: func() []detectors.Result {
r := detectors.Result{
DetectorType: detectorspb.DetectorType_Accuweather,
Verified: false,
}
r.SetVerificationError(context.DeadlineExceeded)
return []detectors.Result{r}
}(),
wantErr: false,
},
{
name: "found, real secrets, verification error due to unexpected api surface",
s: Scanner{Scanner: v1.Scanner{Client: common.ConstantResponseHttpClient(500, "{}")}},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within", secret)),
verify: true,
},
want: func() []detectors.Result {
r := detectors.Result{
DetectorType: detectorspb.DetectorType_Accuweather,
Verified: false,
}
r.SetVerificationError(fmt.Errorf("unexpected HTTP response status 500"))
return []detectors.Result{r}
}(),
wantErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a accuweather secret %s within but verified", inactiveSecret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Accuweather,
Verified: false,
},
},
wantErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Accuweather.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
gotErr := ""
if got[i].VerificationError() != nil {
gotErr = got[i].VerificationError().Error()
}
wantErr := ""
if tt.want[i].VerificationError() != nil {
wantErr = tt.want[i].VerificationError().Error()
}
if gotErr != wantErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.want[i].VerificationError(), got[i].VerificationError())
}
got[i].Raw = nil
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "RawV2", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("Accuweather.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
86 changes: 86 additions & 0 deletions pkg/detectors/accuweather/v2/accuweather_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package accuweather

import (
"context"
"fmt"
"testing"

"github.com/google/go-cmp/cmp"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
)

var (
validPattern = "dqftwc490oPcxae67sBSF741M56d091a"
invalidPattern = "dqftwc49%oPcxae67sBSF741M56f091a"
)

func TestAccuWeather_Pattern(t *testing.T) {
d := Scanner{}
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})

tests := []struct {
name string
input string
want []string
}{
{
name: "valid pattern",
input: fmt.Sprintf("accuweather token = '%s'", validPattern),
want: []string{validPattern},
},
{
name: "valid pattern - out of prefix range",
input: fmt.Sprintf("accuweather token keyword is not close to the real token = '%s'", validPattern),
want: nil,
},
{
name: "invalid pattern",
input: fmt.Sprintf("accuweather = '%s'", invalidPattern),
want: nil,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
if len(matchedDetectors) == 0 {
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
return
}

results, err := d.FromData(context.Background(), false, []byte(test.input))
if err != nil {
t.Errorf("error = %v", err)
return
}

if len(results) != len(test.want) {
if len(results) == 0 {
t.Errorf("did not receive result")
} else {
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
}
return
}

actual := make(map[string]struct{}, len(results))
for _, r := range results {
if len(r.RawV2) > 0 {
actual[string(r.RawV2)] = struct{}{}
} else {
actual[string(r.Raw)] = struct{}{}
}
}
expected := make(map[string]struct{}, len(test.want))
for _, v := range test.want {
expected[v] = struct{}{}
}

if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
}
})
}
}
6 changes: 4 additions & 2 deletions pkg/engine/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abuseipdb"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abyssale"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/accuweather"
accuweatherv1 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/accuweather/v1"
accuweatherv2 "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/accuweather/v2"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/adafruitio"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/adzuna"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/aeroworkflow"
Expand Down Expand Up @@ -841,7 +842,8 @@ func buildDetectorList() []detectors.Detector {
&abyssale.Scanner{},
// &abstract.Scanner{},
&abuseipdb.Scanner{},
&accuweather.Scanner{},
&accuweatherv1.Scanner{},
&accuweatherv2.Scanner{},
&adafruitio.Scanner{},
// &adobeio.Scanner{},
&adzuna.Scanner{},
Expand Down
Loading