Skip to content

Commit 426d08b

Browse files
Add xAI detector (#4117)
1 parent 869df44 commit 426d08b

File tree

6 files changed

+376
-6
lines changed

6 files changed

+376
-6
lines changed

pkg/detectors/xai/xai.go

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
package xai
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
"strings"
10+
11+
regexp "github.com/wasilibs/go-re2"
12+
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
14+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
16+
)
17+
18+
type Scanner struct {
19+
client *http.Client
20+
}
21+
22+
// Ensure the Scanner satisfies the interface at compile time.
23+
var _ detectors.Detector = (*Scanner)(nil)
24+
25+
var (
26+
defaultClient = common.SaneHttpClient()
27+
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
28+
keyPat = regexp.MustCompile(`\b(xai-[0-9a-zA-Z_]{80})\b`)
29+
)
30+
31+
// Keywords are used for efficiently pre-filtering chunks.
32+
// Use identifiers in the secret preferably, or the provider name.
33+
func (s Scanner) Keywords() []string {
34+
return []string{"xai-"}
35+
}
36+
37+
// FromData will find and optionally verify Xai secrets in a given set of bytes.
38+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
39+
dataStr := string(data)
40+
41+
keyMatches := make(map[string]struct{})
42+
for _, match := range keyPat.FindAllStringSubmatch(dataStr, -1) {
43+
keyMatches[match[1]] = struct{}{}
44+
}
45+
46+
for match := range keyMatches {
47+
s1 := detectors.Result{
48+
DetectorType: detectorspb.DetectorType_XAI,
49+
Raw: []byte(match),
50+
}
51+
52+
if verify {
53+
client := s.client
54+
if client == nil {
55+
client = defaultClient
56+
}
57+
58+
isVerified, extraData, verificationErr := verifyMatch(ctx, client, match)
59+
s1.Verified = isVerified
60+
s1.ExtraData = extraData
61+
s1.SetVerificationError(verificationErr, match)
62+
}
63+
64+
results = append(results, s1)
65+
}
66+
67+
return
68+
}
69+
70+
func verifyMatch(ctx context.Context, client *http.Client, apiKey string) (bool, map[string]string, error) {
71+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://api.x.ai/v1/api-key", nil)
72+
if err != nil {
73+
return false, nil, nil
74+
}
75+
76+
req.Header.Add("Content-Type", "application/json")
77+
req.Header.Add("Authorization", "Bearer "+apiKey)
78+
79+
res, err := client.Do(req)
80+
if err != nil {
81+
return false, nil, err
82+
}
83+
defer func() {
84+
_, _ = io.Copy(io.Discard, res.Body)
85+
_ = res.Body.Close()
86+
}()
87+
88+
switch res.StatusCode {
89+
case http.StatusOK:
90+
// Parse the API response for useful information like name and ACLs
91+
var data struct {
92+
Name string `json:"name"`
93+
Acls []string `json:"acls"`
94+
}
95+
if err := json.NewDecoder(res.Body).Decode(&data); err != nil {
96+
// The API Key is still verified, but there are parsing errors.
97+
// Hence, return true for verified along with error.
98+
return true, nil, fmt.Errorf("failed to decode response: %w", err)
99+
}
100+
101+
aclsStr := strings.Join(data.Acls, ",")
102+
103+
// Convert the relevant fields into a map
104+
result := map[string]string{
105+
"name": data.Name,
106+
"acls": aclsStr,
107+
}
108+
109+
return true, result, nil
110+
case http.StatusBadRequest, http.StatusUnauthorized:
111+
// The secret is determinately not verified (nothing to do)
112+
return false, nil, nil
113+
default:
114+
return false, nil, fmt.Errorf("unexpected HTTP response status %d", res.StatusCode)
115+
}
116+
}
117+
118+
func (s Scanner) Type() detectorspb.DetectorType {
119+
return detectorspb.DetectorType_XAI
120+
}
121+
122+
func (s Scanner) Description() string {
123+
return "xAI is an AI company with the mission of advancing scientific discovery and gaining a deeper understanding of our universe."
124+
}
+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
//go:build detectors
2+
// +build detectors
3+
4+
package xai
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"testing"
10+
"time"
11+
12+
"github.com/google/go-cmp/cmp"
13+
"github.com/google/go-cmp/cmp/cmpopts"
14+
15+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
17+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
18+
)
19+
20+
func TestXai_FromChunk(t *testing.T) {
21+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
22+
defer cancel()
23+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
24+
if err != nil {
25+
t.Fatalf("could not get test secrets from GCP: %s", err)
26+
}
27+
secret := testSecrets.MustGetField("XAI")
28+
inactiveSecret := testSecrets.MustGetField("XAI_INACTIVE")
29+
30+
type args struct {
31+
ctx context.Context
32+
data []byte
33+
verify bool
34+
}
35+
tests := []struct {
36+
name string
37+
s Scanner
38+
args args
39+
want []detectors.Result
40+
wantErr bool
41+
wantVerificationErr bool
42+
}{
43+
{
44+
name: "found, verified",
45+
s: Scanner{},
46+
args: args{
47+
ctx: context.Background(),
48+
data: []byte(fmt.Sprintf("You can find a xai secret %s within", secret)),
49+
verify: true,
50+
},
51+
want: []detectors.Result{
52+
{
53+
DetectorType: detectorspb.DetectorType_XAI,
54+
Verified: true,
55+
},
56+
},
57+
wantErr: false,
58+
wantVerificationErr: false,
59+
},
60+
{
61+
name: "found, unverified",
62+
s: Scanner{},
63+
args: args{
64+
ctx: context.Background(),
65+
data: []byte(fmt.Sprintf("You can find a xai secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
66+
verify: true,
67+
},
68+
want: []detectors.Result{
69+
{
70+
DetectorType: detectorspb.DetectorType_XAI,
71+
Verified: false,
72+
},
73+
},
74+
wantErr: false,
75+
wantVerificationErr: false,
76+
},
77+
{
78+
name: "not found",
79+
s: Scanner{},
80+
args: args{
81+
ctx: context.Background(),
82+
data: []byte("You cannot find the secret within"),
83+
verify: true,
84+
},
85+
want: nil,
86+
wantErr: false,
87+
wantVerificationErr: false,
88+
},
89+
{
90+
name: "found, would be verified if not for timeout",
91+
s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)},
92+
args: args{
93+
ctx: context.Background(),
94+
data: []byte(fmt.Sprintf("You can find a xai secret %s within", secret)),
95+
verify: true,
96+
},
97+
want: []detectors.Result{
98+
{
99+
DetectorType: detectorspb.DetectorType_XAI,
100+
Verified: false,
101+
},
102+
},
103+
wantErr: false,
104+
wantVerificationErr: true,
105+
},
106+
{
107+
name: "found, verified but unexpected api surface",
108+
s: Scanner{client: common.ConstantResponseHttpClient(404, "")},
109+
args: args{
110+
ctx: context.Background(),
111+
data: []byte(fmt.Sprintf("You can find a xai secret %s within", secret)),
112+
verify: true,
113+
},
114+
want: []detectors.Result{
115+
{
116+
DetectorType: detectorspb.DetectorType_XAI,
117+
Verified: false,
118+
},
119+
},
120+
wantErr: false,
121+
wantVerificationErr: true,
122+
},
123+
}
124+
for _, tt := range tests {
125+
t.Run(tt.name, func(t *testing.T) {
126+
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
127+
if (err != nil) != tt.wantErr {
128+
t.Errorf("Xai.FromData() error = %v, wantErr %v", err, tt.wantErr)
129+
return
130+
}
131+
for i := range got {
132+
if len(got[i].Raw) == 0 {
133+
t.Fatalf("no raw secret present: \n %+v", got[i])
134+
}
135+
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
136+
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
137+
}
138+
}
139+
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "ExtraData")
140+
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
141+
t.Errorf("Xai.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
142+
}
143+
})
144+
}
145+
}
146+
147+
func BenchmarkFromData(benchmark *testing.B) {
148+
ctx := context.Background()
149+
s := Scanner{}
150+
for name, data := range detectors.MustGetBenchmarkData() {
151+
benchmark.Run(name, func(b *testing.B) {
152+
b.ResetTimer()
153+
for n := 0; n < b.N; n++ {
154+
_, err := s.FromData(ctx, false, data)
155+
if err != nil {
156+
b.Fatal(err)
157+
}
158+
}
159+
})
160+
}
161+
}

pkg/detectors/xai/xai_test.go

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package xai
2+
3+
import (
4+
"context"
5+
"github.com/google/go-cmp/cmp"
6+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
7+
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
8+
"testing"
9+
)
10+
11+
func TestXai_Pattern(t *testing.T) {
12+
d := Scanner{}
13+
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
14+
tests := []struct {
15+
name string
16+
input string
17+
want []string
18+
}{
19+
{
20+
name: "typical pattern",
21+
input: "xai_token = 'xai-W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab'",
22+
want: []string{"xai-W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab"},
23+
},
24+
{
25+
name: "finds all matches",
26+
input: `grok_token1 = 'xai-W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab'
27+
xai_token2 = 'xai-W5zbfUkzlXedo7qD42AbBLlRSsyJr1W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab'`,
28+
want: []string{"xai-W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab", "xai-W5zbfUkzlXedo7qD42AbBLlRSsyJr1W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXedo7qD42Ab"},
29+
},
30+
{
31+
name: "invalid pattern",
32+
input: "xai_token = 'xai-W5zbfUkzlXedo7qD42AbBLlRSsyJrOW5zbfUkzlXe'",
33+
want: []string{},
34+
},
35+
}
36+
37+
for _, test := range tests {
38+
t.Run(test.name, func(t *testing.T) {
39+
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
40+
if len(matchedDetectors) == 0 {
41+
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
42+
return
43+
}
44+
45+
results, err := d.FromData(context.Background(), false, []byte(test.input))
46+
if err != nil {
47+
t.Errorf("error = %v", err)
48+
return
49+
}
50+
51+
if len(results) != len(test.want) {
52+
if len(results) == 0 {
53+
t.Errorf("did not receive result")
54+
} else {
55+
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
56+
}
57+
return
58+
}
59+
60+
actual := make(map[string]struct{}, len(results))
61+
for _, r := range results {
62+
if len(r.RawV2) > 0 {
63+
actual[string(r.RawV2)] = struct{}{}
64+
} else {
65+
actual[string(r.Raw)] = struct{}{}
66+
}
67+
}
68+
expected := make(map[string]struct{}, len(test.want))
69+
for _, v := range test.want {
70+
expected[v] = struct{}{}
71+
}
72+
73+
if diff := cmp.Diff(expected, actual); diff != "" {
74+
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
75+
}
76+
})
77+
}
78+
}

pkg/engine/defaults/defaults.go

+2
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,7 @@ import (
812812
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/worldcoinindex"
813813
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/worldweather"
814814
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/wrike"
815+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/xai"
815816
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/yandex"
816817
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/yelp"
817818
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/youneedabudget"
@@ -1674,6 +1675,7 @@ func buildDetectorList() []detectors.Detector {
16741675
&workstack.Scanner{},
16751676
&worldcoinindex.Scanner{},
16761677
&worldweather.Scanner{},
1678+
&xai.Scanner{},
16771679
&wrike.Scanner{},
16781680
&yandex.Scanner{},
16791681
&yelp.Scanner{},

0 commit comments

Comments
 (0)