Skip to content

Commit f7836fb

Browse files
sec: fix s3 and gcs host checks (#512)
* sec: fix incorrect host checks for s3 and gcs * add more tests * resolve host detection using url * fix windows url parse * add more tests and trim http scheme * move to strings.Contains for better compatibility and ease * preserve error message
1 parent 7dddd13 commit f7836fb

File tree

7 files changed

+201
-15
lines changed

7 files changed

+201
-15
lines changed

detect_gcs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
1818
return "", false, nil
1919
}
2020

21-
if strings.Contains(src, "googleapis.com/") {
21+
if strings.Contains(src, ".googleapis.com/") {
2222
return d.detectHTTP(src)
2323
}
2424

detect_gcs_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ func TestGCSDetector(t *testing.T) {
2424
"www.googleapis.com/storage/v1/foo/bar.baz",
2525
"gcs::https://www.googleapis.com/storage/v1/foo/bar.baz",
2626
},
27+
{
28+
"www.googleapis.com/storage/v2/foo/bar/toor.baz",
29+
"gcs::https://www.googleapis.com/storage/v2/foo/bar/toor.baz",
30+
},
2731
}
2832

2933
pwd := "/pwd"
@@ -42,3 +46,58 @@ func TestGCSDetector(t *testing.T) {
4246
}
4347
}
4448
}
49+
50+
func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
51+
cases := []struct {
52+
Name string
53+
Input string
54+
Expected string
55+
Output string
56+
}{
57+
{
58+
"valid url",
59+
"www.googleapis.com/storage/v1/my-bucket/foo/bar",
60+
"",
61+
"gcs::https://www.googleapis.com/storage/v1/my-bucket/foo/bar",
62+
},
63+
{
64+
"empty url",
65+
"",
66+
"",
67+
"",
68+
},
69+
{
70+
"not valid url",
71+
"storage/v1/my-bucket/foo/bar",
72+
"error parsing GCS URL",
73+
"",
74+
},
75+
{
76+
"not valid url domain",
77+
"www.googleapis.com.invalid/storage/v1/",
78+
"URL is not a valid GCS URL",
79+
"",
80+
},
81+
{
82+
"not valid url length",
83+
"http://www.googleapis.com/storage",
84+
"URL is not a valid GCS URL",
85+
"",
86+
},
87+
}
88+
89+
pwd := "/pwd"
90+
f := new(GCSDetector)
91+
for _, tc := range cases {
92+
output, _, err := f.Detect(tc.Input, pwd)
93+
if err != nil {
94+
if err.Error() != tc.Expected {
95+
t.Fatalf("expected error %s, got %s for %s", tc.Expected, err.Error(), tc.Name)
96+
}
97+
}
98+
99+
if output != tc.Output {
100+
t.Fatalf("expected %s, got %s for %s", tc.Output, output, tc.Name)
101+
}
102+
}
103+
}

detect_s3_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,58 @@ func TestS3Detector(t *testing.T) {
9090
}
9191
}
9292
}
93+
94+
func TestS3Detector_MalformedDetectHTTP(t *testing.T) {
95+
cases := []struct {
96+
Name string
97+
Input string
98+
Expected string
99+
Output string
100+
}{
101+
{
102+
"valid url",
103+
"s3.amazonaws.com/bucket/foo/bar",
104+
"",
105+
"s3::https://s3.amazonaws.com/bucket/foo/bar",
106+
},
107+
{
108+
"empty url",
109+
"",
110+
"",
111+
"",
112+
},
113+
{
114+
"not valid url",
115+
"bucket/foo/bar",
116+
"error parsing S3 URL",
117+
"",
118+
},
119+
{
120+
"not valid url domain",
121+
"s3.amazonaws.com.invalid/bucket/foo/bar",
122+
"error parsing S3 URL",
123+
"",
124+
},
125+
{
126+
"not valid url lenght",
127+
"http://s3.amazonaws.com",
128+
"URL is not a valid S3 URL",
129+
"",
130+
},
131+
}
132+
133+
pwd := "/pwd"
134+
f := new(S3Detector)
135+
for _, tc := range cases {
136+
output, _, err := f.Detect(tc.Input, pwd)
137+
if err != nil {
138+
if err.Error() != tc.Expected {
139+
t.Fatalf("expected error %s, got %s for %s", tc.Expected, err.Error(), tc.Name)
140+
}
141+
}
142+
143+
if output != tc.Output {
144+
t.Fatalf("expected %s, got %s for %s", tc.Output, output, tc.Name)
145+
}
146+
}
147+
}

get_gcs.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst,
193193
}
194194

195195
func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err error) {
196-
if strings.Contains(u.Host, "googleapis.com") {
196+
if strings.HasSuffix(u.Host, ".googleapis.com") {
197197
hostParts := strings.Split(u.Host, ".")
198198
if len(hostParts) != 3 {
199199
err = fmt.Errorf("URL is not a valid GCS URL")
@@ -208,6 +208,8 @@ func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err err
208208
bucket = pathParts[3]
209209
path = pathParts[4]
210210
fragment = u.Fragment
211+
} else {
212+
err = fmt.Errorf("URL is not a valid GCS URL")
211213
}
212214
return
213215
}

get_gcs_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,59 @@ func TestGCSGetter_GetFile_OAuthAccessToken(t *testing.T) {
233233
}
234234
assertContents(t, dst, "# Main\n")
235235
}
236+
237+
func Test_GCSGetter_ParseUrl(t *testing.T) {
238+
tests := []struct {
239+
name string
240+
url string
241+
}{
242+
{
243+
name: "valid host",
244+
url: "https://www.googleapis.com/storage/v1/hc-go-getter-test/go-getter/foobar",
245+
},
246+
}
247+
for _, tt := range tests {
248+
t.Run(tt.name, func(t *testing.T) {
249+
g := new(GCSGetter)
250+
u, err := url.Parse(tt.url)
251+
if err != nil {
252+
t.Fatalf("unexpected error: %s", err)
253+
}
254+
_, _, _, err = g.parseURL(u)
255+
if err != nil {
256+
t.Fatalf("wasn't expecting error, got %s", err)
257+
}
258+
})
259+
}
260+
}
261+
func Test_GCSGetter_ParseUrl_Malformed(t *testing.T) {
262+
tests := []struct {
263+
name string
264+
url string
265+
}{
266+
{
267+
name: "invalid host suffix",
268+
url: "https://www.googleapis.com.invalid",
269+
},
270+
{
271+
name: "host suffix with a typo",
272+
url: "https://www.googleapi.com.",
273+
},
274+
}
275+
for _, tt := range tests {
276+
t.Run(tt.name, func(t *testing.T) {
277+
g := new(GCSGetter)
278+
u, err := url.Parse(tt.url)
279+
if err != nil {
280+
t.Fatalf("unexpected error: %s", err)
281+
}
282+
_, _, _, err = g.parseURL(u)
283+
if err == nil {
284+
t.Fatalf("expected error, got none")
285+
}
286+
if err.Error() != "URL is not a valid GCS URL" {
287+
t.Fatalf("expected error 'URL is not a valid GCS URL', got %s", err.Error())
288+
}
289+
})
290+
}
291+
}

get_s3.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
252252
// This just check whether we are dealing with S3 or
253253
// any other S3 compliant service. S3 has a predictable
254254
// url as others do not
255-
if strings.Contains(u.Host, "amazonaws.com") {
255+
if strings.HasSuffix(u.Host, ".amazonaws.com") {
256256
// Amazon S3 supports both virtual-hosted–style and path-style URLs to access a bucket, although path-style is deprecated
257257
// In both cases few older regions supports dash-style region indication (s3-Region) even if AWS discourages their use.
258258
// The same bucket could be reached with:
@@ -304,7 +304,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
304304
path = pathParts[1]
305305

306306
}
307-
if len(hostParts) < 3 && len(hostParts) > 5 {
307+
if len(hostParts) < 3 || len(hostParts) > 5 {
308308
err = fmt.Errorf("URL is not a valid S3 URL")
309309
return
310310
}

get_s3_test.go

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -278,35 +278,49 @@ func TestS3Getter_Url(t *testing.T) {
278278

279279
func Test_S3Getter_ParseUrl_Malformed(t *testing.T) {
280280
tests := []struct {
281-
name string
282-
url string
281+
name string
282+
input string
283+
expected string
283284
}{
284285
{
285-
name: "path style",
286-
url: "https://s3.amazonaws.com/bucket",
286+
name: "path style",
287+
input: "https://s3.amazonaws.com/bucket",
288+
expected: "URL is not a valid S3 URL",
287289
},
288290
{
289-
name: "vhost-style, dash region indication",
290-
url: "https://bucket.s3-us-east-1.amazonaws.com",
291+
name: "vhost-style, dash region indication",
292+
input: "https://bucket.s3-us-east-1.amazonaws.com",
293+
expected: "URL is not a valid S3 URL",
291294
},
292295
{
293-
name: "vhost-style, dot region indication",
294-
url: "https://bucket.s3.us-east-1.amazonaws.com",
296+
name: "vhost-style, dot region indication",
297+
input: "https://bucket.s3.us-east-1.amazonaws.com",
298+
expected: "URL is not a valid S3 URL",
299+
},
300+
{
301+
name: "invalid host parts",
302+
input: "https://invalid.host.parts.lenght.s3.us-east-1.amazonaws.com",
303+
expected: "URL is not a valid S3 URL",
304+
},
305+
{
306+
name: "invalid host suffix",
307+
input: "https://bucket.s3.amazonaws.com.invalid",
308+
expected: "URL is not a valid S3 compliant URL",
295309
},
296310
}
297311
for _, tt := range tests {
298312
t.Run(tt.name, func(t *testing.T) {
299313
g := new(S3Getter)
300-
u, err := url.Parse(tt.url)
314+
u, err := url.Parse(tt.input)
301315
if err != nil {
302316
t.Fatalf("unexpected error: %s", err)
303317
}
304318
_, _, _, _, _, err = g.parseUrl(u)
305319
if err == nil {
306320
t.Fatalf("expected error, got none")
307321
}
308-
if err.Error() != "URL is not a valid S3 URL" {
309-
t.Fatalf("expected error 'URL is not a valid S3 URL', got %s", err.Error())
322+
if err.Error() != tt.expected {
323+
t.Fatalf("expected error '%s', got %s for %s", tt.expected, err.Error(), tt.name)
310324
}
311325
})
312326
}

0 commit comments

Comments
 (0)