Skip to content

Commit 8bd780f

Browse files
committed
syntax: add a LangVariant parameter to Quote
I had missed that $'' expansions are non-POSIX, and only implemented by Bash and mksh. So, in POSIX mode, we can't quote non-printable characters. Moreover, fuzzing uncovered that mksh implements \x differently, meaning that we require extra logic to follow its rules. Keep all the fuzz crashers that we found in the process. Since we've started having more edge cases that we can't quote, start returning an error in the API, with a QuoteError type. All it gives right now is a character position and a reason. Finally, document what versions of Bash and mksh we develop with. This matters, because some systems ship with very old versions, which can implement slightly different quoting or escaping rules. While at it, start using quicktest for the tests.
1 parent 1ee509b commit 8bd780f

16 files changed

+318
-131
lines changed

expand/param.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,11 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) {
263263
case syntax.OtherParamOps:
264264
switch arg {
265265
case "Q":
266-
var ok bool
267-
str, ok = syntax.Quote(str)
268-
if !ok {
269-
// Variables can't contain null bytes.
270-
panic("syntax.Quote should never fail on a variable")
266+
str, err = syntax.Quote(str, syntax.LangBash)
267+
if err != nil {
268+
// Is this even possible? If a user runs into this panic,
269+
// it's most likely a bug we need to fix.
270+
panic(err)
271271
}
272272
case "E":
273273
tail := str

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.16
44

55
require (
66
github.com/creack/pty v1.1.15
7+
github.com/frankban/quicktest v1.13.1
78
github.com/google/renameio v1.0.1
89
github.com/kr/pretty v0.3.0
910
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e

go.sum

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
22
github.com/creack/pty v1.1.15 h1:cKRCLMj3Ddm54bKSpemfQ8AtYFBhAI2MPmdys22fBdc=
33
github.com/creack/pty v1.1.15/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
4+
github.com/frankban/quicktest v1.13.1 h1:xVm/f9seEhZFL9+n5kv5XLrGwy6elc4V9v/XFY2vmd8=
5+
github.com/frankban/quicktest v1.13.1/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og=
6+
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
7+
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
48
github.com/google/renameio v1.0.1 h1:Lh/jXZmvZxb0BBeSY5VKEfidcbcbenKjZFzM/q0fSeU=
59
github.com/google/renameio v1.0.1/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk=
610
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -22,6 +26,8 @@ golang.org/x/sys v0.0.0-20210925032602-92d5a993a665 h1:QOQNt6vCjMpXE7JSK5VvAzJC1
2226
golang.org/x/sys v0.0.0-20210925032602-92d5a993a665/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
2327
golang.org/x/term v0.0.0-20210916214954-140adaaadfaf h1:Ihq/mm/suC88gF8WFcVwk+OV6Tq+wyA1O0E5UEvDglI=
2428
golang.org/x/term v0.0.0-20210916214954-140adaaadfaf/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
29+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
30+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
2531
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
2632
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
2733
gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8=

syntax/example_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,11 @@ func ExampleQuote() {
112112
"invalid-\xe2'",
113113
"nonprint-\x0b\x1b",
114114
} {
115-
quoted, ok := syntax.Quote(s)
116-
if !ok {
117-
fmt.Printf("%q cannot be quoted", s)
115+
// We assume Bash syntax here.
116+
// For general shell syntax quoting, use syntax.LangPOSIX.
117+
quoted, err := syntax.Quote(s, syntax.LangBash)
118+
if err != nil {
119+
fmt.Printf("%q cannot be quoted: %v\n", s, err)
118120
} else {
119121
fmt.Printf("Quote(%17q): %s\n", s, quoted)
120122
}

syntax/fuzz_test.go

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package syntax
88

99
import (
10+
"fmt"
1011
"io"
1112
"os/exec"
1213
"strings"
@@ -19,21 +20,46 @@ func FuzzQuote(f *testing.F) {
1920
}
2021

2122
// Keep in sync with ExampleQuote.
22-
f.Add("foo")
23-
f.Add("bar $baz")
24-
f.Add(`"won't"`)
25-
f.Add(`~/home`)
26-
f.Add("#1304")
27-
f.Add("name=value")
28-
f.Add(`glob-*`)
29-
f.Add("invalid-\xe2'")
30-
f.Add("nonprint-\x0b\x1b")
31-
f.Fuzz(func(t *testing.T, s string) {
32-
quoted, ok := Quote(s)
33-
if !ok {
34-
// Contains a null byte; not interesting.
23+
f.Add("foo", uint8(LangBash))
24+
f.Add("bar $baz", uint8(LangBash))
25+
f.Add(`"won't"`, uint8(LangBash))
26+
f.Add(`~/home`, uint8(LangBash))
27+
f.Add("#1304", uint8(LangBash))
28+
f.Add("name=value", uint8(LangBash))
29+
f.Add(`glob-*`, uint8(LangBash))
30+
f.Add("invalid-\xe2'", uint8(LangBash))
31+
f.Add("nonprint-\x0b\x1b", uint8(LangBash))
32+
f.Fuzz(func(t *testing.T, s string, langVariant uint8) {
33+
if langVariant > 3 {
34+
t.Skip() // lang variants are 0-3
35+
}
36+
lang := LangVariant(langVariant)
37+
quoted, err := Quote(s, lang)
38+
if err != nil {
39+
// Cannot be quoted; not interesting.
3540
t.Skip()
3641
}
42+
43+
var shellProgram string
44+
switch lang {
45+
case LangBash:
46+
hasBash51(t)
47+
shellProgram = "bash"
48+
case LangPOSIX:
49+
hasDash059(t)
50+
shellProgram = "dash"
51+
case LangMirBSDKorn:
52+
hasMksh59(t)
53+
shellProgram = "mksh"
54+
case LangBats:
55+
t.Skip() // bats has no shell and its syntax is just bash
56+
default:
57+
panic(fmt.Sprintf("unknown lang variant: %d", lang))
58+
}
59+
60+
// TODO: Also double-check with our parser.
61+
// That should allow us to fuzz Bats too, for instance.
62+
3763
// Beware that this might run arbitrary code
3864
// if Quote is too naive and allows ';' or '$'.
3965
//
@@ -43,13 +69,14 @@ func FuzzQuote(f *testing.F) {
4369
//
4470
// We could consider ways to fully sandbox the bash process,
4571
// but for now that feels overkill.
46-
out, err := exec.Command("bash", "-c", "printf %s "+quoted).CombinedOutput()
72+
out, err := exec.Command(shellProgram, "-c", "printf %s "+quoted).CombinedOutput()
4773
if err != nil {
48-
t.Fatalf("bash error on %q quoted as %s: %v: %s", s, quoted, err, out)
74+
t.Fatalf("%s error on %q quoted as %s: %v: %s", shellProgram, s, quoted, err, out)
4975
}
5076
want, got := s, string(out)
5177
if want != got {
52-
t.Fatalf("output mismatch on %q quoted as %s: got %q (len=%d)", want, quoted, got, len(got))
78+
t.Fatalf("%s output mismatch on %q quoted as %s: got %q (len=%d)",
79+
shellProgram, want, quoted, got, len(got))
5380
}
5481
})
5582
}

syntax/lexer.go

Lines changed: 0 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@ package syntax
55

66
import (
77
"bytes"
8-
"fmt"
98
"io"
10-
"strconv"
11-
"strings"
12-
"unicode"
139
"unicode/utf8"
1410
)
1511

@@ -1147,106 +1143,3 @@ func testBinaryOp(val string) BinTestOperator {
11471143
return 0
11481144
}
11491145
}
1150-
1151-
// Quote returns a quoted version of the input string,
1152-
// so that the quoted version is always expanded or interpreted
1153-
// as the original string.
1154-
//
1155-
// When the boolean result is false,
1156-
// the input string cannot be quoted to satisfy the rule above.
1157-
// For example, an expanded shell string can't contain a null byte.
1158-
//
1159-
// Quoting is necessary when using arbitrary literal strings
1160-
// as words in a shell script or command.
1161-
// Without quoting, one could run into syntax errors,
1162-
// as well as the possibility of running unintended code.
1163-
//
1164-
// The quoting strategy is chosen on a best-effort basis,
1165-
// to minimize the amount of extra bytes necessary.
1166-
//
1167-
// Some strings do not require any quoting and are returned unchanged.
1168-
// Those strings can be directly surrounded in single quotes.
1169-
func Quote(s string) (_ string, ok bool) {
1170-
shellChars := false
1171-
nonPrintable := false
1172-
for _, r := range s {
1173-
switch r {
1174-
// Like regOps; token characters.
1175-
case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`',
1176-
// Whitespace; might result in multiple fields.
1177-
' ', '\t', '\r', '\n',
1178-
// Escape sequences would be expanded.
1179-
'\\',
1180-
// Would start a comment unless quoted.
1181-
'#',
1182-
// Might result in brace expansion.
1183-
'{',
1184-
// Might result in tilde expansion.
1185-
'~',
1186-
// Might result in globbing.
1187-
'*', '?', '[',
1188-
// Might result in an assignment.
1189-
'=':
1190-
shellChars = true
1191-
case '\x00':
1192-
// We can't quote null bytes.
1193-
return "", false
1194-
}
1195-
if r == utf8.RuneError || !unicode.IsPrint(r) {
1196-
nonPrintable = true
1197-
}
1198-
}
1199-
if !shellChars && !nonPrintable && !IsKeyword(s) {
1200-
// Nothing to quote; avoid allocating.
1201-
return s, true
1202-
}
1203-
1204-
// Single quotes are usually best,
1205-
// as they don't require any escaping of characters.
1206-
// If we have any invalid utf8 or non-printable runes,
1207-
// use $'' so that we can escape them.
1208-
// Note that we can't use double quotes for those.
1209-
var b strings.Builder
1210-
if nonPrintable {
1211-
b.WriteString("$'")
1212-
quoteBuf := make([]byte, 0, 16)
1213-
for rem := s; len(rem) > 0; {
1214-
r, size := utf8.DecodeRuneInString(rem)
1215-
switch {
1216-
case r == utf8.RuneError && size == 1:
1217-
fmt.Fprintf(&b, "\\x%x", rem[0])
1218-
case !unicode.IsPrint(r):
1219-
quoteBuf = quoteBuf[:0]
1220-
quoteBuf = strconv.AppendQuoteRuneToASCII(quoteBuf, r)
1221-
// We don't want the single quotes from strconv.
1222-
b.Write(quoteBuf[1 : len(quoteBuf)-1])
1223-
case r == '\'', r == '\\':
1224-
b.WriteByte('\\')
1225-
b.WriteRune(r)
1226-
default:
1227-
b.WriteRune(r)
1228-
}
1229-
rem = rem[size:]
1230-
}
1231-
b.WriteString("'")
1232-
return b.String(), true
1233-
}
1234-
1235-
// Single quotes without any need for escaping.
1236-
if !strings.Contains(s, "'") {
1237-
return "'" + s + "'", true
1238-
}
1239-
1240-
// The string contains single quotes,
1241-
// so fall back to double quotes.
1242-
b.WriteByte('"')
1243-
for _, r := range s {
1244-
switch r {
1245-
case '"', '\\', '`', '$':
1246-
b.WriteByte('\\')
1247-
}
1248-
b.WriteRune(r)
1249-
}
1250-
b.WriteByte('"')
1251-
return b.String(), true
1252-
}

syntax/parser.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ const (
3131
// LangBash corresponds to the GNU Bash language, as described in its
3232
// manual at https://www.gnu.org/software/bash/manual/bash.html.
3333
//
34+
// We currently follow Bash version 5.1.
35+
//
3436
// Its string representation is "bash".
3537
LangBash LangVariant = iota
3638

@@ -45,6 +47,8 @@ const (
4547
// Note that it shares some features with Bash, due to the the shared
4648
// ancestry that is ksh.
4749
//
50+
// We currently follow mksh version 59.
51+
//
4852
// Its string representation is "mksh".
4953
LangMirBSDKorn
5054

0 commit comments

Comments
 (0)