Skip to content

Commit 7d0404a

Browse files
committed
cmd/cue: import non-UTF8 Go string constants as CUE bytes
Otherwise literal.String.Quote replaces invalid UTF-8 sequences with the replacement character \uFFFD, as can be seen by the test failure when the fix is reverted: > cmp decls/p_go_gen.cue decls/p_go_gen.cue.golden diff decls/p_go_gen.cue decls/p_go_gen.cue.golden --- decls/p_go_gen.cue +++ decls/p_go_gen.cue.golden @@ -21,4 +21,4 @@ #UTF8: "012" // "\x30\x31\x32" -#NonUTF8: "a�b��c�" // "a\xffb\xC0\xAFc\xff" +#NonUTF8: 'a\xffb\xc0\xafc\xff' // "a\xffb\xC0\xAFc\xff" Signed-off-by: Daniel Martí <[email protected]> Change-Id: I36217582d6e47053d25d0a4eea7adea55b432027 Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1211576 Unity-Result: CUE porcuepine <[email protected]> Reviewed-by: Roger Peppe <[email protected]> TryBot-Result: CUEcueckoo <[email protected]>
1 parent c65dfa3 commit 7d0404a

File tree

2 files changed

+37
-5
lines changed

2 files changed

+37
-5
lines changed

Diff for: cmd/cue/cmd/get_go.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"strconv"
3131
"strings"
3232
"unicode"
33+
"unicode/utf8"
3334

3435
"github.com/spf13/cobra"
3536
"golang.org/x/tools/go/packages"
@@ -792,10 +793,17 @@ func (e *extractor) reportDecl(x *ast.GenDecl) (a []cueast.Decl) {
792793
var cv cueast.Expr
793794
switch v.Kind() {
794795
case constant.String:
795-
cv = &cueast.BasicLit{
796-
Kind: cuetoken.STRING,
797-
Value: literal.String.Quote(constant.StringVal(v)),
796+
s := constant.StringVal(v)
797+
bl := &cueast.BasicLit{Kind: cuetoken.STRING}
798+
// Go strings may contain any bytes, even invalid UTF-8.
799+
// CUE strings may only contain valid UTF-8, because it has
800+
// bytes values for anything that may not be valid UTF-8.
801+
if utf8.ValidString(s) {
802+
bl.Value = literal.String.Quote(s)
803+
} else {
804+
bl.Value = literal.Bytes.Quote(s)
798805
}
806+
cv = bl
799807

800808
default:
801809
// TODO(mvdan): replace this with switch cases for Bool/Int/Float

Diff for: cmd/cue/cmd/testdata/script/get_go_json_compat.txtar

+26-2
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ language: version: "v0.9.0"
3131
-- decls/export.cue --
3232
package decls
3333

34+
import "strings"
35+
3436
#Root & {
3537
Duration: #ValueSecond
3638

3739
Strings: [
3840
#ErrorPrefix,
3941
#ResetColor,
42+
#UTF8,
4043
]
44+
NonUTF8ThirdByte: strings.ByteAt(#NonUTF8, 3)
4145
}
4246
-- decls/p.go --
4347
package decls
@@ -48,12 +52,20 @@ type Root struct {
4852
Duration time.Duration
4953

5054
Strings []string
55+
56+
// We can't roundtrip NonUTF8 directly because of https://cuelang.org/issue/3823.
57+
// Roundtrip its third byte for now, which does work.
58+
NonUTF8ThirdByte int
5159
}
5260

5361
const ValueSecond = 3 * time.Second
5462

5563
const ErrorPrefix = "\033[31mError:\033[0m"
5664
const ResetColor = "\u001b[0m"
65+
66+
const UTF8 = "\x30\x31\x32" // "012"
67+
// The third byte here should not be replaced by \uFFFD, the replacement character.
68+
const NonUTF8 = "a\xffb\xC0\xAFc\xff"
5769
-- decls/p_go_gen.cue.golden --
5870
// Code generated by cue get go. DO NOT EDIT.
5971

@@ -64,13 +76,21 @@ package decls
6476
#Root: {
6577
Duration: int @go(,time.Duration)
6678
Strings: [...string] @go(,[]string)
79+
80+
// We can't roundtrip NonUTF8 directly because of https://cuelang.org/issue/3823.
81+
// Roundtrip its third byte for now, which does work.
82+
NonUTF8ThirdByte: int
6783
}
6884

6985
#ValueSecond: int & 3000000000
7086

7187
#ErrorPrefix: "\u001b[31mError:\u001b[0m" // "\033[31mError:\033[0m"
7288

7389
#ResetColor: "\u001b[0m"
90+
91+
#UTF8: "012" // "\x30\x31\x32"
92+
93+
#NonUTF8: 'a\xffb\xc0\xafc\xff' // "a\xffb\xC0\xAFc\xff"
7494
-- marshal/main.go --
7595
package main
7696

@@ -88,7 +108,9 @@ func main() {
88108
Strings: []string{
89109
decls.ErrorPrefix,
90110
decls.ResetColor,
111+
decls.UTF8,
91112
},
113+
NonUTF8ThirdByte: int(decls.NonUTF8[3]),
92114
}
93115
// Mimic the formatting of `cue export` to be able to compare bytes.
94116
data, err := json.MarshalIndent(r, "", " ")
@@ -102,6 +124,8 @@ func main() {
102124
"Duration": 3000000000,
103125
"Strings": [
104126
"\u001b[31mError:\u001b[0m",
105-
"\u001b[0m"
106-
]
127+
"\u001b[0m",
128+
"012"
129+
],
130+
"NonUTF8ThirdByte": 192
107131
}

0 commit comments

Comments
 (0)