Skip to content

Commit f6a0d72

Browse files
committed
cmd/compile/internal/syntax: various cleanups following CL 221603
1) Introduced setLit method to uniformly set the scanner state for literals instead of directly manipulating the scanner fields. 2) Use a local variable 'ok' to track validity of literals instead of relying on the side-effect of error reporters setting s.bad. More code but clearer because it is local and explicit. 3) s/litname/baseName/ and use this function uniformly, also for escapes. Consequently we now report always "hexadecimal" and not "hex" (in the case of invalid escapes). 4) Added TestDirectives verifying that we get the correct directive string (even if that string contains '%'). Verified that lines/s parsing performance is unchanged by comparing go test -run StdLib -fast -skip "syntax/(scanner|scanner_test)\.go" before and after (no relevant difference). Change-Id: I143e4648fdaa31d1c365fb794a1cae4bc1c3f5ba Reviewed-on: https://go-review.googlesource.com/c/go/+/222258 Run-TryBot: Robert Griesemer <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]>
1 parent eafb4d8 commit f6a0d72

File tree

2 files changed

+127
-68
lines changed

2 files changed

+127
-68
lines changed

src/cmd/compile/internal/syntax/scanner.go

+79-62
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,23 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
5050

5151
// errorf reports an error at the most recently read character position.
5252
func (s *scanner) errorf(format string, args ...interface{}) {
53-
s.bad = true
5453
s.error(fmt.Sprintf(format, args...))
5554
}
5655

5756
// errorAtf reports an error at a byte column offset relative to the current token start.
5857
func (s *scanner) errorAtf(offset int, format string, args ...interface{}) {
59-
s.bad = true
6058
s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...))
6159
}
6260

61+
// setLit sets the scanner state for a recognized _Literal token.
62+
func (s *scanner) setLit(kind LitKind, ok bool) {
63+
s.nlsemi = true
64+
s.tok = _Literal
65+
s.lit = string(s.segment())
66+
s.bad = !ok
67+
s.kind = kind
68+
}
69+
6370
// next advances the scanner by reading the next token.
6471
//
6572
// If a read, source encoding, or lexical error occurs, next calls
@@ -461,16 +468,15 @@ func (s *scanner) digits(base int, invalid *int) (digsep int) {
461468
}
462469

463470
func (s *scanner) number(seenPoint bool) {
464-
s.bad = false
465-
471+
ok := true
472+
kind := IntLit
466473
base := 10 // number base
467474
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
468475
digsep := 0 // bit 0: digit present, bit 1: '_' present
469476
invalid := -1 // index of invalid digit in literal, or < 0
470477

471478
// integer part
472479
if !seenPoint {
473-
s.kind = IntLit
474480
if s.ch == '0' {
475481
s.nextch()
476482
switch lower(s.ch) {
@@ -491,7 +497,8 @@ func (s *scanner) number(seenPoint bool) {
491497
digsep |= s.digits(base, &invalid)
492498
if s.ch == '.' {
493499
if prefix == 'o' || prefix == 'b' {
494-
s.errorf("invalid radix point in %s", litname(prefix))
500+
s.errorf("invalid radix point in %s literal", baseName(base))
501+
ok = false
495502
}
496503
s.nextch()
497504
seenPoint = true
@@ -500,68 +507,77 @@ func (s *scanner) number(seenPoint bool) {
500507

501508
// fractional part
502509
if seenPoint {
503-
s.kind = FloatLit
510+
kind = FloatLit
504511
digsep |= s.digits(base, &invalid)
505512
}
506513

507-
if digsep&1 == 0 && !s.bad {
508-
s.errorf("%s has no digits", litname(prefix))
514+
if digsep&1 == 0 && ok {
515+
s.errorf("%s literal has no digits", baseName(base))
516+
ok = false
509517
}
510518

511519
// exponent
512520
if e := lower(s.ch); e == 'e' || e == 'p' {
513-
if !s.bad {
521+
if ok {
514522
switch {
515523
case e == 'e' && prefix != 0 && prefix != '0':
516524
s.errorf("%q exponent requires decimal mantissa", s.ch)
525+
ok = false
517526
case e == 'p' && prefix != 'x':
518527
s.errorf("%q exponent requires hexadecimal mantissa", s.ch)
528+
ok = false
519529
}
520530
}
521531
s.nextch()
522-
s.kind = FloatLit
532+
kind = FloatLit
523533
if s.ch == '+' || s.ch == '-' {
524534
s.nextch()
525535
}
526536
digsep = s.digits(10, nil) | digsep&2 // don't lose sep bit
527-
if digsep&1 == 0 && !s.bad {
537+
if digsep&1 == 0 && ok {
528538
s.errorf("exponent has no digits")
539+
ok = false
529540
}
530-
} else if prefix == 'x' && s.kind == FloatLit && !s.bad {
541+
} else if prefix == 'x' && kind == FloatLit && ok {
531542
s.errorf("hexadecimal mantissa requires a 'p' exponent")
543+
ok = false
532544
}
533545

534546
// suffix 'i'
535547
if s.ch == 'i' {
536-
s.kind = ImagLit
548+
kind = ImagLit
537549
s.nextch()
538550
}
539551

540-
s.nlsemi = true
541-
s.lit = string(s.segment())
542-
s.tok = _Literal
552+
s.setLit(kind, ok) // do this now so we can use s.lit below
543553

544-
if s.kind == IntLit && invalid >= 0 && !s.bad {
545-
s.errorAtf(invalid, "invalid digit %q in %s", s.lit[invalid], litname(prefix))
554+
if kind == IntLit && invalid >= 0 && ok {
555+
s.errorAtf(invalid, "invalid digit %q in %s literal", s.lit[invalid], baseName(base))
556+
ok = false
546557
}
547558

548-
if digsep&2 != 0 && !s.bad {
559+
if digsep&2 != 0 && ok {
549560
if i := invalidSep(s.lit); i >= 0 {
550561
s.errorAtf(i, "'_' must separate successive digits")
562+
ok = false
551563
}
552564
}
565+
566+
s.bad = !ok // correct s.bad
553567
}
554568

555-
func litname(prefix rune) string {
556-
switch prefix {
557-
case 'x':
558-
return "hexadecimal literal"
559-
case 'o', '0':
560-
return "octal literal"
561-
case 'b':
562-
return "binary literal"
563-
}
564-
return "decimal literal"
569+
func baseName(base int) string {
570+
switch base {
571+
case 2:
572+
return "binary"
573+
case 8:
574+
return "octal"
575+
case 10:
576+
return "decimal"
577+
case 16:
578+
return "hexadecimal"
579+
}
580+
panic("invalid base")
565581
}
566582

567583
// invalidSep returns the index of the first invalid separator in x, or -1.
@@ -605,50 +621,53 @@ func invalidSep(x string) int {
605621
}
606622

607623
func (s *scanner) rune() {
608-
s.bad = false
624+
ok := true
609625
s.nextch()
610626

611627
n := 0
612628
for ; ; n++ {
613629
if s.ch == '\'' {
614-
if !s.bad {
630+
if ok {
615631
if n == 0 {
616632
s.errorf("empty rune literal or unescaped '")
633+
ok = false
617634
} else if n != 1 {
618635
s.errorAtf(0, "more than one character in rune literal")
636+
ok = false
619637
}
620638
}
621639
s.nextch()
622640
break
623641
}
624642
if s.ch == '\\' {
625643
s.nextch()
626-
s.escape('\'')
644+
if !s.escape('\'') {
645+
ok = false
646+
}
627647
continue
628648
}
629649
if s.ch == '\n' {
630-
if !s.bad {
650+
if ok {
631651
s.errorf("newline in rune literal")
652+
ok = false
632653
}
633654
break
634655
}
635656
if s.ch < 0 {
636-
if !s.bad {
657+
if ok {
637658
s.errorAtf(0, "rune literal not terminated")
659+
ok = false
638660
}
639661
break
640662
}
641663
s.nextch()
642664
}
643665

644-
s.nlsemi = true
645-
s.lit = string(s.segment())
646-
s.kind = RuneLit
647-
s.tok = _Literal
666+
s.setLit(RuneLit, ok)
648667
}
649668

650669
func (s *scanner) stdString() {
651-
s.bad = false
670+
ok := true
652671
s.nextch()
653672

654673
for {
@@ -658,28 +677,29 @@ func (s *scanner) stdString() {
658677
}
659678
if s.ch == '\\' {
660679
s.nextch()
661-
s.escape('"')
680+
if !s.escape('"') {
681+
ok = false
682+
}
662683
continue
663684
}
664685
if s.ch == '\n' {
665686
s.errorf("newline in string")
687+
ok = false
666688
break
667689
}
668690
if s.ch < 0 {
669691
s.errorAtf(0, "string not terminated")
692+
ok = false
670693
break
671694
}
672695
s.nextch()
673696
}
674697

675-
s.nlsemi = true
676-
s.lit = string(s.segment())
677-
s.kind = StringLit
678-
s.tok = _Literal
698+
s.setLit(StringLit, ok)
679699
}
680700

681701
func (s *scanner) rawString() {
682-
s.bad = false
702+
ok := true
683703
s.nextch()
684704

685705
for {
@@ -689,6 +709,7 @@ func (s *scanner) rawString() {
689709
}
690710
if s.ch < 0 {
691711
s.errorAtf(0, "string not terminated")
712+
ok = false
692713
break
693714
}
694715
s.nextch()
@@ -697,10 +718,7 @@ func (s *scanner) rawString() {
697718
// literal (even though they are not part of the literal
698719
// value).
699720

700-
s.nlsemi = true
701-
s.lit = string(s.segment())
702-
s.kind = StringLit
703-
s.tok = _Literal
721+
s.setLit(StringLit, ok)
704722
}
705723

706724
func (s *scanner) comment(text string) {
@@ -797,14 +815,14 @@ func (s *scanner) fullComment() {
797815
}
798816
}
799817

800-
func (s *scanner) escape(quote rune) {
818+
func (s *scanner) escape(quote rune) bool {
801819
var n int
802820
var base, max uint32
803821

804822
switch s.ch {
805823
case quote, 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
806824
s.nextch()
807-
return
825+
return true
808826
case '0', '1', '2', '3', '4', '5', '6', '7':
809827
n, base, max = 3, 8, 255
810828
case 'x':
@@ -818,16 +836,16 @@ func (s *scanner) escape(quote rune) {
818836
n, base, max = 8, 16, unicode.MaxRune
819837
default:
820838
if s.ch < 0 {
821-
return // complain in caller about EOF
839+
return true // complain in caller about EOF
822840
}
823841
s.errorf("unknown escape")
824-
return
842+
return false
825843
}
826844

827845
var x uint32
828846
for i := n; i > 0; i-- {
829847
if s.ch < 0 {
830-
return // complain in caller about EOF
848+
return true // complain in caller about EOF
831849
}
832850
d := base
833851
if isDecimal(s.ch) {
@@ -836,12 +854,8 @@ func (s *scanner) escape(quote rune) {
836854
d = uint32(lower(s.ch)) - 'a' + 10
837855
}
838856
if d >= base {
839-
kind := "hex"
840-
if base == 8 {
841-
kind = "octal"
842-
}
843-
s.errorf("invalid character %q in %s escape", s.ch, kind)
844-
return
857+
s.errorf("invalid character %q in %s escape", s.ch, baseName(int(base)))
858+
return false
845859
}
846860
// d < base
847861
x = x*base + d
@@ -850,10 +864,13 @@ func (s *scanner) escape(quote rune) {
850864

851865
if x > max && base == 8 {
852866
s.errorf("octal escape value %d > 255", x)
853-
return
867+
return false
854868
}
855869

856870
if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
857871
s.errorf("escape is invalid Unicode code point %#U", x)
872+
return false
858873
}
874+
875+
return true
859876
}

0 commit comments

Comments
 (0)