-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.mll
75 lines (67 loc) · 2.12 KB
/
lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
(* The first section of the lexer definition, called the *header*,
is the part that appears below between { and }. It is code
that will simply be copied literally into the generated lexer.ml. *)
{
open Parser
}
(* The second section of the lexer definition defines *identifiers*
that will be used later in the definition. Each identifier is
a *regular expression*. We won't go into details on how regular
expressions work.
Below, we define regular expressions for
- whitespace (spaces and tabs),
- digits (0 through 9)
- integers (nonempty sequences of digits, optionally preceded by a minus sign)
- letters (a through z, and A through Z), and
- identifiers (nonempty sequences of letters).
FYI, these aren't exactly the same as the OCaml definitions of integers and
identifiers. *)
let blank = [' ' '\t']+
let decimal_literal = ['0'-'9']+
let letter = ['a'-'z' 'A'-'Z']
let id = letter (letter |decimal_literal)*
(* The final section of the lexer definition defines how to parse a character
stream into a token stream. Each of the rules below has the form
| regexp { action }
If the lexer sees the regular expression [regexp], it produces the token
specified by the [action]. We won't go into details on how the actions
work. *)
rule token =
parse
| blank { token lexbuf }
| "fun" { FUN }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "false" { BOOL false }
| "true" { BOOL true }
| "not" { NOT }
| "+" { PLUS }
| "-" { MINUS }
| "*" { MULT }
| "/" { DIV }
| "&&" { AND }
| "||" { OR }
| "<>" {NE}
| ">" {GT}
| "<" {LT}
| ">=" {GE}
| "<=" {LE}
| "(" { LPAREN }
| ")" { RPAREN }
| '{' { LCURLY }
| '}' { RCURLY }
| ';' { SEMI }
| '.' { DOT }
| "let" { LET }
| "=" { EQUALS }
| "->" { GOESTO }
| ':' { COLON }
| "in" { IN }
| "rec" { REC }
| "Int" { INTTYPE }
| "Bool" { BOOLTYPE }
| id { IDENT (Lexing.lexeme lexbuf) }
| decimal_literal { INT (int_of_string (Lexing.lexeme lexbuf)) }
| eof { EOF }
(* And that's the end of the lexer definition. *)