Commit | Line | Data |
---|---|---|
866e5b51 FC |
1 | lexer grammar CTFLexer; |
2 | ||
3 | options { | |
4 | language = Java; | |
5 | } | |
6 | ||
7 | @lexer::header { | |
8 | package org.eclipse.linuxtools.ctf.parser; | |
9 | } | |
10 | ||
11 | /* | |
12 | * Lexer grammers | |
13 | */ | |
14 | ||
15 | /* | |
16 | * Keywords | |
17 | */ | |
18 | ALIGNTOK : 'align' ; | |
19 | CONSTTOK : 'const' ; | |
20 | CHARTOK : 'char' ; | |
21 | DOUBLETOK : 'double' ; | |
22 | ENUMTOK : 'enum' ; | |
23 | EVENTTOK : 'event' ; | |
24 | FLOATINGPOINTTOK : 'floating_point' ; | |
25 | FLOATTOK : 'float' ; | |
26 | INTEGERTOK : 'integer' ; | |
27 | INTTOK : 'int' ; | |
28 | LONGTOK : 'long' ; | |
29 | SHORTTOK : 'short' ; | |
30 | SIGNEDTOK : 'signed' ; | |
31 | STREAMTOK : 'stream' ; | |
32 | STRINGTOK : 'string' ; | |
33 | STRUCTTOK : 'struct' ; | |
34 | TRACETOK : 'trace' ; | |
35 | TYPEALIASTOK : 'typealias' ; | |
36 | TYPEDEFTOK : 'typedef' ; | |
37 | UNSIGNEDTOK : 'unsigned' ; | |
38 | VARIANTTOK : 'variant' ; | |
39 | VOIDTOK : 'void' ; | |
40 | BOOLTOK : '_Bool' ; | |
41 | COMPLEXTOK : '_Complex' ; | |
42 | IMAGINARYTOK : '_Imaginary' ; | |
43 | ENVTOK : 'env' ; | |
44 | CLOCKTOK : 'clock' ; | |
45 | ||
46 | /* | |
47 | * Spec still to come. | |
48 | */ | |
49 | NANNUMBERTOK : 'NaN' ; | |
50 | INFINITYTOK : '+inf' ; | |
51 | NINFINITYTOK : '-inf' ; | |
52 | ||
53 | /* | |
54 | * Symbols | |
55 | */ | |
56 | SEPARATOR : ',' ; | |
57 | COLON : ':' ; | |
58 | ELIPSES : '...' ; | |
59 | ASSIGNMENT : '=' ; | |
60 | TYPE_ASSIGNMENT : ':=' ; | |
61 | LT : '<' ; | |
62 | GT : '>' ; | |
63 | OPENBRAC : '[' ; | |
64 | CLOSEBRAC : ']' ; | |
65 | LPAREN : '(' ; | |
66 | RPAREN : ')' ; | |
67 | LCURL : '{' ; | |
68 | RCURL : '}' ; | |
69 | TERM : ';' ; | |
70 | POINTER : '*' ; | |
71 | SIGN : '+' | '-' ; | |
72 | ARROW : '->' ; | |
73 | DOT : '.' ; | |
74 | fragment BACKSLASH : '\\' ; | |
75 | ||
76 | /* | |
77 | * Boolean literals | |
78 | * - We better leave them as identifiers and numbers... | |
79 | */ | |
80 | /*TRUE : 'true' | 'TRUE' ; | |
81 | FALSE : 'false' | 'FALSE' ; | |
82 | ZERO : '0' ; | |
83 | ONE : '1' ;*/ | |
84 | ||
85 | ||
86 | /* | |
87 | * Integer literals | |
88 | */ | |
89 | OCTAL_LITERAL : '0' ('0'..'7')+ INTEGER_TYPES_SUFFIX? ; | |
90 | ||
91 | DECIMAL_LITERAL : DIGIT+ INTEGER_TYPES_SUFFIX? ; | |
92 | ||
93 | HEX_LITERAL : HEX_PREFIX HEX_DIGIT+ INTEGER_TYPES_SUFFIX? ; | |
94 | fragment HEX_DIGIT : DIGIT | ('a'..'f') | ('A'..'F') ; | |
95 | fragment HEX_PREFIX : '0' ('x' | 'X') ; | |
96 | ||
97 | /* Helpers for integer literals */ | |
98 | fragment DIGIT : '0'..'9' ; | |
99 | fragment NONZERO_DIGIT : '1'..'9' ; | |
100 | ||
101 | ||
102 | /** | |
103 | * Integer suffix for long, long long and unsigned. | |
104 | * | |
105 | * Matches all possible combination of L, LL and U. | |
106 | */ | |
107 | fragment INTEGER_TYPES_SUFFIX : | |
108 | ('l' ('l')? | 'L' ('L')?) // l, ll | |
109 | | ('u' | 'U') // u | |
110 | | ('u' | 'U') ('l' ('l')? | 'L' ('L')?) // ul, ull | |
111 | | ('l' ('l')? | 'L' ('L')?) ('u'| 'U') // lu, llu | |
112 | ; | |
113 | ||
114 | /** | |
115 | * Escape sequences | |
116 | */ | |
117 | fragment ESCAPE_SEQUENCE : | |
118 | BACKSLASH ('\'' | '"' | '?' | BACKSLASH | 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' ) | |
119 | | OCTAL_ESCAPE | |
120 | | UNICODE_ESCAPE | |
121 | | HEXADECIMAL_ESCAPE | |
122 | ; | |
123 | ||
124 | /** | |
125 | * Octal escape sequence | |
126 | */ | |
127 | fragment OCTAL_ESCAPE : | |
128 | BACKSLASH ('0'..'3') ('0'..'7') ('0'..'7') | |
129 | | BACKSLASH ('0'..'7') ('0'..'7') | |
130 | | BACKSLASH ('0'..'7') | |
131 | ; | |
132 | ||
133 | /** | |
134 | * Hexadecimal escape sequence | |
135 | */ | |
136 | fragment HEXADECIMAL_ESCAPE : BACKSLASH 'x' HEX_DIGIT+ ; | |
137 | ||
138 | /** | |
139 | * Unicode escape sequence | |
140 | */ | |
141 | fragment UNICODE_ESCAPE : | |
142 | BACKSLASH 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT | |
143 | | BACKSLASH 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT | |
144 | ; | |
145 | ||
146 | ||
147 | /* Used in both character and string literal */ | |
148 | fragment STRINGPREFIX : 'L'; | |
149 | ||
150 | /* | |
151 | * Character literal | |
152 | */ | |
153 | CHARACTER_LITERAL : STRINGPREFIX? SINGLEQUOTE CHAR_CONTENT+ SINGLEQUOTE ; | |
154 | fragment CHAR_CONTENT : (ESCAPE_SEQUENCE | ~(BACKSLASH | SINGLEQUOTE)) ; | |
155 | fragment SINGLEQUOTE : '\''; | |
156 | ||
157 | /* | |
158 | * String literal | |
159 | */ | |
160 | STRING_LITERAL : STRINGPREFIX? DOUBLEQUOTE STRING_CONTENT* DOUBLEQUOTE ; | |
161 | fragment STRING_CONTENT : (ESCAPE_SEQUENCE | ~(BACKSLASH | DOUBLEQUOTE)) ; | |
162 | fragment DOUBLEQUOTE : '"' ; | |
163 | ||
164 | /** | |
165 | * Whitespaces | |
166 | */ | |
167 | WS : (' ' | '\r' | '\t' | '\u000C' | '\n') { $channel=HIDDEN; } ; | |
168 | ||
169 | /** | |
170 | * Multiline comment | |
171 | */ | |
172 | // About the greedy option: see page 100-101 of The Definitive ANTLR reference | |
173 | // COMMENT : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} ; | |
174 | COMMENT : COMMENT_OPEN .* COMMENT_CLOSE { $channel = HIDDEN; } ; | |
175 | fragment COMMENT_OPEN : '/*'; | |
176 | fragment COMMENT_CLOSE : '*/'; | |
177 | ||
178 | /** | |
179 | * Single line comment | |
180 | */ | |
181 | LINE_COMMENT : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} ; | |
182 | ||
183 | /** | |
184 | * Identifiers | |
185 | */ | |
186 | IDENTIFIER : NONDIGIT (NONDIGIT | DIGIT)* ; | |
187 | fragment NONDIGIT : ('_') | ('A'..'Z') | ('a'..'z') ; |