Commit | Line | Data |
---|---|---|
970ed795 EL |
1 | /****************************************************************************** |
2 | * Copyright (c) 2000-2014 Ericsson Telecom AB | |
3 | * All rights reserved. This program and the accompanying materials | |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
7 | ******************************************************************************/ | |
8 | ||
9 | /** | |
10 | * Lexical analyzer for TTCN-3 character patterns. | |
11 | * | |
12 | * \author Matyas Forstner (Matyas.Forstner@eth.ericsson.se) | |
13 | * | |
14 | * 20031121 | |
15 | */ | |
16 | ||
17 | %option nostack | |
18 | %option noyylineno | |
19 | %option noyywrap | |
20 | %option nounput | |
21 | %option never-interactive | |
22 | %option prefix="pattern_yy" | |
23 | ||
24 | %{ /* ****************** C declarations ***************** */ | |
25 | ||
26 | #include <ctype.h> | |
27 | #include <stddef.h> | |
28 | #include "pattern.hh" | |
29 | #include "pattern_p.hh" | |
30 | ||
31 | /* Access the semantic value of the bison parser. Usually this is done by | |
32 | * #defining yylval to the appropriate symbol whose name depends on the | |
33 | * %name-prefix of the parser, e.g. pattern_yylval or pattern_unilval. | |
34 | * | |
35 | * Because we need to be able to access either one or the other, | |
36 | * we keep a pointer which is set by the parser when it calls | |
37 | * init_pattern_yylex() */ | |
38 | static YYSTYPE *yylval_ptr; | |
39 | #define yylval (*yylval_ptr) | |
40 | ||
41 | static unsigned int nof_parentheses = 0; | |
42 | static bool meta = false; | |
43 | ||
44 | %} /* ***************** definitions ***************** */ | |
45 | ||
46 | NUMBER 0|([1-9][0-9]*) | |
47 | ||
48 | /* start conditions */ | |
49 | %x SC_Set SC_Hash SC_HashParen SC_Quadruple SC_Quadruple_Set | |
50 | ||
51 | %% /* ***************** rules ************************* */ | |
52 | ||
53 | /* drop whitespaces */ | |
54 | <SC_Hash,SC_HashParen,SC_Quadruple,SC_Quadruple_Set>[ \t\r\n\v\f]+ | |
55 | ||
56 | <SC_Set> | |
57 | { | |
58 | ||
59 | "]" { | |
60 | BEGIN(INITIAL); | |
61 | return KW_Set_End; | |
62 | } | |
63 | ||
64 | "-]" { | |
65 | BEGIN(INITIAL); | |
66 | return KW_Set_Dash_End; | |
67 | } | |
68 | ||
69 | "-" return '-'; | |
70 | ||
71 | } /* SC_Set */ | |
72 | ||
73 | <SC_Hash> | |
74 | { | |
75 | ||
76 | [0-9] { | |
77 | BEGIN(INITIAL); | |
78 | yylval.u = yytext[0] - '0'; | |
79 | return TOK_Digit; | |
80 | } | |
81 | ||
82 | "(" { | |
83 | BEGIN(SC_HashParen); | |
84 | return '('; | |
85 | } | |
86 | ||
87 | } /* SC_Hash */ | |
88 | ||
89 | <SC_HashParen,SC_Quadruple,SC_Quadruple_Set> | |
90 | { | |
91 | ||
92 | {NUMBER} { | |
93 | errno = 0; | |
94 | yylval.u = strtoul(yytext, NULL, 10); | |
95 | if (errno != 0) TTCN_pattern_error("Number `%s' is too large to be " | |
96 | "represented in memory. (%s)", yytext, strerror(errno)); | |
97 | return TOK_Number; | |
98 | } | |
99 | ||
100 | "," return ','; | |
101 | ||
102 | } /* SC_HashParen,SC_Quadruple,SC_Quadruple_Set */ | |
103 | ||
104 | <SC_HashParen>")" { | |
105 | BEGIN(INITIAL); | |
106 | return ')'; | |
107 | } | |
108 | ||
109 | <SC_Quadruple,SC_Quadruple_Set> | |
110 | { | |
111 | ||
112 | "{" return '{'; | |
113 | ||
114 | "}" { | |
115 | if (YY_START == SC_Quadruple) BEGIN(INITIAL); | |
116 | else BEGIN(SC_Set); | |
117 | return '}'; | |
118 | } | |
119 | ||
120 | } /* SC_Quadruple,SC_Quadruple_Set */ | |
121 | ||
122 | "*" { meta = true; return '*'; } | |
123 | "+" { meta = true; return '+'; } | |
124 | "?" { meta = true; return '?'; } | |
125 | "|" { meta = true; return '|'; } | |
126 | ||
127 | "(" { | |
128 | nof_parentheses++; | |
129 | meta = true; | |
130 | return KW_Group_Begin; | |
131 | } | |
132 | ||
133 | ")" { | |
134 | if (nof_parentheses > 0) { | |
135 | nof_parentheses--; | |
136 | return KW_Group_End; | |
137 | } else { | |
138 | TTCN_pattern_error("Unmatched `)'."); | |
139 | yylval.c = ')'; | |
140 | return TOK_Char; | |
141 | } | |
142 | } | |
143 | ||
144 | "[" { | |
145 | BEGIN(SC_Set); | |
146 | meta = true; | |
147 | return KW_Set_Begin; | |
148 | } | |
149 | ||
150 | "[^" { | |
151 | BEGIN(SC_Set); | |
152 | meta = true; | |
153 | return KW_Set_Begin_Neg; | |
154 | } | |
155 | ||
156 | "[]" { | |
157 | BEGIN(SC_Set); | |
158 | meta = true; | |
159 | return KW_Set_Begin_Rsbrkt; | |
160 | } | |
161 | ||
162 | "[^]" { | |
163 | BEGIN(SC_Set); | |
164 | meta = true; | |
165 | return KW_Set_Begin_Neg_Rsbrkt; | |
166 | } | |
167 | ||
168 | "]" { | |
169 | TTCN_pattern_error("Unmatched `]'."); | |
170 | yylval.c = ']'; | |
171 | return TOK_Char; | |
172 | } | |
173 | ||
174 | "#" { | |
175 | BEGIN(SC_Hash); | |
176 | meta = true; | |
177 | return '#'; | |
178 | } | |
179 | ||
180 | <INITIAL,SC_Set> | |
181 | { | |
182 | ||
183 | /* \metacharacters */ | |
184 | "\\d" { meta = true; return KW_BS_d; } | |
185 | "\\w" { meta = true; return KW_BS_w; } | |
186 | "\\t" { meta = true; return KW_BS_t; } | |
187 | "\\n" { meta = true; return KW_BS_n; } | |
188 | "\\r" { meta = true; return KW_BS_r; } | |
189 | "\\s" { meta = true; return KW_BS_s; } | |
190 | "\\b" { meta = true; return KW_BS_b; } | |
191 | ||
192 | "\\q" { | |
193 | meta = true; | |
194 | if (YY_START == INITIAL) BEGIN(SC_Quadruple); | |
195 | else BEGIN(SC_Quadruple_Set); | |
196 | return KW_BS_q; | |
197 | } | |
198 | ||
199 | /* escaped special characters: ? * \ [ ] - ^ | ( ) # + { } */ | |
200 | \\[][?*\\^|()#+{}-] { | |
201 | yylval.c = yytext[1]; | |
202 | return TOK_Char; /* not meta */ | |
203 | } | |
204 | ||
205 | /* invalid escape sequences */ | |
206 | "\\"(.|"\n") { | |
207 | if (isprint((unsigned char)yytext[1])) | |
208 | TTCN_pattern_warning("Use of unrecognized escape sequence `\\%c' is " | |
209 | "deprecated.", yytext[1]); | |
210 | else TTCN_pattern_warning("Use of unrecognized escape sequence `\\' + " | |
211 | "character code %u (0x%02X) is deprecated.", (unsigned char)yytext[1], | |
212 | (unsigned char)yytext[1]); | |
213 | yylval.c = yytext[1]; | |
214 | return TOK_Char; | |
215 | } | |
216 | ||
217 | /* single backslash (at the end) */ | |
218 | \\ { | |
219 | TTCN_pattern_error("Invalid single backslash (`\\') character at the end " | |
220 | "of the pattern."); | |
221 | } | |
222 | ||
223 | .|"\n" { | |
224 | yylval.c = yytext[0]; | |
225 | return TOK_Char; | |
226 | } | |
227 | ||
228 | } /* INITIAL, SC_Set */ | |
229 | ||
230 | /* erroneous characters */ | |
231 | ||
232 | <SC_Hash>.|\n { | |
233 | if (isprint((unsigned char)yytext[0])) | |
234 | TTCN_pattern_error("A digit or `(' was expected after `#' instead of " | |
235 | "character `%c'.", yytext[0]); | |
236 | else TTCN_pattern_error("A digit or `(' was expected after `#' instead of " | |
237 | "character with code %u (0x%02X).", (unsigned char)yytext[0], | |
238 | (unsigned char)yytext[0]); | |
239 | } | |
240 | ||
241 | <SC_HashParen>. { | |
242 | if (isprint((unsigned char)yytext[0])) | |
243 | TTCN_pattern_error("A number, `,' or `)' was expected after `#(' instead " | |
244 | "of character `%c'.", yytext[0]); | |
245 | else TTCN_pattern_error("A number, `,' or `)' was expected after `#(' " | |
246 | "instead of character with code %u (0x%02X).", (unsigned char)yytext[0], | |
247 | (unsigned char)yytext[0]); | |
248 | } | |
249 | ||
250 | <SC_Quadruple,SC_Quadruple_Set>. { | |
251 | if (isprint((unsigned char)yytext[0])) | |
252 | TTCN_pattern_error("A number, `,' or `}' was expected after `\\q{' " | |
253 | "instead of character `%c'.", yytext[0]); | |
254 | else TTCN_pattern_error("A number, `,' or `}' was expected after `\\q{' " | |
255 | "instead of character with code %u (0x%02X).", (unsigned char)yytext[0], | |
256 | (unsigned char)yytext[0]); | |
257 | } | |
258 | ||
259 | %% | |
260 | ||
261 | unsigned int get_nof_parentheses() | |
262 | { | |
263 | return nof_parentheses; | |
264 | } | |
265 | ||
266 | bool has_meta() | |
267 | { | |
268 | return meta; | |
269 | } | |
270 | ||
271 | void init_pattern_yylex(YYSTYPE *sema_val) | |
272 | { | |
273 | BEGIN(INITIAL); | |
274 | yylval_ptr = sema_val; | |
275 | nof_parentheses = 0; | |
276 | meta = false; | |
277 | } |