Commit | Line | Data |
---|---|---|
970ed795 EL |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // Copyright (c) 2000-2014 Ericsson Telecom AB | |
3 | // All rights reserved. This program and the accompanying materials | |
4 | // are made available under the terms of the Eclipse Public License v1.0 | |
5 | // which accompanies this distribution, and is available at | |
6 | // http://www.eclipse.org/legal/epl-v10.html | |
7 | /////////////////////////////////////////////////////////////////////////////// | |
8 | #include "TextAST.hh" | |
9 | #include <stdio.h> | |
10 | #include "common/pattern.hh" | |
11 | #include "common/pattern_p.hh" | |
12 | ||
13 | void TextAST::init_TextAST() | |
14 | { | |
15 | begin_val=NULL; | |
16 | end_val=NULL; | |
17 | separator_val=NULL; | |
18 | nof_field_params=0; | |
19 | field_params=NULL; | |
20 | true_params=NULL; | |
21 | false_params=NULL; | |
22 | coding_params.leading_zero=false; | |
23 | coding_params.repeatable=false; | |
24 | coding_params.min_length=-1; | |
25 | coding_params.max_length=-1; | |
26 | coding_params.convert=0; | |
27 | coding_params.just=1; | |
28 | decoding_params.leading_zero=false; | |
29 | decoding_params.repeatable=false; | |
30 | decoding_params.min_length=-1; | |
31 | decoding_params.max_length=-1; | |
32 | decoding_params.convert=0; | |
33 | decoding_params.just=1; | |
34 | decode_token=NULL; | |
35 | case_sensitive=true; | |
36 | } | |
37 | ||
38 | TextAST::TextAST(const TextAST *other_val) | |
39 | { | |
40 | init_TextAST(); | |
41 | if(other_val){ | |
42 | //printf("HALI\n\r"); | |
43 | //other_val->print_TextAST(); | |
44 | copy_textAST_matching_values(&begin_val,other_val->begin_val); | |
45 | copy_textAST_matching_values(&end_val,other_val->end_val); | |
46 | copy_textAST_matching_values(&separator_val,other_val->separator_val); | |
47 | ||
48 | nof_field_params=other_val->nof_field_params; | |
49 | if(nof_field_params) field_params= | |
50 | (textAST_enum_def**)Malloc(nof_field_params*sizeof(textAST_enum_def*)); | |
51 | for(int a=0;a<nof_field_params;a++){ | |
52 | if(other_val->field_params[a]){ | |
53 | field_params[a]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def)); | |
54 | field_params[a]->name= | |
55 | new Common::Identifier(*(other_val->field_params[a]->name)); | |
56 | textAST_matching_values *b=&(field_params[a]->value); | |
57 | copy_textAST_matching_values(&b,&other_val->field_params[a]->value); | |
58 | } else field_params[a]=NULL; | |
59 | } | |
60 | //print_TextAST(); | |
61 | ||
62 | copy_textAST_matching_values(&true_params,other_val->true_params); | |
63 | copy_textAST_matching_values(&false_params,other_val->false_params); | |
64 | ||
65 | coding_params.leading_zero=other_val->coding_params.leading_zero; | |
66 | coding_params.repeatable=other_val->coding_params.repeatable; | |
67 | coding_params.min_length=other_val->coding_params.min_length; | |
68 | coding_params.max_length=other_val->coding_params.max_length; | |
69 | coding_params.convert=other_val->coding_params.convert; | |
70 | coding_params.just=other_val->coding_params.just; | |
71 | decoding_params.leading_zero=other_val->decoding_params.leading_zero; | |
72 | decoding_params.repeatable=other_val->decoding_params.repeatable; | |
73 | decoding_params.min_length=other_val->decoding_params.min_length; | |
74 | decoding_params.max_length=other_val->decoding_params.max_length; | |
75 | decoding_params.convert=other_val->decoding_params.convert; | |
76 | decoding_params.just=other_val->decoding_params.just; | |
77 | ||
78 | if(other_val->decode_token) | |
79 | decode_token=mcopystr(other_val->decode_token); | |
80 | case_sensitive=other_val->case_sensitive; | |
81 | } | |
82 | } | |
83 | ||
84 | TextAST::~TextAST() | |
85 | { | |
86 | if(begin_val){ | |
87 | Free(begin_val->encode_token); | |
88 | Free(begin_val->decode_token); | |
89 | Free(begin_val); | |
90 | } | |
91 | if(end_val){ | |
92 | Free(end_val->encode_token); | |
93 | Free(end_val->decode_token); | |
94 | Free(end_val); | |
95 | } | |
96 | if(separator_val){ | |
97 | Free(separator_val->encode_token); | |
98 | Free(separator_val->decode_token); | |
99 | Free(separator_val); | |
100 | } | |
101 | if(field_params){ | |
102 | for(int a=0;a<nof_field_params;a++){ | |
103 | if(field_params[a]){ | |
104 | delete field_params[a]->name; | |
105 | Free(field_params[a]->value.encode_token); | |
106 | Free(field_params[a]->value.decode_token); | |
107 | } | |
108 | Free(field_params[a]); | |
109 | } | |
110 | Free(field_params); | |
111 | } | |
112 | if(true_params){ | |
113 | Free(true_params->encode_token); | |
114 | Free(true_params->decode_token); | |
115 | Free(true_params); | |
116 | } | |
117 | if(false_params){ | |
118 | Free(false_params->encode_token); | |
119 | Free(false_params->decode_token); | |
120 | Free(false_params); | |
121 | } | |
122 | Free(decode_token); | |
123 | } | |
124 | ||
125 | void TextAST::print_TextAST() const | |
126 | { | |
127 | printf("\n\rBegin:"); | |
128 | if(begin_val){ | |
129 | printf("\n\r Encode token:"); | |
130 | if(begin_val->encode_token) printf(" %s\n\r",begin_val->encode_token); | |
131 | else printf(" NULL\n\r"); | |
132 | printf(" Decode token:"); | |
133 | if(begin_val->decode_token) printf(" %s\n\r",begin_val->decode_token); | |
134 | else printf(" NULL\n\r"); | |
135 | if(begin_val->case_sensitive) printf(" case_sensitive\n\r"); | |
136 | else printf(" case_insensitive\n\r"); | |
137 | } | |
138 | else printf("NULL\n\r"); | |
139 | printf("End:"); | |
140 | if(end_val){ | |
141 | printf("\n\r Encode token:"); | |
142 | if(end_val->encode_token) printf(" %s\n\r",end_val->encode_token); | |
143 | else printf(" NULL\n\r"); | |
144 | printf(" Decode token:"); | |
145 | if(end_val->decode_token) printf(" %s\n\r",end_val->decode_token); | |
146 | else printf(" NULL\n\r"); | |
147 | if(end_val->case_sensitive) printf(" case_sensitive\n\r"); | |
148 | else printf(" case_insensitive\n\r"); | |
149 | } | |
150 | else printf("NULL\n\r"); | |
151 | printf("Separator:"); | |
152 | if(separator_val){ | |
153 | printf("\n\r Encode token:"); | |
154 | if(separator_val->encode_token) printf(" %s\n\r",separator_val->encode_token); | |
155 | else printf(" NULL\n\r"); | |
156 | printf(" Decode token:"); | |
157 | if(separator_val->decode_token) printf(" %s\n\r",separator_val->decode_token); | |
158 | else printf(" NULL\n\r"); | |
159 | if(separator_val->case_sensitive) printf(" case_sensitive\n\r"); | |
160 | else printf(" case_insensitive\n\r"); | |
161 | } | |
162 | else printf("NULL\n\r"); | |
163 | printf("Select token:"); | |
164 | if(decode_token) printf("%s",decode_token); | |
165 | if(case_sensitive) printf(" case_sensitive\n\r"); | |
166 | else printf(" case_insensitive\n\r"); | |
167 | printf("True:"); | |
168 | if(true_params){ | |
169 | printf("\n\r Encode token:"); | |
170 | if(true_params->encode_token) printf(" %s\n\r",true_params->encode_token); | |
171 | else printf(" NULL\n\r"); | |
172 | printf(" Decode token:"); | |
173 | if(true_params->decode_token) printf(" %s\n\r",true_params->decode_token); | |
174 | else printf(" NULL\n\r"); | |
175 | if(true_params->case_sensitive) printf(" case_sensitive\n\r"); | |
176 | else printf(" case_insensitive\n\r"); | |
177 | } | |
178 | else printf("NULL\n\r"); | |
179 | printf("False:"); | |
180 | if(true_params){ | |
181 | printf("\n\r Encode token:"); | |
182 | if(false_params->encode_token) printf(" %s\n\r",false_params->encode_token); | |
183 | else printf(" NULL\n\r"); | |
184 | printf(" Decode token:"); | |
185 | if(false_params->decode_token) printf(" %s\n\r",false_params->decode_token); | |
186 | else printf(" NULL\n\r"); | |
187 | if(false_params->case_sensitive) printf(" case_sensitive\n\r"); | |
188 | else printf(" case_insensitive\n\r"); | |
189 | } | |
190 | else printf("NULL\n\r"); | |
191 | printf("Number of fields:%i\n\r",nof_field_params); | |
192 | for(int a=0;a<nof_field_params;a++){ | |
193 | printf("Field %i:\n\r",a); | |
194 | if(field_params[a]){ | |
195 | printf(" Name: %s\n\r",field_params[a]->name->get_name().c_str()); | |
196 | printf(" Encode token:"); | |
197 | if(field_params[a]->value.encode_token) printf(" %s\n\r",field_params[a]->value.encode_token); | |
198 | else printf(" NULL\n\r"); | |
199 | printf(" Decode token:"); | |
200 | if(field_params[a]->value.decode_token) printf(" %s\n\r",field_params[a]->value.decode_token); | |
201 | else printf(" NULL\n\r"); | |
202 | if(field_params[a]->value.case_sensitive) printf(" case_sensitive\n\r"); | |
203 | else printf(" case_insensitive\n\r"); | |
204 | } else printf(" NULL\n\r"); | |
205 | } | |
206 | printf("Coding params:\n\r Leading 0:"); | |
207 | if(coding_params.leading_zero) printf(" true\n\r"); | |
208 | else printf(" false\n\r"); | |
209 | printf(" Length: %i - %i\n\r",coding_params.min_length,coding_params.max_length); | |
210 | printf(" Convert: %i\n\r Just:%i\n\r",coding_params.convert,coding_params.just); | |
211 | ||
212 | printf("Decoding params:\n\r Leading 0:"); | |
213 | if(decoding_params.leading_zero) printf(" true\n\r"); | |
214 | else printf(" false\n\r"); | |
215 | printf(" Length: %i - %i\n\r",decoding_params.min_length,decoding_params.max_length); | |
216 | printf(" Convert: %i\n\r Just:%i\n\r",decoding_params.convert,decoding_params.just); | |
217 | } | |
218 | ||
219 | int TextAST::get_field_param_index(const Common::Identifier *name) | |
220 | { | |
221 | for(int a=0;a<nof_field_params;a++){ | |
222 | if(*field_params[a]->name==*name) return a; | |
223 | } | |
224 | field_params=(textAST_enum_def **)Realloc(field_params,(nof_field_params+1)*sizeof(textAST_enum_def *)); | |
225 | field_params[nof_field_params]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def)); | |
226 | field_params[nof_field_params]->name= new Common::Identifier(*name); | |
227 | field_params[nof_field_params]->value.encode_token=NULL; | |
228 | field_params[nof_field_params]->value.decode_token=NULL; | |
229 | field_params[nof_field_params]->value.case_sensitive=true; | |
230 | nof_field_params++; | |
231 | return nof_field_params-1; | |
232 | } | |
233 | ||
234 | void copy_textAST_matching_values(textAST_matching_values **to, | |
235 | const textAST_matching_values *from) | |
236 | { | |
237 | if(from==NULL) return; | |
238 | if(*to==NULL) | |
239 | *to=(textAST_matching_values*)Malloc(sizeof(textAST_matching_values)); | |
240 | if(from->encode_token) (*to)->encode_token=mcopystr(from->encode_token); | |
241 | else (*to)->encode_token=NULL; | |
242 | if(from->decode_token && !from->generated_decode_token) | |
243 | (*to)->decode_token=mcopystr(from->decode_token); | |
244 | else (*to)->decode_token=NULL; | |
245 | (*to)->case_sensitive=from->case_sensitive; | |
246 | (*to)->generated_decode_token=false; | |
247 | } | |
248 | ||
249 | char *process_decode_token(const char *decode_token, | |
250 | const Common::Location& loc) | |
251 | { | |
252 | enum { INITIAL, BS, BS_N, BS_Q } state = INITIAL; | |
253 | // points to the last backslash found | |
254 | size_t bs_idx = 0; | |
255 | char *ret_val = memptystr(); | |
256 | for (size_t i = 0; decode_token[i] != '\0'; i++) { | |
257 | switch (state) { | |
258 | case INITIAL: | |
259 | switch (decode_token[i]) { | |
260 | case '\\': | |
261 | state = BS; | |
262 | bs_idx = i; | |
263 | break; | |
264 | case '{': | |
265 | // {reference}: find the matching bracket and skip the entire reference | |
266 | for (size_t j = i + 1; ; j++) { | |
267 | if (decode_token[j] == '\0') { | |
268 | loc.error("Invalid reference `%s' in matching pattern", | |
269 | decode_token + i); | |
270 | i = j - 1; | |
271 | break; | |
272 | } else if (decode_token[j] == '}') { | |
273 | loc.error("Reference `%s' is not allowed in matching pattern", | |
274 | string(j + 1 - i, decode_token + i).c_str()); | |
275 | i = j; | |
276 | break; | |
277 | } | |
278 | } | |
279 | break; | |
280 | case '\'': | |
281 | case '"': | |
282 | // replace '' -> ' and "" -> " | |
283 | ret_val = mputc(ret_val, decode_token[i]); | |
284 | if (decode_token[i + 1] == decode_token[i]) i++; | |
285 | break; | |
286 | default: | |
287 | ret_val = mputc(ret_val, decode_token[i]); | |
288 | } | |
289 | break; | |
290 | case BS: | |
291 | switch (decode_token[i]) { | |
292 | case 'N': | |
293 | state = BS_N; | |
294 | break; | |
295 | case 'q': | |
296 | state = BS_Q; | |
297 | break; | |
298 | case '\'': | |
299 | case '"': | |
300 | // replace \' -> ' and \" -> " | |
301 | ret_val = mputc(ret_val, decode_token[i]); | |
302 | state = INITIAL; | |
303 | break; | |
304 | default: | |
305 | // keep other escape sequences | |
306 | ret_val = mputc(ret_val, '\\'); | |
307 | ret_val = mputc(ret_val, decode_token[i]); | |
308 | state = INITIAL; | |
309 | } | |
310 | break; | |
311 | case BS_N: | |
312 | switch (decode_token[i]) { | |
313 | case ' ': | |
314 | case '\t': | |
315 | case '\r': | |
316 | case '\n': | |
317 | case '\v': | |
318 | case '\f': | |
319 | // ignore whitespace following \N | |
320 | break; | |
321 | case '{': | |
322 | // \N{reference}: find the matching bracket and skip the entire | |
323 | // reference | |
324 | for (size_t j = i + 1; ; j++) { | |
325 | if (decode_token[j] == '\0') { | |
326 | loc.error("Invalid character set reference `%s' in matching " | |
327 | "pattern", decode_token + bs_idx); | |
328 | i = j - 1; | |
329 | break; | |
330 | } else if (decode_token[j] == '}') { | |
331 | loc.error("Character set reference `%s' is not allowed in matching " | |
332 | "pattern", string(j + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
333 | i = j; | |
334 | break; | |
335 | } | |
336 | } | |
337 | state = INITIAL; | |
338 | break; | |
339 | default: | |
340 | loc.error("Invalid character set reference `%s' in matching pattern", | |
341 | string(i + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
342 | state = INITIAL; | |
343 | } | |
344 | break; | |
345 | case BS_Q: | |
346 | switch (decode_token[i]) { | |
347 | case ' ': | |
348 | case '\t': | |
349 | case '\r': | |
350 | case '\n': | |
351 | case '\v': | |
352 | case '\f': | |
353 | // ignore whitespace following \q | |
354 | break; | |
355 | case '{': | |
356 | // copy the rest of the quadruple literally | |
357 | ret_val = mputstr(ret_val, "\\q{"); | |
358 | state = INITIAL; | |
359 | break; | |
360 | default: | |
361 | loc.error("Invalid quadruple notation `%s' in matching pattern", | |
362 | string(i + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
363 | state = INITIAL; | |
364 | } | |
365 | } | |
366 | } | |
367 | // final check of state | |
368 | switch (state) { | |
369 | case INITIAL: | |
370 | break; | |
371 | case BS: | |
372 | loc.error("Invalid trailing backslash in matching pattern"); | |
373 | break; | |
374 | case BS_N: | |
375 | loc.error("Invalid character set reference `%s' in matching pattern", | |
376 | decode_token + bs_idx); | |
377 | break; | |
378 | case BS_Q: | |
379 | loc.error("Invalid quadruple notation `%s' in matching pattern", | |
380 | decode_token + bs_idx); | |
381 | } | |
382 | return ret_val; | |
383 | } | |
384 | ||
385 | extern bool has_meta(); // in pattern_la.l | |
386 | ||
387 | extern int pattern_yylex(); | |
388 | extern void init_pattern_yylex(YYSTYPE *p); | |
389 | struct yy_buffer_state; | |
390 | extern yy_buffer_state* pattern_yy_scan_string(const char*); | |
391 | extern int pattern_yylex_destroy(); | |
392 | ||
393 | char *make_posix_str_code(const char *pat, bool cs) | |
394 | { | |
395 | if (pat == NULL || pat[0] == '\0') return mcopystr("I"); | |
396 | char *ret_val; | |
397 | ||
398 | ret_val = mcopystr(cs ? "I" : "N"); // Case sensitive? _I_gen / _N_em | |
399 | ||
400 | // Run the pattern lexer over the pattern to check for meta-characters. | |
401 | // TTCN_pattern_to_regexp() would be overkill. | |
402 | yy_buffer_state *flex_buffer = pattern_yy_scan_string(pat); | |
403 | if (flex_buffer == NULL) {} //a fatal error was already generated | |
404 | else { | |
405 | YYSTYPE yylval; | |
406 | init_pattern_yylex(&yylval); | |
407 | while (pattern_yylex()) {} // 0 means end-of-string | |
408 | pattern_yylex_destroy(); | |
409 | } | |
410 | ||
411 | // If the matching is case sensitive and there are no meta-characters, | |
412 | // matching for fixed strings can be done without regular expressions. | |
413 | const bool maybe_fixed = cs && !has_meta(); | |
414 | ||
415 | char *mod_token = mprintf("(%s)*", pat); | |
416 | // disable all warnings and "not supported" messages while translating the | |
417 | // pattern to POSIX regexp again (these messages were already reported | |
418 | // during semantic analysis) | |
419 | unsigned orig_verb_level = verb_level; | |
420 | verb_level &= ~(1|2); | |
421 | char *posix_str = TTCN_pattern_to_regexp(mod_token); | |
422 | verb_level = orig_verb_level; | |
423 | Free(mod_token); | |
424 | if (posix_str == NULL) FATAL_ERROR("make_posix_str_code()"); | |
425 | ||
426 | size_t len = mstrlen(posix_str), skip = 0; | |
427 | if (maybe_fixed) { // maybe we can do fixed strings | |
428 | *ret_val = 'F'; // "fixed" | |
429 | ||
430 | // posix_str contains: ^(pat).*$ but all we need is pat | |
431 | ret_val = mputstrn(ret_val, posix_str + 2, len - 6); | |
432 | } | |
433 | else { // no fixed-string optimization possible | |
434 | // TEXT decoder optimization | |
435 | if (len > 3) { | |
436 | if (!memcmp(posix_str + (len - 3), ".*$", 3)) { | |
437 | // The POSIX RE is set to match a bunch of characters before the end. | |
438 | // This happens if the TTCN pattern wanted to match anywhere, not just | |
439 | // at the end (TTCN patterns are anchored at the beginning and end). | |
440 | // Instead of looking for (0 or more) characters and ignoring them, | |
441 | // just don't look for them at all. RE are not anchored by default. | |
442 | posix_str = mtruncstr(posix_str, len -= 3); | |
443 | } | |
444 | ||
445 | if (len > 3) { // len might have changed while truncating, above | |
446 | // Un-anchor the cheap way, at the beginning. | |
447 | skip = memcmp(posix_str, "^.*", 3) ? 0 : 3; | |
448 | } | |
449 | } | |
450 | ||
451 | ret_val = mputstr(ret_val, posix_str + skip); | |
452 | } | |
453 | ||
454 | Free(posix_str); | |
455 | ||
456 | return ret_val; | |
457 | } | |
458 | ||
459 | char *convert_charstring_to_pattern(const char *str) | |
460 | { | |
461 | char *ret_val = memptystr(); | |
462 | for (size_t i = 0; str[i]; i++) { | |
463 | switch (str[i]) { | |
464 | case '?': | |
465 | case '*': | |
466 | case '\\': | |
467 | case '[': | |
468 | case ']': | |
469 | case '{': | |
470 | case '}': | |
471 | case '|': | |
472 | case '(': | |
473 | case ')': | |
474 | case '#': | |
475 | case '+': | |
476 | // special characters of TTCN-3 patterns; escape with a backslash | |
477 | ret_val = mputc(ret_val, '\\'); | |
478 | // no break | |
479 | default: | |
480 | ret_val = mputc(ret_val, str[i]); | |
481 | } | |
482 | } | |
483 | return ret_val; | |
484 | } | |
485 | ||
486 | void init_textAST_matching_values(textAST_matching_values *val){ | |
487 | val->encode_token=NULL; | |
488 | val->decode_token=NULL; | |
489 | val->case_sensitive=true; | |
490 | val->generated_decode_token=false; | |
491 | } |