Commit | Line | Data |
---|---|---|
d44e3c4f | 1 | /****************************************************************************** |
2 | * Copyright (c) 2000-2016 Ericsson Telecom AB | |
3 | * All rights reserved. This program and the accompanying materials | |
4 | * are made available under the terms of the Eclipse Public License v1.0 | |
5 | * which accompanies this distribution, and is available at | |
6 | * http://www.eclipse.org/legal/epl-v10.html | |
7 | * | |
8 | * Contributors: | |
9 | * Balasko, Jeno | |
10 | * Raduly, Csaba | |
11 | * Szabo, Janos Zoltan – initial implementation | |
12 | * Szalai, Gabor | |
13 | * | |
14 | ******************************************************************************/ | |
970ed795 EL |
15 | #include "TextAST.hh" |
16 | #include <stdio.h> | |
17 | #include "common/pattern.hh" | |
18 | #include "common/pattern_p.hh" | |
19 | ||
20 | void TextAST::init_TextAST() | |
21 | { | |
22 | begin_val=NULL; | |
23 | end_val=NULL; | |
24 | separator_val=NULL; | |
25 | nof_field_params=0; | |
26 | field_params=NULL; | |
27 | true_params=NULL; | |
28 | false_params=NULL; | |
29 | coding_params.leading_zero=false; | |
30 | coding_params.repeatable=false; | |
31 | coding_params.min_length=-1; | |
32 | coding_params.max_length=-1; | |
33 | coding_params.convert=0; | |
34 | coding_params.just=1; | |
35 | decoding_params.leading_zero=false; | |
36 | decoding_params.repeatable=false; | |
37 | decoding_params.min_length=-1; | |
38 | decoding_params.max_length=-1; | |
39 | decoding_params.convert=0; | |
40 | decoding_params.just=1; | |
41 | decode_token=NULL; | |
42 | case_sensitive=true; | |
43 | } | |
44 | ||
45 | TextAST::TextAST(const TextAST *other_val) | |
46 | { | |
47 | init_TextAST(); | |
48 | if(other_val){ | |
49 | //printf("HALI\n\r"); | |
50 | //other_val->print_TextAST(); | |
51 | copy_textAST_matching_values(&begin_val,other_val->begin_val); | |
52 | copy_textAST_matching_values(&end_val,other_val->end_val); | |
53 | copy_textAST_matching_values(&separator_val,other_val->separator_val); | |
54 | ||
55 | nof_field_params=other_val->nof_field_params; | |
56 | if(nof_field_params) field_params= | |
57 | (textAST_enum_def**)Malloc(nof_field_params*sizeof(textAST_enum_def*)); | |
58 | for(int a=0;a<nof_field_params;a++){ | |
59 | if(other_val->field_params[a]){ | |
60 | field_params[a]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def)); | |
61 | field_params[a]->name= | |
62 | new Common::Identifier(*(other_val->field_params[a]->name)); | |
63 | textAST_matching_values *b=&(field_params[a]->value); | |
64 | copy_textAST_matching_values(&b,&other_val->field_params[a]->value); | |
65 | } else field_params[a]=NULL; | |
66 | } | |
67 | //print_TextAST(); | |
68 | ||
69 | copy_textAST_matching_values(&true_params,other_val->true_params); | |
70 | copy_textAST_matching_values(&false_params,other_val->false_params); | |
71 | ||
72 | coding_params.leading_zero=other_val->coding_params.leading_zero; | |
73 | coding_params.repeatable=other_val->coding_params.repeatable; | |
74 | coding_params.min_length=other_val->coding_params.min_length; | |
75 | coding_params.max_length=other_val->coding_params.max_length; | |
76 | coding_params.convert=other_val->coding_params.convert; | |
77 | coding_params.just=other_val->coding_params.just; | |
78 | decoding_params.leading_zero=other_val->decoding_params.leading_zero; | |
79 | decoding_params.repeatable=other_val->decoding_params.repeatable; | |
80 | decoding_params.min_length=other_val->decoding_params.min_length; | |
81 | decoding_params.max_length=other_val->decoding_params.max_length; | |
82 | decoding_params.convert=other_val->decoding_params.convert; | |
83 | decoding_params.just=other_val->decoding_params.just; | |
84 | ||
85 | if(other_val->decode_token) | |
86 | decode_token=mcopystr(other_val->decode_token); | |
87 | case_sensitive=other_val->case_sensitive; | |
88 | } | |
89 | } | |
90 | ||
91 | TextAST::~TextAST() | |
92 | { | |
93 | if(begin_val){ | |
94 | Free(begin_val->encode_token); | |
95 | Free(begin_val->decode_token); | |
96 | Free(begin_val); | |
97 | } | |
98 | if(end_val){ | |
99 | Free(end_val->encode_token); | |
100 | Free(end_val->decode_token); | |
101 | Free(end_val); | |
102 | } | |
103 | if(separator_val){ | |
104 | Free(separator_val->encode_token); | |
105 | Free(separator_val->decode_token); | |
106 | Free(separator_val); | |
107 | } | |
108 | if(field_params){ | |
109 | for(int a=0;a<nof_field_params;a++){ | |
110 | if(field_params[a]){ | |
111 | delete field_params[a]->name; | |
112 | Free(field_params[a]->value.encode_token); | |
113 | Free(field_params[a]->value.decode_token); | |
114 | } | |
115 | Free(field_params[a]); | |
116 | } | |
117 | Free(field_params); | |
118 | } | |
119 | if(true_params){ | |
120 | Free(true_params->encode_token); | |
121 | Free(true_params->decode_token); | |
122 | Free(true_params); | |
123 | } | |
124 | if(false_params){ | |
125 | Free(false_params->encode_token); | |
126 | Free(false_params->decode_token); | |
127 | Free(false_params); | |
128 | } | |
129 | Free(decode_token); | |
130 | } | |
131 | ||
132 | void TextAST::print_TextAST() const | |
133 | { | |
134 | printf("\n\rBegin:"); | |
135 | if(begin_val){ | |
136 | printf("\n\r Encode token:"); | |
137 | if(begin_val->encode_token) printf(" %s\n\r",begin_val->encode_token); | |
138 | else printf(" NULL\n\r"); | |
139 | printf(" Decode token:"); | |
140 | if(begin_val->decode_token) printf(" %s\n\r",begin_val->decode_token); | |
141 | else printf(" NULL\n\r"); | |
142 | if(begin_val->case_sensitive) printf(" case_sensitive\n\r"); | |
143 | else printf(" case_insensitive\n\r"); | |
144 | } | |
145 | else printf("NULL\n\r"); | |
146 | printf("End:"); | |
147 | if(end_val){ | |
148 | printf("\n\r Encode token:"); | |
149 | if(end_val->encode_token) printf(" %s\n\r",end_val->encode_token); | |
150 | else printf(" NULL\n\r"); | |
151 | printf(" Decode token:"); | |
152 | if(end_val->decode_token) printf(" %s\n\r",end_val->decode_token); | |
153 | else printf(" NULL\n\r"); | |
154 | if(end_val->case_sensitive) printf(" case_sensitive\n\r"); | |
155 | else printf(" case_insensitive\n\r"); | |
156 | } | |
157 | else printf("NULL\n\r"); | |
158 | printf("Separator:"); | |
159 | if(separator_val){ | |
160 | printf("\n\r Encode token:"); | |
161 | if(separator_val->encode_token) printf(" %s\n\r",separator_val->encode_token); | |
162 | else printf(" NULL\n\r"); | |
163 | printf(" Decode token:"); | |
164 | if(separator_val->decode_token) printf(" %s\n\r",separator_val->decode_token); | |
165 | else printf(" NULL\n\r"); | |
166 | if(separator_val->case_sensitive) printf(" case_sensitive\n\r"); | |
167 | else printf(" case_insensitive\n\r"); | |
168 | } | |
169 | else printf("NULL\n\r"); | |
170 | printf("Select token:"); | |
171 | if(decode_token) printf("%s",decode_token); | |
172 | if(case_sensitive) printf(" case_sensitive\n\r"); | |
173 | else printf(" case_insensitive\n\r"); | |
174 | printf("True:"); | |
175 | if(true_params){ | |
176 | printf("\n\r Encode token:"); | |
177 | if(true_params->encode_token) printf(" %s\n\r",true_params->encode_token); | |
178 | else printf(" NULL\n\r"); | |
179 | printf(" Decode token:"); | |
180 | if(true_params->decode_token) printf(" %s\n\r",true_params->decode_token); | |
181 | else printf(" NULL\n\r"); | |
182 | if(true_params->case_sensitive) printf(" case_sensitive\n\r"); | |
183 | else printf(" case_insensitive\n\r"); | |
184 | } | |
185 | else printf("NULL\n\r"); | |
186 | printf("False:"); | |
187 | if(true_params){ | |
188 | printf("\n\r Encode token:"); | |
189 | if(false_params->encode_token) printf(" %s\n\r",false_params->encode_token); | |
190 | else printf(" NULL\n\r"); | |
191 | printf(" Decode token:"); | |
192 | if(false_params->decode_token) printf(" %s\n\r",false_params->decode_token); | |
193 | else printf(" NULL\n\r"); | |
194 | if(false_params->case_sensitive) printf(" case_sensitive\n\r"); | |
195 | else printf(" case_insensitive\n\r"); | |
196 | } | |
197 | else printf("NULL\n\r"); | |
198 | printf("Number of fields:%i\n\r",nof_field_params); | |
199 | for(int a=0;a<nof_field_params;a++){ | |
200 | printf("Field %i:\n\r",a); | |
201 | if(field_params[a]){ | |
202 | printf(" Name: %s\n\r",field_params[a]->name->get_name().c_str()); | |
203 | printf(" Encode token:"); | |
204 | if(field_params[a]->value.encode_token) printf(" %s\n\r",field_params[a]->value.encode_token); | |
205 | else printf(" NULL\n\r"); | |
206 | printf(" Decode token:"); | |
207 | if(field_params[a]->value.decode_token) printf(" %s\n\r",field_params[a]->value.decode_token); | |
208 | else printf(" NULL\n\r"); | |
209 | if(field_params[a]->value.case_sensitive) printf(" case_sensitive\n\r"); | |
210 | else printf(" case_insensitive\n\r"); | |
211 | } else printf(" NULL\n\r"); | |
212 | } | |
213 | printf("Coding params:\n\r Leading 0:"); | |
214 | if(coding_params.leading_zero) printf(" true\n\r"); | |
215 | else printf(" false\n\r"); | |
216 | printf(" Length: %i - %i\n\r",coding_params.min_length,coding_params.max_length); | |
217 | printf(" Convert: %i\n\r Just:%i\n\r",coding_params.convert,coding_params.just); | |
218 | ||
219 | printf("Decoding params:\n\r Leading 0:"); | |
220 | if(decoding_params.leading_zero) printf(" true\n\r"); | |
221 | else printf(" false\n\r"); | |
222 | printf(" Length: %i - %i\n\r",decoding_params.min_length,decoding_params.max_length); | |
223 | printf(" Convert: %i\n\r Just:%i\n\r",decoding_params.convert,decoding_params.just); | |
224 | } | |
225 | ||
226 | int TextAST::get_field_param_index(const Common::Identifier *name) | |
227 | { | |
228 | for(int a=0;a<nof_field_params;a++){ | |
229 | if(*field_params[a]->name==*name) return a; | |
230 | } | |
231 | field_params=(textAST_enum_def **)Realloc(field_params,(nof_field_params+1)*sizeof(textAST_enum_def *)); | |
232 | field_params[nof_field_params]=(textAST_enum_def*)Malloc(sizeof(textAST_enum_def)); | |
233 | field_params[nof_field_params]->name= new Common::Identifier(*name); | |
234 | field_params[nof_field_params]->value.encode_token=NULL; | |
235 | field_params[nof_field_params]->value.decode_token=NULL; | |
236 | field_params[nof_field_params]->value.case_sensitive=true; | |
237 | nof_field_params++; | |
238 | return nof_field_params-1; | |
239 | } | |
240 | ||
241 | void copy_textAST_matching_values(textAST_matching_values **to, | |
242 | const textAST_matching_values *from) | |
243 | { | |
244 | if(from==NULL) return; | |
245 | if(*to==NULL) | |
246 | *to=(textAST_matching_values*)Malloc(sizeof(textAST_matching_values)); | |
247 | if(from->encode_token) (*to)->encode_token=mcopystr(from->encode_token); | |
248 | else (*to)->encode_token=NULL; | |
249 | if(from->decode_token && !from->generated_decode_token) | |
250 | (*to)->decode_token=mcopystr(from->decode_token); | |
251 | else (*to)->decode_token=NULL; | |
252 | (*to)->case_sensitive=from->case_sensitive; | |
253 | (*to)->generated_decode_token=false; | |
254 | } | |
255 | ||
256 | char *process_decode_token(const char *decode_token, | |
257 | const Common::Location& loc) | |
258 | { | |
259 | enum { INITIAL, BS, BS_N, BS_Q } state = INITIAL; | |
260 | // points to the last backslash found | |
261 | size_t bs_idx = 0; | |
262 | char *ret_val = memptystr(); | |
263 | for (size_t i = 0; decode_token[i] != '\0'; i++) { | |
264 | switch (state) { | |
265 | case INITIAL: | |
266 | switch (decode_token[i]) { | |
267 | case '\\': | |
268 | state = BS; | |
269 | bs_idx = i; | |
270 | break; | |
271 | case '{': | |
272 | // {reference}: find the matching bracket and skip the entire reference | |
273 | for (size_t j = i + 1; ; j++) { | |
274 | if (decode_token[j] == '\0') { | |
275 | loc.error("Invalid reference `%s' in matching pattern", | |
276 | decode_token + i); | |
277 | i = j - 1; | |
278 | break; | |
279 | } else if (decode_token[j] == '}') { | |
280 | loc.error("Reference `%s' is not allowed in matching pattern", | |
281 | string(j + 1 - i, decode_token + i).c_str()); | |
282 | i = j; | |
283 | break; | |
284 | } | |
285 | } | |
286 | break; | |
287 | case '\'': | |
288 | case '"': | |
289 | // replace '' -> ' and "" -> " | |
290 | ret_val = mputc(ret_val, decode_token[i]); | |
291 | if (decode_token[i + 1] == decode_token[i]) i++; | |
292 | break; | |
293 | default: | |
294 | ret_val = mputc(ret_val, decode_token[i]); | |
295 | } | |
296 | break; | |
297 | case BS: | |
298 | switch (decode_token[i]) { | |
299 | case 'N': | |
300 | state = BS_N; | |
301 | break; | |
302 | case 'q': | |
303 | state = BS_Q; | |
304 | break; | |
305 | case '\'': | |
306 | case '"': | |
307 | // replace \' -> ' and \" -> " | |
308 | ret_val = mputc(ret_val, decode_token[i]); | |
309 | state = INITIAL; | |
310 | break; | |
311 | default: | |
312 | // keep other escape sequences | |
313 | ret_val = mputc(ret_val, '\\'); | |
314 | ret_val = mputc(ret_val, decode_token[i]); | |
315 | state = INITIAL; | |
316 | } | |
317 | break; | |
318 | case BS_N: | |
319 | switch (decode_token[i]) { | |
320 | case ' ': | |
321 | case '\t': | |
322 | case '\r': | |
323 | case '\n': | |
324 | case '\v': | |
325 | case '\f': | |
326 | // ignore whitespace following \N | |
327 | break; | |
328 | case '{': | |
329 | // \N{reference}: find the matching bracket and skip the entire | |
330 | // reference | |
331 | for (size_t j = i + 1; ; j++) { | |
332 | if (decode_token[j] == '\0') { | |
333 | loc.error("Invalid character set reference `%s' in matching " | |
334 | "pattern", decode_token + bs_idx); | |
335 | i = j - 1; | |
336 | break; | |
337 | } else if (decode_token[j] == '}') { | |
338 | loc.error("Character set reference `%s' is not allowed in matching " | |
339 | "pattern", string(j + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
340 | i = j; | |
341 | break; | |
342 | } | |
343 | } | |
344 | state = INITIAL; | |
345 | break; | |
346 | default: | |
347 | loc.error("Invalid character set reference `%s' in matching pattern", | |
348 | string(i + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
349 | state = INITIAL; | |
350 | } | |
351 | break; | |
352 | case BS_Q: | |
353 | switch (decode_token[i]) { | |
354 | case ' ': | |
355 | case '\t': | |
356 | case '\r': | |
357 | case '\n': | |
358 | case '\v': | |
359 | case '\f': | |
360 | // ignore whitespace following \q | |
361 | break; | |
362 | case '{': | |
363 | // copy the rest of the quadruple literally | |
364 | ret_val = mputstr(ret_val, "\\q{"); | |
365 | state = INITIAL; | |
366 | break; | |
367 | default: | |
368 | loc.error("Invalid quadruple notation `%s' in matching pattern", | |
369 | string(i + 1 - bs_idx, decode_token + bs_idx).c_str()); | |
370 | state = INITIAL; | |
371 | } | |
372 | } | |
373 | } | |
374 | // final check of state | |
375 | switch (state) { | |
376 | case INITIAL: | |
377 | break; | |
378 | case BS: | |
379 | loc.error("Invalid trailing backslash in matching pattern"); | |
380 | break; | |
381 | case BS_N: | |
382 | loc.error("Invalid character set reference `%s' in matching pattern", | |
383 | decode_token + bs_idx); | |
384 | break; | |
385 | case BS_Q: | |
386 | loc.error("Invalid quadruple notation `%s' in matching pattern", | |
387 | decode_token + bs_idx); | |
388 | } | |
389 | return ret_val; | |
390 | } | |
391 | ||
392 | extern bool has_meta(); // in pattern_la.l | |
393 | ||
394 | extern int pattern_yylex(); | |
395 | extern void init_pattern_yylex(YYSTYPE *p); | |
396 | struct yy_buffer_state; | |
397 | extern yy_buffer_state* pattern_yy_scan_string(const char*); | |
398 | extern int pattern_yylex_destroy(); | |
399 | ||
400 | char *make_posix_str_code(const char *pat, bool cs) | |
401 | { | |
402 | if (pat == NULL || pat[0] == '\0') return mcopystr("I"); | |
403 | char *ret_val; | |
404 | ||
405 | ret_val = mcopystr(cs ? "I" : "N"); // Case sensitive? _I_gen / _N_em | |
406 | ||
407 | // Run the pattern lexer over the pattern to check for meta-characters. | |
408 | // TTCN_pattern_to_regexp() would be overkill. | |
409 | yy_buffer_state *flex_buffer = pattern_yy_scan_string(pat); | |
410 | if (flex_buffer == NULL) {} //a fatal error was already generated | |
411 | else { | |
412 | YYSTYPE yylval; | |
413 | init_pattern_yylex(&yylval); | |
414 | while (pattern_yylex()) {} // 0 means end-of-string | |
415 | pattern_yylex_destroy(); | |
416 | } | |
417 | ||
418 | // If the matching is case sensitive and there are no meta-characters, | |
419 | // matching for fixed strings can be done without regular expressions. | |
420 | const bool maybe_fixed = cs && !has_meta(); | |
421 | ||
422 | char *mod_token = mprintf("(%s)*", pat); | |
423 | // disable all warnings and "not supported" messages while translating the | |
424 | // pattern to POSIX regexp again (these messages were already reported | |
425 | // during semantic analysis) | |
426 | unsigned orig_verb_level = verb_level; | |
427 | verb_level &= ~(1|2); | |
428 | char *posix_str = TTCN_pattern_to_regexp(mod_token); | |
429 | verb_level = orig_verb_level; | |
430 | Free(mod_token); | |
431 | if (posix_str == NULL) FATAL_ERROR("make_posix_str_code()"); | |
432 | ||
433 | size_t len = mstrlen(posix_str), skip = 0; | |
434 | if (maybe_fixed) { // maybe we can do fixed strings | |
435 | *ret_val = 'F'; // "fixed" | |
436 | ||
437 | // posix_str contains: ^(pat).*$ but all we need is pat | |
438 | ret_val = mputstrn(ret_val, posix_str + 2, len - 6); | |
439 | } | |
440 | else { // no fixed-string optimization possible | |
441 | // TEXT decoder optimization | |
442 | if (len > 3) { | |
443 | if (!memcmp(posix_str + (len - 3), ".*$", 3)) { | |
444 | // The POSIX RE is set to match a bunch of characters before the end. | |
445 | // This happens if the TTCN pattern wanted to match anywhere, not just | |
446 | // at the end (TTCN patterns are anchored at the beginning and end). | |
447 | // Instead of looking for (0 or more) characters and ignoring them, | |
448 | // just don't look for them at all. RE are not anchored by default. | |
449 | posix_str = mtruncstr(posix_str, len -= 3); | |
450 | } | |
451 | ||
452 | if (len > 3) { // len might have changed while truncating, above | |
453 | // Un-anchor the cheap way, at the beginning. | |
454 | skip = memcmp(posix_str, "^.*", 3) ? 0 : 3; | |
455 | } | |
456 | } | |
457 | ||
458 | ret_val = mputstr(ret_val, posix_str + skip); | |
459 | } | |
460 | ||
461 | Free(posix_str); | |
462 | ||
463 | return ret_val; | |
464 | } | |
465 | ||
466 | char *convert_charstring_to_pattern(const char *str) | |
467 | { | |
468 | char *ret_val = memptystr(); | |
469 | for (size_t i = 0; str[i]; i++) { | |
470 | switch (str[i]) { | |
471 | case '?': | |
472 | case '*': | |
473 | case '\\': | |
474 | case '[': | |
475 | case ']': | |
476 | case '{': | |
477 | case '}': | |
478 | case '|': | |
479 | case '(': | |
480 | case ')': | |
481 | case '#': | |
482 | case '+': | |
483 | // special characters of TTCN-3 patterns; escape with a backslash | |
484 | ret_val = mputc(ret_val, '\\'); | |
485 | // no break | |
486 | default: | |
487 | ret_val = mputc(ret_val, str[i]); | |
488 | } | |
489 | } | |
490 | return ret_val; | |
491 | } | |
492 | ||
493 | void init_textAST_matching_values(textAST_matching_values *val){ | |
494 | val->encode_token=NULL; | |
495 | val->decode_token=NULL; | |
496 | val->case_sensitive=true; | |
497 | val->generated_decode_token=false; | |
498 | } |