Sync with 5.1.0
[deliverable/titan.core.git] / common / pattern_uni.y
1 /******************************************************************************
2 * Copyright (c) 2000-2014 Ericsson Telecom AB
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 ******************************************************************************/
8
9 /**
10 * Based on pattern_p.y
11 */
12
13 %{
14
15 /*********************************************************************
16 * C(++) declarations
17 *********************************************************************/
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <ctype.h>
22
23 #if defined(__CYGWIN__) && defined(__clang__)
24 /* Cygwin's clang 3.0 has its own limits.h, which does not bring in
25 the system's limits.h unless we define this macro: */
26 #define __STDC_HOSTED__ 1
27 #define _GCC_NEXT_LIMITS_H
28 #endif
29 #include <limits.h>
30
31 #include <regex.h>
32 #if !defined(RE_DUP_MAX)
33 /* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */
34 # include <sys/syslimits.h>
35 #endif
36
37 #include "memory.h"
38 #include "pattern.hh"
39
40 #include "Quadruple.hh"
41
42 union YYSTYPE;
43 /* defined in lexer c-file: */
44
45 extern int pattern_yylex();
46 inline int pattern_unilex() { return pattern_yylex(); }
47 extern void init_pattern_yylex(YYSTYPE*);
48 struct yy_buffer_state;
49 extern yy_buffer_state* pattern_yy_scan_string(const char*);
50 extern void pattern_yy_delete_buffer(yy_buffer_state*);
51 extern unsigned int get_nof_parentheses();
52
53 /* defined in this file: */
54
55 /** The converted regexp. */
56 static char *ret_val;
57 /** The parser error reporting function. */
58 static void pattern_unierror(const char *error_str);
59
60 static int user_groups;
61
62 #define YYERROR_VERBOSE
63
64 static void yyprint(FILE *file, int type, const YYSTYPE& value);
65 #define YYPRINT(f,t,v) yyprint(f,t,v)
66
67 %}
68
69 /*********************************************************************
70 * Bison declarations
71 *********************************************************************/
72
73 %name-prefix="pattern_uni"
74 %output="pattern_uni.cc"
75 %defines
76 %verbose
77 %expect 0
78 %start Pattern
79 %debug
80
81 /*********************************************************************
82 * The union-type
83 * Must be kept in sync with the one in pattern_p.y !
84 *********************************************************************/
85
86 %union {
87 int b; /* boolean */
88 char c; /* single character */
89 char *s; /* character string */
90 unsigned long int u; /* unsigned integer */
91 struct character_set *set; // used by nonterminals in pattern_p.y
92
93 union {
94 unsigned int value;
95 #if defined(__sparc__) || defined(__sparc)
96 struct {
97 unsigned char group;
98 unsigned char plane;
99 unsigned char row;
100 unsigned char cell;
101 } comp;
102 #else
103 struct {
104 unsigned char cell;
105 unsigned char row;
106 unsigned char plane;
107 unsigned char group;
108 } comp;
109 #endif
110 } q; // single universal char, used by nonterminals in pattern_uni.y
111 class QuadSet* qset; // used by nonterminals in pattern_uni.y
112 }
113
114 /*********************************************************************
115 * Tokens
116 *********************************************************************/
117
118 %token <c> TOK_Char "<ordinary character>"
119 %token <u> TOK_Number "<number>"
120 %token <u> TOK_Digit "<digit>"
121
122 /*********************************************************************
123 * Keywords
124 *********************************************************************/
125
126 %token KW_BS_q "\\q"
127 %token KW_BS_d "\\d"
128 %token KW_BS_w "\\w"
129 %token KW_BS_t "\\t"
130 %token KW_BS_n "\\n"
131 %token KW_BS_r "\\r"
132 %token KW_BS_s "\\s"
133 %token KW_BS_b "\\b"
134
135 %token KW_Group_Begin "("
136 %token KW_Group_End ")"
137 %token KW_Set_Begin "["
138 %token KW_Set_Begin_Neg "[^"
139 %token KW_Set_Begin_Rsbrkt "[]"
140 %token KW_Set_Begin_Neg_Rsbrkt "[^]"
141 %token KW_Set_End "]"
142 %token KW_Set_Dash_End "-]"
143
144 /*********************************************************************
145 * semantic types of nonterminals
146 *********************************************************************/
147
148 %type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End
149 %type <q> RE_Set_Range_Char RE_Quadruple
150 %type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem
151 RE_Multiply_Elem RE_Multiply_Statement RE_Group
152 RE_OneCharPos
153 %type <qset> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char
154
155 /*********************************************************************
156 * Destructors
157 *********************************************************************/
158
159 %destructor { Free($$); }
160 RE_Alter_Elem
161 RE_Body
162 RE_Concat_Elem
163 RE_Elems
164 RE_Group
165 RE_Multiply_Elem
166 RE_Multiply_Statement
167 RE_OneCharPos
168
169 %destructor { delete $$; }
170 RE_Set
171 RE_Set_Body
172 RE_Set_Elem
173 RE_Set_NoRange_Char
174
175 %%
176
177 /*********************************************************************
178 * Grammar
179 *********************************************************************/
180
181 Pattern:
182 RE_Body {ret_val=$1;}
183 ;
184
185 RE_Body:
186 /* empty */
187 {
188 $$ = mcopystr("^$");
189 }
190 | RE_Elems
191 {
192 if ($1 != NULL) {
193 $$ = mprintf("^%s$", $1);
194 Free($1);
195 } else $$ = mcopystr("^$");
196 }
197 ;
198
199 RE_Elems:
200 RE_Alter_Elem { $$ = $1; }
201 | RE_Elems '|' RE_Alter_Elem
202 {
203 unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0);
204 if ($3 != NULL) {
205 if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3);
206 else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3);
207 Free($3);
208 } else {
209 if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()");
210 else $$ = NULL;
211 }
212 }
213 ;
214
215 RE_Alter_Elem:
216 RE_Concat_Elem { $$ = $1; }
217 | RE_Alter_Elem RE_Concat_Elem
218 {
219 $$ = mputstr($1, $2);
220 Free($2);
221 }
222 ;
223
224 RE_Concat_Elem:
225 RE_Multiply_Elem {$$=$1;}
226 | RE_Multiply_Elem RE_Multiply_Statement
227 {
228 if ($1 != NULL && $2 != NULL) {
229 $$ = mputstr($1, $2);
230 Free($2);
231 } else {
232 Free($1);
233 Free($2);
234 $$ = NULL;
235 }
236 }
237 | '*' {$$=mcopystr("(........)*");}
238 ;
239
240 RE_Multiply_Elem:
241 RE_Group {$$=$1;}
242 | RE_OneCharPos {$$=$1;}
243 ;
244
245 RE_Group:
246 KW_Group_Begin KW_Group_End
247 {
248 user_groups++;
249 $$ = mcopystr("<)");
250 }
251 | KW_Group_Begin RE_Elems KW_Group_End
252 {
253 user_groups++;
254 if ($2 != NULL) {
255 $$ = mprintf("<%s)", $2);
256 Free($2);
257 } else {
258 $$ = mcopystr("<)");
259 }
260 }
261 ;
262
263 RE_Multiply_Statement:
264 '+'
265 {
266 $$ = mcopystr("+");
267 }
268 | '#' '(' ',' ')'
269 {
270 $$ = mcopystr("*");
271 }
272 | '#' TOK_Digit
273 {
274 if ($2 == 0) {
275 TTCN_pattern_warning("The number of repetitions is zero: `#0'.");
276 $$ = NULL;
277 } else if ($2 == 1) $$ = memptystr();
278 else {
279 if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of "
280 "repetitions: `#%lu'.", $2);
281 $$ = mprintf("{%lu}", $2);
282 }
283 }
284 | '#' '(' TOK_Number ')'
285 {
286 if ($3 == 0) {
287 TTCN_pattern_warning("The number of repetitions is zero: `#(0)'.");
288 $$ = NULL;
289 } else if ($3 == 1) $$ = memptystr();
290 else {
291 #ifdef RE_DUP_MAX
292 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in "
293 "`#(%lu)' exceeds the limit allowed by this system (%d).", $3,
294 RE_DUP_MAX);
295 #endif
296 $$ = mprintf("{%lu}", $3);
297 }
298 }
299 | '#' '(' TOK_Number ',' TOK_Number ')'
300 {
301 #ifdef RE_DUP_MAX
302 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
303 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
304 "(%d).", $3, $5, RE_DUP_MAX);
305 if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
306 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
307 "(%d).", $3, $5, RE_DUP_MAX);
308 #endif
309 if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper "
310 "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5);
311 if ($3 == $5) {
312 if ($3 == 0) {
313 TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'.");
314 $$ = NULL;
315 } else if ($3 == 1) $$ = memptystr();
316 else {
317 $$ = mprintf("{%lu}", $3);
318 }
319 } else {
320
321 if ($3 == 0 && $5 == 1) $$ = mcopystr("?");
322 else $$ = mprintf("{%lu,%lu}", $3, $5);
323
324 }
325 }
326 | '#' '(' ',' TOK_Number ')'
327 {
328 if ($4 == 0) {
329 TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'.");
330 $$ = NULL;
331 } else {
332 #ifdef RE_DUP_MAX
333 if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
334 "repetitions in `#(,%lu)' exceeds the limit allowed by this system "
335 "(%d).", $4, RE_DUP_MAX);
336 #endif
337
338 if ($4 == 1) $$ = mcopystr("?");
339 else $$ = mprintf("{0,%lu}", $4);
340
341 }
342 }
343 | '#' '(' TOK_Number ',' ')'
344 {
345 if ($3 == 0) $$ = mcopystr("*");
346 else {
347 #ifdef RE_DUP_MAX
348 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
349 "repetitions in `#(%lu,)' exceeds the limit allowed by this system "
350 "(%d).", $3, RE_DUP_MAX);
351 #endif
352
353 if ($3 == 1) $$ = mcopystr("+");
354 else $$ = mprintf("{%lu,}", $3);
355
356 }
357 }
358 ;
359
360 RE_OneCharPos:
361 '?' {$$=mcopystr("(........)");}
362 | KW_BS_d {$$=mcopystr("(AAAAAAD[A-J])");}
363 | KW_BS_w {$$=mcopystr("(AAAAAAD[A-J]|AAAAAA(E[B-P]|F[A-K])|AAAAAA(G[B-P]|H[A-K]))");}
364 | KW_BS_t {$$=mcopystr("AAAAAAAJ");}
365 | KW_BS_n {$$=mcopystr("(AAAAAAA[K-N])");}
366 | KW_BS_r {$$=mcopystr("AAAAAAAN");}
367 | KW_BS_s {$$=mcopystr("(AAAAAAA[J-N]|AAAAAACA)");}
368 | KW_BS_b
369 {
370 TTCN_pattern_warning("Metacharacter `\\b' is not supported yet.");
371 $$ = NULL;
372 }
373 | TOK_Char
374 {
375 unsigned char c = $1;
376 if ($1 <= 0) TTCN_pattern_error("Character with code %u "
377 "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1);
378 $$ = Quad::get_hexrepr(c);
379 }
380 | RE_Quadruple
381 {
382 $$ = Quad::get_hexrepr($1.value);
383 }
384 | RE_Set
385 {
386 if ($1->is_empty()) {
387 TTCN_pattern_error("Empty character set.");
388 $$ = NULL;
389 } else
390 $$ = $1->generate_posix();
391 delete $1;
392 }
393 ;
394
395 RE_Set:
396 /* RE_Set_Begin is 1 for "[^", 0 for "["
397 * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]"
398 * RE_Set_End is 1 for "-]", 0 for "]"
399 */
400 RE_Set_Begin RE_Set_Body RE_Set_End
401 {
402 if ($2 != NULL)
403 $$ = $2;
404 else
405 $$ = new QuadSet();
406 if ($3 && !$$->add(new Quad('-')))
407 TTCN_pattern_warning("Duplicate character `-' in the character set.");
408 if ($1)
409 $$->set_negate(true);
410 }
411 | RE_Set_Begin '-' RE_Set_Body RE_Set_End
412 {
413 if ($3 != NULL)
414 $$ = $3;
415 else
416 $$ = new QuadSet();
417 if (!$$->add(new Quad('-')))
418 TTCN_pattern_warning("Duplicate character `-' in the character set.");
419 if ($1)
420 $$->set_negate(true);
421 }
422 | RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End
423 {
424 if ($2 != NULL)
425 $$ = $2;
426 else
427 $$ = new QuadSet();
428 if (!$$->add(new Quad(']')))
429 TTCN_pattern_warning("Duplicate character `]' in the character set.");
430 if ($3 && !$$->add(new Quad('-')))
431 TTCN_pattern_warning("Duplicate character `-' in the character set.");
432 if ($1)
433 $$->set_negate(true);
434 }
435 | RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End
436 {
437 if ($4 != NULL)
438 $$ = $4;
439 else
440 $$ = new QuadSet();
441 if ((unsigned int)']' > $3.value) {
442 TTCN_pattern_error("Invalid range in the character set: the "
443 "character code of the lower bound (%u) is higher than that of the "
444 "upper bound (%u).", ']', (unsigned int)$3.value);
445 }
446 $$->add(new QuadInterval(Quad(']'), Quad($3.value)));
447 if ($5) {
448 if (!$$->add(new Quad('-')))
449 TTCN_pattern_warning("Duplicate character `-' in the character set.");
450 }
451 if ($1)
452 $$->set_negate(true);
453 }
454 ;
455
456 RE_Set_Begin:
457 KW_Set_Begin { $$ = 0; }
458 | KW_Set_Begin_Neg { $$ = 1; }
459 ;
460
461 RE_Set_Begin_Rsbrkt:
462 KW_Set_Begin_Rsbrkt { $$ = 0; }
463 | KW_Set_Begin_Neg_Rsbrkt { $$ = 1; }
464 ;
465
466 RE_Set_End:
467 KW_Set_End { $$ = 0; }
468 | KW_Set_Dash_End { $$ = 1; }
469 ;
470
471 RE_Set_Body:
472 /* empty */ { $$ = NULL; }
473 | RE_Set_Body RE_Set_Elem
474 {
475 if ($1 != NULL) {
476 $$ = $1;
477 $$->join($2);
478 delete($2);
479 } else
480 $$ = $2;
481 }
482 ;
483
484 RE_Set_Elem:
485 RE_Set_Range_Char
486 {
487 $$ = new QuadSet();
488 $$->add(new Quad($1.value));
489 }
490 | RE_Set_NoRange_Char { $$ = $1; }
491 | RE_Set_Range_Char '-' RE_Set_Range_Char
492 {
493 if ($1.value > $3.value) {
494 TTCN_pattern_error("Invalid range in the character set: the "
495 "character code of the lower bound (%u) is higher than that of the "
496 "upper bound (%u).", (unsigned int)$1.value, (unsigned int)$3.value);
497 }
498 $$ = new QuadSet();
499 $$->add(new QuadInterval(Quad($1.value), Quad($3.value)));
500 }
501 ;
502
503 RE_Set_Range_Char:
504 KW_BS_t { $$.value = '\t'; }
505 | KW_BS_r { $$.value = '\r'; }
506 | TOK_Char
507 {
508 if ($1 <= 0) TTCN_pattern_error("Character with code %u "
509 "(0x%02x) cannot be used in a pattern for type charstring.", $1, $1);
510 $$.value = $1;
511 }
512 | RE_Quadruple { $$.value = $1.value; }
513 ;
514
515 RE_Set_NoRange_Char:
516 KW_BS_d
517 {
518 $$ = new QuadSet();
519 $$->add(new QuadInterval(Quad('0'), Quad('9')));
520 }
521 | KW_BS_w
522 {
523 $$ = new QuadSet();
524 $$->add(new QuadInterval(Quad('0'), Quad('9')));
525 $$->add(new QuadInterval(Quad('A'), Quad('Z')));
526 $$->add(new QuadInterval(Quad('a'), Quad('z')));
527 }
528 | KW_BS_n
529 {
530 $$ = new QuadSet();
531 $$->add(new QuadInterval(Quad('\n'), Quad('\r')));
532 }
533 | KW_BS_s
534 {
535 $$ = new QuadSet();
536 $$->add(new QuadInterval(Quad('\t'), Quad('\r')));
537 $$->add(new Quad(' '));
538 }
539 | KW_BS_b
540 {
541 $$ = new QuadSet();
542 TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a "
543 "character set.");
544 }
545 ;
546
547 RE_Quadruple:
548 KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}'
549 {
550 if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple "
551 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 "
552 "instead of %lu.", $3, $5, $7, $9, $3);
553 if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple "
554 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
555 "instead of %lu.", $3, $5, $7, $9, $5);
556 if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple "
557 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
558 "instead of %lu.", $3, $5, $7, $9, $7);
559 if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple "
560 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
561 "instead of %lu.", $3, $5, $7, $9, $9);
562 if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero "
563 "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a "
564 "pattern for type universal charstring.");
565 $$.comp.group = $3;
566 $$.comp.plane = $5;
567 $$.comp.row = $7;
568 $$.comp.cell = $9;
569 }
570 ;
571
572 %%
573
574 /*********************************************************************
575 * Interface
576 *********************************************************************/
577
578 char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int** groups)
579 {
580 /* if you want to debug */
581 //pattern_unidebug=1;
582
583 ret_val=NULL;
584 user_groups = 0;
585
586 yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern);
587 if(flex_buffer == NULL) {
588 TTCN_pattern_error("Flex buffer creation failed.");
589 return NULL;
590 }
591 init_pattern_yylex(&yylval);
592 if(pattern_uniparse()) {
593 Free(ret_val);
594 ret_val=NULL;
595 }
596 pattern_yy_delete_buffer(flex_buffer);
597
598 // needed by regexp to find user specified groups
599 if (user_groups && groups) {
600 *groups = (int*)Malloc(sizeof(int) * (user_groups + 1));
601 (*groups)[0] = user_groups;
602
603 int par = -1, index = 1;
604 for (size_t i = 0; i < strlen(ret_val); i++) {
605 if (ret_val[i] == '(') {
606 par++;
607 }
608 if (ret_val[i] == '<') {
609 ret_val[i] = '(';
610 par++;
611 (*groups)[index++] = par;
612 }
613 }
614 } else if (groups)
615 *groups = (int*)0;
616
617 return ret_val;
618 }
619
620 // Backwards compatibility shim
621 char* TTCN_pattern_to_regexp_uni(const char* p_pattern, int ere, int** /*groups*/)
622 {
623 TTCN_pattern_warning("TTCN_pattern_to_regexp_uni"
624 "(const char* p_pattern, int ere, int** groups) is deprecated");
625 if (ere != 1) TTCN_pattern_error(
626 "BRE is not supported for TTCN_pattern_to_regexp_uni");
627 return TTCN_pattern_to_regexp(p_pattern);
628 }
629
630
631 /*********************************************************************
632 * Static functions
633 *********************************************************************/
634
635 void pattern_unierror(const char *error_str)
636 {
637 TTCN_pattern_error("%s", error_str);
638 }
639
640 void yyprint(FILE *file, int type, const YYSTYPE& value)
641 {
642 switch (type) {
643 case TOK_Char:
644 fprintf(file, "'%c'", value.c);
645 break;
646 case TOK_Digit: case TOK_Number:
647 fprintf(file, "'%lu'", value.u);
648 break;
649 default:
650 break;
651 }
652 }
653
This page took 0.053958 seconds and 5 git commands to generate.