Merge pull request #10 from egerpil/master
[deliverable/titan.core.git] / common / pattern_p.y
CommitLineData
970ed795 1/******************************************************************************
3abe9331 2 * Copyright (c) 2000-2015 Ericsson Telecom AB
970ed795
EL
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 ******************************************************************************/
8
9/**
10 * Parser for TTCN-3 character patterns.
11 *
12 * \author Matyas Forstner (Matyas.Forstner@eth.ericsson.se)
13 *
14 * 20031121
15 */
16
17%{
18
19/*********************************************************************
20 * C(++) declarations
21 *********************************************************************/
22
23#include <stdio.h>
24#include <string.h>
25#include <ctype.h>
26#if defined(__CYGWIN__) && defined(__clang__)
27/* Cygwin's clang 3.0 has its own limits.h, which does not bring in
28 the system's limits.h unless we define this macro: */
29#define __STDC_HOSTED__ 1
30#define _GCC_NEXT_LIMITS_H
31#endif
32#include <limits.h>
33
34#include <regex.h>
35#if !defined(RE_DUP_MAX)
36/* RE_DUP_MAX is defined in limits.h or regex.h, except on Cygwin 1.5 */
37# include <sys/syslimits.h>
38#endif
39
40#include "memory.h"
41#include "pattern.hh"
42
43/* defined in lexer c-file: */
44
45 union YYSTYPE;
46 extern int pattern_yylex();
47 extern void init_pattern_yylex(YYSTYPE *p);
48 struct yy_buffer_state;
49 extern yy_buffer_state* pattern_yy_scan_string(const char*);
50 extern int pattern_yylex_destroy();
51 extern unsigned int get_nof_parentheses();
52
53/* defined in this file: */
54
55 /** The converted regexp. */
56 static char *ret_val;
3abe9331 57 /** Turns error messages for extended ASCII characters on or off */
58 static bool allow_ext_ascii = false;
970ed795
EL
59 /** The parser error reporting function. */
60 static void pattern_yyerror(const char *error_str);
61 /** Creates the POSIX equivalent of literal character \a c using the
62 * appropriate escape sequence when needed. */
63 static char *translate_character(char c);
64 /** Returns the printable equivalent of character \a c */
65 static char *print_character(char c);
66 /** Returns the printable equivalent of range \a lower .. \a upper */
67 static char *print_range(char lower, char upper);
68 /** structure for manipulating character sets */
69 struct character_set;
70 /** allocates, initializes and returns a new empty set */
71 static character_set *set_init();
72 /** allocates and returns a copy of \a set */
73 static character_set *set_copy(const character_set *set);
74 /** deallocates set \a set */
75 static void set_free(character_set *set);
76 /** returns whether set \a set is empty */
77 static int set_is_empty(const character_set *set);
78 /** returns whether set \a set contains all characters in range 1..127 */
79 static int set_is_full(const character_set *set);
80 /** returns whether set \a set contains the character \a c */
81 static int set_has_char(const character_set *set, char c);
82 /** adds character \a c to set \a set */
83 static void set_add_char(character_set *set, char c);
84 /** removes character \a c to set \a set */
85 static void set_remove_char(character_set *set, char c);
86 /** returns whether set \a set contains at least one character in the range
87 * \a lower .. \a upper */
88 static int set_has_range(const character_set *set, char lower, char upper);
89 /** adds range \a lower .. \a upper to set \a set */
90 static void set_add_range(character_set *set, char lower, char upper);
91 /** returns whether set \a set1 and \a set2 has non-empty intersect */
92 static int set_has_intersect(const character_set *set1,
93 const character_set *set2);
94 /** joins sets \a dst and \a src into \a dst */
95 static void set_join(character_set *dst, const character_set *src);
96 /** negates the set \a set */
97 static void set_negate(character_set *set);
98 /** reports the duplicate occurrences of characters and ranges in \a set1
99 * and \a set2 */
100 static void set_report_duplicates(const character_set *set1,
101 const character_set *set2);
102 /** generates the POSIX equivalent of \a set */
103 static char *set_generate_posix(const character_set *set);
104
105#define YYERROR_VERBOSE
106
107static void yyprint(FILE *file, int type, const YYSTYPE& value);
108#define YYPRINT(f,t,v) yyprint(f,t,v)
109
110%}
111
112/*********************************************************************
113 * Bison declarations
114 *********************************************************************/
115
116%name-prefix="pattern_yy"
117%output="pattern_p.cc"
118%defines
119%verbose
120%expect 0
121%start Pattern
122%debug
123
124/*********************************************************************
125 * The union-type
126 * Must be kept in sync with the one in pattern_uni.y !
127 *********************************************************************/
128
129%union {
130 int b; /* boolean */
131 char c; /* single character */
132 char *s; /* character string */
133 unsigned long int u; /* unsigned integer */
134 struct character_set *set; // used by nonterminals in pattern_p.y
135
136 union {
137 unsigned int value;
138#if defined(__sparc__) || defined(__sparc)
139 struct {
140 unsigned char group;
141 unsigned char plane;
142 unsigned char row;
143 unsigned char cell;
144 } comp;
145#else
146 struct {
147 unsigned char cell;
148 unsigned char row;
149 unsigned char plane;
150 unsigned char group;
151 } comp;
152#endif
153 } q; // single universal char, used by nonterminals in pattern_uni.y
154 class QuadSet* qset; // used by nonterminals in pattern_uni.y
155}
156
157/*********************************************************************
158 * Tokens
159 *********************************************************************/
160
161%token <c> TOK_Char "<ordinary character>"
162%token <u> TOK_Number "<number>"
163%token <u> TOK_Digit "<digit>"
164
165/*********************************************************************
166 * Keywords
167 *********************************************************************/
168
169%token KW_BS_q "\\q"
170%token KW_BS_d "\\d"
171%token KW_BS_w "\\w"
172%token KW_BS_t "\\t"
173%token KW_BS_n "\\n"
174%token KW_BS_r "\\r"
175%token KW_BS_s "\\s"
176%token KW_BS_b "\\b"
177
178%token KW_Group_Begin "("
179%token KW_Group_End ")"
180%token KW_Set_Begin "["
181%token KW_Set_Begin_Neg "[^"
182%token KW_Set_Begin_Rsbrkt "[]"
183%token KW_Set_Begin_Neg_Rsbrkt "[^]"
184%token KW_Set_End "]"
185%token KW_Set_Dash_End "-]"
186
187/*********************************************************************
188 * semantic types of nonterminals
189 *********************************************************************/
190
191%type <b> RE_Set_Begin RE_Set_Begin_Rsbrkt RE_Set_End
192%type <c> RE_Set_Range_Char RE_Quadruple
193%type <s> RE_Body RE_Elems RE_Alter_Elem RE_Concat_Elem
194 RE_Multiply_Elem RE_Multiply_Statement RE_Group
195 RE_OneCharPos
196%type <set> RE_Set RE_Set_Body RE_Set_Elem RE_Set_NoRange_Char
197
198/*********************************************************************
199 * Destructors
200 *********************************************************************/
201
202%destructor { Free($$); }
203RE_Alter_Elem
204RE_Body
205RE_Concat_Elem
206RE_Elems
207RE_Group
208RE_Multiply_Elem
209RE_Multiply_Statement
210RE_OneCharPos
211
212%destructor { set_free($$); }
213RE_Set
214RE_Set_Body
215RE_Set_Elem
216RE_Set_NoRange_Char
217
218%%
219
220/*********************************************************************
221 * Grammar
222 *********************************************************************/
223
224Pattern:
225 RE_Body {ret_val=$1;}
226;
227
228RE_Body:
229 /* empty */
230 {
231 $$ = mcopystr("^$");
232 }
233| RE_Elems
234 {
235 if ($1 != NULL) {
236 $$ = mprintf("^%s$", $1);
237 Free($1);
238 } else $$ = mcopystr("^$");
239 }
240;
241
242RE_Elems:
243 RE_Alter_Elem { $$ = $1; }
244| RE_Elems '|' RE_Alter_Elem
245 {
246 unsigned int nof_pars = get_nof_parentheses() + (yychar==KW_Group_End ? 1 : 0);
247 if ($3 != NULL) {
248 if ($1 != NULL) $$ = mputprintf($1, nof_pars ? "|%s" : "$|^%s", $3);
249 else $$ = mprintf( nof_pars ? "()|%s" : "()$|^%s" , $3);
250 Free($3);
251 } else {
252 if ($1 != NULL) $$ = mputstr($1, nof_pars ? "|()" : "$|^()");
253 else $$ = NULL;
254 }
255 }
256;
257
258RE_Alter_Elem:
259 RE_Concat_Elem { $$ = $1; }
260| RE_Alter_Elem RE_Concat_Elem
261 {
262 $$ = mputstr($1, $2);
263 Free($2);
264 }
265;
266
267RE_Concat_Elem:
268 RE_Multiply_Elem {$$=$1;}
269| RE_Multiply_Elem RE_Multiply_Statement
270 {
271 if ($1 != NULL && $2 != NULL) {
272 $$ = mputstr($1, $2);
273 Free($2);
274 } else {
275 Free($1);
276 Free($2);
277 $$ = NULL;
278 }
279 }
280| '*' {$$=mcopystr(".*");}
281;
282
283RE_Multiply_Elem:
284 RE_Group {$$=$1;}
285| RE_OneCharPos {$$=$1;}
286;
287
288RE_Group:
289 KW_Group_Begin KW_Group_End
290 {
291 $$ = mcopystr("()");
292 }
293| KW_Group_Begin RE_Elems KW_Group_End
294 {
295 if ($2 != NULL) {
296 $$ = mprintf("(%s)", $2);
297 Free($2);
298 } else {
299 $$ = mcopystr("()");
300 }
301 }
302;
303
304RE_Multiply_Statement:
305 '+'
306 {
307 $$ = mcopystr("+");
308 }
309| '#' '(' ',' ')'
310 {
311 $$ = mcopystr("*");
312 }
313| '#' TOK_Digit
314 {
315 if ($2 == 0) {
316 TTCN_pattern_warning("The number of repetitions is zero: `#0'.");
317 $$ = NULL;
318 } else if ($2 == 1) $$ = memptystr();
319 else {
320 if ($2 > 9) TTCN_pattern_warning("Internal error: Invalid number of "
321 "repetitions: `#%lu'.", $2);
322 $$ = mprintf("{%lu}", $2);
323 }
324 }
325| '#' '(' TOK_Number ')'
326 {
327 if ($3 == 0) {
328 TTCN_pattern_warning("The number of repetitions is zero: `#(0)'.");
329 $$ = NULL;
330 } else if ($3 == 1) $$ = memptystr();
331 else {
332#ifdef RE_DUP_MAX
333 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The number of repetitions in "
334 "`#(%lu)' exceeds the limit allowed by this system (%d).", $3,
335 RE_DUP_MAX);
336#endif
337 $$ = mprintf("{%lu}", $3);
338 }
339 }
340| '#' '(' TOK_Number ',' TOK_Number ')'
341 {
342#ifdef RE_DUP_MAX
343 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
344 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
345 "(%d).", $3, $5, RE_DUP_MAX);
346 if ($5 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
347 "repetitions in `#(%lu,%lu)' exceeds the limit allowed by this system "
348 "(%d).", $3, $5, RE_DUP_MAX);
349#endif
350 if ($3 > $5) TTCN_pattern_error("The lower bound is higher than the upper "
351 "bound in the number of repetitions: `#(%lu,%lu)'.", $3, $5);
352 if ($3 == $5) {
353 if ($3 == 0) {
354 TTCN_pattern_warning("The number of repetitions is zero: `#(0,0)'.");
355 $$ = NULL;
356 } else if ($3 == 1) $$ = memptystr();
357 else {
358 $$ = mprintf("{%lu}", $3);
359 }
360 } else {
361 if ($3 == 0 && $5 == 1) $$ = mcopystr("?");
362 else $$ = mprintf("{%lu,%lu}", $3, $5);
363 }
364 }
365| '#' '(' ',' TOK_Number ')'
366 {
367 if ($4 == 0) {
368 TTCN_pattern_warning("The number of repetitions is zero: `#(,0)'.");
369 $$ = NULL;
370 } else {
371#ifdef RE_DUP_MAX
372 if ($4 > RE_DUP_MAX) TTCN_pattern_warning("The maximum number of "
373 "repetitions in `#(,%lu)' exceeds the limit allowed by this system "
374 "(%d).", $4, RE_DUP_MAX);
375#endif
376 if ($4 == 1) $$ = mcopystr("?");
377 else $$ = mprintf("{0,%lu}", $4);
378 }
379 }
380| '#' '(' TOK_Number ',' ')'
381 {
382 if ($3 == 0) $$ = mcopystr("*");
383 else {
384#ifdef RE_DUP_MAX
385 if ($3 > RE_DUP_MAX) TTCN_pattern_warning("The minimum number of "
386 "repetitions in `#(%lu,)' exceeds the limit allowed by this system "
387 "(%d).", $3, RE_DUP_MAX);
388#endif
389 if ($3 == 1) $$ = mcopystr("+");
390 else $$ = mprintf("{%lu,}", $3);
391 }
392 }
393;
394
395RE_OneCharPos:
396 '?' {$$=mcopystr(".");}
397| KW_BS_d {$$=mcopystr("[0-9]");}
398| KW_BS_w {$$=mcopystr("[0-9A-Za-z]");}
399| KW_BS_t {$$=mcopystr("\t");}
400| KW_BS_n {$$=mcopystr("[\n-\r]");}
401| KW_BS_r {$$=mcopystr("\r");}
402| KW_BS_s {$$=mcopystr("[\t-\r ]");}
403| KW_BS_b
404 {
405 TTCN_pattern_warning("Metacharacter `\\b' is not supported yet.");
406 $$ = NULL;
407 }
408| TOK_Char
409 {
410 unsigned char c = $1;
3abe9331 411 if (c == 0 || (c > 127 && !allow_ext_ascii)) TTCN_pattern_error("Character "
412 "with code %u (0x%02x) cannot be used in a pattern for type charstring.", c, c);
970ed795
EL
413 $$ = translate_character($1);
414 }
415| RE_Quadruple
416 {
417 $$ = translate_character($1);
418 }
419| RE_Set
420 {
421 if (set_is_empty($1)) {
422 TTCN_pattern_error("Empty character set.");
423 $$ = NULL;
424 } else $$ = set_generate_posix($1);
425 set_free($1);
426 }
427;
428
429RE_Set:
430 /* RE_Set_Begin is 1 for "[^", 0 for "["
431 * RE_Set_Begin_Rsbrkt is 1 for "[^]", 0 for "[]"
432 * RE_Set_End is 1 for "-]", 0 for "]"
433 */
434 RE_Set_Begin RE_Set_Body RE_Set_End
435 {
436 if ($2 != NULL) $$ = $2;
437 else $$ = set_init();
438 if ($3) {
439 if (set_has_char($$, '-'))
440 TTCN_pattern_warning("Duplicate character `-' in the character set.");
441 else set_add_char($$, '-');
442 }
443 if ($1) set_negate($$);
444 }
445| RE_Set_Begin '-' RE_Set_Body RE_Set_End
446 {
447 if ($3 != NULL) $$ = $3;
448 else $$ = set_init();
449 if (set_has_char($$, '-'))
450 TTCN_pattern_warning("Duplicate character `-' in the character set.");
451 else set_add_char($$, '-');
452 if ($4) {
453 if (set_has_char($$, '-'))
454 TTCN_pattern_warning("Duplicate character `-' in the character set.");
455 else set_add_char($$, '-');
456 }
457 if ($1) set_negate($$);
458 }
459| RE_Set_Begin_Rsbrkt RE_Set_Body RE_Set_End
460 {
461 if ($2 != NULL) $$ = $2;
462 else $$ = set_init();
463 if (set_has_char($$, ']'))
464 TTCN_pattern_warning("Duplicate character `]' in the character set.");
465 else set_add_char($$, ']');
466 if ($3) {
467 if (set_has_char($$, '-'))
468 TTCN_pattern_warning("Duplicate character `-' in the character set.");
469 else set_add_char($$, '-');
470 }
471 if ($1) set_negate($$);
472 }
473| RE_Set_Begin_Rsbrkt '-' RE_Set_Range_Char RE_Set_Body RE_Set_End
474 {
475 if ($4 != NULL) $$ = $4;
476 else $$ = set_init();
477 char *range_str = print_range(']', $3);
478 if (']' > $3) {
479 TTCN_pattern_error("Invalid range `%s' in the character set: the "
480 "character code of the lower bound (%u) is higher than that of the "
481 "upper bound (%u).", range_str, ']', (unsigned char)$3);
482 } else {
483 if (set_has_range($$, ']', $3)) {
484 character_set *tmpset = set_init();
485 set_add_range(tmpset, ']', $3);
486 set_report_duplicates($$, tmpset);
487 set_free(tmpset);
488 }
489 }
490 set_add_range($$, ']', $3);
491 Free(range_str);
492 if ($5) {
493 if (set_has_char($$, '-'))
494 TTCN_pattern_warning("Duplicate character `-' in the character set.");
495 else set_add_char($$, '-');
496 }
497 if ($1) set_negate($$);
498 }
499;
500
501RE_Set_Begin:
502 KW_Set_Begin { $$ = 0; }
503| KW_Set_Begin_Neg { $$ = 1; }
504;
505
506RE_Set_Begin_Rsbrkt:
507 KW_Set_Begin_Rsbrkt { $$ = 0; }
508| KW_Set_Begin_Neg_Rsbrkt { $$ = 1; }
509;
510
511RE_Set_End:
512 KW_Set_End { $$ = 0; }
513| KW_Set_Dash_End { $$ = 1; }
514;
515
516RE_Set_Body:
517 /* empty */ { $$ = NULL; }
518| RE_Set_Body RE_Set_Elem
519 {
520 if ($1 != NULL) {
521 $$ = $1;
522 if (set_has_intersect($$, $2)) set_report_duplicates($$, $2);
523 set_join($$, $2);
524 set_free($2);
525 } else $$ = $2;
526 }
527;
528
529RE_Set_Elem:
530 RE_Set_Range_Char
531 {
532 $$ = set_init();
533 set_add_char($$, $1);
534 }
535| RE_Set_NoRange_Char { $$ = $1; }
536| RE_Set_Range_Char '-' RE_Set_Range_Char
537 {
538 if ($1 > $3) {
539 char *range_str = print_range($1, $3);
540 TTCN_pattern_error("Invalid range `%s' in the character set: the "
541 "character code of the lower bound (%u) is higher than that of the "
542 "upper bound (%u).", range_str, (unsigned char)$1, (unsigned char)$3);
543 Free(range_str);
544 }
545 $$ = set_init();
546 set_add_range($$, $1, $3);
547 }
548;
549
550RE_Set_Range_Char:
551 KW_BS_t { $$ = '\t'; }
552| KW_BS_r { $$ = '\r'; }
553| TOK_Char
554 {
555 unsigned char c = $1;
3abe9331 556 if (c == 0 || (c > 127 && !allow_ext_ascii)) TTCN_pattern_error("Character "
557 "with code %u (0x%02x) cannot be used in a pattern for type charstring.", c, c);
970ed795
EL
558 $$ = $1;
559 }
560| RE_Quadruple { $$ = $1; }
561;
562
563RE_Set_NoRange_Char:
564 KW_BS_d
565 {
566 $$ = set_init();
567 set_add_range($$, '0', '9');
568 }
569| KW_BS_w
570 {
571 $$ = set_init();
572 set_add_range($$, '0', '9');
573 set_add_range($$, 'A', 'Z');
574 set_add_range($$, 'a', 'z');
575 }
576| KW_BS_n
577 {
578 $$ = set_init();
579 set_add_range($$, '\n', '\r');
580 }
581| KW_BS_s
582 {
583 $$ = set_init();
584 set_add_range($$, '\t', '\r');
585 set_add_char($$, ' ');
586 }
587| KW_BS_b
588 {
589 TTCN_pattern_error("Metacharacter `\\b' does not make any sense in a "
590 "character set.");
591 $$ = set_init();
592 }
593;
594
595RE_Quadruple:
596 KW_BS_q '{' TOK_Number ',' TOK_Number ',' TOK_Number ',' TOK_Number '}'
597 {
598 if ($3 > 127) TTCN_pattern_error("The first number (group) of quadruple "
599 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..127 "
600 "instead of %lu.", $3, $5, $7, $9, $3);
601 if ($5 > 255) TTCN_pattern_error("The second number (plane) of quadruple "
602 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
603 "instead of %lu.", $3, $5, $7, $9, $5);
604 if ($7 > 255) TTCN_pattern_error("The third number (row) of quadruple "
605 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
606 "instead of %lu.", $3, $5, $7, $9, $7);
607 if ($9 > 255) TTCN_pattern_error("The fourth number (cell) of quadruple "
608 "`\\q{%lu,%lu,%lu,%lu}' is too large. It should be in the range 0..255 "
609 "instead of %lu.", $3, $5, $7, $9, $9);
610 if ($3 > 0 || $5 > 0 || $7 > 0 || $9 > 127) TTCN_pattern_error("Quadruple "
611 "`\\q{%lu,%lu,%lu,%lu}' is not valid in a pattern for type charstring.",
612 $3, $5, $7, $9);
613 if ($3 == 0 && $5 == 0 && $7 == 0 && $9 == 0) TTCN_pattern_error("Zero "
614 "character (i.e. quadruple `\\q{0,0,0,0}') is not supported in a "
615 "pattern for type charstring.");
616 $$ = $9;
617 }
618;
619
620%%
621
622/*********************************************************************
623 * Interface
624 *********************************************************************/
625
3abe9331 626char* TTCN_pattern_to_regexp(const char* p_pattern, bool utf8)
970ed795
EL
627{
628 /* if you want to debug */
629 //pattern_yydebug=1;
630
631 ret_val=NULL;
632
3abe9331 633 /* allow extended ASCII characters if the pattern is in UTF-8 format */
634 allow_ext_ascii = utf8;
635
970ed795
EL
636 yy_buffer_state *flex_buffer = pattern_yy_scan_string(p_pattern);
637 if(flex_buffer == NULL) {
638 TTCN_pattern_error("Flex buffer creation failed.");
639 return NULL;
640 }
641 init_pattern_yylex(&yylval);
642 if(pattern_yyparse()) {
643 Free(ret_val);
644 ret_val=NULL;
645 }
646 pattern_yylex_destroy();
647 return ret_val;
648}
649
650// Backwards compatibility shim
651char* TTCN_pattern_to_regexp(const char* p_pattern, int ere)
652{
653 TTCN_pattern_warning(
654 "TTCN_pattern_to_regexp(const char* p_pattern, int ere) is deprecated");
655 if (ere != 1) TTCN_pattern_error(
656 "BRE is not supported for TTCN_pattern_to_regexp");
657 return TTCN_pattern_to_regexp(p_pattern);
658}
659
660/*********************************************************************
661 * Static functions
662 *********************************************************************/
663
664/// Error reporting function
665void pattern_yyerror(const char *error_str)
666{
667 TTCN_pattern_error("%s", error_str);
668}
669
670/** Escape plain characters which would be metacharacters in a regex.
671 *
672 * @param c plain character
673 * @return a newly allocated string which must be Free() 'd
674 */
675char *translate_character(char c)
676{
677 int escape_needed = 0;
678 switch (c) {
679 case '|':
680 case '+':
681 case '?':
682 case '{':
683 case '}':
684 case '(':
685 case ')':
686 case '.':
687 case '^':
688 case '$':
689 case '[':
690 case '*':
691 case '\\':
692 escape_needed = 1;
693 break;
694 }
695 if (escape_needed) return mprintf("\\%c", c);
696 else return mputc(NULL, c);
697}
698
699char *print_character(char c)
700{
701 switch (c) {
702 case '\t':
703 return mcopystr("\\t");
704 case '\r':
705 return mcopystr("\\r");
706 default:
707 if (isprint((unsigned char)c)) return mprintf("%c", c);
708 else return mprintf("\\q{0,0,0,%u}", (unsigned char)c);
709 }
710}
711
712char *print_range(char lower, char upper)
713{
714 char *range_str = print_character(lower);
715 range_str = mputc(range_str, '-');
716 char *upper_str = print_character(upper);
717 range_str = mputstr(range_str, upper_str);
718 Free(upper_str);
719 return range_str;
720}
721
722#define CS_BITS_PER_ELEM (8 * sizeof(unsigned long))
723#define CS_NOF_ELEMS ((128 + CS_BITS_PER_ELEM - 1) / CS_BITS_PER_ELEM)
724
725struct character_set {
726 unsigned long set_members[CS_NOF_ELEMS];
727};
728
729character_set *set_init()
730{
731 character_set *set = (character_set*)Malloc(sizeof(*set));
732 memset(set->set_members, 0, sizeof(set->set_members));
733 return set;
734}
735
736character_set *set_copy(const character_set *set)
737{
738 character_set *set2 = (character_set*)Malloc(sizeof(*set2));
739 memcpy(set2, set, sizeof(*set2));
740 return set2;
741}
742
743void set_free(character_set *set)
744{
745 Free(set);
746}
747
748int set_is_empty(const character_set *set)
749{
750 if ((set->set_members[0] & ~1UL) != 0) return 0;
751 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
752 if (set->set_members[i] != 0) return 0;
753 return 1;
754}
755
756int set_is_full(const character_set *set)
757{
758 if (~(set->set_members[0] | 1UL) != 0) return 0;
759 for (size_t i = 1; i < CS_NOF_ELEMS; i++)
760 if (~set->set_members[i] != 0) return 0;
761 return 1;
762}
763
764int set_has_char(const character_set *set, char c)
765{
766 if (set->set_members[c / CS_BITS_PER_ELEM] & 1UL << c % CS_BITS_PER_ELEM)
767 return 1;
768 else return 0;
769}
770
771void set_add_char(character_set *set, char c)
772{
773 set->set_members[c / CS_BITS_PER_ELEM] |= 1UL << c % CS_BITS_PER_ELEM;
774}
775
776void set_remove_char(character_set *set, char c)
777{
778 set->set_members[c / CS_BITS_PER_ELEM] &= ~(1UL << c % CS_BITS_PER_ELEM);
779}
780
781int set_has_range(const character_set *set, char lower, char upper)
782{
783 for (size_t i = lower; i <= (unsigned char)upper; i++)
784 if (set->set_members[i / CS_BITS_PER_ELEM] & 1UL << i % CS_BITS_PER_ELEM)
785 return 1;
786 return 0;
787}
788
789void set_add_range(character_set *set, char lower, char upper)
790{
791 for (size_t i = lower; i <= (unsigned char)upper; i++)
792 set->set_members[i / CS_BITS_PER_ELEM] |= 1UL << i % CS_BITS_PER_ELEM;
793}
794
795int set_has_intersect(const character_set *set1, const character_set *set2)
796{
797 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
798 if (set1->set_members[i] & set2->set_members[i]) return 1;
799 return 0;
800}
801
802void set_join(character_set *dst, const character_set *src)
803{
804 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
805 dst->set_members[i] |= src->set_members[i];
806}
807
808void set_negate(character_set *set)
809{
810 for (size_t i = 0; i < CS_NOF_ELEMS; i++)
811 set->set_members[i] = ~set->set_members[i];
812}
813
814void set_report_duplicates(const character_set *set1,
815 const character_set *set2)
816{
817 for (unsigned char i = 0; i <= 127; ) {
818 for (i++; i <= 127; i++)
819 if (set_has_char(set2, i) && set_has_char(set1, i)) break;
820 if (i > 127) break;
821 char lower = i;
822 for (i++; i <= 127; i++)
823 if (!set_has_char(set2, i) || !set_has_char(set1, i)) break;
824 char upper = i - 1;
825 if (lower < upper) {
826 char *range_str = print_range(lower, upper);
827 TTCN_pattern_warning("Duplicate range `%s' in the character set.",
828 range_str);
829 Free(range_str);
830 } else {
831 char *char_str = print_character(lower);
832 if(lower == '\r' ){
833 TTCN_pattern_warning("Duplicate character `%s' in the character "
834 "set. Please note the \\n includes the \\r implicitly. "
835 "Use \\q{0,0,0,10} if you would like to match the LF only.", char_str);
836 } else {
837 TTCN_pattern_warning("Duplicate character `%s' in the character "
838 "set.", char_str);
839 }
840 Free(char_str);
841 }
842 }
843}
844
845static char *append_posix_body(char *set_body, const character_set *set)
846{
847 for (unsigned char i = 0; i <= 127; ) {
848 for (i++; i <= 127; i++) if (set_has_char(set, i)) break;
849 if (i > 127) break;
850 char lower = i;
851 set_body = mputc(set_body, lower);
852 for (i++; i <= 127; i++) if (!set_has_char(set, i)) break;
853 char upper = i - 1;
854 if (lower < upper) {
855 if (lower + 1 < upper) set_body = mputc(set_body, '-');
856 set_body = mputc(set_body, upper);
857 }
858 }
859 return set_body;
860}
861
862static char *generate_posix_body(character_set *set)
863{
864 int has_caret;
865 if (set_has_char(set, '^') && !(set_has_char(set, '^' - 1) &&
866 set_has_char(set, '^' + 1))) {
867 set_remove_char(set, '^');
868 has_caret = 1;
869 } else has_caret = 0;
870 int has_dash;
871 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
872 set_has_char(set, '-' + 1))) {
873 set_remove_char(set, '-');
874 has_dash = 1;
875 } else has_dash = 0;
876 int has_rsbrkt;
877 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
878 set_has_char(set, ']' + 1))) {
879 set_remove_char(set, ']');
880 has_rsbrkt = 1;
881 } else has_rsbrkt = 0;
882 char *set_body = memptystr();
883 if (set_is_empty(set) && !has_rsbrkt) {
884 /* the `-' must precede the `^' */
885 if (has_dash) set_body = mputc(set_body, '-');
886 if (has_caret) set_body = mputc(set_body, '^');
887 } else {
888 /* order: ']', others, '^', '-' */
889 if (has_rsbrkt) set_body = mputc(set_body, ']');
890 set_body = append_posix_body(set_body, set);
891 if (has_caret) set_body = mputc(set_body, '^');
892 if (has_dash) set_body = mputc(set_body, '-');
893 }
894 return set_body;
895}
896
897static char *generate_posix_body_compl(character_set *set)
898{
899 set_negate(set);
900 int has_dash;
901 if (set_has_char(set, '-') && !(set_has_char(set, '-' - 1) &&
902 set_has_char(set, '-' + 1))) {
903 set_remove_char(set, '-');
904 has_dash = 1;
905 } else has_dash = 0;
906 int has_rsbrkt;
907 if (set_has_char(set, ']') && !(set_has_char(set, ']' - 1) &&
908 set_has_char(set, ']' + 1))) {
909 set_remove_char(set, ']');
910 has_rsbrkt = 1;
911 } else has_rsbrkt = 0;
912 char *set_body = mcopystr("^");
913 /* order: ']', others, '-' */
914 if (has_rsbrkt) set_body = mputc(set_body, ']');
915 set_body = append_posix_body(set_body, set);
916 if (has_dash) set_body = mputc(set_body, '-');
917 return set_body;
918}
919
920char *set_generate_posix(const character_set *set)
921{
922 /* a full set can only be represented in this way: */
923 if (set_is_full(set)) return mcopystr(".");
924 character_set *tempset = set_copy(set);
925 char *set_body = generate_posix_body(tempset);
926 set_free(tempset);
927 char *posix_str;
928 if (set_body[0] == '\0') {
929 Free(set_body);
930 TTCN_pattern_error("Internal error: empty POSIX set.");
931 return NULL;
932 }
933 /* do not use the set notation in POSIX if the set contains only one
934 * character */
935 if (set_body[1] == '\0') posix_str = translate_character(set_body[0]);
936 else {
937 /* create the complemented version of the same set */
938 tempset = set_copy(set);
939 char *compl_body = generate_posix_body_compl(tempset);
940 set_free(tempset);
941 if (compl_body[0] == '\0') {
942 Free(set_body);
943 Free(compl_body);
944 TTCN_pattern_error("Internal error: empty complemented POSIX set.");
945 return NULL;
946 }
947 /* use the complemented form in the POSIX equivalent if it is the shorter
948 * one */
949 if (mstrlen(compl_body) < mstrlen(set_body))
950 posix_str = mprintf("[%s]", compl_body);
951 else posix_str = mprintf("[%s]", set_body);
952 Free(compl_body);
953 }
954 Free(set_body);
955 return posix_str;
956}
957
958void yyprint(FILE *file, int type, const YYSTYPE& value)
959{
960 switch (type) {
961 case TOK_Char:
962 fprintf(file, "'%c'", value.c);
963 break;
964 case TOK_Digit: case TOK_Number:
965 fprintf(file, "'%lu'", value.u);
966 break;
967 default:
968 break;
969 }
970}
971
This page took 0.060967 seconds and 5 git commands to generate.