compiler2/asn1/asn1la.l

   1 /******************************************************************************
   2  * Copyright (c) 2000-2016 Ericsson Telecom AB
   3  * All rights reserved. This program and the accompanying materials
   4  * are made available under the terms of the Eclipse Public License v1.0
   5  * which accompanies this distribution, and is available at
   6  * http://www.eclipse.org/legal/epl-v10.html
   7  *
   8  * Contributors:
   9  *   Balasko, Jeno
  10  *   Baranyi, Botond
  11  *   Delic, Adam
  12  *   Forstner, Matyas
  13  *   Kovacs, Ferenc
  14  *   Lovassy, Arpad
  15  *   Raduly, Csaba
  16  *   Szabo, Janos Zoltan – initial implementation
  17  *
  18  ******************************************************************************/
  19 /*
  20  * ASN1:1997 lexical analyzer
  21  * Written by Matyas Forstner
  22  * 20021115
  23  */
  24
  25 %option stack
  26 %option yylineno
  27 %option noyywrap
  28 %option nounput
  29 %option never-interactive
  30 %option prefix="asn1_yy"
  31
  32 %{ /* ****************** C declarations ***************** */
  33
  34 #include "../error.h"
  35 #include "../string.hh"
  36 #include "../Identifier.hh"
  37 #include "../Setting.hh"
  38 #include "../Value.hh"
  39 #include "TokenBuf.hh"
  40
  41 using namespace Common;
  42 using namespace Asn;
  43
  44 /**
  45  * The name of the file which is under parsing.
  46  */
  47 const char *asn1_infile;
  48 /**
  49  * When this flag is set, then the special internal TITAN-tokens are
  50  * handled.
  51  */
  52 bool asn1_yy_parse_internal=false;
  53
  54 void asn1_init();
  55 void asn1_free();
  56 void asn1la_newfile(const char *filename);
  57 void asn1la_newtokenbuf(TokenBuf *tb);
  58 int asn1_yylex_my();
  59
  60 static int commentlevel=0;
  61 static int commentbeginlineno=0;
  62 static TokenBuf *asn1_tokenbuf=0;
  63 static TokenBuf *asn1_tokenbuf_curr=0;
  64
  65 static void id_replace_underscores(char *s);
  66 static void bhstring_remove_whitespaces(char *s);
  67 static void cstring_remove_newlines(char *s);
  68
  69 %} /* ***************** definitions ***************** */
  70
  71 /* start conditions */
  72 %x SC_LINECOMMENT SC_COMMENT SC_COMMENT_END
  73 %s SC_TITAN
  74
  75 /* 11.1.6 */
  76 WHITESPACE [ \t\x0A\x0B\x0C\x0D]+
  77 NEWLINE [\x0A\x0B\x0C\x0D]
  78
  79 /* 11.2 */
  80 UPPERIDENTIFIER [A-Z]([\-_]?[A-Za-z0-9]+)*
  81 /* 11.3 */
  82 LOWERIDENTIFIER [a-z]([\-_]?[A-Za-z0-9]+)*
  83
  84 /* X.681, 7.1 */
  85 ampUPPERIDENTIFIER \&{UPPERIDENTIFIER}
  86 ampLOWERIDENTIFIER \&{LOWERIDENTIFIER}
  87
  88 /* 11.8 */
  89 NUMBER ([1-9][0-9]*)|0
  90 /* 11.9 */
  91 REALNUMBER {NUMBER}(\.[0-9]+)?([eE][+\-]?{NUMBER})?
  92 /* 11.10 */
  93 BSTRING '([01]|{WHITESPACE})*'B
  94 BSTRING_BAD '[^']*'B
  95 /* 11.12 */
  96 HSTRING '([0-9A-F]|{WHITESPACE})*'H
  97 HSTRING_BAD '[^']*'H
  98 HSTRING_BAD_OSTRING '([0-9A-F]|{WHITESPACE})*'O
  99
 100 /* 11.14 */
 101 CSTRING \"([^"]|(\"\"))*\"
 102 CSTRING_BAD '[^']*'
 103
 104 SEPARATOR ({WHITESPACE}|{COMMENT})
 105
 106 PERPERCOMMENT "//"[^\x0A\x0B\x0C\x0D]*{NEWLINE}
 107
 108 TITAN "$#&&&(#TITAN$#&&^#% "
 109 TITAN_ID [A-Za-z0-9 \-_]+
 110
 111 %% /* ***************** rules ************************* */
 112
 113 {TITAN} {
 114   if(asn1_yy_parse_internal)
 115     yy_push_state(SC_TITAN);
 116   else {
 117     Location loc(asn1_infile, yylineno);
 118     loc.error("Unexpected `%s'.", yytext);
 119   }
 120 }
 121
 122 <SC_TITAN> /* -------- SC_TITAN scope -------------- */
 123 {
 124
 125 "Assignments" {
 126   yy_pop_state();
 127   return KW_TITAN_Assignments;
 128 }
 129
 130 "UpperIdentifier\""{TITAN_ID}"\"" {
 131   yy_pop_state();
 132   yytext[strlen(yytext)-1]='\0';
 133   yylval.id=new Identifier(Identifier::ID_ASN, string(yytext+16));
 134   return TOK_UpperIdentifier;
 135 }
 136
 137 } /* ------------------- End of SC_TITAN scope -------- */
 138
 139 {WHITESPACE}
 140
 141 "--" {
 142   yy_push_state(SC_LINECOMMENT);
 143 }
 144
 145 "/*" {
 146   commentlevel=1;
 147   commentbeginlineno=yylineno;
 148   yy_push_state(SC_COMMENT);
 149 }
 150
 151 {PERPERCOMMENT} {
 152   Location loc(asn1_infile, yylineno-1);
 153   loc.error("This style of comment is not permitted in ASN.1");
 154 }
 155
 156
 157 "::=" {return TOK_Assignment;}
 158 ":=" {
 159   Location loc(asn1_infile, yylineno);
 160   loc.error("`:=' is not valid in ASN.1. Did you mean `::='?");
 161   return TOK_Assignment;
 162 }
 163 "..." {return TOK_Ellipsis;}
 164 ".." {return TOK_RangeSeparator;}
 165 "[[" {return TOK_LeftVersionBrackets;}
 166 "]]" {return TOK_RightVersionBrackets;}
 167 "{" {return '{';}
 168 "}" {return '}';}
 169 "(" {return '(';}
 170 ")" {return ')';}
 171 "[" {return '[';}
 172 "]" {return ']';}
 173 "," {return ',';}
 174 "." {return '.';}
 175 "-" {return '-';}
 176 ":" {return ':';}
 177 ";" {return ';';}
 178 "@" {return '@';}
 179 "|" {return '|';}
 180 "!" {return '!';}
 181 "^" {return '^';}
 182 "<" {return '<';}
 183 ">" {return '>';}
 184
 185 "ABSENT" {return KW_ABSENT;}
 186   /*
 187     "ABSTRACT-SYNTAX" {return KW_ABSTRACT_SYNTAX;}
 188   */
 189 "ALL" {return KW_ALL;}
 190 "ANY" {return KW_ANY;}
 191 "APPLICATION" {return KW_APPLICATION;}
 192 "AUTOMATIC" {return KW_AUTOMATIC;}
 193 "BEGIN" {return KW_BEGIN;}
 194 "BIT" {return KW_BIT;}
 195 "BMPString" {return KW_BMPString;}
 196 "BOOLEAN" {return KW_BOOLEAN;}
 197 "BY" {return KW_BY;}
 198 "CHARACTER" {return KW_CHARACTER;}
 199 "CHOICE" {return KW_CHOICE;}
 200 "CLASS" {return KW_CLASS;}
 201 "COMPONENT" {return KW_COMPONENT;}
 202 "COMPONENTS" {return KW_COMPONENTS;}
 203 "CONSTRAINED" {return KW_CONSTRAINED;}
 204 "CONTAINING" {return KW_CONTAINING;}
 205 "DEFAULT" {return KW_DEFAULT;}
 206 "DEFINED" {return KW_DEFINED;}
 207 "DEFINITIONS" {return KW_DEFINITIONS;}
 208 "EMBEDDED" {return KW_EMBEDDED;}
 209 "ENCODED" {return KW_ENCODED;}
 210 "END" {return KW_END;}
 211 "ENUMERATED" {return KW_ENUMERATED;}
 212 "EXCEPT" {return KW_EXCEPT;}
 213 "EXPLICIT" {return KW_EXPLICIT;}
 214 "EXPORTS" {return KW_EXPORTS;}
 215 "EXTENSIBILITY" {return KW_EXTENSIBILITY;}
 216 "EXTERNAL" {return KW_EXTERNAL;}
 217 "FALSE" {return KW_FALSE;}
 218 "FROM" {return KW_FROM;}
 219 "GeneralizedTime" {return KW_GeneralizedTime;}
 220 "GeneralString" {return KW_GeneralString;}
 221 "GraphicString" {return KW_GraphicString;}
 222 "IA5String" {return KW_IA5String;}
 223 "IDENTIFIER" {return KW_IDENTIFIER;}
 224 "IMPLICIT" {return KW_IMPLICIT;}
 225 "IMPLIED" {return KW_IMPLIED;}
 226 "IMPORTS" {return KW_IMPORTS;}
 227 "INCLUDES" {return KW_INCLUDES;}
 228 "INSTANCE" {return KW_INSTANCE;}
 229 "INTEGER" {return KW_INTEGER;}
 230 "INTERSECTION" {return KW_INTERSECTION;}
 231 "ISO646String" {return KW_ISO646String;}
 232 "MAX" {return KW_MAX;}
 233 "MIN" {return KW_MIN;}
 234 "MINUS-INFINITY" {return KW_MINUS_INFINITY;}
 235 "NOT-A-NUMBER" {return KW_NOT_A_NUMBER;}
 236 "NULL" {return KW_NULL;}
 237 "NumericString" {return KW_NumericString;}
 238 "OBJECT" {return KW_OBJECT;}
 239 "ObjectDescriptor" {return KW_ObjectDescriptor;}
 240 "OCTET" {return KW_OCTET;}
 241 "OF" {return KW_OF;}
 242 "OPTIONAL" {return KW_OPTIONAL;}
 243 "PATTERN" {return KW_PATTERN;}
 244 "PDV" {return KW_PDV;}
 245 "PLUS-INFINITY" {return KW_PLUS_INFINITY;}
 246 "PRESENT" {return KW_PRESENT;}
 247 "PrintableString" {return KW_PrintableString;}
 248 "PRIVATE" {return KW_PRIVATE;}
 249 "REAL" {return KW_REAL;}
 250 "RELATIVE-OID" {return KW_RELATIVE_OID;}
 251 "SEQUENCE" {return KW_SEQUENCE;}
 252 "SET" {return KW_SET;}
 253 "SIZE" {return KW_SIZE;}
 254 "STRING" {return KW_STRING;}
 255 "SYNTAX" {return KW_SYNTAX;}
 256 "T61String" {return KW_T61String;}
 257 "TAGS" {return KW_TAGS;}
 258 "TeletexString" {return KW_TeletexString;}
 259 "TRUE" {return KW_TRUE;}
 260   /*
 261     "TYPE-IDENTIFIER" {return KW_TYPE_IDENTIFIER;}
 262   */
 263 "UNION" {return KW_UNION;}
 264 "UNIQUE" {return KW_UNIQUE;}
 265 "UNIVERSAL" {return KW_UNIVERSAL;}
 266 "UniversalString" {return KW_UniversalString;}
 267 "UTCTime" {return KW_UTCTime;}
 268 "UTF8String" {return KW_UTF8String;}
 269 "VideotexString" {return KW_VideotexString;}
 270 "VisibleString" {return KW_VisibleString;}
 271 "WITH" {return KW_WITH;}
 272
 273 {LOWERIDENTIFIER} {
 274   id_replace_underscores(yytext);
 275   yylval.id=new Identifier(Identifier::ID_ASN, string(yytext));
 276   return TOK_LowerIdentifier;
 277 }
 278
 279 {UPPERIDENTIFIER} {
 280   id_replace_underscores(yytext);
 281   yylval.id=new Identifier(Identifier::ID_ASN, string(yytext));
 282   return TOK_UpperIdentifier;
 283 }
 284
 285 {ampUPPERIDENTIFIER} {
 286   id_replace_underscores(yytext);
 287   yylval.id=new Identifier(Identifier::ID_ASN, string(yytext));
 288   return TOK_ampUpperIdentifier;
 289 }
 290
 291 {ampLOWERIDENTIFIER} {
 292   id_replace_underscores(yytext);
 293   yylval.id=new Identifier(Identifier::ID_ASN, string(yytext));
 294   return TOK_ampLowerIdentifier;
 295 }
 296
 297 {NUMBER} {
 298   Location loc(asn1_infile, yylineno);
 299   yylval.i = new int_val_t(yytext, loc);
 300   return TOK_Number;
 301 }
 302
 303 {REALNUMBER} {
 304   Location loc(asn1_infile, yylineno);
 305   yylval.value = new Value(Value::V_REAL, string2Real(yytext, loc));
 306   return TOK_RealNumber;
 307 }
 308
 309 {BSTRING} {
 310   bhstring_remove_whitespaces(yytext);
 311   yytext[strlen(yytext)-2]='\0';
 312   yylval.value=new Value(Value::V_BSTR, new string(yytext+1));
 313   return TOK_BString;
 314 }
 315
 316 {BSTRING_BAD} {
 317   Location loc(asn1_infile, yylineno);
 318   loc.error("Invalid bstring: %s.", yytext);
 319   yylval.value=new Value(Value::V_BSTR, new string());
 320   return TOK_HString;
 321 }
 322
 323 {HSTRING} {
 324   bhstring_remove_whitespaces(yytext);
 325   yytext[strlen(yytext)-2]='\0';
 326   yylval.value=new Value(Value::V_HSTR, new string(yytext+1));
 327   return TOK_HString;
 328 }
 329
 330 {HSTRING_BAD} {
 331   Location loc(asn1_infile, yylineno);
 332   loc.error("Invalid hstring: %s.", yytext);
 333   yylval.value=new Value(Value::V_HSTR, new string());
 334   return TOK_HString;
 335 }
 336
 337 {HSTRING_BAD_OSTRING} {
 338   string ostr(yytext);
 339   ostr[ostr.size()-1]='H';
 340   Location loc(asn1_infile, yylineno);
 341   loc.error("Invalid suffix `O' in %s. Did you mean the hstring %s?",
 342             yytext, ostr.c_str());
 343   bhstring_remove_whitespaces(yytext);
 344   yytext[strlen(yytext)-2]='\0';
 345   yylval.value=new Value(Value::V_HSTR, new string(yytext+1));
 346   return TOK_HString;
 347 }
 348
 349 {CSTRING} {
 350   yytext[strlen(yytext)-1]='\0';
 351   cstring_remove_newlines(yytext+1);
 352   yylval.str=new string(yytext+1);
 353   return TOK_CString;
 354 }
 355
 356 {CSTRING_BAD} {
 357   Location loc(asn1_infile, yylineno);
 358   loc.error("Invalid cstring: %s. Perhaps you wanted to use quotation"
 359             " marks (\") instead of apostrophes (')?", yytext);
 360   yytext[strlen(yytext)-1]='\0';
 361   yylval.str=new string(yytext+1);
 362   return TOK_CString;
 363 }
 364
 365 "'" {return '\'';}
 366 "\"" {return '"';}
 367
 368 "*/" {
 369   Location loc(asn1_infile, yylineno);
 370   loc.error("Unmatched `*/'");
 371 }
 372
 373 . {
 374   Location loc(asn1_infile, yylineno);
 375   loc.error("`%c' (0x%02X) character is not used in ASN.1",
 376             (unsigned char)yytext[0]>31?(unsigned char)yytext[0]:'?',
 377             (unsigned char)yytext[0]);
 378 }
 379
 380 <INITIAL><<EOF>> {
 381   return 0;
 382 }
 383
 384 <SC_LINECOMMENT> /* -------- SC_LINECOMMENT scope -------------- */
 385 {
 386
 387 "--"|{NEWLINE} {
 388   yy_pop_state();
 389 }
 390
 391 <<EOF>> {
 392   yy_pop_state();
 393   return 0;
 394 }
 395
 396 .
 397
 398 } /* ------------------- End of SC_LINECOMMENT scope --------*/
 399
 400 <SC_COMMENT,SC_COMMENT_END><<EOF>> {
 401   Location loc(asn1_infile, commentbeginlineno);
 402   loc.error("Unmatched `/*'");
 403   while(yy_top_state()!=INITIAL) yy_pop_state();
 404   yy_pop_state();
 405   return 0;
 406 }
 407
 408 <SC_COMMENT> /* -------- SC_COMMENT scope -------------- */
 409 {
 410
 411 "/*" {
 412   commentlevel++;
 413 }
 414
 415 "*" {yy_push_state(SC_COMMENT_END);}
 416
 417 [^*]
 418
 419 } /* ------------------- End of SC_COMMENT scope --------*/
 420
 421 <SC_COMMENT_END> /* ----- SC_COMMENT_END scope -------------- */
 422 {
 423
 424 "*"
 425
 426 "/" {
 427   yy_pop_state();
 428   commentlevel--;
 429   if(!commentlevel) yy_pop_state();
 430 }
 431
 432 [^*/] {yy_pop_state();}
 433
 434 } /* ------------------- End of SC_COMMENT_END scope --------*/
 435
 436 %%
 437
 438 void asn1_init()
 439 {
 440   asn1_infile=0;
 441   asn1_tokenbuf=new TokenBuf();
 442   Asn::Assignments::create_spec_asss();
 443 }
 444
 445 void asn1_free()
 446 {
 447   Asn::Assignments::destroy_spec_asss();
 448   delete asn1_tokenbuf;
 449 }
 450
 451 void asn1la_newfile(const char *filename)
 452 {
 453   asn1_tokenbuf->reset(filename);
 454   asn1la_newtokenbuf(asn1_tokenbuf);
 455 }
 456
 457 void asn1la_newtokenbuf(TokenBuf *tb)
 458 {
 459   if(!tb)
 460     FATAL_ERROR("asn1la_newtokenbuf(): tb is NULL");
 461   asn1_tokenbuf_curr=tb;
 462   asn1_infile=asn1_tokenbuf_curr->get_filename();
 463   yylineno=1;
 464 }
 465
 466 void id_replace_underscores(char *s)
 467 {
 468   char *origid=0;
 469   char *p=s;
 470   while(*p!='\0') {
 471     if(*p=='_') {
 472       if(!origid) origid=mputstr(origid, s);
 473       *p='-';
 474     }
 475     p++;
 476   }
 477   if(origid) {
 478     Location loc(asn1_infile, yylineno);
 479     loc.error("`%s' is not a valid ASN.1 identifier. Did you mean `%s'?",
 480               origid, s);
 481     Free(origid);
 482   }
 483 }
 484
 485 void bhstring_remove_whitespaces(char *s) {
 486   char *p1, *p2;
 487   for(p1=p2=s; *p2; p2++) {
 488     switch(*p2) {
 489     case ' ':  // whitespace, no newline chars
 490     case '\t':
 491     case 0x0A: // newline chars
 492     case 0x0B:
 493     case 0x0C:
 494     case 0x0D:
 495       break;
 496     default:
 497       *p1++=*p2;
 498     } // switch *p2
 499   } // for
 500   *p1='\0';
 501 }
 502
 503 void cstring_remove_newlines(char *s) {
 504   /* also "" -> " */
 505   char  *p_r, *p_w, *p_b;
 506   enum {NO_WS, /**< no whitespace */
 507         WS,    /**< whitespace, but not newline */
 508         NL     /**< newline */
 509   } state=NO_WS;
 510   /*
 511     p_r reads the string s;
 512     p_w writes into the string s;
 513     p_b is a bookmark for writing; shows the last non-whitespace character+1,
 514         if the state is WS;
 515         if state is NL, then the next whitespaces must be eaten up;
 516   */
 517   for(p_w=p_b=p_r=s; *p_r; p_w++, p_r++) {
 518     switch(*p_r) {
 519     case ' ': // whitespace, no newline chars
 520     case '\t':
 521       if(state==NO_WS) {p_b=p_w; state=WS;}
 522       break;
 523     case 0x0A: // newline chars (LF,VT,FF,CR)
 524     case 0x0B:
 525     case 0x0C:
 526     case 0x0D:
 527       switch(state) {
 528       case NO_WS:
 529         p_b=p_w;
 530         /* no break */
 531       case WS:
 532       case NL:
 533         state=NL;
 534       } // switch state
 535       break;
 536     default: // not whitespace chars
 537       if(state==NL) p_w=p_b;
 538       state=NO_WS;
 539     } // switch *p_r
 540     *p_w = *p_r;
 541     if (*p_r=='"' && *(p_r+1)=='"') { // On the first of two QUOTATION MARKs.
 542       p_w--; // Back up writing pos. The next " will overwrite this one.
 543     }
 544   } // for
 545   *p_w='\0';
 546 }
 547
 548 int asn1_yylex_my()
 549 {
 550   return asn1_tokenbuf_curr->lex();
 551 }