gdb/ada-lex.l

   1 /* FLEX lexer for Ada expressions, for GDB.
   2    Copyright (C) 1994, 1997, 1998, 2000, 2001, 2002, 2003, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /*----------------------------------------------------------------------*/
  21
  22 /* The converted version of this file is to be included in ada-exp.y, */
  23 /* the Ada parser for gdb.  The function yylex obtains characters from */
  24 /* the global pointer lexptr.  It returns a syntactic category for */
  25 /* each successive token and places a semantic value into yylval */
  26 /* (ada-lval), defined by the parser.   */
  27
  28 DIG     [0-9]
  29 NUM10   ({DIG}({DIG}|_)*)
  30 HEXDIG  [0-9a-f]
  31 NUM16   ({HEXDIG}({HEXDIG}|_)*)
  32 OCTDIG  [0-7]
  33 LETTER  [a-z_]
  34 ID      ({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
  35 WHITE   [ \t\n]
  36 TICK    ("'"{WHITE}*)
  37 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
  38 OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
  39
  40 EXP     (e[+-]{NUM10})
  41 POSEXP  (e"+"?{NUM10})
  42
  43 %{
  44
  45 #define NUMERAL_WIDTH 256
  46 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
  47
  48 /* Temporary staging for numeric literals.  */
  49 static char numbuf[NUMERAL_WIDTH];
  50  static void canonicalizeNumeral (char *s1, const char *);
  51 static struct stoken processString (const char*, int);
  52 static int processInt (const char *, const char *, const char *);
  53 static int processReal (const char *);
  54 static struct stoken processId (const char *, int);
  55 static int processAttribute (const char *);
  56 static int find_dot_all (const char *);
  57
  58 #undef YY_DECL
  59 #define YY_DECL static int yylex ( void )
  60
  61 #undef YY_INPUT
  62 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
  63     if ( *lexptr == '\000' ) \
  64       (RESULT) = YY_NULL; \
  65     else \
  66       { \
  67         *(BUF) = *lexptr; \
  68         (RESULT) = 1; \
  69         lexptr += 1; \
  70       }
  71
  72 static int find_dot_all (const char *);
  73
  74 %}
  75
  76 %option case-insensitive interactive nodefault
  77
  78 %s BEFORE_QUAL_QUOTE
  79
  80 %%
  81
  82 {WHITE}          { }
  83
  84 "--".*           { yyterminate(); }
  85
  86 {NUM10}{POSEXP}  {
  87                    canonicalizeNumeral (numbuf, yytext);
  88                    return processInt (NULL, numbuf, strrchr(numbuf, 'e')+1);
  89                  }
  90
  91 {NUM10}          {
  92                    canonicalizeNumeral (numbuf, yytext);
  93                    return processInt (NULL, numbuf, NULL);
  94                  }
  95
  96 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
  97                    canonicalizeNumeral (numbuf, yytext);
  98                    return processInt (numbuf,
  99                                       strchr (numbuf, '#') + 1,
 100                                       strrchr(numbuf, '#') + 1);
 101                  }
 102
 103 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
 104                    canonicalizeNumeral (numbuf, yytext);
 105                    return processInt (numbuf, strchr (numbuf, '#') + 1, NULL);
 106                  }
 107
 108 "0x"{HEXDIG}+   {
 109                   canonicalizeNumeral (numbuf, yytext+2);
 110                   return processInt ("16#", numbuf, NULL);
 111                 }
 112
 113
 114 {NUM10}"."{NUM10}{EXP} {
 115                    canonicalizeNumeral (numbuf, yytext);
 116                    return processReal (numbuf);
 117                 }
 118
 119 {NUM10}"."{NUM10} {
 120                    canonicalizeNumeral (numbuf, yytext);
 121                    return processReal (numbuf);
 122                 }
 123
 124 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
 125                    error (_("Based real literals not implemented yet."));
 126                 }
 127
 128 {NUM10}"#"{NUM16}"."{NUM16}"#" {
 129                    error (_("Based real literals not implemented yet."));
 130                 }
 131
 132 <INITIAL>"'"({GRAPHIC}|\")"'" {
 133                    yylval.typed_val.type = type_char ();
 134                    yylval.typed_val.val = yytext[1];
 135                    return CHARLIT;
 136                 }
 137
 138 <INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
 139                    int v;
 140                    yylval.typed_val.type = type_char ();
 141                    sscanf (yytext+3, "%2x", &v);
 142                    yylval.typed_val.val = v;
 143                    return CHARLIT;
 144                 }
 145
 146 \"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
 147                    yylval.sval = processString (yytext+1, yyleng-2);
 148                    return STRING;
 149                 }
 150
 151 \"              {
 152                    error (_("ill-formed or non-terminated string literal"));
 153                 }
 154
 155
 156 if              {
 157                   while (*lexptr != 'i' && *lexptr != 'I')
 158                     lexptr -= 1;
 159                   yyrestart(NULL);
 160                   return 0;
 161                 }
 162
 163 (task|thread)   {
 164                   /* This keyword signals the end of the expression and
 165                      will be processed separately.  */
 166                   while (*lexptr != 't' && *lexptr != 'T')
 167                     lexptr--;
 168                   yyrestart(NULL);
 169                   return 0;
 170                 }
 171
 172         /* ADA KEYWORDS */
 173
 174 abs             { return ABS; }
 175 and             { return _AND_; }
 176 else            { return ELSE; }
 177 in              { return IN; }
 178 mod             { return MOD; }
 179 new             { return NEW; }
 180 not             { return NOT; }
 181 null            { return NULL_PTR; }
 182 or              { return OR; }
 183 others          { return OTHERS; }
 184 rem             { return REM; }
 185 then            { return THEN; }
 186 xor             { return XOR; }
 187
 188         /* BOOLEAN "KEYWORDS" */
 189
 190  /* True and False are not keywords in Ada, but rather enumeration constants.
 191     However, the boolean type is no longer represented as an enum, so True
 192     and False are no longer defined in symbol tables.  We compromise by
 193     making them keywords (when bare). */
 194
 195 true            { return TRUEKEYWORD; }
 196 false           { return FALSEKEYWORD; }
 197
 198         /* ATTRIBUTES */
 199
 200 {TICK}[a-zA-Z][a-zA-Z]+ { return processAttribute (yytext+1); }
 201
 202         /* PUNCTUATION */
 203
 204 "=>"            { return ARROW; }
 205 ".."            { return DOTDOT; }
 206 "**"            { return STARSTAR; }
 207 ":="            { return ASSIGN; }
 208 "/="            { return NOTEQUAL; }
 209 "<="            { return LEQ; }
 210 ">="            { return GEQ; }
 211
 212 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
 213
 214 [-&*+./:<>=|;\[\]] { return yytext[0]; }
 215
 216 ","             { if (paren_depth == 0 && comma_terminates)
 217                     {
 218                       lexptr -= 1;
 219                       yyrestart(NULL);
 220                       return 0;
 221                     }
 222                   else
 223                     return ',';
 224                 }
 225
 226 "("             { paren_depth += 1; return '('; }
 227 ")"             { if (paren_depth == 0)
 228                     {
 229                       lexptr -= 1;
 230                       yyrestart(NULL);
 231                       return 0;
 232                     }
 233                   else
 234                     {
 235                       paren_depth -= 1;
 236                       return ')';
 237                     }
 238                 }
 239
 240 "."{WHITE}*all  { return DOT_ALL; }
 241
 242 "."{WHITE}*{ID} {
 243                   yylval.sval = processId (yytext+1, yyleng-1);
 244                   return DOT_ID;
 245                 }
 246
 247 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
 248                   int all_posn = find_dot_all (yytext);
 249
 250                   if (all_posn == -1 && yytext[yyleng-1] == '\'')
 251                     {
 252                       BEGIN BEFORE_QUAL_QUOTE;
 253                       yyless (yyleng-1);
 254                     }
 255                   else if (all_posn >= 0)
 256                     yyless (all_posn);
 257                   yylval.sval = processId (yytext, yyleng);
 258                   return NAME;
 259                }
 260
 261
 262         /* GDB EXPRESSION CONSTRUCTS  */
 263
 264 "'"[^']+"'"{WHITE}*:: {
 265                   yyless (yyleng - 2);
 266                   yylval.sval = processId (yytext, yyleng);
 267                   return NAME;
 268                 }
 269
 270 "::"            { return COLONCOLON; }
 271
 272 [{}@]           { return yytext[0]; }
 273
 274         /* REGISTERS AND GDB CONVENIENCE VARIABLES */
 275
 276 "$"({LETTER}|{DIG}|"$")*  {
 277                   yylval.sval.ptr = yytext;
 278                   yylval.sval.length = yyleng;
 279                   return SPECIAL_VARIABLE;
 280                 }
 281
 282         /* CATCH-ALL ERROR CASE */
 283
 284 .               { error (_("Invalid character '%s' in expression."), yytext); }
 285 %%
 286
 287 #include <ctype.h>
 288 #include "gdb_string.h"
 289
 290 /* Initialize the lexer for processing new expression. */
 291
 292 void
 293 lexer_init (FILE *inp)
 294 {
 295   BEGIN INITIAL;
 296   yyrestart (inp);
 297 }
 298
 299
 300 /* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
 301
 302 static void
 303 canonicalizeNumeral (char *s1, const char *s2)
 304 {
 305   for (; *s2 != '\000'; s2 += 1)
 306     {
 307       if (*s2 != '_')
 308         {
 309           *s1 = tolower(*s2);
 310           s1 += 1;
 311         }
 312     }
 313   s1[0] = '\000';
 314 }
 315
 316 /* Interprets the prefix of NUM that consists of digits of the given BASE
 317    as an integer of that BASE, with the string EXP as an exponent.
 318    Puts value in yylval, and returns INT, if the string is valid.  Causes
 319    an error if the number is improperly formated.   BASE, if NULL, defaults
 320    to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
 321  */
 322
 323 static int
 324 processInt (const char *base0, const char *num0, const char *exp0)
 325 {
 326   ULONGEST result;
 327   long exp;
 328   int base;
 329
 330   char *trailer;
 331
 332   if (base0 == NULL)
 333     base = 10;
 334   else
 335     {
 336       base = strtol (base0, (char **) NULL, 10);
 337       if (base < 2 || base > 16)
 338         error (_("Invalid base: %d."), base);
 339     }
 340
 341   if (exp0 == NULL)
 342     exp = 0;
 343   else
 344     exp = strtol(exp0, (char **) NULL, 10);
 345
 346   errno = 0;
 347   result = strtoulst (num0, (const char **) &trailer, base);
 348   if (errno == ERANGE)
 349     error (_("Integer literal out of range"));
 350   if (isxdigit(*trailer))
 351     error (_("Invalid digit `%c' in based literal"), *trailer);
 352
 353   while (exp > 0)
 354     {
 355       if (result > (ULONG_MAX / base))
 356         error (_("Integer literal out of range"));
 357       result *= base;
 358       exp -= 1;
 359     }
 360
 361   if ((result >> (gdbarch_int_bit (parse_gdbarch)-1)) == 0)
 362     yylval.typed_val.type = type_int ();
 363   else if ((result >> (gdbarch_long_bit (parse_gdbarch)-1)) == 0)
 364     yylval.typed_val.type = type_long ();
 365   else if (((result >> (gdbarch_long_bit (parse_gdbarch)-1)) >> 1) == 0)
 366     {
 367       /* We have a number representable as an unsigned integer quantity.
 368          For consistency with the C treatment, we will treat it as an
 369          anonymous modular (unsigned) quantity.  Alas, the types are such
 370          that we need to store .val as a signed quantity.  Sorry
 371          for the mess, but C doesn't officially guarantee that a simple
 372          assignment does the trick (no, it doesn't; read the reference manual).
 373        */
 374       yylval.typed_val.type
 375         = builtin_type (parse_gdbarch)->builtin_unsigned_long;
 376       if (result & LONGEST_SIGN)
 377         yylval.typed_val.val =
 378           (LONGEST) (result & ~LONGEST_SIGN)
 379           - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
 380       else
 381         yylval.typed_val.val = (LONGEST) result;
 382       return INT;
 383     }
 384   else
 385     yylval.typed_val.type = type_long_long ();
 386
 387   yylval.typed_val.val = (LONGEST) result;
 388   return INT;
 389 }
 390
 391 static int
 392 processReal (const char *num0)
 393 {
 394   sscanf (num0, "%" DOUBLEST_SCAN_FORMAT, &yylval.typed_val_float.dval);
 395
 396   yylval.typed_val_float.type = type_float ();
 397   if (sizeof(DOUBLEST) >= gdbarch_double_bit (parse_gdbarch)
 398                             / TARGET_CHAR_BIT)
 399     yylval.typed_val_float.type = type_double ();
 400   if (sizeof(DOUBLEST) >= gdbarch_long_double_bit (parse_gdbarch)
 401                             / TARGET_CHAR_BIT)
 402     yylval.typed_val_float.type = type_long_double ();
 403
 404   return FLOAT;
 405 }
 406
 407
 408 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
 409    resulting string is valid until the next call to ada_parse.  It differs
 410    from NAME0 in that:
 411     + Characters between '...' or <...> are transfered verbatim to
 412       yylval.ssym.
 413     + <, >, and trailing "'" characters in quoted sequences are removed
 414       (a leading quote is preserved to indicate that the name is not to be
 415       GNAT-encoded).
 416     + Unquoted whitespace is removed.
 417     + Unquoted alphabetic characters are mapped to lower case.
 418    Result is returned as a struct stoken, but for convenience, the string
 419    is also null-terminated.  Result string valid until the next call of
 420    ada_parse.
 421  */
 422 static struct stoken
 423 processId (const char *name0, int len)
 424 {
 425   char *name = obstack_alloc (&temp_parse_space, len + 11);
 426   int i0, i;
 427   struct stoken result;
 428
 429   while (len > 0 && isspace (name0[len-1]))
 430     len -= 1;
 431   i = i0 = 0;
 432   while (i0 < len)
 433     {
 434       if (isalnum (name0[i0]))
 435         {
 436           name[i] = tolower (name0[i0]);
 437           i += 1; i0 += 1;
 438         }
 439       else switch (name0[i0])
 440         {
 441         default:
 442           name[i] = name0[i0];
 443           i += 1; i0 += 1;
 444           break;
 445         case ' ': case '\t':
 446           i0 += 1;
 447           break;
 448         case '\'':
 449           do
 450             {
 451               name[i] = name0[i0];
 452               i += 1; i0 += 1;
 453             }
 454           while (i0 < len && name0[i0] != '\'');
 455           i0 += 1;
 456           break;
 457         case '<':
 458           i0 += 1;
 459           while (i0 < len && name0[i0] != '>')
 460             {
 461               name[i] = name0[i0];
 462               i += 1; i0 += 1;
 463             }
 464           i0 += 1;
 465           break;
 466         }
 467     }
 468   name[i] = '\000';
 469
 470   result.ptr = name;
 471   result.length = i;
 472   return result;
 473 }
 474
 475 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
 476    with special hex character notations replaced with characters.
 477    Result valid until the next call to ada_parse.  */
 478
 479 static struct stoken
 480 processString (const char *text, int len)
 481 {
 482   const char *p;
 483   char *q;
 484   const char *lim = text + len;
 485   struct stoken result;
 486
 487   q = result.ptr = obstack_alloc (&temp_parse_space, len);
 488   p = text;
 489   while (p < lim)
 490     {
 491       if (p[0] == '[' && p[1] == '"' && p+2 < lim)
 492          {
 493            if (p[2] == '"')  /* "...["""]... */
 494              {
 495                *q = '"';
 496                p += 4;
 497              }
 498            else
 499              {
 500                int chr;
 501                sscanf (p+2, "%2x", &chr);
 502                *q = (char) chr;
 503                p += 5;
 504              }
 505          }
 506        else
 507          *q = *p;
 508        q += 1;
 509        p += 1;
 510      }
 511   result.length = q - result.ptr;
 512   return result;
 513 }
 514
 515 /* Returns the position within STR of the '.' in a
 516    '.{WHITE}*all' component of a dotted name, or -1 if there is none.
 517    Note: we actually don't need this routine, since 'all' can never be an
 518    Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
 519    must fail, and will eventually be interpreted as (foo).all or
 520    (foo).all.x.  However, this does avoid an extraneous lookup. */
 521
 522 static int
 523 find_dot_all (const char *str)
 524 {
 525   int i;
 526   for (i = 0; str[i] != '\000'; i += 1)
 527     {
 528       if (str[i] == '.')
 529         {
 530           int i0 = i;
 531           do
 532             i += 1;
 533           while (isspace (str[i]));
 534           if (strncmp (str+i, "all", 3) == 0
 535               && ! isalnum (str[i+3]) && str[i+3] != '_')
 536             return i0;
 537         }
 538     }
 539   return -1;
 540 }
 541
 542 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
 543    case.  */
 544
 545 static int
 546 subseqMatch (const char *subseq, const char *str)
 547 {
 548   if (subseq[0] == '\0')
 549     return 1;
 550   else if (str[0] == '\0')
 551     return 0;
 552   else if (tolower (subseq[0]) == tolower (str[0]))
 553     return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
 554   else
 555     return subseqMatch (subseq, str+1);
 556 }
 557
 558
 559 static struct { const char *name; int code; }
 560 attributes[] = {
 561   { "address", TICK_ADDRESS },
 562   { "unchecked_access", TICK_ACCESS },
 563   { "unrestricted_access", TICK_ACCESS },
 564   { "access", TICK_ACCESS },
 565   { "first", TICK_FIRST },
 566   { "last", TICK_LAST },
 567   { "length", TICK_LENGTH },
 568   { "max", TICK_MAX },
 569   { "min", TICK_MIN },
 570   { "modulus", TICK_MODULUS },
 571   { "pos", TICK_POS },
 572   { "range", TICK_RANGE },
 573   { "size", TICK_SIZE },
 574   { "tag", TICK_TAG },
 575   { "val", TICK_VAL },
 576   { NULL, -1 }
 577 };
 578
 579 /* Return the syntactic code corresponding to the attribute name or
 580    abbreviation STR.  */
 581
 582 static int
 583 processAttribute (const char *str)
 584 {
 585   int i, k;
 586
 587   for (i = 0; attributes[i].code != -1; i += 1)
 588     if (strcasecmp (str, attributes[i].name) == 0)
 589       return attributes[i].code;
 590
 591   for (i = 0, k = -1; attributes[i].code != -1; i += 1)
 592     if (subseqMatch (str, attributes[i].name))
 593       {
 594         if (k == -1)
 595           k = i;
 596         else
 597           error (_("ambiguous attribute name: `%s'"), str);
 598       }
 599   if (k == -1)
 600     error (_("unrecognized attribute: `%s'"), str);
 601
 602   return attributes[k].code;
 603 }
 604
 605 int
 606 yywrap(void)
 607 {
 608   return 1;
 609 }
 610
 611 /* Dummy definition to suppress warnings about unused static definitions. */
 612 typedef void (*dummy_function) ();
 613 dummy_function ada_flex_use[] =
 614 {
 615   (dummy_function) yyunput
 616 };