gdb/ada-lex.l

   1 /* FLEX lexer for Ada expressions, for GDB.
   2    Copyright (C) 1994-2019 Free Software Foundation, Inc.
   3
   4    This file is part of GDB.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 /*----------------------------------------------------------------------*/
  20
  21 /* The converted version of this file is to be included in ada-exp.y, */
  22 /* the Ada parser for gdb.  The function yylex obtains characters from */
  23 /* the global pointer lexptr.  It returns a syntactic category for */
  24 /* each successive token and places a semantic value into yylval */
  25 /* (ada-lval), defined by the parser.   */
  26
  27 DIG     [0-9]
  28 NUM10   ({DIG}({DIG}|_)*)
  29 HEXDIG  [0-9a-f]
  30 NUM16   ({HEXDIG}({HEXDIG}|_)*)
  31 OCTDIG  [0-7]
  32 LETTER  [a-z_]
  33 ID      ({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
  34 WHITE   [ \t\n]
  35 TICK    ("'"{WHITE}*)
  36 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
  37 OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
  38
  39 EXP     (e[+-]{NUM10})
  40 POSEXP  (e"+"?{NUM10})
  41
  42 %{
  43
  44 #include "diagnostics.h"
  45
  46 /* Some old versions of flex generate code that uses the "register" keyword,
  47    which clang warns about.  This was observed for example with flex 2.5.35,
  48    as shipped with macOS 10.12.  */
  49 DIAGNOSTIC_PUSH
  50 DIAGNOSTIC_IGNORE_DEPRECATED_REGISTER
  51
  52 #define NUMERAL_WIDTH 256
  53 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
  54
  55 /* Temporary staging for numeric literals.  */
  56 static char numbuf[NUMERAL_WIDTH];
  57  static void canonicalizeNumeral (char *s1, const char *);
  58 static struct stoken processString (const char*, int);
  59 static int processInt (struct parser_state *, const char *, const char *,
  60                        const char *);
  61 static int processReal (struct parser_state *, const char *);
  62 static struct stoken processId (const char *, int);
  63 static int processAttribute (const char *);
  64 static int find_dot_all (const char *);
  65 static void rewind_to_char (int);
  66
  67 #undef YY_DECL
  68 #define YY_DECL static int yylex ( void )
  69
  70 /* Flex generates a static function "input" which is not used.
  71    Defining YY_NO_INPUT comments it out.  */
  72 #define YY_NO_INPUT
  73
  74 #undef YY_INPUT
  75 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
  76     if ( *pstate->lexptr == '\000' ) \
  77       (RESULT) = YY_NULL; \
  78     else \
  79       { \
  80         *(BUF) = *pstate->lexptr; \
  81         (RESULT) = 1; \
  82         pstate->lexptr += 1; \
  83       }
  84
  85 static int find_dot_all (const char *);
  86
  87 /* Depth of parentheses.  */
  88 static int paren_depth;
  89
  90 %}
  91
  92 %option case-insensitive interactive nodefault noyywrap
  93
  94 %s BEFORE_QUAL_QUOTE
  95
  96 %%
  97
  98 {WHITE}          { }
  99
 100 "--".*           { yyterminate(); }
 101
 102 {NUM10}{POSEXP}  {
 103                    canonicalizeNumeral (numbuf, yytext);
 104                    return processInt (pstate, NULL, numbuf,
 105                                       strrchr (numbuf, 'e') + 1);
 106                  }
 107
 108 {NUM10}          {
 109                    canonicalizeNumeral (numbuf, yytext);
 110                    return processInt (pstate, NULL, numbuf, NULL);
 111                  }
 112
 113 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
 114                    canonicalizeNumeral (numbuf, yytext);
 115                    return processInt (pstate, numbuf,
 116                                       strchr (numbuf, '#') + 1,
 117                                       strrchr(numbuf, '#') + 1);
 118                  }
 119
 120 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
 121                    canonicalizeNumeral (numbuf, yytext);
 122                    return processInt (pstate, numbuf, strchr (numbuf, '#') + 1,
 123                                       NULL);
 124                  }
 125
 126 "0x"{HEXDIG}+   {
 127                   canonicalizeNumeral (numbuf, yytext+2);
 128                   return processInt (pstate, "16#", numbuf, NULL);
 129                 }
 130
 131
 132 {NUM10}"."{NUM10}{EXP} {
 133                    canonicalizeNumeral (numbuf, yytext);
 134                    return processReal (pstate, numbuf);
 135                 }
 136
 137 {NUM10}"."{NUM10} {
 138                    canonicalizeNumeral (numbuf, yytext);
 139                    return processReal (pstate, numbuf);
 140                 }
 141
 142 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
 143                    error (_("Based real literals not implemented yet."));
 144                 }
 145
 146 {NUM10}"#"{NUM16}"."{NUM16}"#" {
 147                    error (_("Based real literals not implemented yet."));
 148                 }
 149
 150 <INITIAL>"'"({GRAPHIC}|\")"'" {
 151                    yylval.typed_val.type = type_char (pstate);
 152                    yylval.typed_val.val = yytext[1];
 153                    return CHARLIT;
 154                 }
 155
 156 <INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
 157                    int v;
 158                    yylval.typed_val.type = type_char (pstate);
 159                    sscanf (yytext+3, "%2x", &v);
 160                    yylval.typed_val.val = v;
 161                    return CHARLIT;
 162                 }
 163
 164 \"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
 165                    yylval.sval = processString (yytext+1, yyleng-2);
 166                    return STRING;
 167                 }
 168
 169 \"              {
 170                    error (_("ill-formed or non-terminated string literal"));
 171                 }
 172
 173
 174 if              {
 175                   rewind_to_char ('i');
 176                   return 0;
 177                 }
 178
 179 task            {
 180                   rewind_to_char ('t');
 181                   return 0;
 182                 }
 183
 184 thread{WHITE}+{DIG} {
 185                   /* This keyword signals the end of the expression and
 186                      will be processed separately.  */
 187                   rewind_to_char ('t');
 188                   return 0;
 189                 }
 190
 191         /* ADA KEYWORDS */
 192
 193 abs             { return ABS; }
 194 and             { return _AND_; }
 195 else            { return ELSE; }
 196 in              { return IN; }
 197 mod             { return MOD; }
 198 new             { return NEW; }
 199 not             { return NOT; }
 200 null            { return NULL_PTR; }
 201 or              { return OR; }
 202 others          { return OTHERS; }
 203 rem             { return REM; }
 204 then            { return THEN; }
 205 xor             { return XOR; }
 206
 207         /* BOOLEAN "KEYWORDS" */
 208
 209  /* True and False are not keywords in Ada, but rather enumeration constants.
 210     However, the boolean type is no longer represented as an enum, so True
 211     and False are no longer defined in symbol tables.  We compromise by
 212     making them keywords (when bare). */
 213
 214 true            { return TRUEKEYWORD; }
 215 false           { return FALSEKEYWORD; }
 216
 217         /* ATTRIBUTES */
 218
 219 {TICK}[a-zA-Z][a-zA-Z_]+ { BEGIN INITIAL; return processAttribute (yytext+1); }
 220
 221         /* PUNCTUATION */
 222
 223 "=>"            { return ARROW; }
 224 ".."            { return DOTDOT; }
 225 "**"            { return STARSTAR; }
 226 ":="            { return ASSIGN; }
 227 "/="            { return NOTEQUAL; }
 228 "<="            { return LEQ; }
 229 ">="            { return GEQ; }
 230
 231 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
 232
 233 [-&*+./:<>=|;\[\]] { return yytext[0]; }
 234
 235 ","             { if (paren_depth == 0 && pstate->comma_terminates)
 236                     {
 237                       rewind_to_char (',');
 238                       return 0;
 239                     }
 240                   else
 241                     return ',';
 242                 }
 243
 244 "("             { paren_depth += 1; return '('; }
 245 ")"             { if (paren_depth == 0)
 246                     {
 247                       rewind_to_char (')');
 248                       return 0;
 249                     }
 250                   else
 251                     {
 252                       paren_depth -= 1;
 253                       return ')';
 254                     }
 255                 }
 256
 257 "."{WHITE}*all  { return DOT_ALL; }
 258
 259 "."{WHITE}*{ID} {
 260                   yylval.sval = processId (yytext+1, yyleng-1);
 261                   return DOT_ID;
 262                 }
 263
 264 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
 265                   int all_posn = find_dot_all (yytext);
 266
 267                   if (all_posn == -1 && yytext[yyleng-1] == '\'')
 268                     {
 269                       BEGIN BEFORE_QUAL_QUOTE;
 270                       yyless (yyleng-1);
 271                     }
 272                   else if (all_posn >= 0)
 273                     yyless (all_posn);
 274                   yylval.sval = processId (yytext, yyleng);
 275                   return NAME;
 276                }
 277
 278
 279         /* GDB EXPRESSION CONSTRUCTS  */
 280
 281 "'"[^']+"'"{WHITE}*:: {
 282                   yyless (yyleng - 2);
 283                   yylval.sval = processId (yytext, yyleng);
 284                   return NAME;
 285                 }
 286
 287 "::"            { return COLONCOLON; }
 288
 289 [{}@]           { return yytext[0]; }
 290
 291         /* REGISTERS AND GDB CONVENIENCE VARIABLES */
 292
 293 "$"({LETTER}|{DIG}|"$")*  {
 294                   yylval.sval.ptr = yytext;
 295                   yylval.sval.length = yyleng;
 296                   return DOLLAR_VARIABLE;
 297                 }
 298
 299         /* CATCH-ALL ERROR CASE */
 300
 301 .               { error (_("Invalid character '%s' in expression."), yytext); }
 302 %%
 303
 304 #include <ctype.h>
 305 /* Initialize the lexer for processing new expression. */
 306
 307 static void
 308 lexer_init (FILE *inp)
 309 {
 310   BEGIN INITIAL;
 311   paren_depth = 0;
 312   yyrestart (inp);
 313 }
 314
 315
 316 /* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
 317
 318 static void
 319 canonicalizeNumeral (char *s1, const char *s2)
 320 {
 321   for (; *s2 != '\000'; s2 += 1)
 322     {
 323       if (*s2 != '_')
 324         {
 325           *s1 = tolower(*s2);
 326           s1 += 1;
 327         }
 328     }
 329   s1[0] = '\000';
 330 }
 331
 332 /* Interprets the prefix of NUM that consists of digits of the given BASE
 333    as an integer of that BASE, with the string EXP as an exponent.
 334    Puts value in yylval, and returns INT, if the string is valid.  Causes
 335    an error if the number is improperly formated.   BASE, if NULL, defaults
 336    to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
 337  */
 338
 339 static int
 340 processInt (struct parser_state *par_state, const char *base0,
 341             const char *num0, const char *exp0)
 342 {
 343   ULONGEST result;
 344   long exp;
 345   int base;
 346   const char *trailer;
 347
 348   if (base0 == NULL)
 349     base = 10;
 350   else
 351     {
 352       base = strtol (base0, (char **) NULL, 10);
 353       if (base < 2 || base > 16)
 354         error (_("Invalid base: %d."), base);
 355     }
 356
 357   if (exp0 == NULL)
 358     exp = 0;
 359   else
 360     exp = strtol(exp0, (char **) NULL, 10);
 361
 362   errno = 0;
 363   result = strtoulst (num0, &trailer, base);
 364   if (errno == ERANGE)
 365     error (_("Integer literal out of range"));
 366   if (isxdigit(*trailer))
 367     error (_("Invalid digit `%c' in based literal"), *trailer);
 368
 369   while (exp > 0)
 370     {
 371       if (result > (ULONG_MAX / base))
 372         error (_("Integer literal out of range"));
 373       result *= base;
 374       exp -= 1;
 375     }
 376
 377   if ((result >> (gdbarch_int_bit (par_state->gdbarch ())-1)) == 0)
 378     yylval.typed_val.type = type_int (par_state);
 379   else if ((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) == 0)
 380     yylval.typed_val.type = type_long (par_state);
 381   else if (((result >> (gdbarch_long_bit (par_state->gdbarch ())-1)) >> 1) == 0)
 382     {
 383       /* We have a number representable as an unsigned integer quantity.
 384          For consistency with the C treatment, we will treat it as an
 385          anonymous modular (unsigned) quantity.  Alas, the types are such
 386          that we need to store .val as a signed quantity.  Sorry
 387          for the mess, but C doesn't officially guarantee that a simple
 388          assignment does the trick (no, it doesn't; read the reference manual).
 389        */
 390       yylval.typed_val.type
 391         = builtin_type (par_state->gdbarch ())->builtin_unsigned_long;
 392       if (result & LONGEST_SIGN)
 393         yylval.typed_val.val =
 394           (LONGEST) (result & ~LONGEST_SIGN)
 395           - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
 396       else
 397         yylval.typed_val.val = (LONGEST) result;
 398       return INT;
 399     }
 400   else
 401     yylval.typed_val.type = type_long_long (par_state);
 402
 403   yylval.typed_val.val = (LONGEST) result;
 404   return INT;
 405 }
 406
 407 static int
 408 processReal (struct parser_state *par_state, const char *num0)
 409 {
 410   yylval.typed_val_float.type = type_long_double (par_state);
 411
 412   bool parsed = parse_float (num0, strlen (num0),
 413                              yylval.typed_val_float.type,
 414                              yylval.typed_val_float.val);
 415   gdb_assert (parsed);
 416   return FLOAT;
 417 }
 418
 419
 420 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
 421    resulting string is valid until the next call to ada_parse.  If
 422    NAME0 contains the substring "___", it is assumed to be already
 423    encoded and the resulting name is equal to it.  Similarly, if the name
 424    starts with '<', it is copied verbatim.  Otherwise, it differs
 425    from NAME0 in that:
 426     + Characters between '...' are transfered verbatim to yylval.ssym.
 427     + Trailing "'" characters in quoted sequences are removed (a leading quote is
 428       preserved to indicate that the name is not to be GNAT-encoded).
 429     + Unquoted whitespace is removed.
 430     + Unquoted alphabetic characters are mapped to lower case.
 431    Result is returned as a struct stoken, but for convenience, the string
 432    is also null-terminated.  Result string valid until the next call of
 433    ada_parse.
 434  */
 435 static struct stoken
 436 processId (const char *name0, int len)
 437 {
 438   char *name = (char *) obstack_alloc (&temp_parse_space, len + 11);
 439   int i0, i;
 440   struct stoken result;
 441
 442   result.ptr = name;
 443   while (len > 0 && isspace (name0[len-1]))
 444     len -= 1;
 445
 446   if (name0[0] == '<' || strstr (name0, "___") != NULL)
 447     {
 448       strncpy (name, name0, len);
 449       name[len] = '\000';
 450       result.length = len;
 451       return result;
 452     }
 453
 454   i = i0 = 0;
 455   while (i0 < len)
 456     {
 457       if (isalnum (name0[i0]))
 458         {
 459           name[i] = tolower (name0[i0]);
 460           i += 1; i0 += 1;
 461         }
 462       else switch (name0[i0])
 463         {
 464         default:
 465           name[i] = name0[i0];
 466           i += 1; i0 += 1;
 467           break;
 468         case ' ': case '\t':
 469           i0 += 1;
 470           break;
 471         case '\'':
 472           do
 473             {
 474               name[i] = name0[i0];
 475               i += 1; i0 += 1;
 476             }
 477           while (i0 < len && name0[i0] != '\'');
 478           i0 += 1;
 479           break;
 480         }
 481     }
 482   name[i] = '\000';
 483
 484   result.length = i;
 485   return result;
 486 }
 487
 488 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
 489    with special hex character notations replaced with characters.
 490    Result valid until the next call to ada_parse.  */
 491
 492 static struct stoken
 493 processString (const char *text, int len)
 494 {
 495   const char *p;
 496   char *q;
 497   const char *lim = text + len;
 498   struct stoken result;
 499
 500   q = (char *) obstack_alloc (&temp_parse_space, len);
 501   result.ptr = q;
 502   p = text;
 503   while (p < lim)
 504     {
 505       if (p[0] == '[' && p[1] == '"' && p+2 < lim)
 506          {
 507            if (p[2] == '"')  /* "...["""]... */
 508              {
 509                *q = '"';
 510                p += 4;
 511              }
 512            else
 513              {
 514                int chr;
 515                sscanf (p+2, "%2x", &chr);
 516                *q = (char) chr;
 517                p += 5;
 518              }
 519          }
 520        else
 521          *q = *p;
 522        q += 1;
 523        p += 1;
 524      }
 525   result.length = q - result.ptr;
 526   return result;
 527 }
 528
 529 /* Returns the position within STR of the '.' in a
 530    '.{WHITE}*all' component of a dotted name, or -1 if there is none.
 531    Note: we actually don't need this routine, since 'all' can never be an
 532    Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
 533    must fail, and will eventually be interpreted as (foo).all or
 534    (foo).all.x.  However, this does avoid an extraneous lookup. */
 535
 536 static int
 537 find_dot_all (const char *str)
 538 {
 539   int i;
 540
 541   for (i = 0; str[i] != '\000'; i++)
 542     if (str[i] == '.')
 543       {
 544         int i0 = i;
 545
 546         do
 547           i += 1;
 548         while (isspace (str[i]));
 549
 550         if (strncasecmp (str + i, "all", 3) == 0
 551             && !isalnum (str[i + 3]) && str[i + 3] != '_')
 552           return i0;
 553       }
 554   return -1;
 555 }
 556
 557 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
 558    case.  */
 559
 560 static int
 561 subseqMatch (const char *subseq, const char *str)
 562 {
 563   if (subseq[0] == '\0')
 564     return 1;
 565   else if (str[0] == '\0')
 566     return 0;
 567   else if (tolower (subseq[0]) == tolower (str[0]))
 568     return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
 569   else
 570     return subseqMatch (subseq, str+1);
 571 }
 572
 573
 574 static struct { const char *name; int code; }
 575 attributes[] = {
 576   { "address", TICK_ADDRESS },
 577   { "unchecked_access", TICK_ACCESS },
 578   { "unrestricted_access", TICK_ACCESS },
 579   { "access", TICK_ACCESS },
 580   { "first", TICK_FIRST },
 581   { "last", TICK_LAST },
 582   { "length", TICK_LENGTH },
 583   { "max", TICK_MAX },
 584   { "min", TICK_MIN },
 585   { "modulus", TICK_MODULUS },
 586   { "pos", TICK_POS },
 587   { "range", TICK_RANGE },
 588   { "size", TICK_SIZE },
 589   { "tag", TICK_TAG },
 590   { "val", TICK_VAL },
 591   { NULL, -1 }
 592 };
 593
 594 /* Return the syntactic code corresponding to the attribute name or
 595    abbreviation STR.  */
 596
 597 static int
 598 processAttribute (const char *str)
 599 {
 600   int i, k;
 601
 602   for (i = 0; attributes[i].code != -1; i += 1)
 603     if (strcasecmp (str, attributes[i].name) == 0)
 604       return attributes[i].code;
 605
 606   for (i = 0, k = -1; attributes[i].code != -1; i += 1)
 607     if (subseqMatch (str, attributes[i].name))
 608       {
 609         if (k == -1)
 610           k = i;
 611         else
 612           error (_("ambiguous attribute name: `%s'"), str);
 613       }
 614   if (k == -1)
 615     error (_("unrecognized attribute: `%s'"), str);
 616
 617   return attributes[k].code;
 618 }
 619
 620 /* Back up lexptr by yyleng and then to the rightmost occurrence of
 621    character CH, case-folded (there must be one).  WARNING: since
 622    lexptr points to the next input character that Flex has not yet
 623    transferred to its internal buffer, the use of this function
 624    depends on the assumption that Flex calls YY_INPUT only when it is
 625    logically necessary to do so (thus, there is no reading ahead
 626    farther than needed to identify the next token.)  */
 627
 628 static void
 629 rewind_to_char (int ch)
 630 {
 631   pstate->lexptr -= yyleng;
 632   while (toupper (*pstate->lexptr) != toupper (ch))
 633     pstate->lexptr -= 1;
 634   yyrestart (NULL);
 635 }
 636
 637 /* Dummy definition to suppress warnings about unused static definitions. */
 638 typedef void (*dummy_function) ();
 639 dummy_function ada_flex_use[] =
 640 {
 641   (dummy_function) yyunput
 642 };
 643
 644 DIAGNOSTIC_POP