/* YACC parser for C++ names, for GDB.
- Copyright (C) 2003-2017 Free Software Foundation, Inc.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
Parts of the lexer are based on c-exp.y from GDB.
fill_comp (DEMANGLE_COMPONENT_BINARY_ARGS, lhs, rhs));
}
+/* Like ISALPHA, but also returns true for the union of all UTF-8
+ multi-byte sequence bytes and non-ASCII characters in
+ extended-ASCII charsets (e.g., Latin1). I.e., returns true if the
+ high bit is set. Note that not all UTF-8 ranges are allowed in C++
+ identifiers, but we don't need to be pedantic so for simplicity we
+ ignore that here. Plus this avoids the complication of actually
+ knowing what was the right encoding. */
+
+static inline bool
+cp_ident_is_alpha (unsigned char ch)
+{
+ return ISALPHA (ch) || ch >= 0x80;
+}
+
+/* Similarly, but Like ISALNUM. */
+
+static inline bool
+cp_ident_is_alnum (unsigned char ch)
+{
+ return ISALNUM (ch) || ch >= 0x80;
+}
+
/* Find the end of a symbol name starting at LEXPTR. */
static const char *
{
const char *p = lexptr;
- while (*p && (ISALNUM (*p) || *p == '_' || *p == '$' || *p == '.'))
+ while (*p && (cp_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
p++;
return p;
return ERROR;
}
- if (!(c == '_' || c == '$' || ISALPHA (c)))
+ if (!(c == '_' || c == '$' || cp_ident_is_alpha (c)))
{
/* We must have come across a bad character (e.g. ';'). */
yyerror (_("invalid character"));
namelen = 0;
do
c = tokstart[++namelen];
- while (ISALNUM (c) || c == '_' || c == '$');
+ while (cp_ident_is_alnum (c) || c == '_' || c == '$');
lexptr += namelen;