X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gdb%2Fc-lang.c;h=4ba81ba86aa637bfa643d0da31766e4dce44da65;hb=cf648174b0ee4bbc9f53deb4d4d67a3e010f534b;hp=309a0b0dfe1e196a4214e0c7b7e71875061a2536;hpb=0fb0cc7590113e9b459dfcc48dc71c9d419d9580;p=deliverable%2Fbinutils-gdb.git diff --git a/gdb/c-lang.c b/gdb/c-lang.c index 309a0b0dfe..4ba81ba86a 100644 --- a/gdb/c-lang.c +++ b/gdb/c-lang.c @@ -33,48 +33,331 @@ #include "demangle.h" #include "cp-abi.h" #include "cp-support.h" +#include "gdb_obstack.h" +#include extern void _initialize_c_language (void); -static void c_emit_char (int c, struct ui_file * stream, int quoter); -/* Print the character C on STREAM as part of the contents of a literal - string whose delimiter is QUOTER. Note that that format for printing - characters and strings is language specific. */ +/* Given a C string type, STR_TYPE, return the corresponding target + character set name. */ -static void -c_emit_char (int c, struct ui_file *stream, int quoter) +static const char * +charset_for_string_type (enum c_string_type str_type, + enum bfd_endian byte_order) { - const char *escape; - int host_char; + switch (str_type & ~C_CHAR) + { + case C_STRING: + return target_charset (); + case C_WIDE_STRING: + return target_wide_charset (byte_order); + case C_STRING_16: + /* FIXME: UCS-2 is not always correct. */ + if (byte_order == BFD_ENDIAN_BIG) + return "UCS-2BE"; + else + return "UCS-2LE"; + case C_STRING_32: + /* FIXME: UCS-4 is not always correct. */ + if (byte_order == BFD_ENDIAN_BIG) + return "UCS-4BE"; + else + return "UCS-4LE"; + } + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); +} + +/* Classify ELTTYPE according to what kind of character it is. Return + the enum constant representing the character type. Also set + *ENCODING to the name of the character set to use when converting + characters of this type in target BYTE_ORDER to the host character set. */ - c &= 0xFF; /* Avoid sign bit follies */ +static enum c_string_type +classify_type (struct type *elttype, enum bfd_endian byte_order, + const char **encoding) +{ + struct type *saved_type; + enum c_string_type result; - escape = c_target_char_has_backslash_escape (c); - if (escape) + /* We loop because ELTTYPE may be a typedef, and we want to + successively peel each typedef until we reach a type we + understand. We don't use CHECK_TYPEDEF because that will strip + all typedefs at once -- but in C, wchar_t is itself a typedef, so + that would do the wrong thing. */ + while (elttype) { - if (quoter == '"' && strcmp (escape, "0") == 0) - /* Print nulls embedded in double quoted strings as \000 to - prevent ambiguity. */ - fprintf_filtered (stream, "\\000"); + char *name = TYPE_NAME (elttype); + + if (TYPE_CODE (elttype) == TYPE_CODE_CHAR || !name) + { + result = C_CHAR; + goto done; + } + + if (!strcmp (name, "wchar_t")) + { + result = C_WIDE_CHAR; + goto done; + } + + if (!strcmp (name, "char16_t")) + { + result = C_CHAR_16; + goto done; + } + + if (!strcmp (name, "char32_t")) + { + result = C_CHAR_32; + goto done; + } + + if (TYPE_CODE (elttype) != TYPE_CODE_TYPEDEF) + break; + + /* Call for side effects. */ + check_typedef (elttype); + + if (TYPE_TARGET_TYPE (elttype)) + elttype = TYPE_TARGET_TYPE (elttype); else - fprintf_filtered (stream, "\\%s", escape); + { + /* Perhaps check_typedef did not update the target type. In + this case, force the lookup again and hope it works out. + It never will for C, but it might for C++. */ + CHECK_TYPEDEF (elttype); + } } - else if (target_char_to_host (c, &host_char) - && host_char_print_literally (host_char)) + + /* Punt. */ + result = C_CHAR; + + done: + if (encoding) + *encoding = charset_for_string_type (result, byte_order); + + return result; +} + +/* Return true if print_wchar can display W without resorting to a + numeric escape, false otherwise. */ + +static int +wchar_printable (gdb_wchar_t w) +{ + return (gdb_iswprint (w) + || w == LCST ('\a') || w == LCST ('\b') + || w == LCST ('\f') || w == LCST ('\n') + || w == LCST ('\r') || w == LCST ('\t') + || w == LCST ('\v')); +} + +/* A helper function that converts the contents of STRING to wide + characters and then appends them to OUTPUT. */ + +static void +append_string_as_wide (const char *string, struct obstack *output) +{ + for (; *string; ++string) { - if (host_char == '\\' || host_char == quoter) - fputs_filtered ("\\", stream); - fprintf_filtered (stream, "%c", host_char); + gdb_wchar_t w = gdb_btowc (*string); + obstack_grow (output, &w, sizeof (gdb_wchar_t)); + } +} + +/* Print a wide character W to OUTPUT. ORIG is a pointer to the + original (target) bytes representing the character, ORIG_LEN is the + number of valid bytes. WIDTH is the number of bytes in a base + characters of the type. OUTPUT is an obstack to which wide + characters are emitted. QUOTER is a (narrow) character indicating + the style of quotes surrounding the character to be printed. + NEED_ESCAPE is an in/out flag which is used to track numeric + escapes across calls. */ + +static void +print_wchar (gdb_wint_t w, const gdb_byte *orig, int orig_len, + int width, enum bfd_endian byte_order, struct obstack *output, + int quoter, int *need_escapep) +{ + int need_escape = *need_escapep; + *need_escapep = 0; + if (gdb_iswprint (w) && (!need_escape || (!gdb_iswdigit (w) + && w != LCST ('8') + && w != LCST ('9')))) + { + gdb_wchar_t wchar = w; + + if (w == gdb_btowc (quoter) || w == LCST ('\\')) + obstack_grow_wstr (output, LCST ("\\")); + obstack_grow (output, &wchar, sizeof (gdb_wchar_t)); } else - fprintf_filtered (stream, "\\%.3o", (unsigned int) c); + { + switch (w) + { + case LCST ('\a'): + obstack_grow_wstr (output, LCST ("\\a")); + break; + case LCST ('\b'): + obstack_grow_wstr (output, LCST ("\\b")); + break; + case LCST ('\f'): + obstack_grow_wstr (output, LCST ("\\f")); + break; + case LCST ('\n'): + obstack_grow_wstr (output, LCST ("\\n")); + break; + case LCST ('\r'): + obstack_grow_wstr (output, LCST ("\\r")); + break; + case LCST ('\t'): + obstack_grow_wstr (output, LCST ("\\t")); + break; + case LCST ('\v'): + obstack_grow_wstr (output, LCST ("\\v")); + break; + default: + { + int i; + + for (i = 0; i + width <= orig_len; i += width) + { + char octal[30]; + ULONGEST value; + value = extract_unsigned_integer (&orig[i], width, byte_order); + /* If the value fits in 3 octal digits, print it that + way. Otherwise, print it as a hex escape. */ + if (value <= 0777) + sprintf (octal, "\\%.3o", (int) (value & 0777)); + else + sprintf (octal, "\\x%lx", (long) value); + append_string_as_wide (octal, output); + } + /* If we somehow have extra bytes, print them now. */ + while (i < orig_len) + { + char octal[5]; + sprintf (octal, "\\%.3o", orig[i] & 0xff); + append_string_as_wide (octal, output); + ++i; + } + + *need_escapep = 1; + } + break; + } + } +} + +/* Print the character C on STREAM as part of the contents of a literal + string whose delimiter is QUOTER. Note that that format for printing + characters and strings is language specific. */ + +static void +c_emit_char (int c, struct type *type, struct ui_file *stream, int quoter) +{ + enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type)); + struct obstack wchar_buf, output; + struct cleanup *cleanups; + const char *encoding; + gdb_byte *buf; + struct wchar_iterator *iter; + int need_escape = 0; + + classify_type (type, byte_order, &encoding); + + buf = alloca (TYPE_LENGTH (type)); + pack_long (buf, type, c); + + iter = make_wchar_iterator (buf, TYPE_LENGTH (type), encoding, + TYPE_LENGTH (type)); + cleanups = make_cleanup_wchar_iterator (iter); + + /* This holds the printable form of the wchar_t data. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (1) + { + int num_chars; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; + int print_escape = 1; + enum wchar_iterate_result result; + + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + if (num_chars < 0) + break; + if (num_chars > 0) + { + /* If all characters are printable, print them. Otherwise, + we're going to have to print an escape sequence. We + check all characters because we want to print the target + bytes in the escape sequence, and we don't know character + boundaries there. */ + int i; + + print_escape = 0; + for (i = 0; i < num_chars; ++i) + if (!wchar_printable (chars[i])) + { + print_escape = 1; + break; + } + + if (!print_escape) + { + for (i = 0; i < num_chars; ++i) + print_wchar (chars[i], buf, buflen, TYPE_LENGTH (type), + byte_order, &wchar_buf, quoter, &need_escape); + } + } + + /* This handles the NUM_CHARS == 0 case as well. */ + if (print_escape) + print_wchar (gdb_WEOF, buf, buflen, TYPE_LENGTH (type), byte_order, + &wchar_buf, quoter, &need_escape); + } + + /* The output in the host encoding. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings (INTERMEDIATE_ENCODING, host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanups); } void -c_printchar (int c, struct ui_file *stream) +c_printchar (int c, struct type *type, struct ui_file *stream) { + enum c_string_type str_type; + + str_type = classify_type (type, BFD_ENDIAN_UNKNOWN, NULL); + switch (str_type) + { + case C_CHAR: + break; + case C_WIDE_CHAR: + fputc_filtered ('L', stream); + break; + case C_CHAR_16: + fputc_filtered ('u', stream); + break; + case C_CHAR_32: + fputc_filtered ('U', stream); + break; + } + fputc_filtered ('\'', stream); - LA_EMIT_CHAR (c, stream, '\''); + LA_EMIT_CHAR (c, type, stream, '\''); fputc_filtered ('\'', stream); } @@ -85,87 +368,211 @@ c_printchar (int c, struct ui_file *stream) printing LENGTH characters, or if FORCE_ELLIPSES. */ void -c_printstr (struct ui_file *stream, const gdb_byte *string, - unsigned int length, int width, int force_ellipses, +c_printstr (struct ui_file *stream, struct type *type, const gdb_byte *string, + unsigned int length, int force_ellipses, const struct value_print_options *options) { + enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type)); unsigned int i; unsigned int things_printed = 0; int in_quotes = 0; int need_comma = 0; + int width = TYPE_LENGTH (type); + struct obstack wchar_buf, output; + struct cleanup *cleanup; + enum c_string_type str_type; + const char *encoding; + struct wchar_iterator *iter; + int finished = 0; + int need_escape = 0; /* If the string was not truncated due to `set print elements', and the last byte of it is a null, we don't print that, in traditional C style. */ if (!force_ellipses && length > 0 - && (extract_unsigned_integer (string + (length - 1) * width, width) - == '\0')) + && (extract_unsigned_integer (string + (length - 1) * width, + width, byte_order) == 0)) length--; + str_type = classify_type (type, byte_order, &encoding) & ~C_CHAR; + switch (str_type) + { + case C_STRING: + break; + case C_WIDE_STRING: + fputs_filtered ("L", stream); + break; + case C_STRING_16: + fputs_filtered ("u", stream); + break; + case C_STRING_32: + fputs_filtered ("U", stream); + break; + } + if (length == 0) { fputs_filtered ("\"\"", stream); return; } - for (i = 0; i < length && things_printed < options->print_max; ++i) + if (length == -1) { - /* Position of the character we are examining - to see whether it is repeated. */ - unsigned int rep1; - /* Number of repetitions we have detected so far. */ - unsigned int reps; - unsigned long current_char; + unsigned long current_char = 1; + for (i = 0; current_char; ++i) + { + QUIT; + current_char = extract_unsigned_integer (string + i * width, + width, byte_order); + } + length = i; + } + + /* Arrange to iterate over the characters, in wchar_t form. */ + iter = make_wchar_iterator (string, length * width, encoding, width); + cleanup = make_cleanup_wchar_iterator (iter); + + /* WCHAR_BUF is the obstack we use to represent the string in + wchar_t form. */ + obstack_init (&wchar_buf); + make_cleanup_obstack_free (&wchar_buf); + + while (!finished && things_printed < options->print_max) + { + int num_chars; + enum wchar_iterate_result result; + gdb_wchar_t *chars; + const gdb_byte *buf; + size_t buflen; QUIT; if (need_comma) { - fputs_filtered (", ", stream); + obstack_grow_wstr (&wchar_buf, LCST (", ")); need_comma = 0; } - current_char = extract_unsigned_integer (string + i * width, width); + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + /* We only look at repetitions when we were able to convert a + single character in isolation. This makes the code simpler + and probably does the sensible thing in the majority of + cases. */ + while (num_chars == 1) + { + /* Count the number of repetitions. */ + unsigned int reps = 0; + gdb_wchar_t current_char = chars[0]; + const gdb_byte *orig_buf = buf; + int orig_len = buflen; - rep1 = i + 1; - reps = 1; - while (rep1 < length - && extract_unsigned_integer (string + rep1 * width, width) - == current_char) + if (need_comma) + { + obstack_grow_wstr (&wchar_buf, LCST (", ")); + need_comma = 0; + } + + while (num_chars == 1 && current_char == chars[0]) + { + num_chars = wchar_iterate (iter, &result, &chars, &buf, &buflen); + ++reps; + } + + /* Emit CURRENT_CHAR according to the repetition count and + options. */ + if (reps > options->repeat_count_threshold) + { + if (in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\", ")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\", ")); + in_quotes = 0; + } + obstack_grow_wstr (&wchar_buf, LCST ("'")); + need_escape = 0; + print_wchar (current_char, orig_buf, orig_len, width, + byte_order, &wchar_buf, '\'', &need_escape); + obstack_grow_wstr (&wchar_buf, LCST ("'")); + { + /* Painful gyrations. */ + int j; + char *s = xstrprintf (_(" "), reps); + for (j = 0; s[j]; ++j) + { + gdb_wchar_t w = gdb_btowc (s[j]); + obstack_grow (&wchar_buf, &w, sizeof (gdb_wchar_t)); + } + xfree (s); + } + things_printed += options->repeat_count_threshold; + need_comma = 1; + } + else + { + /* Saw the character one or more times, but fewer than + the repetition threshold. */ + if (!in_quotes) + { + if (options->inspect_it) + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); + else + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; + need_escape = 0; + } + + while (reps-- > 0) + { + print_wchar (current_char, orig_buf, orig_len, width, + byte_order, &wchar_buf, '"', &need_escape); + ++things_printed; + } + } + } + + /* NUM_CHARS and the other outputs from wchar_iterate are valid + here regardless of which branch was taken above. */ + if (num_chars < 0) { - ++rep1; - ++reps; + /* Hit EOF. */ + finished = 1; + break; } - if (reps > options->repeat_count_threshold) + switch (result) { - if (in_quotes) + case wchar_iterate_invalid: + if (!in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\", ", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\", ", stream); - in_quotes = 0; + obstack_grow_wstr (&wchar_buf, LCST ("\"")); + in_quotes = 1; } - LA_PRINT_CHAR (current_char, stream); - fprintf_filtered (stream, _(" "), reps); - i = rep1 - 1; - things_printed += options->repeat_count_threshold; - need_comma = 1; - } - else - { - if (!in_quotes) + need_escape = 0; + print_wchar (gdb_WEOF, buf, buflen, width, byte_order, &wchar_buf, + '"', &need_escape); + break; + + case wchar_iterate_incomplete: + if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\",")); else - fputs_filtered ("\"", stream); - in_quotes = 1; + obstack_grow_wstr (&wchar_buf, LCST ("\",")); + in_quotes = 0; } - LA_EMIT_CHAR (current_char, stream, '"'); - ++things_printed; + obstack_grow_wstr (&wchar_buf, LCST (" ")); + finished = 1; + break; } } @@ -173,16 +580,463 @@ c_printstr (struct ui_file *stream, const gdb_byte *string, if (in_quotes) { if (options->inspect_it) - fputs_filtered ("\\\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\\\"")); else - fputs_filtered ("\"", stream); + obstack_grow_wstr (&wchar_buf, LCST ("\"")); } - if (force_ellipses || i < length) - fputs_filtered ("...", stream); + if (force_ellipses || !finished) + obstack_grow_wstr (&wchar_buf, LCST ("...")); + + /* OUTPUT is where we collect `char's for printing. */ + obstack_init (&output); + make_cleanup_obstack_free (&output); + + convert_between_encodings (INTERMEDIATE_ENCODING, host_charset (), + obstack_base (&wchar_buf), + obstack_object_size (&wchar_buf), + 1, &output, translit_char); + obstack_1grow (&output, '\0'); + + fputs_filtered (obstack_base (&output), stream); + + do_cleanups (cleanup); } + +/* Obtain a C string from the inferior storing it in a newly allocated + buffer in BUFFER, which should be freed by the caller. If the + in- and out-parameter *LENGTH is specified at -1, the string is read + until a null character of the appropriate width is found, otherwise + the string is read to the length of characters specified. + The size of a character is determined by the length of the target + type of the pointer or array. If VALUE is an array with a known + length, the function will not read past the end of the array. + On completion, *LENGTH will be set to the size of the string read in + characters. (If a length of -1 is specified, the length returned + will not include the null character). CHARSET is always set to the + target charset. */ + +void +c_get_string (struct value *value, gdb_byte **buffer, int *length, + const char **charset) +{ + int err, width; + unsigned int fetchlimit; + struct type *type = check_typedef (value_type (value)); + struct type *element_type = TYPE_TARGET_TYPE (type); + int req_length = *length; + enum bfd_endian byte_order = gdbarch_byte_order (get_type_arch (type)); + + if (element_type == NULL) + goto error; + + if (TYPE_CODE (type) == TYPE_CODE_ARRAY) + { + /* If we know the size of the array, we can use it as a limit on the + number of characters to be fetched. */ + if (TYPE_NFIELDS (type) == 1 + && TYPE_CODE (TYPE_FIELD_TYPE (type, 0)) == TYPE_CODE_RANGE) + { + LONGEST low_bound, high_bound; + + get_discrete_bounds (TYPE_FIELD_TYPE (type, 0), + &low_bound, &high_bound); + fetchlimit = high_bound - low_bound + 1; + } + else + fetchlimit = UINT_MAX; + } + else if (TYPE_CODE (type) == TYPE_CODE_PTR) + fetchlimit = UINT_MAX; + else + /* We work only with arrays and pointers. */ + goto error; + + element_type = check_typedef (element_type); + if (TYPE_CODE (element_type) != TYPE_CODE_INT + && TYPE_CODE (element_type) != TYPE_CODE_CHAR) + /* If the elements are not integers or characters, we don't consider it + a string. */ + goto error; + + width = TYPE_LENGTH (element_type); + + /* If the string lives in GDB's memory instead of the inferior's, then we + just need to copy it to BUFFER. Also, since such strings are arrays + with known size, FETCHLIMIT will hold the size of the array. */ + if ((VALUE_LVAL (value) == not_lval + || VALUE_LVAL (value) == lval_internalvar) + && fetchlimit != UINT_MAX) + { + int i; + const gdb_byte *contents = value_contents (value); + + /* If a length is specified, use that. */ + if (*length >= 0) + i = *length; + else + /* Otherwise, look for a null character. */ + for (i = 0; i < fetchlimit; i++) + if (extract_unsigned_integer (contents + i * width, width, + byte_order) == 0) + break; + + /* I is now either a user-defined length, the number of non-null + characters, or FETCHLIMIT. */ + *length = i * width; + *buffer = xmalloc (*length); + memcpy (*buffer, contents, *length); + err = 0; + } + else + { + err = read_string (value_as_address (value), *length, width, fetchlimit, + byte_order, buffer, length); + if (err) + { + xfree (*buffer); + error (_("Error reading string from inferior: %s"), + safe_strerror (err)); + } + } + + /* If the LENGTH is specified at -1, we want to return the string + length up to the terminating null character. If an actual length + was specified, we want to return the length of exactly what was + read. */ + if (req_length == -1) + /* If the last character is null, subtract it from LENGTH. */ + if (*length > 0 + && extract_unsigned_integer (*buffer + *length - width, width, + byte_order) == 0) + *length -= width; + + /* The read_string function will return the number of bytes read. + If length returned from read_string was > 0, return the number of + characters read by dividing the number of bytes by width. */ + if (*length != 0) + *length = *length / width; + + *charset = target_charset (); + + return; + + error: + { + char *type_str; + + type_str = type_to_string (type); + if (type_str) + { + make_cleanup (xfree, type_str); + error (_("Trying to read string with inappropriate type `%s'."), + type_str); + } + else + error (_("Trying to read string with inappropriate type.")); + } +} + -/* Preprocessing and parsing C and C++ expressions. */ +/* Evaluating C and C++ expressions. */ + +/* Convert a UCN. The digits of the UCN start at P and extend no + farther than LIMIT. DEST_CHARSET is the name of the character set + into which the UCN should be converted. The results are written to + OUTPUT. LENGTH is the maximum length of the UCN, either 4 or 8. + Returns a pointer to just after the final digit of the UCN. */ + +static char * +convert_ucn (char *p, char *limit, const char *dest_charset, + struct obstack *output, int length) +{ + unsigned long result = 0; + gdb_byte data[4]; + int i; + + for (i = 0; i < length && p < limit && isxdigit (*p); ++i, ++p) + result = (result << 4) + host_hex_value (*p); + + for (i = 3; i >= 0; --i) + { + data[i] = result & 0xff; + result >>= 8; + } + + convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output, + translit_none); + + return p; +} + +/* Emit a character, VALUE, which was specified numerically, to + OUTPUT. TYPE is the target character type. */ + +static void +emit_numeric_character (struct type *type, unsigned long value, + struct obstack *output) +{ + gdb_byte *buffer; + + buffer = alloca (TYPE_LENGTH (type)); + pack_long (buffer, type, value); + obstack_grow (output, buffer, TYPE_LENGTH (type)); +} + +/* Convert an octal escape sequence. TYPE is the target character + type. The digits of the escape sequence begin at P and extend no + farther than LIMIT. The result is written to OUTPUT. Returns a + pointer to just after the final digit of the escape sequence. */ + +static char * +convert_octal (struct type *type, char *p, char *limit, struct obstack *output) +{ + int i; + unsigned long value = 0; + + for (i = 0; + i < 3 && p < limit && isdigit (*p) && *p != '8' && *p != '9'; + ++i) + { + value = 8 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +/* Convert a hex escape sequence. TYPE is the target character type. + The digits of the escape sequence begin at P and extend no farther + than LIMIT. The result is written to OUTPUT. Returns a pointer to + just after the final digit of the escape sequence. */ + +static char * +convert_hex (struct type *type, char *p, char *limit, struct obstack *output) +{ + unsigned long value = 0; + + while (p < limit && isxdigit (*p)) + { + value = 16 * value + host_hex_value (*p); + ++p; + } + + emit_numeric_character (type, value, output); + + return p; +} + +#define ADVANCE \ + do { \ + ++p; \ + if (p == limit) \ + error (_("Malformed escape sequence")); \ + } while (0) + +/* Convert an escape sequence to a target format. TYPE is the target + character type to use, and DEST_CHARSET is the name of the target + character set. The backslash of the escape sequence is at *P, and + the escape sequence will not extend past LIMIT. The results are + written to OUTPUT. Returns a pointer to just past the final + character of the escape sequence. */ + +static char * +convert_escape (struct type *type, const char *dest_charset, + char *p, char *limit, struct obstack *output) +{ + /* Skip the backslash. */ + ADVANCE; + + switch (*p) + { + case '\\': + obstack_1grow (output, '\\'); + ++p; + break; + + case 'x': + ADVANCE; + if (!isxdigit (*p)) + error (_("\\x used with no following hex digits.")); + p = convert_hex (type, p, limit, output); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + p = convert_octal (type, p, limit, output); + break; + + case 'u': + case 'U': + { + int length = *p == 'u' ? 4 : 8; + ADVANCE; + if (!isxdigit (*p)) + error (_("\\u used with no following hex digits")); + p = convert_ucn (p, limit, dest_charset, output, length); + } + } + + return p; +} + +/* Given a single string from a (C-specific) OP_STRING list, convert + it to a target string, handling escape sequences specially. The + output is written to OUTPUT. DATA is the input string, which has + length LEN. DEST_CHARSET is the name of the target character set, + and TYPE is the type of target character to use. */ + +static void +parse_one_string (struct obstack *output, char *data, int len, + const char *dest_charset, struct type *type) +{ + char *limit; + + limit = data + len; + + while (data < limit) + { + char *p = data; + /* Look for next escape, or the end of the input. */ + while (p < limit && *p != '\\') + ++p; + /* If we saw a run of characters, convert them all. */ + if (p > data) + convert_between_encodings (host_charset (), dest_charset, + data, p - data, 1, output, translit_none); + /* If we saw an escape, convert it. */ + if (p < limit) + p = convert_escape (type, dest_charset, p, limit, output); + data = p; + } +} + +/* Expression evaluator for the C language family. Most operations + are delegated to evaluate_subexp_standard; see that function for a + description of the arguments. */ + +static struct value * +evaluate_subexp_c (struct type *expect_type, struct expression *exp, + int *pos, enum noside noside) +{ + enum exp_opcode op = exp->elts[*pos].opcode; + + switch (op) + { + case OP_STRING: + { + int oplen, limit; + struct type *type; + struct obstack output; + struct cleanup *cleanup; + struct value *result; + enum c_string_type dest_type; + const char *dest_charset; + enum bfd_endian byte_order; + + obstack_init (&output); + cleanup = make_cleanup_obstack_free (&output); + + ++*pos; + oplen = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + limit = *pos + BYTES_TO_EXP_ELEM (oplen + 1); + dest_type + = (enum c_string_type) longest_to_int (exp->elts[*pos].longconst); + switch (dest_type & ~C_CHAR) + { + case C_STRING: + type = language_string_char_type (exp->language_defn, + exp->gdbarch); + break; + case C_WIDE_STRING: + type = lookup_typename (exp->language_defn, exp->gdbarch, + "wchar_t", NULL, 0); + break; + case C_STRING_16: + type = lookup_typename (exp->language_defn, exp->gdbarch, + "char16_t", NULL, 0); + break; + case C_STRING_32: + type = lookup_typename (exp->language_defn, exp->gdbarch, + "char32_t", NULL, 0); + break; + default: + internal_error (__FILE__, __LINE__, "unhandled c_string_type"); + } + + /* Ensure TYPE_LENGTH is valid for TYPE. */ + check_typedef (type); + + byte_order = gdbarch_byte_order (exp->gdbarch); + dest_charset = charset_for_string_type (dest_type, byte_order); + + ++*pos; + while (*pos < limit) + { + int len; + + len = longest_to_int (exp->elts[*pos].longconst); + + ++*pos; + if (noside != EVAL_SKIP) + parse_one_string (&output, &exp->elts[*pos].string, len, + dest_charset, type); + *pos += BYTES_TO_EXP_ELEM (len); + } + + /* Skip the trailing length and opcode. */ + *pos += 2; + + if (noside == EVAL_SKIP) + { + /* Return a dummy value of the appropriate type. */ + if ((dest_type & C_CHAR) != 0) + result = allocate_value (type); + else + result = value_cstring ("", 0, type); + do_cleanups (cleanup); + return result; + } + + if ((dest_type & C_CHAR) != 0) + { + LONGEST value; + + if (obstack_object_size (&output) != TYPE_LENGTH (type)) + error (_("Could not convert character constant to target character set")); + value = unpack_long (type, obstack_base (&output)); + result = value_from_longest (type, value); + } + else + { + int i; + /* Write the terminating character. */ + for (i = 0; i < TYPE_LENGTH (type); ++i) + obstack_1grow (&output, 0); + result = value_cstring (obstack_base (&output), + obstack_object_size (&output), + type); + } + do_cleanups (cleanup); + return result; + } + break; + + default: + break; + } + return evaluate_subexp_standard (expect_type, exp, pos, noside); +} @@ -280,6 +1134,15 @@ c_language_arch_info (struct gdbarch *gdbarch, lai->bool_type_default = builtin->builtin_int; } +static const struct exp_descriptor exp_descriptor_c = +{ + print_subexp_standard, + operator_length_standard, + op_name_standard, + dump_subexp_body_standard, + evaluate_subexp_c +}; + const struct language_defn c_language_defn = { "c", /* Language name */ @@ -289,7 +1152,7 @@ const struct language_defn c_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -314,6 +1177,7 @@ const struct language_defn c_language_defn = c_language_arch_info, default_print_array_index, default_pass_by_reference, + c_get_string, LANG_MAGIC }; @@ -407,7 +1271,7 @@ const struct language_defn cplus_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -432,6 +1296,7 @@ const struct language_defn cplus_language_defn = cplus_language_arch_info, default_print_array_index, cp_pass_by_reference, + c_get_string, LANG_MAGIC }; @@ -444,7 +1309,7 @@ const struct language_defn asm_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -469,6 +1334,7 @@ const struct language_defn asm_language_defn = c_language_arch_info, /* FIXME: la_language_arch_info. */ default_print_array_index, default_pass_by_reference, + c_get_string, LANG_MAGIC }; @@ -486,7 +1352,7 @@ const struct language_defn minimal_language_defn = case_sensitive_on, array_row_major, macro_expansion_c, - &exp_descriptor_standard, + &exp_descriptor_c, c_parse, c_error, null_post_parser, @@ -511,6 +1377,7 @@ const struct language_defn minimal_language_defn = c_language_arch_info, default_print_array_index, default_pass_by_reference, + c_get_string, LANG_MAGIC };