X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=gdb%2Fcharset.c;h=abad9015752ded357e9b48b1e7a0e81e6f3828e5;hb=cc2c4da8813b980a4e68272bb43583f4af6fe89c;hp=d54749c10f3e70c9cca35a158dce8241cf9e3d93;hpb=3e43a32aaa2d78fca10dea6746b820176f39bcc8;p=deliverable%2Fbinutils-gdb.git diff --git a/gdb/charset.c b/gdb/charset.c index d54749c10f..abad901575 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -1,7 +1,6 @@ /* Character set conversion support for GDB. - Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010, 2011 - Free Software Foundation, Inc. + Copyright (C) 2001-2016 Free Software Foundation, Inc. This file is part of GDB. @@ -21,16 +20,13 @@ #include "defs.h" #include "charset.h" #include "gdbcmd.h" -#include "gdb_assert.h" #include "gdb_obstack.h" #include "gdb_wait.h" #include "charset-list.h" #include "vec.h" #include "environ.h" #include "arch-utils.h" - -#include -#include "gdb_string.h" +#include "gdb_vecs.h" #include #ifdef USE_WIN32API @@ -81,9 +77,13 @@ arrange for there to be a single available character set. */ #undef GDB_DEFAULT_HOST_CHARSET -#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" -#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" -#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" +#ifdef USE_WIN32API +# define GDB_DEFAULT_HOST_CHARSET "CP1252" +#else +# define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" +#endif +#define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET +#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" #undef DEFAULT_CHARSET_NAMES #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , @@ -99,68 +99,68 @@ #undef ICONV_CONST #define ICONV_CONST const -/* Some systems don't have EILSEQ, so we define it here, but not as - EINVAL, because callers of `iconv' want to distinguish EINVAL and - EILSEQ. This is what iconv.h from libiconv does as well. Note - that wchar.h may also define EILSEQ, so this needs to be after we - include wchar.h, which happens in defs.h through gdb_wchar.h. */ -#ifndef EILSEQ -#define EILSEQ ENOENT -#endif +/* We allow conversions from UTF-32, wchar_t, and the host charset. + We allow conversions to wchar_t and the host charset. + Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE, + 0 otherwise. This is used as a flag in calls to iconv. */ -iconv_t +static iconv_t phony_iconv_open (const char *to, const char *from) { - /* We allow conversions from UTF-32BE, wchar_t, and the host charset. - We allow conversions to wchar_t and the host charset. */ - if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") - && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) - return -1; if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) return -1; - /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is - used as a flag in calls to iconv. */ - return !strcmp (from, "UTF-32BE"); + if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32")) + return 1; + + if (!strcmp (from, "UTF-32LE")) + return 2; + + if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) + return -1; + + return 0; } -int +static int phony_iconv_close (iconv_t arg) { return 0; } -size_t +static size_t phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { if (utf_flag) { + enum bfd_endian endian + = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE; while (*inbytesleft >= 4) { - size_t j; - unsigned long c = 0; - - for (j = 0; j < 4; ++j) - { - c <<= 8; - c += (*inbuf)[j] & 0xff; - } + unsigned long c + = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian); if (c >= 256) { errno = EILSEQ; return -1; } + if (*outbytesleft < 1) + { + errno = E2BIG; + return -1; + } **outbuf = c & 0xff; ++*outbuf; --*outbytesleft; - ++*inbuf; + *inbuf += 4; *inbytesleft -= 4; } - if (*inbytesleft < 4) + if (*inbytesleft) { + /* Partial sequence on input. */ errno = EINVAL; return -1; } @@ -178,12 +178,11 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, *outbuf += amt; *inbytesleft -= amt; *outbytesleft -= amt; - } - - if (*inbytesleft) - { - errno = E2BIG; - return -1; + if (*inbytesleft) + { + errno = E2BIG; + return -1; + } } /* The number of non-reversible conversions -- but they were all @@ -191,8 +190,28 @@ phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, return 0; } -#endif +#else /* PHONY_ICONV */ +/* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it + to ENOENT, while gnulib defines it to a different value. Always + map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */ + +static size_t +gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + size_t ret; + + ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft); + if (errno == ENOENT) + errno = EILSEQ; + return ret; +} + +#undef iconv +#define iconv gdb_iconv + +#endif /* PHONY_ICONV */ /* The global lists of character sets and translations. */ @@ -283,6 +302,11 @@ set_be_le_names (struct gdbarch *gdbarch) return; be_le_arch = gdbarch; +#ifdef PHONY_ICONV + /* Match the wide charset names recognized by phony_iconv_open. */ + target_wide_charset_le_name = "UTF-32LE"; + target_wide_charset_be_name = "UTF-32BE"; +#else target_wide_charset_le_name = NULL; target_wide_charset_be_name = NULL; @@ -306,6 +330,7 @@ set_be_le_names (struct gdbarch *gdbarch) target_wide_charset_le_name = charset_enum[i]; } } +# endif /* PHONY_ICONV */ } /* 'Set charset', 'set host-charset', 'set target-charset', 'set @@ -324,13 +349,13 @@ validate (struct gdbarch *gdbarch) desc = iconv_open (target_wide_cset, host_cset); if (desc == (iconv_t) -1) - error ("Cannot convert between character sets `%s' and `%s'", + error (_("Cannot convert between character sets `%s' and `%s'"), target_wide_cset, host_cset); iconv_close (desc); desc = iconv_open (target_cset, host_cset); if (desc == (iconv_t) -1) - error ("Cannot convert between character sets `%s' and `%s'", + error (_("Cannot convert between character sets `%s' and `%s'"), target_cset, host_cset); iconv_close (desc); @@ -461,7 +486,7 @@ host_hex_value (char c) static void cleanup_iconv (void *p) { - iconv_t *descp = p; + iconv_t *descp = (iconv_t *) p; iconv_close (*descp); } @@ -474,7 +499,7 @@ convert_between_encodings (const char *from, const char *to, iconv_t desc; struct cleanup *cleanups; size_t inleft; - char *inp; + ICONV_CONST char *inp; unsigned int space_request; /* Often, the host and target charsets will be the same. */ @@ -486,11 +511,11 @@ convert_between_encodings (const char *from, const char *to, desc = iconv_open (to, from); if (desc == (iconv_t) -1) - perror_with_name ("Converting character sets"); + perror_with_name (_("Converting character sets")); cleanups = make_cleanup (cleanup_iconv, &desc); inleft = num_bytes; - inp = (char *) bytes; + inp = (ICONV_CONST char *) bytes; space_request = num_bytes; @@ -503,14 +528,14 @@ convert_between_encodings (const char *from, const char *to, old_size = obstack_object_size (output); obstack_blank (output, space_request); - outp = obstack_base (output) + old_size; + outp = (char *) obstack_base (output) + old_size; outleft = space_request; - r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft); + r = iconv (desc, &inp, &inleft, &outp, &outleft); /* Now make sure that the object on the obstack only includes bytes we have converted. */ - obstack_blank (output, - (int) outleft); + obstack_blank_fast (output, -outleft); if (r == (size_t) -1) { @@ -531,7 +556,7 @@ convert_between_encodings (const char *from, const char *to, { char octal[5]; - sprintf (octal, "\\%.3o", *inp & 0xff); + xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); obstack_grow_str (output, octal); ++inp; @@ -553,8 +578,8 @@ convert_between_encodings (const char *from, const char *to, break; default: - perror_with_name ("Internal error while " - "converting character sets"); + perror_with_name (_("Internal error while " + "converting character sets")); } } } @@ -571,7 +596,7 @@ struct wchar_iterator iconv_t desc; /* The input string. This is updated as convert characters. */ - char *input; + const gdb_byte *input; /* The number of bytes remaining in the input. */ size_t bytes; @@ -593,11 +618,11 @@ make_wchar_iterator (const gdb_byte *input, size_t bytes, desc = iconv_open (INTERMEDIATE_ENCODING, charset); if (desc == (iconv_t) -1) - perror_with_name ("Converting character sets"); + perror_with_name (_("Converting character sets")); result = XNEW (struct wchar_iterator); result->desc = desc; - result->input = (char *) input; + result->input = input; result->bytes = bytes; result->width = width; @@ -610,7 +635,7 @@ make_wchar_iterator (const gdb_byte *input, size_t bytes, static void do_cleanup_iterator (void *p) { - struct wchar_iterator *iter = p; + struct wchar_iterator *iter = (struct wchar_iterator *) p; iconv_close (iter->desc); xfree (iter->out); @@ -640,14 +665,15 @@ wchar_iterate (struct wchar_iterator *iter, out_request = 1; while (iter->bytes > 0) { + ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input; char *outptr = (char *) &iter->out[0]; - char *orig_inptr = iter->input; + const gdb_byte *orig_inptr = iter->input; size_t orig_in = iter->bytes; size_t out_avail = out_request * sizeof (gdb_wchar_t); size_t num; - size_t r = iconv (iter->desc, - (ICONV_CONST char **) &iter->input, - &iter->bytes, &outptr, &out_avail); + size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail); + + iter->input = (gdb_byte *) inptr; if (r == (size_t) -1) { @@ -679,8 +705,7 @@ wchar_iterate (struct wchar_iterator *iter, if (out_request > iter->out_size) { iter->out_size = out_request; - iter->out = xrealloc (iter->out, - out_request * sizeof (gdb_wchar_t)); + iter->out = XRESIZEVEC (gdb_wchar_t, iter->out, out_request); } continue; @@ -694,8 +719,8 @@ wchar_iterate (struct wchar_iterator *iter, return 0; default: - perror_with_name ("Internal error while " - "converting character sets"); + perror_with_name (_("Internal error while " + "converting character sets")); } } @@ -718,8 +743,6 @@ wchar_iterate (struct wchar_iterator *iter, extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ -DEF_VEC_P (char_ptr); - static VEC (char_ptr) *charsets; #ifdef PHONY_ICONV @@ -799,7 +822,9 @@ find_charset_names (void) char *args[3]; int err, status; int fail = 1; + int flags; struct gdb_environ *iconv_env; + char *iconv_program; /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not a tty. We need to recognize it and ignore it. This text is @@ -811,12 +836,26 @@ find_charset_names (void) child = pex_init (PEX_USE_PIPES, "iconv", NULL); - args[0] = "iconv"; +#ifdef ICONV_BIN + { + char *iconv_dir = relocate_gdb_directory (ICONV_BIN, + ICONV_BIN_RELOCATABLE); + iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); + xfree (iconv_dir); + } +#else + iconv_program = xstrdup ("iconv"); +#endif + args[0] = iconv_program; args[1] = "-l"; args[2] = NULL; + flags = PEX_STDERR_TO_STDOUT; +#ifndef ICONV_BIN + flags |= PEX_SEARCH; +#endif /* Note that we simply ignore errors here. */ - if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT, - "iconv", args, environ_vector (iconv_env), + if (!pex_run_in_environment (child, flags, + args[0], args, environ_vector (iconv_env), NULL, NULL, &err)) { FILE *in = pex_read_output (child, 0); @@ -825,7 +864,7 @@ find_charset_names (void) parse the glibc and libiconv formats; feel free to add others as needed. */ - while (!feof (in)) + while (in != NULL && !feof (in)) { /* The size of buf is chosen arbitrarily. */ char buf[1024]; @@ -888,17 +927,15 @@ find_charset_names (void) } + xfree (iconv_program); pex_free (child); free_environ (iconv_env); if (fail) { /* Some error occurred, so drop the vector. */ - int ix; - char *elt; - for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix) - xfree (elt); - VEC_truncate (char_ptr, charsets, 0); + free_char_ptr_vec (charsets); + charsets = NULL; } else VEC_safe_push (char_ptr, charsets, NULL); @@ -922,6 +959,71 @@ default_auto_wide_charset (void) return GDB_DEFAULT_TARGET_WIDE_CHARSET; } + +#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION +/* Macro used for UTF or UCS endianness suffix. */ +#if WORDS_BIGENDIAN +#define ENDIAN_SUFFIX "BE" +#else +#define ENDIAN_SUFFIX "LE" +#endif + +/* The code below serves to generate a compile time error if + gdb_wchar_t type is not of size 2 nor 4, despite the fact that + macro __STDC_ISO_10646__ is defined. + This is better than a gdb_assert call, because GDB cannot handle + strings correctly if this size is different. */ + +extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 + || sizeof (gdb_wchar_t) == 4) + ? 1 : -1]; + +/* intermediate_encoding returns the charset used internally by + GDB to convert between target and host encodings. As the test above + compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. + UTF-16/32 is tested first, UCS-2/4 is tested as a second option, + otherwise an error is generated. */ + +const char * +intermediate_encoding (void) +{ + iconv_t desc; + static const char *stored_result = NULL; + char *result; + + if (stored_result) + return stored_result; + result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), + ENDIAN_SUFFIX); + /* Check that the name is supported by iconv_open. */ + desc = iconv_open (result, host_charset ()); + if (desc != (iconv_t) -1) + { + iconv_close (desc); + stored_result = result; + return result; + } + /* Not valid, free the allocated memory. */ + xfree (result); + /* Second try, with UCS-2 type. */ + result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), + ENDIAN_SUFFIX); + /* Check that the name is supported by iconv_open. */ + desc = iconv_open (result, host_charset ()); + if (desc != (iconv_t) -1) + { + iconv_close (desc); + stored_result = result; + return result; + } + /* Not valid, free the allocated memory. */ + xfree (result); + /* No valid charset found, generate error here. */ + error (_("Unable to find a vaild charset for string conversions")); +} + +#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ + void _initialize_charset (void) { @@ -981,8 +1083,8 @@ Set the host character set."), _("\ Show the host character set."), _("\ The `host character set' is the one used by the system GDB is running on.\n\ You may only use supersets of ASCII for your host character set; GDB does\n\ -not support any others.\nTo see a list of the character sets GDB supports, \ -type `set host-charset '."), +not support any others.\n\ +To see a list of the character sets GDB supports, type `set host-charset '."), set_host_charset_sfunc, show_host_charset_name, &setlist, &showlist); @@ -993,8 +1095,8 @@ Set the target character set."), _("\ Show the target character set."), _("\ The `target character set' is the one used by the program being debugged.\n\ GDB translates characters and strings between the host and target\n\ -character sets as needed.\nTo see a list of the character sets GDB supports, \ -type `set target-charset'"), +character sets as needed.\n\ +To see a list of the character sets GDB supports, type `set target-charset'"), set_target_charset_sfunc, show_target_charset_name, &setlist, &showlist);