PR gdb/12623: non-stop crashes inferior, PC adjustment and 1-byte insns
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
ecd75fc8 3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
234b45d4
KB
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
a9762ec7 9 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
a9762ec7 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
19
20#include "defs.h"
21#include "charset.h"
22#include "gdbcmd.h"
6c7a06a3 23#include "gdb_obstack.h"
732f6a93 24#include "gdb_wait.h"
6c7a06a3
TT
25#include "charset-list.h"
26#include "vec.h"
40b5c9fb 27#include "environ.h"
f870a310 28#include "arch-utils.h"
fa864999 29#include "gdb_vecs.h"
234b45d4
KB
30#include <ctype.h>
31
43484f03
DJ
32#ifdef USE_WIN32API
33#include <windows.h>
34#endif
234b45d4
KB
35\f
36/* How GDB's character set support works
37
6c7a06a3 38 GDB has three global settings:
234b45d4
KB
39
40 - The `current host character set' is the character set GDB should
41 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
42 terminal knows how to display properly. Most users should not
43 change this.
234b45d4
KB
44
45 - The `current target character set' is the character set the
46 program being debugged uses.
47
6c7a06a3
TT
48 - The `current target wide character set' is the wide character set
49 the program being debugged uses, that is, the encoding used for
50 wchar_t.
51
234b45d4
KB
52 There are commands to set each of these, and mechanisms for
53 choosing reasonable default values. GDB has a global list of
54 character sets that it can use as its host or target character
55 sets.
56
57 The header file `charset.h' declares various functions that
58 different pieces of GDB need to perform tasks like:
59
60 - printing target strings and characters to the user's terminal
61 (mostly target->host conversions),
62
63 - building target-appropriate representations of strings and
64 characters the user enters in expressions (mostly host->target
65 conversions),
66
6c7a06a3
TT
67 and so on.
68
69 To avoid excessive code duplication and maintenance efforts,
70 GDB simply requires a capable iconv function. Users on platforms
71 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
72
73\f
6c7a06a3 74#ifdef PHONY_ICONV
234b45d4 75
6c7a06a3
TT
76/* Provide a phony iconv that does as little as possible. Also,
77 arrange for there to be a single available character set. */
234b45d4 78
6c7a06a3
TT
79#undef GDB_DEFAULT_HOST_CHARSET
80#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
81#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
82#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
83#undef DEFAULT_CHARSET_NAMES
84#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
85
86#undef iconv_t
87#define iconv_t int
88#undef iconv_open
62234ccc 89#define iconv_open phony_iconv_open
6c7a06a3 90#undef iconv
62234ccc 91#define iconv phony_iconv
6c7a06a3 92#undef iconv_close
62234ccc 93#define iconv_close phony_iconv_close
6c7a06a3 94
0dd7fb99
TT
95#undef ICONV_CONST
96#define ICONV_CONST const
97
76208fec 98/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
99 EINVAL, because callers of `iconv' want to distinguish EINVAL and
100 EILSEQ. This is what iconv.h from libiconv does as well. Note
101 that wchar.h may also define EILSEQ, so this needs to be after we
102 include wchar.h, which happens in defs.h through gdb_wchar.h. */
103#ifndef EILSEQ
104#define EILSEQ ENOENT
105#endif
106
a95babbf 107static iconv_t
62234ccc 108phony_iconv_open (const char *to, const char *from)
6c7a06a3 109{
b8899f2b 110 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 111 We allow conversions to wchar_t and the host charset. */
b8899f2b 112 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
113 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
114 return -1;
115 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
116 return -1;
234b45d4 117
b8899f2b 118 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 119 used as a flag in calls to iconv. */
b8899f2b 120 return !strcmp (from, "UTF-32BE");
6c7a06a3 121}
234b45d4 122
a95babbf 123static int
62234ccc 124phony_iconv_close (iconv_t arg)
6c7a06a3
TT
125{
126 return 0;
127}
234b45d4 128
a95babbf 129static size_t
62234ccc
TT
130phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
131 char **outbuf, size_t *outbytesleft)
6c7a06a3 132{
b8899f2b 133 if (utf_flag)
6c7a06a3
TT
134 {
135 while (*inbytesleft >= 4)
136 {
137 size_t j;
138 unsigned long c = 0;
139
140 for (j = 0; j < 4; ++j)
141 {
142 c <<= 8;
143 c += (*inbuf)[j] & 0xff;
144 }
145
146 if (c >= 256)
147 {
148 errno = EILSEQ;
149 return -1;
150 }
151 **outbuf = c & 0xff;
152 ++*outbuf;
153 --*outbytesleft;
154
155 ++*inbuf;
156 *inbytesleft -= 4;
157 }
158 if (*inbytesleft < 4)
159 {
160 errno = EINVAL;
161 return -1;
162 }
163 }
164 else
165 {
166 /* In all other cases we simply copy input bytes to the
167 output. */
168 size_t amt = *inbytesleft;
c5504eaf 169
6c7a06a3
TT
170 if (amt > *outbytesleft)
171 amt = *outbytesleft;
172 memcpy (*outbuf, *inbuf, amt);
173 *inbuf += amt;
174 *outbuf += amt;
175 *inbytesleft -= amt;
176 *outbytesleft -= amt;
177 }
234b45d4 178
6c7a06a3
TT
179 if (*inbytesleft)
180 {
181 errno = E2BIG;
182 return -1;
183 }
234b45d4 184
6c7a06a3
TT
185 /* The number of non-reversible conversions -- but they were all
186 reversible. */
187 return 0;
188}
234b45d4 189
6c7a06a3 190#endif
234b45d4
KB
191
192
193\f
194/* The global lists of character sets and translations. */
195
196
e33d66ec
EZ
197#ifndef GDB_DEFAULT_TARGET_CHARSET
198#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
199#endif
200
6c7a06a3 201#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 202#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
203#endif
204
205static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
206static const char *host_charset_name = "auto";
920d2a44
AC
207static void
208show_host_charset_name (struct ui_file *file, int from_tty,
209 struct cmd_list_element *c,
210 const char *value)
211{
6c7a06a3
TT
212 if (!strcmp (value, "auto"))
213 fprintf_filtered (file,
214 _("The host character set is \"auto; currently %s\".\n"),
215 auto_host_charset_name);
216 else
217 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
218}
219
f870a310 220static const char *target_charset_name = "auto";
920d2a44
AC
221static void
222show_target_charset_name (struct ui_file *file, int from_tty,
223 struct cmd_list_element *c, const char *value)
224{
f870a310
TT
225 if (!strcmp (value, "auto"))
226 fprintf_filtered (file,
227 _("The target character set is \"auto; "
228 "currently %s\".\n"),
229 gdbarch_auto_charset (get_current_arch ()));
230 else
231 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
232 value);
920d2a44
AC
233}
234
f870a310 235static const char *target_wide_charset_name = "auto";
6c7a06a3 236static void
aff410f1
MS
237show_target_wide_charset_name (struct ui_file *file,
238 int from_tty,
239 struct cmd_list_element *c,
240 const char *value)
e33d66ec 241{
f870a310
TT
242 if (!strcmp (value, "auto"))
243 fprintf_filtered (file,
244 _("The target wide character set is \"auto; "
245 "currently %s\".\n"),
246 gdbarch_auto_wide_charset (get_current_arch ()));
247 else
248 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
249 value);
6c7a06a3 250}
e33d66ec 251
6c7a06a3 252static const char *default_charset_names[] =
e33d66ec 253{
6c7a06a3 254 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
255 0
256};
234b45d4 257
6c7a06a3 258static const char **charset_enum;
234b45d4 259
6c7a06a3
TT
260\f
261/* If the target wide character set has big- or little-endian
262 variants, these are the corresponding names. */
263static const char *target_wide_charset_be_name;
264static const char *target_wide_charset_le_name;
234b45d4 265
f870a310
TT
266/* The architecture for which the BE- and LE-names are valid. */
267static struct gdbarch *be_le_arch;
268
269/* A helper function which sets the target wide big- and little-endian
270 character set names, if possible. */
234b45d4 271
6c7a06a3 272static void
f870a310 273set_be_le_names (struct gdbarch *gdbarch)
234b45d4 274{
6c7a06a3 275 int i, len;
f870a310
TT
276 const char *target_wide;
277
278 if (be_le_arch == gdbarch)
279 return;
280 be_le_arch = gdbarch;
234b45d4 281
6c7a06a3
TT
282 target_wide_charset_le_name = NULL;
283 target_wide_charset_be_name = NULL;
234b45d4 284
f870a310
TT
285 target_wide = target_wide_charset_name;
286 if (!strcmp (target_wide, "auto"))
287 target_wide = gdbarch_auto_wide_charset (gdbarch);
288
289 len = strlen (target_wide);
6c7a06a3
TT
290 for (i = 0; charset_enum[i]; ++i)
291 {
f870a310 292 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
293 continue;
294 if ((charset_enum[i][len] == 'B'
295 || charset_enum[i][len] == 'L')
296 && charset_enum[i][len + 1] == 'E'
297 && charset_enum[i][len + 2] == '\0')
298 {
299 if (charset_enum[i][len] == 'B')
300 target_wide_charset_be_name = charset_enum[i];
301 else
302 target_wide_charset_le_name = charset_enum[i];
303 }
304 }
234b45d4
KB
305}
306
6c7a06a3
TT
307/* 'Set charset', 'set host-charset', 'set target-charset', 'set
308 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
309
310static void
f870a310 311validate (struct gdbarch *gdbarch)
234b45d4 312{
6c7a06a3
TT
313 iconv_t desc;
314 const char *host_cset = host_charset ();
f870a310
TT
315 const char *target_cset = target_charset (gdbarch);
316 const char *target_wide_cset = target_wide_charset_name;
c5504eaf 317
f870a310
TT
318 if (!strcmp (target_wide_cset, "auto"))
319 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 320
f870a310 321 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3 322 if (desc == (iconv_t) -1)
a73c6dcd 323 error (_("Cannot convert between character sets `%s' and `%s'"),
f870a310 324 target_wide_cset, host_cset);
6c7a06a3 325 iconv_close (desc);
234b45d4 326
f870a310 327 desc = iconv_open (target_cset, host_cset);
6c7a06a3 328 if (desc == (iconv_t) -1)
a73c6dcd 329 error (_("Cannot convert between character sets `%s' and `%s'"),
f870a310 330 target_cset, host_cset);
6c7a06a3 331 iconv_close (desc);
234b45d4 332
f870a310
TT
333 /* Clear the cache. */
334 be_le_arch = NULL;
234b45d4
KB
335}
336
6c7a06a3
TT
337/* This is the sfunc for the 'set charset' command. */
338static void
aff410f1
MS
339set_charset_sfunc (char *charset, int from_tty,
340 struct cmd_list_element *c)
234b45d4 341{
aff410f1 342 /* CAREFUL: set the target charset here as well. */
6c7a06a3 343 target_charset_name = host_charset_name;
f870a310 344 validate (get_current_arch ());
234b45d4
KB
345}
346
6c7a06a3
TT
347/* 'set host-charset' command sfunc. We need a wrapper here because
348 the function needs to have a specific signature. */
349static void
350set_host_charset_sfunc (char *charset, int from_tty,
351 struct cmd_list_element *c)
234b45d4 352{
f870a310 353 validate (get_current_arch ());
234b45d4
KB
354}
355
6c7a06a3
TT
356/* Wrapper for the 'set target-charset' command. */
357static void
358set_target_charset_sfunc (char *charset, int from_tty,
359 struct cmd_list_element *c)
234b45d4 360{
f870a310 361 validate (get_current_arch ());
234b45d4
KB
362}
363
6c7a06a3
TT
364/* Wrapper for the 'set target-wide-charset' command. */
365static void
366set_target_wide_charset_sfunc (char *charset, int from_tty,
367 struct cmd_list_element *c)
234b45d4 368{
f870a310 369 validate (get_current_arch ());
234b45d4
KB
370}
371
6c7a06a3
TT
372/* sfunc for the 'show charset' command. */
373static void
aff410f1
MS
374show_charset (struct ui_file *file, int from_tty,
375 struct cmd_list_element *c,
6c7a06a3 376 const char *name)
234b45d4 377{
6c7a06a3
TT
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
aff410f1
MS
380 show_target_wide_charset_name (file, from_tty, c,
381 target_wide_charset_name);
234b45d4
KB
382}
383
234b45d4 384\f
6c7a06a3 385/* Accessor functions. */
234b45d4 386
6c7a06a3
TT
387const char *
388host_charset (void)
234b45d4 389{
6c7a06a3
TT
390 if (!strcmp (host_charset_name, "auto"))
391 return auto_host_charset_name;
392 return host_charset_name;
234b45d4
KB
393}
394
6c7a06a3 395const char *
f870a310 396target_charset (struct gdbarch *gdbarch)
234b45d4 397{
f870a310
TT
398 if (!strcmp (target_charset_name, "auto"))
399 return gdbarch_auto_charset (gdbarch);
6c7a06a3 400 return target_charset_name;
234b45d4 401}
234b45d4 402
6c7a06a3 403const char *
f870a310 404target_wide_charset (struct gdbarch *gdbarch)
234b45d4 405{
f870a310
TT
406 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
407
408 set_be_le_names (gdbarch);
e17a4113 409 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 410 {
6c7a06a3
TT
411 if (target_wide_charset_be_name)
412 return target_wide_charset_be_name;
234b45d4 413 }
6c7a06a3 414 else
234b45d4 415 {
6c7a06a3
TT
416 if (target_wide_charset_le_name)
417 return target_wide_charset_le_name;
234b45d4
KB
418 }
419
f870a310
TT
420 if (!strcmp (target_wide_charset_name, "auto"))
421 return gdbarch_auto_wide_charset (gdbarch);
422
6c7a06a3 423 return target_wide_charset_name;
234b45d4
KB
424}
425
234b45d4 426\f
6c7a06a3
TT
427/* Host character set management. For the time being, we assume that
428 the host character set is some superset of ASCII. */
234b45d4 429
6c7a06a3
TT
430char
431host_letter_to_control_character (char c)
234b45d4 432{
6c7a06a3
TT
433 if (c == '?')
434 return 0177;
435 return c & 0237;
234b45d4
KB
436}
437
6c7a06a3
TT
438/* Convert a host character, C, to its hex value. C must already have
439 been validated using isxdigit. */
234b45d4 440
6c7a06a3
TT
441int
442host_hex_value (char c)
234b45d4 443{
6c7a06a3
TT
444 if (isdigit (c))
445 return c - '0';
446 if (c >= 'a' && c <= 'f')
447 return 10 + c - 'a';
448 gdb_assert (c >= 'A' && c <= 'F');
449 return 10 + c - 'A';
234b45d4
KB
450}
451
234b45d4 452\f
6c7a06a3 453/* Public character management functions. */
234b45d4 454
6c7a06a3 455/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 456
6c7a06a3
TT
457static void
458cleanup_iconv (void *p)
234b45d4 459{
6c7a06a3
TT
460 iconv_t *descp = p;
461 iconv_close (*descp);
234b45d4
KB
462}
463
6c7a06a3
TT
464void
465convert_between_encodings (const char *from, const char *to,
466 const gdb_byte *bytes, unsigned int num_bytes,
467 int width, struct obstack *output,
468 enum transliterations translit)
469{
470 iconv_t desc;
471 struct cleanup *cleanups;
472 size_t inleft;
39086a0e 473 ICONV_CONST char *inp;
6c7a06a3
TT
474 unsigned int space_request;
475
476 /* Often, the host and target charsets will be the same. */
477 if (!strcmp (from, to))
478 {
479 obstack_grow (output, bytes, num_bytes);
480 return;
481 }
234b45d4 482
6c7a06a3
TT
483 desc = iconv_open (to, from);
484 if (desc == (iconv_t) -1)
9b20d036 485 perror_with_name (_("Converting character sets"));
6c7a06a3 486 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 487
6c7a06a3 488 inleft = num_bytes;
39086a0e 489 inp = (ICONV_CONST char *) bytes;
234b45d4 490
6c7a06a3 491 space_request = num_bytes;
234b45d4 492
6c7a06a3 493 while (inleft > 0)
234b45d4 494 {
6c7a06a3
TT
495 char *outp;
496 size_t outleft, r;
497 int old_size;
498
499 old_size = obstack_object_size (output);
500 obstack_blank (output, space_request);
501
241fd515 502 outp = (char *) obstack_base (output) + old_size;
6c7a06a3
TT
503 outleft = space_request;
504
39086a0e 505 r = iconv (desc, &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
506
507 /* Now make sure that the object on the obstack only includes
508 bytes we have converted. */
509 obstack_blank (output, - (int) outleft);
510
511 if (r == (size_t) -1)
512 {
513 switch (errno)
514 {
515 case EILSEQ:
516 {
517 int i;
518
519 /* Invalid input sequence. */
520 if (translit == translit_none)
3e43a32a
MS
521 error (_("Could not convert character "
522 "to `%s' character set"), to);
6c7a06a3
TT
523
524 /* We emit escape sequence for the bytes, skip them,
525 and try again. */
526 for (i = 0; i < width; ++i)
527 {
528 char octal[5];
529
08850b56 530 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
6c7a06a3
TT
531 obstack_grow_str (output, octal);
532
533 ++inp;
534 --inleft;
535 }
536 }
537 break;
538
539 case E2BIG:
540 /* We ran out of space in the output buffer. Make it
541 bigger next time around. */
542 space_request *= 2;
543 break;
544
545 case EINVAL:
546 /* Incomplete input sequence. FIXME: ought to report this
547 to the caller somehow. */
548 inleft = 0;
549 break;
550
551 default:
9b20d036
MS
552 perror_with_name (_("Internal error while "
553 "converting character sets"));
6c7a06a3
TT
554 }
555 }
234b45d4 556 }
234b45d4 557
6c7a06a3 558 do_cleanups (cleanups);
234b45d4
KB
559}
560
e33d66ec 561\f
e33d66ec 562
6c7a06a3
TT
563/* An iterator that returns host wchar_t's from a target string. */
564struct wchar_iterator
e33d66ec 565{
6c7a06a3
TT
566 /* The underlying iconv descriptor. */
567 iconv_t desc;
e33d66ec 568
6c7a06a3 569 /* The input string. This is updated as convert characters. */
2898e560 570 const gdb_byte *input;
6c7a06a3
TT
571 /* The number of bytes remaining in the input. */
572 size_t bytes;
e33d66ec 573
6c7a06a3
TT
574 /* The width of an input character. */
575 size_t width;
e33d66ec 576
6c7a06a3
TT
577 /* The output buffer and its size. */
578 gdb_wchar_t *out;
579 size_t out_size;
580};
234b45d4 581
6c7a06a3
TT
582/* Create a new iterator. */
583struct wchar_iterator *
aff410f1
MS
584make_wchar_iterator (const gdb_byte *input, size_t bytes,
585 const char *charset, size_t width)
234b45d4 586{
6c7a06a3
TT
587 struct wchar_iterator *result;
588 iconv_t desc;
234b45d4 589
732f6a93 590 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3 591 if (desc == (iconv_t) -1)
9b20d036 592 perror_with_name (_("Converting character sets"));
234b45d4 593
6c7a06a3
TT
594 result = XNEW (struct wchar_iterator);
595 result->desc = desc;
2898e560 596 result->input = input;
6c7a06a3
TT
597 result->bytes = bytes;
598 result->width = width;
234b45d4 599
6c7a06a3
TT
600 result->out = XNEW (gdb_wchar_t);
601 result->out_size = 1;
234b45d4 602
6c7a06a3 603 return result;
e33d66ec 604}
234b45d4 605
e33d66ec 606static void
6c7a06a3 607do_cleanup_iterator (void *p)
e33d66ec 608{
6c7a06a3 609 struct wchar_iterator *iter = p;
234b45d4 610
6c7a06a3
TT
611 iconv_close (iter->desc);
612 xfree (iter->out);
613 xfree (iter);
234b45d4
KB
614}
615
6c7a06a3
TT
616struct cleanup *
617make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 618{
6c7a06a3 619 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 620}
234b45d4 621
6c7a06a3
TT
622int
623wchar_iterate (struct wchar_iterator *iter,
624 enum wchar_iterate_result *out_result,
625 gdb_wchar_t **out_chars,
626 const gdb_byte **ptr,
627 size_t *len)
628{
629 size_t out_request;
630
631 /* Try to convert some characters. At first we try to convert just
632 a single character. The reason for this is that iconv does not
633 necessarily update its outgoing arguments when it encounters an
634 invalid input sequence -- but we want to reliably report this to
635 our caller so it can emit an escape sequence. */
636 out_request = 1;
637 while (iter->bytes > 0)
e33d66ec 638 {
39086a0e 639 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
6c7a06a3 640 char *outptr = (char *) &iter->out[0];
2898e560 641 const gdb_byte *orig_inptr = iter->input;
6c7a06a3
TT
642 size_t orig_in = iter->bytes;
643 size_t out_avail = out_request * sizeof (gdb_wchar_t);
644 size_t num;
39086a0e
PA
645 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
646
647 iter->input = (gdb_byte *) inptr;
c5504eaf 648
6c7a06a3
TT
649 if (r == (size_t) -1)
650 {
651 switch (errno)
652 {
653 case EILSEQ:
aff410f1
MS
654 /* Invalid input sequence. We still might have
655 converted a character; if so, return it. */
1558ab4c
JK
656 if (out_avail < out_request * sizeof (gdb_wchar_t))
657 break;
658
aff410f1
MS
659 /* Otherwise skip the first invalid character, and let
660 the caller know about it. */
6c7a06a3
TT
661 *out_result = wchar_iterate_invalid;
662 *ptr = iter->input;
663 *len = iter->width;
664 iter->input += iter->width;
665 iter->bytes -= iter->width;
666 return 0;
667
668 case E2BIG:
669 /* We ran out of space. We still might have converted a
670 character; if so, return it. Otherwise, grow the
671 buffer and try again. */
672 if (out_avail < out_request * sizeof (gdb_wchar_t))
673 break;
674
675 ++out_request;
676 if (out_request > iter->out_size)
677 {
678 iter->out_size = out_request;
679 iter->out = xrealloc (iter->out,
680 out_request * sizeof (gdb_wchar_t));
681 }
682 continue;
683
684 case EINVAL:
685 /* Incomplete input sequence. Let the caller know, and
686 arrange for future calls to see EOF. */
687 *out_result = wchar_iterate_incomplete;
688 *ptr = iter->input;
689 *len = iter->bytes;
690 iter->bytes = 0;
691 return 0;
692
693 default:
9b20d036
MS
694 perror_with_name (_("Internal error while "
695 "converting character sets"));
6c7a06a3
TT
696 }
697 }
698
699 /* We converted something. */
700 num = out_request - out_avail / sizeof (gdb_wchar_t);
701 *out_result = wchar_iterate_ok;
702 *out_chars = iter->out;
703 *ptr = orig_inptr;
704 *len = orig_in - iter->bytes;
705 return num;
e33d66ec 706 }
6c7a06a3
TT
707
708 /* Really done. */
709 *out_result = wchar_iterate_eof;
710 return -1;
234b45d4
KB
711}
712
e33d66ec 713\f
6c7a06a3 714/* The charset.c module initialization function. */
234b45d4 715
6c7a06a3 716extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 717
6c7a06a3 718static VEC (char_ptr) *charsets;
234b45d4 719
6c7a06a3 720#ifdef PHONY_ICONV
234b45d4 721
6c7a06a3
TT
722static void
723find_charset_names (void)
234b45d4 724{
6c7a06a3
TT
725 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
726 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
727}
728
6c7a06a3 729#else /* PHONY_ICONV */
fc3b640d
TT
730
731/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
732 provides different symbols in the static and dynamic libraries.
733 So, configure may see libiconvlist but not iconvlist. But, calling
734 iconvlist is the right thing to do and will work. Hence we do a
735 check here but unconditionally call iconvlist below. */
736#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 737
6c7a06a3
TT
738/* A helper function that adds some character sets to the vector of
739 all character sets. This is a callback function for iconvlist. */
740
741static int
742add_one (unsigned int count, const char *const *names, void *data)
234b45d4 743{
6c7a06a3 744 unsigned int i;
234b45d4 745
6c7a06a3
TT
746 for (i = 0; i < count; ++i)
747 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 748
6c7a06a3 749 return 0;
234b45d4
KB
750}
751
6c7a06a3
TT
752static void
753find_charset_names (void)
234b45d4 754{
6c7a06a3
TT
755 iconvlist (add_one, NULL);
756 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
757}
758
6c7a06a3 759#else
234b45d4 760
40b5c9fb
DE
761/* Return non-zero if LINE (output from iconv) should be ignored.
762 Older iconv programs (e.g. 2.2.2) include the human readable
763 introduction even when stdout is not a tty. Newer versions omit
764 the intro if stdout is not a tty. */
765
766static int
767ignore_line_p (const char *line)
768{
769 /* This table is used to filter the output. If this text appears
770 anywhere in the line, it is ignored (strstr is used). */
771 static const char * const ignore_lines[] =
772 {
773 "The following",
774 "not necessarily",
775 "the FROM and TO",
776 "listed with several",
777 NULL
778 };
779 int i;
780
781 for (i = 0; ignore_lines[i] != NULL; ++i)
782 {
783 if (strstr (line, ignore_lines[i]) != NULL)
784 return 1;
785 }
786
787 return 0;
788}
789
6c7a06a3
TT
790static void
791find_charset_names (void)
234b45d4 792{
732f6a93
TT
793 struct pex_obj *child;
794 char *args[3];
795 int err, status;
796 int fail = 1;
478aac75 797 int flags;
40b5c9fb 798 struct gdb_environ *iconv_env;
478aac75 799 char *iconv_program;
40b5c9fb 800
aff410f1
MS
801 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
802 not a tty. We need to recognize it and ignore it. This text is
803 subject to translation, so force LANGUAGE=C. */
40b5c9fb
DE
804 iconv_env = make_environ ();
805 init_environ (iconv_env);
806 set_in_environ (iconv_env, "LANGUAGE", "C");
807 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93 808
40618926 809 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
732f6a93 810
478aac75
DE
811#ifdef ICONV_BIN
812 {
813 char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
814 ICONV_BIN_RELOCATABLE);
815 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
816 xfree (iconv_dir);
817 }
818#else
819 iconv_program = xstrdup ("iconv");
820#endif
821 args[0] = iconv_program;
732f6a93
TT
822 args[1] = "-l";
823 args[2] = NULL;
478aac75
DE
824 flags = PEX_STDERR_TO_STDOUT;
825#ifndef ICONV_BIN
826 flags |= PEX_SEARCH;
827#endif
732f6a93 828 /* Note that we simply ignore errors here. */
478aac75
DE
829 if (!pex_run_in_environment (child, flags,
830 args[0], args, environ_vector (iconv_env),
40b5c9fb 831 NULL, NULL, &err))
732f6a93
TT
832 {
833 FILE *in = pex_read_output (child, 0);
834
835 /* POSIX says that iconv -l uses an unspecified format. We
836 parse the glibc and libiconv formats; feel free to add others
837 as needed. */
40b5c9fb 838
1d6b2d2b 839 while (in != NULL && !feof (in))
732f6a93
TT
840 {
841 /* The size of buf is chosen arbitrarily. */
842 char buf[1024];
843 char *start, *r;
8ea13695 844 int len;
732f6a93
TT
845
846 r = fgets (buf, sizeof (buf), in);
847 if (!r)
848 break;
849 len = strlen (r);
850 if (len <= 3)
851 continue;
40b5c9fb
DE
852 if (ignore_line_p (r))
853 continue;
854
732f6a93
TT
855 /* Strip off the newline. */
856 --len;
857 /* Strip off one or two '/'s. glibc will print lines like
858 "8859_7//", but also "10646-1:1993/UCS4/". */
859 if (buf[len - 1] == '/')
860 --len;
861 if (buf[len - 1] == '/')
862 --len;
863 buf[len] = '\0';
864
865 /* libiconv will print multiple entries per line, separated
aff410f1
MS
866 by spaces. Older iconvs will print multiple entries per
867 line, indented by two spaces, and separated by ", "
40b5c9fb 868 (i.e. the human readable form). */
732f6a93
TT
869 start = buf;
870 while (1)
871 {
872 int keep_going;
873 char *p;
874
40b5c9fb
DE
875 /* Skip leading blanks. */
876 for (p = start; *p && *p == ' '; ++p)
877 ;
878 start = p;
879 /* Find the next space, comma, or end-of-line. */
880 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
881 ;
882 /* Ignore an empty result. */
883 if (p == start)
884 break;
885 keep_going = *p;
886 *p = '\0';
887 VEC_safe_push (char_ptr, charsets, xstrdup (start));
888 if (!keep_going)
889 break;
890 /* Skip any extra spaces. */
891 for (start = p + 1; *start && *start == ' '; ++start)
892 ;
893 }
894 }
234b45d4 895
732f6a93
TT
896 if (pex_get_status (child, 1, &status)
897 && WIFEXITED (status) && !WEXITSTATUS (status))
898 fail = 0;
234b45d4 899
6c7a06a3 900 }
234b45d4 901
478aac75 902 xfree (iconv_program);
732f6a93 903 pex_free (child);
40b5c9fb 904 free_environ (iconv_env);
234b45d4 905
732f6a93
TT
906 if (fail)
907 {
908 /* Some error occurred, so drop the vector. */
e4ab2fad
JK
909 free_char_ptr_vec (charsets);
910 charsets = NULL;
732f6a93
TT
911 }
912 else
913 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 914}
234b45d4 915
fc3b640d 916#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 917#endif /* PHONY_ICONV */
234b45d4 918
f870a310
TT
919/* The "auto" target charset used by default_auto_charset. */
920static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
921
922const char *
923default_auto_charset (void)
924{
925 return auto_target_charset_name;
926}
927
928const char *
929default_auto_wide_charset (void)
930{
931 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
932}
933
bcb28afc
PM
934
935#ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
936/* Macro used for UTF or UCS endianness suffix. */
937#if WORDS_BIGENDIAN
938#define ENDIAN_SUFFIX "BE"
939#else
940#define ENDIAN_SUFFIX "LE"
941#endif
942
943/* The code below serves to generate a compile time error if
944 gdb_wchar_t type is not of size 2 nor 4, despite the fact that
945 macro __STDC_ISO_10646__ is defined.
946 This is better than a gdb_assert call, because GDB cannot handle
947 strings correctly if this size is different. */
948
949extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
950 || sizeof (gdb_wchar_t) == 4)
951 ? 1 : -1];
952
ee34b3f9 953/* intermediate_encoding returns the charset used internally by
bcb28afc
PM
954 GDB to convert between target and host encodings. As the test above
955 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
956 UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
957 otherwise an error is generated. */
958
959const char *
960intermediate_encoding (void)
961{
962 iconv_t desc;
963 static const char *stored_result = NULL;
964 char *result;
bcb28afc
PM
965
966 if (stored_result)
967 return stored_result;
968 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
969 ENDIAN_SUFFIX);
970 /* Check that the name is supported by iconv_open. */
971 desc = iconv_open (result, host_charset ());
972 if (desc != (iconv_t) -1)
973 {
974 iconv_close (desc);
975 stored_result = result;
976 return result;
977 }
978 /* Not valid, free the allocated memory. */
979 xfree (result);
980 /* Second try, with UCS-2 type. */
981 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
982 ENDIAN_SUFFIX);
983 /* Check that the name is supported by iconv_open. */
984 desc = iconv_open (result, host_charset ());
985 if (desc != (iconv_t) -1)
986 {
987 iconv_close (desc);
988 stored_result = result;
989 return result;
990 }
991 /* Not valid, free the allocated memory. */
992 xfree (result);
993 /* No valid charset found, generate error here. */
994 error (_("Unable to find a vaild charset for string conversions"));
995}
996
997#endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
998
234b45d4
KB
999void
1000_initialize_charset (void)
1001{
f870a310 1002 /* The first element is always "auto". */
732f6a93 1003 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
1004 find_charset_names ();
1005
1006 if (VEC_length (char_ptr, charsets) > 1)
1007 charset_enum = (const char **) VEC_address (char_ptr, charsets);
1008 else
1009 charset_enum = default_charset_names;
1010
1011#ifndef PHONY_ICONV
1012#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
1013 /* The result of nl_langinfo may be overwritten later. This may
1014 leak a little memory, if the user later changes the host charset,
1015 but that doesn't matter much. */
1016 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
aff410f1
MS
1017 /* Solaris will return `646' here -- but the Solaris iconv then does
1018 not accept this. Darwin (and maybe FreeBSD) may return "" here,
06be6983
TG
1019 which GNU libiconv doesn't like (infinite loop). */
1020 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 1021 auto_host_charset_name = "ASCII";
f870a310
TT
1022 auto_target_charset_name = auto_host_charset_name;
1023#elif defined (USE_WIN32API)
1024 {
3e43a32a
MS
1025 /* "CP" + x<=5 digits + paranoia. */
1026 static char w32_host_default_charset[16];
f870a310
TT
1027
1028 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1029 "CP%d", GetACP());
1030 auto_host_charset_name = w32_host_default_charset;
1031 auto_target_charset_name = auto_host_charset_name;
1032 }
6c7a06a3
TT
1033#endif
1034#endif
e33d66ec 1035
7ab04401 1036 add_setshow_enum_cmd ("charset", class_support,
f870a310 1037 charset_enum, &host_charset_name, _("\
7ab04401
AC
1038Set the host and target character sets."), _("\
1039Show the host and target character sets."), _("\
3d263c1d
BI
1040The `host character set' is the one used by the system GDB is running on.\n\
1041The `target character set' is the one used by the program being debugged.\n\
1042You may only use supersets of ASCII for your host character set; GDB does\n\
1043not support any others.\n\
1044To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
1045 /* Note that the sfunc below needs to set
1046 target_charset_name, because the 'set
1047 charset' command sets two variables. */
1048 set_charset_sfunc,
1049 show_charset,
1050 &setlist, &showlist);
1051
1052 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 1053 charset_enum, &host_charset_name, _("\
7ab04401
AC
1054Set the host character set."), _("\
1055Show the host character set."), _("\
3d263c1d
BI
1056The `host character set' is the one used by the system GDB is running on.\n\
1057You may only use supersets of ASCII for your host character set; GDB does\n\
ac74f770
MS
1058not support any others.\n\
1059To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 1060 set_host_charset_sfunc,
920d2a44 1061 show_host_charset_name,
7ab04401
AC
1062 &setlist, &showlist);
1063
1064 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 1065 charset_enum, &target_charset_name, _("\
7ab04401
AC
1066Set the target character set."), _("\
1067Show the target character set."), _("\
3d263c1d
BI
1068The `target character set' is the one used by the program being debugged.\n\
1069GDB translates characters and strings between the host and target\n\
b670013c 1070character sets as needed.\n\
ac74f770 1071To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 1072 set_target_charset_sfunc,
920d2a44 1073 show_target_charset_name,
7ab04401 1074 &setlist, &showlist);
6c7a06a3
TT
1075
1076 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 1077 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
1078 _("\
1079Set the target wide character set."), _("\
1080Show the target wide character set."), _("\
3e43a32a
MS
1081The `target wide character set' is the one used by the program being debugged.\
1082\nIn particular it is the encoding used by `wchar_t'.\n\
6c7a06a3
TT
1083GDB translates characters and strings between the host and target\n\
1084character sets as needed.\n\
1085To see a list of the character sets GDB supports, type\n\
1086`set target-wide-charset'<TAB>"),
1087 set_target_wide_charset_sfunc,
1088 show_target_wide_charset_name,
1089 &setlist, &showlist);
234b45d4 1090}
This page took 0.861731 seconds and 4 git commands to generate.