2010-03-30 Ozkan Sezer <sezeroz@gmail.com>
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
f870a310 30#include "arch-utils.h"
234b45d4
KB
31
32#include <stddef.h>
4ef3f3be 33#include "gdb_string.h"
234b45d4
KB
34#include <ctype.h>
35
43484f03
DJ
36#ifdef USE_WIN32API
37#include <windows.h>
38#endif
234b45d4
KB
39\f
40/* How GDB's character set support works
41
6c7a06a3 42 GDB has three global settings:
234b45d4
KB
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
46 terminal knows how to display properly. Most users should not
47 change this.
234b45d4
KB
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
6c7a06a3
TT
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
234b45d4
KB
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
6c7a06a3
TT
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
76
77\f
6c7a06a3 78#ifdef PHONY_ICONV
234b45d4 79
6c7a06a3
TT
80/* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
234b45d4 82
6c7a06a3
TT
83#undef GDB_DEFAULT_HOST_CHARSET
84#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87#undef DEFAULT_CHARSET_NAMES
88#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90#undef iconv_t
91#define iconv_t int
92#undef iconv_open
93#undef iconv
94#undef iconv_close
95
0dd7fb99
TT
96#undef ICONV_CONST
97#define ICONV_CONST const
98
76208fec 99/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
100 EINVAL, because callers of `iconv' want to distinguish EINVAL and
101 EILSEQ. This is what iconv.h from libiconv does as well. Note
102 that wchar.h may also define EILSEQ, so this needs to be after we
103 include wchar.h, which happens in defs.h through gdb_wchar.h. */
104#ifndef EILSEQ
105#define EILSEQ ENOENT
106#endif
107
6c7a06a3
TT
108iconv_t
109iconv_open (const char *to, const char *from)
110{
b8899f2b 111 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 112 We allow conversions to wchar_t and the host charset. */
b8899f2b 113 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
114 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
115 return -1;
116 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
117 return -1;
234b45d4 118
b8899f2b 119 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 120 used as a flag in calls to iconv. */
b8899f2b 121 return !strcmp (from, "UTF-32BE");
6c7a06a3 122}
234b45d4 123
6c7a06a3
TT
124int
125iconv_close (iconv_t arg)
126{
127 return 0;
128}
234b45d4 129
6c7a06a3 130size_t
b8899f2b 131iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
6c7a06a3
TT
132 char **outbuf, size_t *outbytesleft)
133{
b8899f2b 134 if (utf_flag)
6c7a06a3
TT
135 {
136 while (*inbytesleft >= 4)
137 {
138 size_t j;
139 unsigned long c = 0;
140
141 for (j = 0; j < 4; ++j)
142 {
143 c <<= 8;
144 c += (*inbuf)[j] & 0xff;
145 }
146
147 if (c >= 256)
148 {
149 errno = EILSEQ;
150 return -1;
151 }
152 **outbuf = c & 0xff;
153 ++*outbuf;
154 --*outbytesleft;
155
156 ++*inbuf;
157 *inbytesleft -= 4;
158 }
159 if (*inbytesleft < 4)
160 {
161 errno = EINVAL;
162 return -1;
163 }
164 }
165 else
166 {
167 /* In all other cases we simply copy input bytes to the
168 output. */
169 size_t amt = *inbytesleft;
170 if (amt > *outbytesleft)
171 amt = *outbytesleft;
172 memcpy (*outbuf, *inbuf, amt);
173 *inbuf += amt;
174 *outbuf += amt;
175 *inbytesleft -= amt;
176 *outbytesleft -= amt;
177 }
234b45d4 178
6c7a06a3
TT
179 if (*inbytesleft)
180 {
181 errno = E2BIG;
182 return -1;
183 }
234b45d4 184
6c7a06a3
TT
185 /* The number of non-reversible conversions -- but they were all
186 reversible. */
187 return 0;
188}
234b45d4 189
6c7a06a3 190#endif
234b45d4
KB
191
192
193\f
194/* The global lists of character sets and translations. */
195
196
e33d66ec
EZ
197#ifndef GDB_DEFAULT_TARGET_CHARSET
198#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
199#endif
200
6c7a06a3 201#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 202#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
203#endif
204
205static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
206static const char *host_charset_name = "auto";
920d2a44
AC
207static void
208show_host_charset_name (struct ui_file *file, int from_tty,
209 struct cmd_list_element *c,
210 const char *value)
211{
6c7a06a3
TT
212 if (!strcmp (value, "auto"))
213 fprintf_filtered (file,
214 _("The host character set is \"auto; currently %s\".\n"),
215 auto_host_charset_name);
216 else
217 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
218}
219
f870a310 220static const char *target_charset_name = "auto";
920d2a44
AC
221static void
222show_target_charset_name (struct ui_file *file, int from_tty,
223 struct cmd_list_element *c, const char *value)
224{
f870a310
TT
225 if (!strcmp (value, "auto"))
226 fprintf_filtered (file,
227 _("The target character set is \"auto; "
228 "currently %s\".\n"),
229 gdbarch_auto_charset (get_current_arch ()));
230 else
231 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
232 value);
920d2a44
AC
233}
234
f870a310 235static const char *target_wide_charset_name = "auto";
6c7a06a3
TT
236static void
237show_target_wide_charset_name (struct ui_file *file, int from_tty,
238 struct cmd_list_element *c, const char *value)
e33d66ec 239{
f870a310
TT
240 if (!strcmp (value, "auto"))
241 fprintf_filtered (file,
242 _("The target wide character set is \"auto; "
243 "currently %s\".\n"),
244 gdbarch_auto_wide_charset (get_current_arch ()));
245 else
246 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
247 value);
6c7a06a3 248}
e33d66ec 249
6c7a06a3 250static const char *default_charset_names[] =
e33d66ec 251{
6c7a06a3 252 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
253 0
254};
234b45d4 255
6c7a06a3 256static const char **charset_enum;
234b45d4 257
6c7a06a3
TT
258\f
259/* If the target wide character set has big- or little-endian
260 variants, these are the corresponding names. */
261static const char *target_wide_charset_be_name;
262static const char *target_wide_charset_le_name;
234b45d4 263
f870a310
TT
264/* The architecture for which the BE- and LE-names are valid. */
265static struct gdbarch *be_le_arch;
266
267/* A helper function which sets the target wide big- and little-endian
268 character set names, if possible. */
234b45d4 269
6c7a06a3 270static void
f870a310 271set_be_le_names (struct gdbarch *gdbarch)
234b45d4 272{
6c7a06a3 273 int i, len;
f870a310
TT
274 const char *target_wide;
275
276 if (be_le_arch == gdbarch)
277 return;
278 be_le_arch = gdbarch;
234b45d4 279
6c7a06a3
TT
280 target_wide_charset_le_name = NULL;
281 target_wide_charset_be_name = NULL;
234b45d4 282
f870a310
TT
283 target_wide = target_wide_charset_name;
284 if (!strcmp (target_wide, "auto"))
285 target_wide = gdbarch_auto_wide_charset (gdbarch);
286
287 len = strlen (target_wide);
6c7a06a3
TT
288 for (i = 0; charset_enum[i]; ++i)
289 {
f870a310 290 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
291 continue;
292 if ((charset_enum[i][len] == 'B'
293 || charset_enum[i][len] == 'L')
294 && charset_enum[i][len + 1] == 'E'
295 && charset_enum[i][len + 2] == '\0')
296 {
297 if (charset_enum[i][len] == 'B')
298 target_wide_charset_be_name = charset_enum[i];
299 else
300 target_wide_charset_le_name = charset_enum[i];
301 }
302 }
234b45d4
KB
303}
304
6c7a06a3
TT
305/* 'Set charset', 'set host-charset', 'set target-charset', 'set
306 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
307
308static void
f870a310 309validate (struct gdbarch *gdbarch)
234b45d4 310{
6c7a06a3
TT
311 iconv_t desc;
312 const char *host_cset = host_charset ();
f870a310
TT
313 const char *target_cset = target_charset (gdbarch);
314 const char *target_wide_cset = target_wide_charset_name;
315 if (!strcmp (target_wide_cset, "auto"))
316 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 317
f870a310 318 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3
TT
319 if (desc == (iconv_t) -1)
320 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 321 target_wide_cset, host_cset);
6c7a06a3 322 iconv_close (desc);
234b45d4 323
f870a310 324 desc = iconv_open (target_cset, host_cset);
6c7a06a3
TT
325 if (desc == (iconv_t) -1)
326 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 327 target_cset, host_cset);
6c7a06a3 328 iconv_close (desc);
234b45d4 329
f870a310
TT
330 /* Clear the cache. */
331 be_le_arch = NULL;
234b45d4
KB
332}
333
6c7a06a3
TT
334/* This is the sfunc for the 'set charset' command. */
335static void
336set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 337{
6c7a06a3
TT
338 /* CAREFUL: set the target charset here as well. */
339 target_charset_name = host_charset_name;
f870a310 340 validate (get_current_arch ());
234b45d4
KB
341}
342
6c7a06a3
TT
343/* 'set host-charset' command sfunc. We need a wrapper here because
344 the function needs to have a specific signature. */
345static void
346set_host_charset_sfunc (char *charset, int from_tty,
347 struct cmd_list_element *c)
234b45d4 348{
f870a310 349 validate (get_current_arch ());
234b45d4
KB
350}
351
6c7a06a3
TT
352/* Wrapper for the 'set target-charset' command. */
353static void
354set_target_charset_sfunc (char *charset, int from_tty,
355 struct cmd_list_element *c)
234b45d4 356{
f870a310 357 validate (get_current_arch ());
234b45d4
KB
358}
359
6c7a06a3
TT
360/* Wrapper for the 'set target-wide-charset' command. */
361static void
362set_target_wide_charset_sfunc (char *charset, int from_tty,
363 struct cmd_list_element *c)
234b45d4 364{
f870a310 365 validate (get_current_arch ());
234b45d4
KB
366}
367
6c7a06a3
TT
368/* sfunc for the 'show charset' command. */
369static void
370show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
371 const char *name)
234b45d4 372{
6c7a06a3
TT
373 show_host_charset_name (file, from_tty, c, host_charset_name);
374 show_target_charset_name (file, from_tty, c, target_charset_name);
375 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
376}
377
234b45d4 378\f
6c7a06a3 379/* Accessor functions. */
234b45d4 380
6c7a06a3
TT
381const char *
382host_charset (void)
234b45d4 383{
6c7a06a3
TT
384 if (!strcmp (host_charset_name, "auto"))
385 return auto_host_charset_name;
386 return host_charset_name;
234b45d4
KB
387}
388
6c7a06a3 389const char *
f870a310 390target_charset (struct gdbarch *gdbarch)
234b45d4 391{
f870a310
TT
392 if (!strcmp (target_charset_name, "auto"))
393 return gdbarch_auto_charset (gdbarch);
6c7a06a3 394 return target_charset_name;
234b45d4 395}
234b45d4 396
6c7a06a3 397const char *
f870a310 398target_wide_charset (struct gdbarch *gdbarch)
234b45d4 399{
f870a310
TT
400 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
401
402 set_be_le_names (gdbarch);
e17a4113 403 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 404 {
6c7a06a3
TT
405 if (target_wide_charset_be_name)
406 return target_wide_charset_be_name;
234b45d4 407 }
6c7a06a3 408 else
234b45d4 409 {
6c7a06a3
TT
410 if (target_wide_charset_le_name)
411 return target_wide_charset_le_name;
234b45d4
KB
412 }
413
f870a310
TT
414 if (!strcmp (target_wide_charset_name, "auto"))
415 return gdbarch_auto_wide_charset (gdbarch);
416
6c7a06a3 417 return target_wide_charset_name;
234b45d4
KB
418}
419
234b45d4 420\f
6c7a06a3
TT
421/* Host character set management. For the time being, we assume that
422 the host character set is some superset of ASCII. */
234b45d4 423
6c7a06a3
TT
424char
425host_letter_to_control_character (char c)
234b45d4 426{
6c7a06a3
TT
427 if (c == '?')
428 return 0177;
429 return c & 0237;
234b45d4
KB
430}
431
6c7a06a3
TT
432/* Convert a host character, C, to its hex value. C must already have
433 been validated using isxdigit. */
234b45d4 434
6c7a06a3
TT
435int
436host_hex_value (char c)
234b45d4 437{
6c7a06a3
TT
438 if (isdigit (c))
439 return c - '0';
440 if (c >= 'a' && c <= 'f')
441 return 10 + c - 'a';
442 gdb_assert (c >= 'A' && c <= 'F');
443 return 10 + c - 'A';
234b45d4
KB
444}
445
234b45d4 446\f
6c7a06a3 447/* Public character management functions. */
234b45d4 448
6c7a06a3 449/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 450
6c7a06a3
TT
451static void
452cleanup_iconv (void *p)
234b45d4 453{
6c7a06a3
TT
454 iconv_t *descp = p;
455 iconv_close (*descp);
234b45d4
KB
456}
457
6c7a06a3
TT
458void
459convert_between_encodings (const char *from, const char *to,
460 const gdb_byte *bytes, unsigned int num_bytes,
461 int width, struct obstack *output,
462 enum transliterations translit)
463{
464 iconv_t desc;
465 struct cleanup *cleanups;
466 size_t inleft;
467 char *inp;
468 unsigned int space_request;
469
470 /* Often, the host and target charsets will be the same. */
471 if (!strcmp (from, to))
472 {
473 obstack_grow (output, bytes, num_bytes);
474 return;
475 }
234b45d4 476
6c7a06a3
TT
477 desc = iconv_open (to, from);
478 if (desc == (iconv_t) -1)
479 perror_with_name ("Converting character sets");
480 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 481
6c7a06a3
TT
482 inleft = num_bytes;
483 inp = (char *) bytes;
234b45d4 484
6c7a06a3 485 space_request = num_bytes;
234b45d4 486
6c7a06a3 487 while (inleft > 0)
234b45d4 488 {
6c7a06a3
TT
489 char *outp;
490 size_t outleft, r;
491 int old_size;
492
493 old_size = obstack_object_size (output);
494 obstack_blank (output, space_request);
495
496 outp = obstack_base (output) + old_size;
497 outleft = space_request;
498
0dd7fb99 499 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
500
501 /* Now make sure that the object on the obstack only includes
502 bytes we have converted. */
503 obstack_blank (output, - (int) outleft);
504
505 if (r == (size_t) -1)
506 {
507 switch (errno)
508 {
509 case EILSEQ:
510 {
511 int i;
512
513 /* Invalid input sequence. */
514 if (translit == translit_none)
515 error (_("Could not convert character to `%s' character set"),
516 to);
517
518 /* We emit escape sequence for the bytes, skip them,
519 and try again. */
520 for (i = 0; i < width; ++i)
521 {
522 char octal[5];
523
524 sprintf (octal, "\\%.3o", *inp & 0xff);
525 obstack_grow_str (output, octal);
526
527 ++inp;
528 --inleft;
529 }
530 }
531 break;
532
533 case E2BIG:
534 /* We ran out of space in the output buffer. Make it
535 bigger next time around. */
536 space_request *= 2;
537 break;
538
539 case EINVAL:
540 /* Incomplete input sequence. FIXME: ought to report this
541 to the caller somehow. */
542 inleft = 0;
543 break;
544
545 default:
546 perror_with_name ("Internal error while converting character sets");
547 }
548 }
234b45d4 549 }
234b45d4 550
6c7a06a3 551 do_cleanups (cleanups);
234b45d4
KB
552}
553
e33d66ec 554\f
e33d66ec 555
6c7a06a3
TT
556/* An iterator that returns host wchar_t's from a target string. */
557struct wchar_iterator
e33d66ec 558{
6c7a06a3
TT
559 /* The underlying iconv descriptor. */
560 iconv_t desc;
e33d66ec 561
6c7a06a3
TT
562 /* The input string. This is updated as convert characters. */
563 char *input;
564 /* The number of bytes remaining in the input. */
565 size_t bytes;
e33d66ec 566
6c7a06a3
TT
567 /* The width of an input character. */
568 size_t width;
e33d66ec 569
6c7a06a3
TT
570 /* The output buffer and its size. */
571 gdb_wchar_t *out;
572 size_t out_size;
573};
234b45d4 574
6c7a06a3
TT
575/* Create a new iterator. */
576struct wchar_iterator *
577make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
578 size_t width)
234b45d4 579{
6c7a06a3
TT
580 struct wchar_iterator *result;
581 iconv_t desc;
234b45d4 582
732f6a93 583 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
584 if (desc == (iconv_t) -1)
585 perror_with_name ("Converting character sets");
234b45d4 586
6c7a06a3
TT
587 result = XNEW (struct wchar_iterator);
588 result->desc = desc;
589 result->input = (char *) input;
590 result->bytes = bytes;
591 result->width = width;
234b45d4 592
6c7a06a3
TT
593 result->out = XNEW (gdb_wchar_t);
594 result->out_size = 1;
234b45d4 595
6c7a06a3 596 return result;
e33d66ec 597}
234b45d4 598
e33d66ec 599static void
6c7a06a3 600do_cleanup_iterator (void *p)
e33d66ec 601{
6c7a06a3 602 struct wchar_iterator *iter = p;
234b45d4 603
6c7a06a3
TT
604 iconv_close (iter->desc);
605 xfree (iter->out);
606 xfree (iter);
234b45d4
KB
607}
608
6c7a06a3
TT
609struct cleanup *
610make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 611{
6c7a06a3 612 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 613}
234b45d4 614
6c7a06a3
TT
615int
616wchar_iterate (struct wchar_iterator *iter,
617 enum wchar_iterate_result *out_result,
618 gdb_wchar_t **out_chars,
619 const gdb_byte **ptr,
620 size_t *len)
621{
622 size_t out_request;
623
624 /* Try to convert some characters. At first we try to convert just
625 a single character. The reason for this is that iconv does not
626 necessarily update its outgoing arguments when it encounters an
627 invalid input sequence -- but we want to reliably report this to
628 our caller so it can emit an escape sequence. */
629 out_request = 1;
630 while (iter->bytes > 0)
e33d66ec 631 {
6c7a06a3
TT
632 char *outptr = (char *) &iter->out[0];
633 char *orig_inptr = iter->input;
634 size_t orig_in = iter->bytes;
635 size_t out_avail = out_request * sizeof (gdb_wchar_t);
636 size_t num;
637 gdb_wchar_t result;
638
0dd7fb99
TT
639 size_t r = iconv (iter->desc,
640 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3
TT
641 &outptr, &out_avail);
642 if (r == (size_t) -1)
643 {
644 switch (errno)
645 {
646 case EILSEQ:
647 /* Invalid input sequence. Skip it, and let the caller
648 know about it. */
649 *out_result = wchar_iterate_invalid;
650 *ptr = iter->input;
651 *len = iter->width;
652 iter->input += iter->width;
653 iter->bytes -= iter->width;
654 return 0;
655
656 case E2BIG:
657 /* We ran out of space. We still might have converted a
658 character; if so, return it. Otherwise, grow the
659 buffer and try again. */
660 if (out_avail < out_request * sizeof (gdb_wchar_t))
661 break;
662
663 ++out_request;
664 if (out_request > iter->out_size)
665 {
666 iter->out_size = out_request;
667 iter->out = xrealloc (iter->out,
668 out_request * sizeof (gdb_wchar_t));
669 }
670 continue;
671
672 case EINVAL:
673 /* Incomplete input sequence. Let the caller know, and
674 arrange for future calls to see EOF. */
675 *out_result = wchar_iterate_incomplete;
676 *ptr = iter->input;
677 *len = iter->bytes;
678 iter->bytes = 0;
679 return 0;
680
681 default:
682 perror_with_name ("Internal error while converting character sets");
683 }
684 }
685
686 /* We converted something. */
687 num = out_request - out_avail / sizeof (gdb_wchar_t);
688 *out_result = wchar_iterate_ok;
689 *out_chars = iter->out;
690 *ptr = orig_inptr;
691 *len = orig_in - iter->bytes;
692 return num;
e33d66ec 693 }
6c7a06a3
TT
694
695 /* Really done. */
696 *out_result = wchar_iterate_eof;
697 return -1;
234b45d4
KB
698}
699
e33d66ec 700\f
6c7a06a3 701/* The charset.c module initialization function. */
234b45d4 702
6c7a06a3 703extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 704
6c7a06a3
TT
705typedef char *char_ptr;
706DEF_VEC_P (char_ptr);
234b45d4 707
6c7a06a3 708static VEC (char_ptr) *charsets;
234b45d4 709
6c7a06a3 710#ifdef PHONY_ICONV
234b45d4 711
6c7a06a3
TT
712static void
713find_charset_names (void)
234b45d4 714{
6c7a06a3
TT
715 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
716 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
717}
718
6c7a06a3 719#else /* PHONY_ICONV */
fc3b640d
TT
720
721/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
722 provides different symbols in the static and dynamic libraries.
723 So, configure may see libiconvlist but not iconvlist. But, calling
724 iconvlist is the right thing to do and will work. Hence we do a
725 check here but unconditionally call iconvlist below. */
726#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 727
6c7a06a3
TT
728/* A helper function that adds some character sets to the vector of
729 all character sets. This is a callback function for iconvlist. */
730
731static int
732add_one (unsigned int count, const char *const *names, void *data)
234b45d4 733{
6c7a06a3 734 unsigned int i;
234b45d4 735
6c7a06a3
TT
736 for (i = 0; i < count; ++i)
737 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 738
6c7a06a3 739 return 0;
234b45d4
KB
740}
741
6c7a06a3
TT
742static void
743find_charset_names (void)
234b45d4 744{
6c7a06a3
TT
745 iconvlist (add_one, NULL);
746 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
747}
748
6c7a06a3 749#else
234b45d4 750
40b5c9fb
DE
751/* Return non-zero if LINE (output from iconv) should be ignored.
752 Older iconv programs (e.g. 2.2.2) include the human readable
753 introduction even when stdout is not a tty. Newer versions omit
754 the intro if stdout is not a tty. */
755
756static int
757ignore_line_p (const char *line)
758{
759 /* This table is used to filter the output. If this text appears
760 anywhere in the line, it is ignored (strstr is used). */
761 static const char * const ignore_lines[] =
762 {
763 "The following",
764 "not necessarily",
765 "the FROM and TO",
766 "listed with several",
767 NULL
768 };
769 int i;
770
771 for (i = 0; ignore_lines[i] != NULL; ++i)
772 {
773 if (strstr (line, ignore_lines[i]) != NULL)
774 return 1;
775 }
776
777 return 0;
778}
779
6c7a06a3
TT
780static void
781find_charset_names (void)
234b45d4 782{
732f6a93
TT
783 struct pex_obj *child;
784 char *args[3];
785 int err, status;
786 int fail = 1;
40b5c9fb
DE
787 struct gdb_environ *iconv_env;
788
789 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
790 a tty. We need to recognize it and ignore it. This text is subject
791 to translation, so force LANGUAGE=C. */
792 iconv_env = make_environ ();
793 init_environ (iconv_env);
794 set_in_environ (iconv_env, "LANGUAGE", "C");
795 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93
TT
796
797 child = pex_init (0, "iconv", NULL);
798
799 args[0] = "iconv";
800 args[1] = "-l";
801 args[2] = NULL;
802 /* Note that we simply ignore errors here. */
40b5c9fb
DE
803 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
804 "iconv", args, environ_vector (iconv_env),
805 NULL, NULL, &err))
732f6a93
TT
806 {
807 FILE *in = pex_read_output (child, 0);
808
809 /* POSIX says that iconv -l uses an unspecified format. We
810 parse the glibc and libiconv formats; feel free to add others
811 as needed. */
40b5c9fb 812
732f6a93
TT
813 while (!feof (in))
814 {
815 /* The size of buf is chosen arbitrarily. */
816 char buf[1024];
817 char *start, *r;
818 int len, keep_going;
819
820 r = fgets (buf, sizeof (buf), in);
821 if (!r)
822 break;
823 len = strlen (r);
824 if (len <= 3)
825 continue;
40b5c9fb
DE
826 if (ignore_line_p (r))
827 continue;
828
732f6a93
TT
829 /* Strip off the newline. */
830 --len;
831 /* Strip off one or two '/'s. glibc will print lines like
832 "8859_7//", but also "10646-1:1993/UCS4/". */
833 if (buf[len - 1] == '/')
834 --len;
835 if (buf[len - 1] == '/')
836 --len;
837 buf[len] = '\0';
838
839 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
840 by spaces. Older iconvs will print multiple entries per line,
841 indented by two spaces, and separated by ", "
842 (i.e. the human readable form). */
732f6a93
TT
843 start = buf;
844 while (1)
845 {
846 int keep_going;
847 char *p;
848
40b5c9fb
DE
849 /* Skip leading blanks. */
850 for (p = start; *p && *p == ' '; ++p)
851 ;
852 start = p;
853 /* Find the next space, comma, or end-of-line. */
854 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
855 ;
856 /* Ignore an empty result. */
857 if (p == start)
858 break;
859 keep_going = *p;
860 *p = '\0';
861 VEC_safe_push (char_ptr, charsets, xstrdup (start));
862 if (!keep_going)
863 break;
864 /* Skip any extra spaces. */
865 for (start = p + 1; *start && *start == ' '; ++start)
866 ;
867 }
868 }
234b45d4 869
732f6a93
TT
870 if (pex_get_status (child, 1, &status)
871 && WIFEXITED (status) && !WEXITSTATUS (status))
872 fail = 0;
234b45d4 873
6c7a06a3 874 }
234b45d4 875
732f6a93 876 pex_free (child);
40b5c9fb 877 free_environ (iconv_env);
234b45d4 878
732f6a93
TT
879 if (fail)
880 {
881 /* Some error occurred, so drop the vector. */
882 int ix;
883 char *elt;
884 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
885 xfree (elt);
886 VEC_truncate (char_ptr, charsets, 0);
887 }
888 else
889 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 890}
234b45d4 891
fc3b640d 892#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 893#endif /* PHONY_ICONV */
234b45d4 894
f870a310
TT
895/* The "auto" target charset used by default_auto_charset. */
896static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
897
898const char *
899default_auto_charset (void)
900{
901 return auto_target_charset_name;
902}
903
904const char *
905default_auto_wide_charset (void)
906{
907 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
908}
909
234b45d4
KB
910void
911_initialize_charset (void)
912{
e33d66ec
EZ
913 struct cmd_list_element *new_cmd;
914
f870a310 915 /* The first element is always "auto". */
732f6a93 916 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
917 find_charset_names ();
918
919 if (VEC_length (char_ptr, charsets) > 1)
920 charset_enum = (const char **) VEC_address (char_ptr, charsets);
921 else
922 charset_enum = default_charset_names;
923
924#ifndef PHONY_ICONV
925#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
926 /* The result of nl_langinfo may be overwritten later. This may
927 leak a little memory, if the user later changes the host charset,
928 but that doesn't matter much. */
929 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
58720494 930 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
931 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
932 which GNU libiconv doesn't like (infinite loop). */
933 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 934 auto_host_charset_name = "ASCII";
f870a310
TT
935 auto_target_charset_name = auto_host_charset_name;
936#elif defined (USE_WIN32API)
937 {
43484f03 938 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
f870a310
TT
939
940 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
941 "CP%d", GetACP());
942 auto_host_charset_name = w32_host_default_charset;
943 auto_target_charset_name = auto_host_charset_name;
944 }
6c7a06a3
TT
945#endif
946#endif
e33d66ec 947
7ab04401 948 add_setshow_enum_cmd ("charset", class_support,
f870a310 949 charset_enum, &host_charset_name, _("\
7ab04401
AC
950Set the host and target character sets."), _("\
951Show the host and target character sets."), _("\
3d263c1d
BI
952The `host character set' is the one used by the system GDB is running on.\n\
953The `target character set' is the one used by the program being debugged.\n\
954You may only use supersets of ASCII for your host character set; GDB does\n\
955not support any others.\n\
956To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
957 /* Note that the sfunc below needs to set
958 target_charset_name, because the 'set
959 charset' command sets two variables. */
960 set_charset_sfunc,
961 show_charset,
962 &setlist, &showlist);
963
964 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 965 charset_enum, &host_charset_name, _("\
7ab04401
AC
966Set the host character set."), _("\
967Show the host character set."), _("\
3d263c1d
BI
968The `host character set' is the one used by the system GDB is running on.\n\
969You may only use supersets of ASCII for your host character set; GDB does\n\
970not support any others.\n\
971To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 972 set_host_charset_sfunc,
920d2a44 973 show_host_charset_name,
7ab04401
AC
974 &setlist, &showlist);
975
976 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 977 charset_enum, &target_charset_name, _("\
7ab04401
AC
978Set the target character set."), _("\
979Show the target character set."), _("\
3d263c1d
BI
980The `target character set' is the one used by the program being debugged.\n\
981GDB translates characters and strings between the host and target\n\
982character sets as needed.\n\
983To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 984 set_target_charset_sfunc,
920d2a44 985 show_target_charset_name,
7ab04401 986 &setlist, &showlist);
6c7a06a3
TT
987
988 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 989 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
990 _("\
991Set the target wide character set."), _("\
992Show the target wide character set."), _("\
993The `target wide character set' is the one used by the program being debugged.\n\
994In particular it is the encoding used by `wchar_t'.\n\
995GDB translates characters and strings between the host and target\n\
996character sets as needed.\n\
997To see a list of the character sets GDB supports, type\n\
998`set target-wide-charset'<TAB>"),
999 set_target_wide_charset_sfunc,
1000 show_target_wide_charset_name,
1001 &setlist, &showlist);
234b45d4 1002}
This page took 0.523433 seconds and 4 git commands to generate.