* MAINTAINERS (GLOBAL MAINTAINERS): Add Jan Kratochvil.
[deliverable/binutils-gdb.git] / gdb / charset.c
CommitLineData
234b45d4 1/* Character set conversion support for GDB.
1bac305b 2
4c38e0a4
JB
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
234b45d4
KB
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
a9762ec7 10 the Free Software Foundation; either version 3 of the License, or
234b45d4
KB
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
a9762ec7 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4
KB
20
21#include "defs.h"
22#include "charset.h"
23#include "gdbcmd.h"
24#include "gdb_assert.h"
6c7a06a3 25#include "gdb_obstack.h"
732f6a93 26#include "gdb_wait.h"
6c7a06a3
TT
27#include "charset-list.h"
28#include "vec.h"
40b5c9fb 29#include "environ.h"
f870a310 30#include "arch-utils.h"
234b45d4
KB
31
32#include <stddef.h>
4ef3f3be 33#include "gdb_string.h"
234b45d4
KB
34#include <ctype.h>
35
43484f03
DJ
36#ifdef USE_WIN32API
37#include <windows.h>
38#endif
234b45d4
KB
39\f
40/* How GDB's character set support works
41
6c7a06a3 42 GDB has three global settings:
234b45d4
KB
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
6c7a06a3
TT
46 terminal knows how to display properly. Most users should not
47 change this.
234b45d4
KB
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
6c7a06a3
TT
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
234b45d4
KB
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
6c7a06a3
TT
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
234b45d4
KB
76
77\f
6c7a06a3 78#ifdef PHONY_ICONV
234b45d4 79
6c7a06a3
TT
80/* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
234b45d4 82
6c7a06a3
TT
83#undef GDB_DEFAULT_HOST_CHARSET
84#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86#define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87#undef DEFAULT_CHARSET_NAMES
88#define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90#undef iconv_t
91#define iconv_t int
92#undef iconv_open
62234ccc 93#define iconv_open phony_iconv_open
6c7a06a3 94#undef iconv
62234ccc 95#define iconv phony_iconv
6c7a06a3 96#undef iconv_close
62234ccc 97#define iconv_close phony_iconv_close
6c7a06a3 98
0dd7fb99
TT
99#undef ICONV_CONST
100#define ICONV_CONST const
101
76208fec 102/* Some systems don't have EILSEQ, so we define it here, but not as
e726d784
EZ
103 EINVAL, because callers of `iconv' want to distinguish EINVAL and
104 EILSEQ. This is what iconv.h from libiconv does as well. Note
105 that wchar.h may also define EILSEQ, so this needs to be after we
106 include wchar.h, which happens in defs.h through gdb_wchar.h. */
107#ifndef EILSEQ
108#define EILSEQ ENOENT
109#endif
110
6c7a06a3 111iconv_t
62234ccc 112phony_iconv_open (const char *to, const char *from)
6c7a06a3 113{
b8899f2b 114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
6c7a06a3 115 We allow conversions to wchar_t and the host charset. */
b8899f2b 116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
6c7a06a3
TT
117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
118 return -1;
119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
120 return -1;
234b45d4 121
b8899f2b 122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
6c7a06a3 123 used as a flag in calls to iconv. */
b8899f2b 124 return !strcmp (from, "UTF-32BE");
6c7a06a3 125}
234b45d4 126
6c7a06a3 127int
62234ccc 128phony_iconv_close (iconv_t arg)
6c7a06a3
TT
129{
130 return 0;
131}
234b45d4 132
6c7a06a3 133size_t
62234ccc
TT
134phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
135 char **outbuf, size_t *outbytesleft)
6c7a06a3 136{
b8899f2b 137 if (utf_flag)
6c7a06a3
TT
138 {
139 while (*inbytesleft >= 4)
140 {
141 size_t j;
142 unsigned long c = 0;
143
144 for (j = 0; j < 4; ++j)
145 {
146 c <<= 8;
147 c += (*inbuf)[j] & 0xff;
148 }
149
150 if (c >= 256)
151 {
152 errno = EILSEQ;
153 return -1;
154 }
155 **outbuf = c & 0xff;
156 ++*outbuf;
157 --*outbytesleft;
158
159 ++*inbuf;
160 *inbytesleft -= 4;
161 }
162 if (*inbytesleft < 4)
163 {
164 errno = EINVAL;
165 return -1;
166 }
167 }
168 else
169 {
170 /* In all other cases we simply copy input bytes to the
171 output. */
172 size_t amt = *inbytesleft;
c5504eaf 173
6c7a06a3
TT
174 if (amt > *outbytesleft)
175 amt = *outbytesleft;
176 memcpy (*outbuf, *inbuf, amt);
177 *inbuf += amt;
178 *outbuf += amt;
179 *inbytesleft -= amt;
180 *outbytesleft -= amt;
181 }
234b45d4 182
6c7a06a3
TT
183 if (*inbytesleft)
184 {
185 errno = E2BIG;
186 return -1;
187 }
234b45d4 188
6c7a06a3
TT
189 /* The number of non-reversible conversions -- but they were all
190 reversible. */
191 return 0;
192}
234b45d4 193
6c7a06a3 194#endif
234b45d4
KB
195
196
197\f
198/* The global lists of character sets and translations. */
199
200
e33d66ec
EZ
201#ifndef GDB_DEFAULT_TARGET_CHARSET
202#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
203#endif
204
6c7a06a3 205#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
b8899f2b 206#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
6c7a06a3
TT
207#endif
208
209static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
210static const char *host_charset_name = "auto";
920d2a44
AC
211static void
212show_host_charset_name (struct ui_file *file, int from_tty,
213 struct cmd_list_element *c,
214 const char *value)
215{
6c7a06a3
TT
216 if (!strcmp (value, "auto"))
217 fprintf_filtered (file,
218 _("The host character set is \"auto; currently %s\".\n"),
219 auto_host_charset_name);
220 else
221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
920d2a44
AC
222}
223
f870a310 224static const char *target_charset_name = "auto";
920d2a44
AC
225static void
226show_target_charset_name (struct ui_file *file, int from_tty,
227 struct cmd_list_element *c, const char *value)
228{
f870a310
TT
229 if (!strcmp (value, "auto"))
230 fprintf_filtered (file,
231 _("The target character set is \"auto; "
232 "currently %s\".\n"),
233 gdbarch_auto_charset (get_current_arch ()));
234 else
235 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
236 value);
920d2a44
AC
237}
238
f870a310 239static const char *target_wide_charset_name = "auto";
6c7a06a3
TT
240static void
241show_target_wide_charset_name (struct ui_file *file, int from_tty,
242 struct cmd_list_element *c, const char *value)
e33d66ec 243{
f870a310
TT
244 if (!strcmp (value, "auto"))
245 fprintf_filtered (file,
246 _("The target wide character set is \"auto; "
247 "currently %s\".\n"),
248 gdbarch_auto_wide_charset (get_current_arch ()));
249 else
250 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
251 value);
6c7a06a3 252}
e33d66ec 253
6c7a06a3 254static const char *default_charset_names[] =
e33d66ec 255{
6c7a06a3 256 DEFAULT_CHARSET_NAMES
e33d66ec
EZ
257 0
258};
234b45d4 259
6c7a06a3 260static const char **charset_enum;
234b45d4 261
6c7a06a3
TT
262\f
263/* If the target wide character set has big- or little-endian
264 variants, these are the corresponding names. */
265static const char *target_wide_charset_be_name;
266static const char *target_wide_charset_le_name;
234b45d4 267
f870a310
TT
268/* The architecture for which the BE- and LE-names are valid. */
269static struct gdbarch *be_le_arch;
270
271/* A helper function which sets the target wide big- and little-endian
272 character set names, if possible. */
234b45d4 273
6c7a06a3 274static void
f870a310 275set_be_le_names (struct gdbarch *gdbarch)
234b45d4 276{
6c7a06a3 277 int i, len;
f870a310
TT
278 const char *target_wide;
279
280 if (be_le_arch == gdbarch)
281 return;
282 be_le_arch = gdbarch;
234b45d4 283
6c7a06a3
TT
284 target_wide_charset_le_name = NULL;
285 target_wide_charset_be_name = NULL;
234b45d4 286
f870a310
TT
287 target_wide = target_wide_charset_name;
288 if (!strcmp (target_wide, "auto"))
289 target_wide = gdbarch_auto_wide_charset (gdbarch);
290
291 len = strlen (target_wide);
6c7a06a3
TT
292 for (i = 0; charset_enum[i]; ++i)
293 {
f870a310 294 if (strncmp (target_wide, charset_enum[i], len))
6c7a06a3
TT
295 continue;
296 if ((charset_enum[i][len] == 'B'
297 || charset_enum[i][len] == 'L')
298 && charset_enum[i][len + 1] == 'E'
299 && charset_enum[i][len + 2] == '\0')
300 {
301 if (charset_enum[i][len] == 'B')
302 target_wide_charset_be_name = charset_enum[i];
303 else
304 target_wide_charset_le_name = charset_enum[i];
305 }
306 }
234b45d4
KB
307}
308
6c7a06a3
TT
309/* 'Set charset', 'set host-charset', 'set target-charset', 'set
310 target-wide-charset', 'set charset' sfunc's. */
234b45d4
KB
311
312static void
f870a310 313validate (struct gdbarch *gdbarch)
234b45d4 314{
6c7a06a3
TT
315 iconv_t desc;
316 const char *host_cset = host_charset ();
f870a310
TT
317 const char *target_cset = target_charset (gdbarch);
318 const char *target_wide_cset = target_wide_charset_name;
c5504eaf 319
f870a310
TT
320 if (!strcmp (target_wide_cset, "auto"))
321 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
234b45d4 322
f870a310 323 desc = iconv_open (target_wide_cset, host_cset);
6c7a06a3
TT
324 if (desc == (iconv_t) -1)
325 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 326 target_wide_cset, host_cset);
6c7a06a3 327 iconv_close (desc);
234b45d4 328
f870a310 329 desc = iconv_open (target_cset, host_cset);
6c7a06a3
TT
330 if (desc == (iconv_t) -1)
331 error ("Cannot convert between character sets `%s' and `%s'",
f870a310 332 target_cset, host_cset);
6c7a06a3 333 iconv_close (desc);
234b45d4 334
f870a310
TT
335 /* Clear the cache. */
336 be_le_arch = NULL;
234b45d4
KB
337}
338
6c7a06a3
TT
339/* This is the sfunc for the 'set charset' command. */
340static void
341set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
234b45d4 342{
6c7a06a3
TT
343 /* CAREFUL: set the target charset here as well. */
344 target_charset_name = host_charset_name;
f870a310 345 validate (get_current_arch ());
234b45d4
KB
346}
347
6c7a06a3
TT
348/* 'set host-charset' command sfunc. We need a wrapper here because
349 the function needs to have a specific signature. */
350static void
351set_host_charset_sfunc (char *charset, int from_tty,
352 struct cmd_list_element *c)
234b45d4 353{
f870a310 354 validate (get_current_arch ());
234b45d4
KB
355}
356
6c7a06a3
TT
357/* Wrapper for the 'set target-charset' command. */
358static void
359set_target_charset_sfunc (char *charset, int from_tty,
360 struct cmd_list_element *c)
234b45d4 361{
f870a310 362 validate (get_current_arch ());
234b45d4
KB
363}
364
6c7a06a3
TT
365/* Wrapper for the 'set target-wide-charset' command. */
366static void
367set_target_wide_charset_sfunc (char *charset, int from_tty,
368 struct cmd_list_element *c)
234b45d4 369{
f870a310 370 validate (get_current_arch ());
234b45d4
KB
371}
372
6c7a06a3
TT
373/* sfunc for the 'show charset' command. */
374static void
375show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
376 const char *name)
234b45d4 377{
6c7a06a3
TT
378 show_host_charset_name (file, from_tty, c, host_charset_name);
379 show_target_charset_name (file, from_tty, c, target_charset_name);
380 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
234b45d4
KB
381}
382
234b45d4 383\f
6c7a06a3 384/* Accessor functions. */
234b45d4 385
6c7a06a3
TT
386const char *
387host_charset (void)
234b45d4 388{
6c7a06a3
TT
389 if (!strcmp (host_charset_name, "auto"))
390 return auto_host_charset_name;
391 return host_charset_name;
234b45d4
KB
392}
393
6c7a06a3 394const char *
f870a310 395target_charset (struct gdbarch *gdbarch)
234b45d4 396{
f870a310
TT
397 if (!strcmp (target_charset_name, "auto"))
398 return gdbarch_auto_charset (gdbarch);
6c7a06a3 399 return target_charset_name;
234b45d4 400}
234b45d4 401
6c7a06a3 402const char *
f870a310 403target_wide_charset (struct gdbarch *gdbarch)
234b45d4 404{
f870a310
TT
405 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
406
407 set_be_le_names (gdbarch);
e17a4113 408 if (byte_order == BFD_ENDIAN_BIG)
234b45d4 409 {
6c7a06a3
TT
410 if (target_wide_charset_be_name)
411 return target_wide_charset_be_name;
234b45d4 412 }
6c7a06a3 413 else
234b45d4 414 {
6c7a06a3
TT
415 if (target_wide_charset_le_name)
416 return target_wide_charset_le_name;
234b45d4
KB
417 }
418
f870a310
TT
419 if (!strcmp (target_wide_charset_name, "auto"))
420 return gdbarch_auto_wide_charset (gdbarch);
421
6c7a06a3 422 return target_wide_charset_name;
234b45d4
KB
423}
424
234b45d4 425\f
6c7a06a3
TT
426/* Host character set management. For the time being, we assume that
427 the host character set is some superset of ASCII. */
234b45d4 428
6c7a06a3
TT
429char
430host_letter_to_control_character (char c)
234b45d4 431{
6c7a06a3
TT
432 if (c == '?')
433 return 0177;
434 return c & 0237;
234b45d4
KB
435}
436
6c7a06a3
TT
437/* Convert a host character, C, to its hex value. C must already have
438 been validated using isxdigit. */
234b45d4 439
6c7a06a3
TT
440int
441host_hex_value (char c)
234b45d4 442{
6c7a06a3
TT
443 if (isdigit (c))
444 return c - '0';
445 if (c >= 'a' && c <= 'f')
446 return 10 + c - 'a';
447 gdb_assert (c >= 'A' && c <= 'F');
448 return 10 + c - 'A';
234b45d4
KB
449}
450
234b45d4 451\f
6c7a06a3 452/* Public character management functions. */
234b45d4 453
6c7a06a3 454/* A cleanup function which is run to close an iconv descriptor. */
234b45d4 455
6c7a06a3
TT
456static void
457cleanup_iconv (void *p)
234b45d4 458{
6c7a06a3
TT
459 iconv_t *descp = p;
460 iconv_close (*descp);
234b45d4
KB
461}
462
6c7a06a3
TT
463void
464convert_between_encodings (const char *from, const char *to,
465 const gdb_byte *bytes, unsigned int num_bytes,
466 int width, struct obstack *output,
467 enum transliterations translit)
468{
469 iconv_t desc;
470 struct cleanup *cleanups;
471 size_t inleft;
472 char *inp;
473 unsigned int space_request;
474
475 /* Often, the host and target charsets will be the same. */
476 if (!strcmp (from, to))
477 {
478 obstack_grow (output, bytes, num_bytes);
479 return;
480 }
234b45d4 481
6c7a06a3
TT
482 desc = iconv_open (to, from);
483 if (desc == (iconv_t) -1)
484 perror_with_name ("Converting character sets");
485 cleanups = make_cleanup (cleanup_iconv, &desc);
234b45d4 486
6c7a06a3
TT
487 inleft = num_bytes;
488 inp = (char *) bytes;
234b45d4 489
6c7a06a3 490 space_request = num_bytes;
234b45d4 491
6c7a06a3 492 while (inleft > 0)
234b45d4 493 {
6c7a06a3
TT
494 char *outp;
495 size_t outleft, r;
496 int old_size;
497
498 old_size = obstack_object_size (output);
499 obstack_blank (output, space_request);
500
501 outp = obstack_base (output) + old_size;
502 outleft = space_request;
503
0dd7fb99 504 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
6c7a06a3
TT
505
506 /* Now make sure that the object on the obstack only includes
507 bytes we have converted. */
508 obstack_blank (output, - (int) outleft);
509
510 if (r == (size_t) -1)
511 {
512 switch (errno)
513 {
514 case EILSEQ:
515 {
516 int i;
517
518 /* Invalid input sequence. */
519 if (translit == translit_none)
520 error (_("Could not convert character to `%s' character set"),
521 to);
522
523 /* We emit escape sequence for the bytes, skip them,
524 and try again. */
525 for (i = 0; i < width; ++i)
526 {
527 char octal[5];
528
529 sprintf (octal, "\\%.3o", *inp & 0xff);
530 obstack_grow_str (output, octal);
531
532 ++inp;
533 --inleft;
534 }
535 }
536 break;
537
538 case E2BIG:
539 /* We ran out of space in the output buffer. Make it
540 bigger next time around. */
541 space_request *= 2;
542 break;
543
544 case EINVAL:
545 /* Incomplete input sequence. FIXME: ought to report this
546 to the caller somehow. */
547 inleft = 0;
548 break;
549
550 default:
551 perror_with_name ("Internal error while converting character sets");
552 }
553 }
234b45d4 554 }
234b45d4 555
6c7a06a3 556 do_cleanups (cleanups);
234b45d4
KB
557}
558
e33d66ec 559\f
e33d66ec 560
6c7a06a3
TT
561/* An iterator that returns host wchar_t's from a target string. */
562struct wchar_iterator
e33d66ec 563{
6c7a06a3
TT
564 /* The underlying iconv descriptor. */
565 iconv_t desc;
e33d66ec 566
6c7a06a3
TT
567 /* The input string. This is updated as convert characters. */
568 char *input;
569 /* The number of bytes remaining in the input. */
570 size_t bytes;
e33d66ec 571
6c7a06a3
TT
572 /* The width of an input character. */
573 size_t width;
e33d66ec 574
6c7a06a3
TT
575 /* The output buffer and its size. */
576 gdb_wchar_t *out;
577 size_t out_size;
578};
234b45d4 579
6c7a06a3
TT
580/* Create a new iterator. */
581struct wchar_iterator *
582make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
583 size_t width)
234b45d4 584{
6c7a06a3
TT
585 struct wchar_iterator *result;
586 iconv_t desc;
234b45d4 587
732f6a93 588 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
6c7a06a3
TT
589 if (desc == (iconv_t) -1)
590 perror_with_name ("Converting character sets");
234b45d4 591
6c7a06a3
TT
592 result = XNEW (struct wchar_iterator);
593 result->desc = desc;
594 result->input = (char *) input;
595 result->bytes = bytes;
596 result->width = width;
234b45d4 597
6c7a06a3
TT
598 result->out = XNEW (gdb_wchar_t);
599 result->out_size = 1;
234b45d4 600
6c7a06a3 601 return result;
e33d66ec 602}
234b45d4 603
e33d66ec 604static void
6c7a06a3 605do_cleanup_iterator (void *p)
e33d66ec 606{
6c7a06a3 607 struct wchar_iterator *iter = p;
234b45d4 608
6c7a06a3
TT
609 iconv_close (iter->desc);
610 xfree (iter->out);
611 xfree (iter);
234b45d4
KB
612}
613
6c7a06a3
TT
614struct cleanup *
615make_cleanup_wchar_iterator (struct wchar_iterator *iter)
e33d66ec 616{
6c7a06a3 617 return make_cleanup (do_cleanup_iterator, iter);
e33d66ec 618}
234b45d4 619
6c7a06a3
TT
620int
621wchar_iterate (struct wchar_iterator *iter,
622 enum wchar_iterate_result *out_result,
623 gdb_wchar_t **out_chars,
624 const gdb_byte **ptr,
625 size_t *len)
626{
627 size_t out_request;
628
629 /* Try to convert some characters. At first we try to convert just
630 a single character. The reason for this is that iconv does not
631 necessarily update its outgoing arguments when it encounters an
632 invalid input sequence -- but we want to reliably report this to
633 our caller so it can emit an escape sequence. */
634 out_request = 1;
635 while (iter->bytes > 0)
e33d66ec 636 {
6c7a06a3
TT
637 char *outptr = (char *) &iter->out[0];
638 char *orig_inptr = iter->input;
639 size_t orig_in = iter->bytes;
640 size_t out_avail = out_request * sizeof (gdb_wchar_t);
641 size_t num;
0dd7fb99
TT
642 size_t r = iconv (iter->desc,
643 (ICONV_CONST char **) &iter->input, &iter->bytes,
6c7a06a3 644 &outptr, &out_avail);
c5504eaf 645
6c7a06a3
TT
646 if (r == (size_t) -1)
647 {
648 switch (errno)
649 {
650 case EILSEQ:
651 /* Invalid input sequence. Skip it, and let the caller
652 know about it. */
653 *out_result = wchar_iterate_invalid;
654 *ptr = iter->input;
655 *len = iter->width;
656 iter->input += iter->width;
657 iter->bytes -= iter->width;
658 return 0;
659
660 case E2BIG:
661 /* We ran out of space. We still might have converted a
662 character; if so, return it. Otherwise, grow the
663 buffer and try again. */
664 if (out_avail < out_request * sizeof (gdb_wchar_t))
665 break;
666
667 ++out_request;
668 if (out_request > iter->out_size)
669 {
670 iter->out_size = out_request;
671 iter->out = xrealloc (iter->out,
672 out_request * sizeof (gdb_wchar_t));
673 }
674 continue;
675
676 case EINVAL:
677 /* Incomplete input sequence. Let the caller know, and
678 arrange for future calls to see EOF. */
679 *out_result = wchar_iterate_incomplete;
680 *ptr = iter->input;
681 *len = iter->bytes;
682 iter->bytes = 0;
683 return 0;
684
685 default:
686 perror_with_name ("Internal error while converting character sets");
687 }
688 }
689
690 /* We converted something. */
691 num = out_request - out_avail / sizeof (gdb_wchar_t);
692 *out_result = wchar_iterate_ok;
693 *out_chars = iter->out;
694 *ptr = orig_inptr;
695 *len = orig_in - iter->bytes;
696 return num;
e33d66ec 697 }
6c7a06a3
TT
698
699 /* Really done. */
700 *out_result = wchar_iterate_eof;
701 return -1;
234b45d4
KB
702}
703
e33d66ec 704\f
6c7a06a3 705/* The charset.c module initialization function. */
234b45d4 706
6c7a06a3 707extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
234b45d4 708
6c7a06a3 709DEF_VEC_P (char_ptr);
234b45d4 710
6c7a06a3 711static VEC (char_ptr) *charsets;
234b45d4 712
6c7a06a3 713#ifdef PHONY_ICONV
234b45d4 714
6c7a06a3
TT
715static void
716find_charset_names (void)
234b45d4 717{
6c7a06a3
TT
718 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
719 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
720}
721
6c7a06a3 722#else /* PHONY_ICONV */
fc3b640d
TT
723
724/* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
725 provides different symbols in the static and dynamic libraries.
726 So, configure may see libiconvlist but not iconvlist. But, calling
727 iconvlist is the right thing to do and will work. Hence we do a
728 check here but unconditionally call iconvlist below. */
729#if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
234b45d4 730
6c7a06a3
TT
731/* A helper function that adds some character sets to the vector of
732 all character sets. This is a callback function for iconvlist. */
733
734static int
735add_one (unsigned int count, const char *const *names, void *data)
234b45d4 736{
6c7a06a3 737 unsigned int i;
234b45d4 738
6c7a06a3
TT
739 for (i = 0; i < count; ++i)
740 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
234b45d4 741
6c7a06a3 742 return 0;
234b45d4
KB
743}
744
6c7a06a3
TT
745static void
746find_charset_names (void)
234b45d4 747{
6c7a06a3
TT
748 iconvlist (add_one, NULL);
749 VEC_safe_push (char_ptr, charsets, NULL);
234b45d4
KB
750}
751
6c7a06a3 752#else
234b45d4 753
40b5c9fb
DE
754/* Return non-zero if LINE (output from iconv) should be ignored.
755 Older iconv programs (e.g. 2.2.2) include the human readable
756 introduction even when stdout is not a tty. Newer versions omit
757 the intro if stdout is not a tty. */
758
759static int
760ignore_line_p (const char *line)
761{
762 /* This table is used to filter the output. If this text appears
763 anywhere in the line, it is ignored (strstr is used). */
764 static const char * const ignore_lines[] =
765 {
766 "The following",
767 "not necessarily",
768 "the FROM and TO",
769 "listed with several",
770 NULL
771 };
772 int i;
773
774 for (i = 0; ignore_lines[i] != NULL; ++i)
775 {
776 if (strstr (line, ignore_lines[i]) != NULL)
777 return 1;
778 }
779
780 return 0;
781}
782
6c7a06a3
TT
783static void
784find_charset_names (void)
234b45d4 785{
732f6a93
TT
786 struct pex_obj *child;
787 char *args[3];
788 int err, status;
789 int fail = 1;
40b5c9fb
DE
790 struct gdb_environ *iconv_env;
791
792 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
793 a tty. We need to recognize it and ignore it. This text is subject
794 to translation, so force LANGUAGE=C. */
795 iconv_env = make_environ ();
796 init_environ (iconv_env);
797 set_in_environ (iconv_env, "LANGUAGE", "C");
798 set_in_environ (iconv_env, "LC_ALL", "C");
732f6a93 799
40618926 800 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
732f6a93
TT
801
802 args[0] = "iconv";
803 args[1] = "-l";
804 args[2] = NULL;
805 /* Note that we simply ignore errors here. */
40b5c9fb
DE
806 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
807 "iconv", args, environ_vector (iconv_env),
808 NULL, NULL, &err))
732f6a93
TT
809 {
810 FILE *in = pex_read_output (child, 0);
811
812 /* POSIX says that iconv -l uses an unspecified format. We
813 parse the glibc and libiconv formats; feel free to add others
814 as needed. */
40b5c9fb 815
732f6a93
TT
816 while (!feof (in))
817 {
818 /* The size of buf is chosen arbitrarily. */
819 char buf[1024];
820 char *start, *r;
8ea13695 821 int len;
732f6a93
TT
822
823 r = fgets (buf, sizeof (buf), in);
824 if (!r)
825 break;
826 len = strlen (r);
827 if (len <= 3)
828 continue;
40b5c9fb
DE
829 if (ignore_line_p (r))
830 continue;
831
732f6a93
TT
832 /* Strip off the newline. */
833 --len;
834 /* Strip off one or two '/'s. glibc will print lines like
835 "8859_7//", but also "10646-1:1993/UCS4/". */
836 if (buf[len - 1] == '/')
837 --len;
838 if (buf[len - 1] == '/')
839 --len;
840 buf[len] = '\0';
841
842 /* libiconv will print multiple entries per line, separated
40b5c9fb
DE
843 by spaces. Older iconvs will print multiple entries per line,
844 indented by two spaces, and separated by ", "
845 (i.e. the human readable form). */
732f6a93
TT
846 start = buf;
847 while (1)
848 {
849 int keep_going;
850 char *p;
851
40b5c9fb
DE
852 /* Skip leading blanks. */
853 for (p = start; *p && *p == ' '; ++p)
854 ;
855 start = p;
856 /* Find the next space, comma, or end-of-line. */
857 for ( ; *p && *p != ' ' && *p != ','; ++p)
732f6a93
TT
858 ;
859 /* Ignore an empty result. */
860 if (p == start)
861 break;
862 keep_going = *p;
863 *p = '\0';
864 VEC_safe_push (char_ptr, charsets, xstrdup (start));
865 if (!keep_going)
866 break;
867 /* Skip any extra spaces. */
868 for (start = p + 1; *start && *start == ' '; ++start)
869 ;
870 }
871 }
234b45d4 872
732f6a93
TT
873 if (pex_get_status (child, 1, &status)
874 && WIFEXITED (status) && !WEXITSTATUS (status))
875 fail = 0;
234b45d4 876
6c7a06a3 877 }
234b45d4 878
732f6a93 879 pex_free (child);
40b5c9fb 880 free_environ (iconv_env);
234b45d4 881
732f6a93
TT
882 if (fail)
883 {
884 /* Some error occurred, so drop the vector. */
885 int ix;
886 char *elt;
887 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
888 xfree (elt);
889 VEC_truncate (char_ptr, charsets, 0);
890 }
891 else
892 VEC_safe_push (char_ptr, charsets, NULL);
6c7a06a3 893}
234b45d4 894
fc3b640d 895#endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
6c7a06a3 896#endif /* PHONY_ICONV */
234b45d4 897
f870a310
TT
898/* The "auto" target charset used by default_auto_charset. */
899static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
900
901const char *
902default_auto_charset (void)
903{
904 return auto_target_charset_name;
905}
906
907const char *
908default_auto_wide_charset (void)
909{
910 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
911}
912
234b45d4
KB
913void
914_initialize_charset (void)
915{
f870a310 916 /* The first element is always "auto". */
732f6a93 917 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
6c7a06a3
TT
918 find_charset_names ();
919
920 if (VEC_length (char_ptr, charsets) > 1)
921 charset_enum = (const char **) VEC_address (char_ptr, charsets);
922 else
923 charset_enum = default_charset_names;
924
925#ifndef PHONY_ICONV
926#ifdef HAVE_LANGINFO_CODESET
f870a310
TT
927 /* The result of nl_langinfo may be overwritten later. This may
928 leak a little memory, if the user later changes the host charset,
929 but that doesn't matter much. */
930 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
58720494 931 /* Solaris will return `646' here -- but the Solaris iconv then
06be6983
TG
932 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
933 which GNU libiconv doesn't like (infinite loop). */
934 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
58720494 935 auto_host_charset_name = "ASCII";
f870a310
TT
936 auto_target_charset_name = auto_host_charset_name;
937#elif defined (USE_WIN32API)
938 {
43484f03 939 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
f870a310
TT
940
941 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
942 "CP%d", GetACP());
943 auto_host_charset_name = w32_host_default_charset;
944 auto_target_charset_name = auto_host_charset_name;
945 }
6c7a06a3
TT
946#endif
947#endif
e33d66ec 948
7ab04401 949 add_setshow_enum_cmd ("charset", class_support,
f870a310 950 charset_enum, &host_charset_name, _("\
7ab04401
AC
951Set the host and target character sets."), _("\
952Show the host and target character sets."), _("\
3d263c1d
BI
953The `host character set' is the one used by the system GDB is running on.\n\
954The `target character set' is the one used by the program being debugged.\n\
955You may only use supersets of ASCII for your host character set; GDB does\n\
956not support any others.\n\
957To see a list of the character sets GDB supports, type `set charset <TAB>'."),
7ab04401
AC
958 /* Note that the sfunc below needs to set
959 target_charset_name, because the 'set
960 charset' command sets two variables. */
961 set_charset_sfunc,
962 show_charset,
963 &setlist, &showlist);
964
965 add_setshow_enum_cmd ("host-charset", class_support,
6c7a06a3 966 charset_enum, &host_charset_name, _("\
7ab04401
AC
967Set the host character set."), _("\
968Show the host character set."), _("\
3d263c1d
BI
969The `host character set' is the one used by the system GDB is running on.\n\
970You may only use supersets of ASCII for your host character set; GDB does\n\
971not support any others.\n\
972To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
7ab04401 973 set_host_charset_sfunc,
920d2a44 974 show_host_charset_name,
7ab04401
AC
975 &setlist, &showlist);
976
977 add_setshow_enum_cmd ("target-charset", class_support,
f870a310 978 charset_enum, &target_charset_name, _("\
7ab04401
AC
979Set the target character set."), _("\
980Show the target character set."), _("\
3d263c1d
BI
981The `target character set' is the one used by the program being debugged.\n\
982GDB translates characters and strings between the host and target\n\
983character sets as needed.\n\
984To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
7ab04401 985 set_target_charset_sfunc,
920d2a44 986 show_target_charset_name,
7ab04401 987 &setlist, &showlist);
6c7a06a3
TT
988
989 add_setshow_enum_cmd ("target-wide-charset", class_support,
f870a310 990 charset_enum, &target_wide_charset_name,
6c7a06a3
TT
991 _("\
992Set the target wide character set."), _("\
993Show the target wide character set."), _("\
994The `target wide character set' is the one used by the program being debugged.\n\
995In particular it is the encoding used by `wchar_t'.\n\
996GDB translates characters and strings between the host and target\n\
997character sets as needed.\n\
998To see a list of the character sets GDB supports, type\n\
999`set target-wide-charset'<TAB>"),
1000 set_target_wide_charset_sfunc,
1001 show_target_wide_charset_name,
1002 &setlist, &showlist);
234b45d4 1003}
This page took 0.527469 seconds and 4 git commands to generate.