fadd232764622e868ebc63cdeaa58820af9bd927
[deliverable/binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001, 2003, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GDB.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "defs.h"
22 #include "charset.h"
23 #include "gdbcmd.h"
24 #include "gdb_assert.h"
25 #include "gdb_obstack.h"
26 #include "gdb_wait.h"
27 #include "charset-list.h"
28 #include "vec.h"
29 #include "environ.h"
30 #include "arch-utils.h"
31
32 #include <stddef.h>
33 #include "gdb_string.h"
34 #include <ctype.h>
35
36 #ifdef USE_WIN32API
37 #include <windows.h>
38 #endif
39 \f
40 /* How GDB's character set support works
41
42 GDB has three global settings:
43
44 - The `current host character set' is the character set GDB should
45 use in talking to the user, and which (hopefully) the user's
46 terminal knows how to display properly. Most users should not
47 change this.
48
49 - The `current target character set' is the character set the
50 program being debugged uses.
51
52 - The `current target wide character set' is the wide character set
53 the program being debugged uses, that is, the encoding used for
54 wchar_t.
55
56 There are commands to set each of these, and mechanisms for
57 choosing reasonable default values. GDB has a global list of
58 character sets that it can use as its host or target character
59 sets.
60
61 The header file `charset.h' declares various functions that
62 different pieces of GDB need to perform tasks like:
63
64 - printing target strings and characters to the user's terminal
65 (mostly target->host conversions),
66
67 - building target-appropriate representations of strings and
68 characters the user enters in expressions (mostly host->target
69 conversions),
70
71 and so on.
72
73 To avoid excessive code duplication and maintenance efforts,
74 GDB simply requires a capable iconv function. Users on platforms
75 without a suitable iconv can use the GNU iconv library. */
76
77 \f
78 #ifdef PHONY_ICONV
79
80 /* Provide a phony iconv that does as little as possible. Also,
81 arrange for there to be a single available character set. */
82
83 #undef GDB_DEFAULT_HOST_CHARSET
84 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
85 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
86 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
87 #undef DEFAULT_CHARSET_NAMES
88 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
89
90 #undef iconv_t
91 #define iconv_t int
92 #undef iconv_open
93 #undef iconv
94 #undef iconv_close
95
96 #undef ICONV_CONST
97 #define ICONV_CONST const
98
99 /* Some systems don't have EILSEQ, so we define it here, but not as
100 EINVAL, because callers of `iconv' want to distinguish EINVAL and
101 EILSEQ. This is what iconv.h from libiconv does as well. Note
102 that wchar.h may also define EILSEQ, so this needs to be after we
103 include wchar.h, which happens in defs.h through gdb_wchar.h. */
104 #ifndef EILSEQ
105 #define EILSEQ ENOENT
106 #endif
107
108 iconv_t
109 iconv_open (const char *to, const char *from)
110 {
111 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
112 We allow conversions to wchar_t and the host charset. */
113 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
114 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
115 return -1;
116 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
117 return -1;
118
119 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
120 used as a flag in calls to iconv. */
121 return !strcmp (from, "UTF-32BE");
122 }
123
124 int
125 iconv_close (iconv_t arg)
126 {
127 return 0;
128 }
129
130 size_t
131 iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
132 char **outbuf, size_t *outbytesleft)
133 {
134 if (utf_flag)
135 {
136 while (*inbytesleft >= 4)
137 {
138 size_t j;
139 unsigned long c = 0;
140
141 for (j = 0; j < 4; ++j)
142 {
143 c <<= 8;
144 c += (*inbuf)[j] & 0xff;
145 }
146
147 if (c >= 256)
148 {
149 errno = EILSEQ;
150 return -1;
151 }
152 **outbuf = c & 0xff;
153 ++*outbuf;
154 --*outbytesleft;
155
156 ++*inbuf;
157 *inbytesleft -= 4;
158 }
159 if (*inbytesleft < 4)
160 {
161 errno = EINVAL;
162 return -1;
163 }
164 }
165 else
166 {
167 /* In all other cases we simply copy input bytes to the
168 output. */
169 size_t amt = *inbytesleft;
170 if (amt > *outbytesleft)
171 amt = *outbytesleft;
172 memcpy (*outbuf, *inbuf, amt);
173 *inbuf += amt;
174 *outbuf += amt;
175 *inbytesleft -= amt;
176 *outbytesleft -= amt;
177 }
178
179 if (*inbytesleft)
180 {
181 errno = E2BIG;
182 return -1;
183 }
184
185 /* The number of non-reversible conversions -- but they were all
186 reversible. */
187 return 0;
188 }
189
190 #endif
191
192
193 \f
194 /* The global lists of character sets and translations. */
195
196
197 #ifndef GDB_DEFAULT_TARGET_CHARSET
198 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
199 #endif
200
201 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
202 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
203 #endif
204
205 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
206 static const char *host_charset_name = "auto";
207 static void
208 show_host_charset_name (struct ui_file *file, int from_tty,
209 struct cmd_list_element *c,
210 const char *value)
211 {
212 if (!strcmp (value, "auto"))
213 fprintf_filtered (file,
214 _("The host character set is \"auto; currently %s\".\n"),
215 auto_host_charset_name);
216 else
217 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
218 }
219
220 static const char *target_charset_name = "auto";
221 static void
222 show_target_charset_name (struct ui_file *file, int from_tty,
223 struct cmd_list_element *c, const char *value)
224 {
225 if (!strcmp (value, "auto"))
226 fprintf_filtered (file,
227 _("The target character set is \"auto; "
228 "currently %s\".\n"),
229 gdbarch_auto_charset (get_current_arch ()));
230 else
231 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
232 value);
233 }
234
235 static const char *target_wide_charset_name = "auto";
236 static void
237 show_target_wide_charset_name (struct ui_file *file, int from_tty,
238 struct cmd_list_element *c, const char *value)
239 {
240 if (!strcmp (value, "auto"))
241 fprintf_filtered (file,
242 _("The target wide character set is \"auto; "
243 "currently %s\".\n"),
244 gdbarch_auto_wide_charset (get_current_arch ()));
245 else
246 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
247 value);
248 }
249
250 static const char *default_charset_names[] =
251 {
252 DEFAULT_CHARSET_NAMES
253 0
254 };
255
256 static const char **charset_enum;
257
258 \f
259 /* If the target wide character set has big- or little-endian
260 variants, these are the corresponding names. */
261 static const char *target_wide_charset_be_name;
262 static const char *target_wide_charset_le_name;
263
264 /* The architecture for which the BE- and LE-names are valid. */
265 static struct gdbarch *be_le_arch;
266
267 /* A helper function which sets the target wide big- and little-endian
268 character set names, if possible. */
269
270 static void
271 set_be_le_names (struct gdbarch *gdbarch)
272 {
273 int i, len;
274 const char *target_wide;
275
276 if (be_le_arch == gdbarch)
277 return;
278 be_le_arch = gdbarch;
279
280 target_wide_charset_le_name = NULL;
281 target_wide_charset_be_name = NULL;
282
283 target_wide = target_wide_charset_name;
284 if (!strcmp (target_wide, "auto"))
285 target_wide = gdbarch_auto_wide_charset (gdbarch);
286
287 len = strlen (target_wide);
288 for (i = 0; charset_enum[i]; ++i)
289 {
290 if (strncmp (target_wide, charset_enum[i], len))
291 continue;
292 if ((charset_enum[i][len] == 'B'
293 || charset_enum[i][len] == 'L')
294 && charset_enum[i][len + 1] == 'E'
295 && charset_enum[i][len + 2] == '\0')
296 {
297 if (charset_enum[i][len] == 'B')
298 target_wide_charset_be_name = charset_enum[i];
299 else
300 target_wide_charset_le_name = charset_enum[i];
301 }
302 }
303 }
304
305 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
306 target-wide-charset', 'set charset' sfunc's. */
307
308 static void
309 validate (struct gdbarch *gdbarch)
310 {
311 iconv_t desc;
312 const char *host_cset = host_charset ();
313 const char *target_cset = target_charset (gdbarch);
314 const char *target_wide_cset = target_wide_charset_name;
315 if (!strcmp (target_wide_cset, "auto"))
316 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
317
318 desc = iconv_open (target_wide_cset, host_cset);
319 if (desc == (iconv_t) -1)
320 error ("Cannot convert between character sets `%s' and `%s'",
321 target_wide_cset, host_cset);
322 iconv_close (desc);
323
324 desc = iconv_open (target_cset, host_cset);
325 if (desc == (iconv_t) -1)
326 error ("Cannot convert between character sets `%s' and `%s'",
327 target_cset, host_cset);
328 iconv_close (desc);
329
330 /* Clear the cache. */
331 be_le_arch = NULL;
332 }
333
334 /* This is the sfunc for the 'set charset' command. */
335 static void
336 set_charset_sfunc (char *charset, int from_tty, struct cmd_list_element *c)
337 {
338 /* CAREFUL: set the target charset here as well. */
339 target_charset_name = host_charset_name;
340 validate (get_current_arch ());
341 }
342
343 /* 'set host-charset' command sfunc. We need a wrapper here because
344 the function needs to have a specific signature. */
345 static void
346 set_host_charset_sfunc (char *charset, int from_tty,
347 struct cmd_list_element *c)
348 {
349 validate (get_current_arch ());
350 }
351
352 /* Wrapper for the 'set target-charset' command. */
353 static void
354 set_target_charset_sfunc (char *charset, int from_tty,
355 struct cmd_list_element *c)
356 {
357 validate (get_current_arch ());
358 }
359
360 /* Wrapper for the 'set target-wide-charset' command. */
361 static void
362 set_target_wide_charset_sfunc (char *charset, int from_tty,
363 struct cmd_list_element *c)
364 {
365 validate (get_current_arch ());
366 }
367
368 /* sfunc for the 'show charset' command. */
369 static void
370 show_charset (struct ui_file *file, int from_tty, struct cmd_list_element *c,
371 const char *name)
372 {
373 show_host_charset_name (file, from_tty, c, host_charset_name);
374 show_target_charset_name (file, from_tty, c, target_charset_name);
375 show_target_wide_charset_name (file, from_tty, c, target_wide_charset_name);
376 }
377
378 \f
379 /* Accessor functions. */
380
381 const char *
382 host_charset (void)
383 {
384 if (!strcmp (host_charset_name, "auto"))
385 return auto_host_charset_name;
386 return host_charset_name;
387 }
388
389 const char *
390 target_charset (struct gdbarch *gdbarch)
391 {
392 if (!strcmp (target_charset_name, "auto"))
393 return gdbarch_auto_charset (gdbarch);
394 return target_charset_name;
395 }
396
397 const char *
398 target_wide_charset (struct gdbarch *gdbarch)
399 {
400 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
401
402 set_be_le_names (gdbarch);
403 if (byte_order == BFD_ENDIAN_BIG)
404 {
405 if (target_wide_charset_be_name)
406 return target_wide_charset_be_name;
407 }
408 else
409 {
410 if (target_wide_charset_le_name)
411 return target_wide_charset_le_name;
412 }
413
414 if (!strcmp (target_wide_charset_name, "auto"))
415 return gdbarch_auto_wide_charset (gdbarch);
416
417 return target_wide_charset_name;
418 }
419
420 \f
421 /* Host character set management. For the time being, we assume that
422 the host character set is some superset of ASCII. */
423
424 char
425 host_letter_to_control_character (char c)
426 {
427 if (c == '?')
428 return 0177;
429 return c & 0237;
430 }
431
432 /* Convert a host character, C, to its hex value. C must already have
433 been validated using isxdigit. */
434
435 int
436 host_hex_value (char c)
437 {
438 if (isdigit (c))
439 return c - '0';
440 if (c >= 'a' && c <= 'f')
441 return 10 + c - 'a';
442 gdb_assert (c >= 'A' && c <= 'F');
443 return 10 + c - 'A';
444 }
445
446 \f
447 /* Public character management functions. */
448
449 /* A cleanup function which is run to close an iconv descriptor. */
450
451 static void
452 cleanup_iconv (void *p)
453 {
454 iconv_t *descp = p;
455 iconv_close (*descp);
456 }
457
458 void
459 convert_between_encodings (const char *from, const char *to,
460 const gdb_byte *bytes, unsigned int num_bytes,
461 int width, struct obstack *output,
462 enum transliterations translit)
463 {
464 iconv_t desc;
465 struct cleanup *cleanups;
466 size_t inleft;
467 char *inp;
468 unsigned int space_request;
469
470 /* Often, the host and target charsets will be the same. */
471 if (!strcmp (from, to))
472 {
473 obstack_grow (output, bytes, num_bytes);
474 return;
475 }
476
477 desc = iconv_open (to, from);
478 if (desc == (iconv_t) -1)
479 perror_with_name ("Converting character sets");
480 cleanups = make_cleanup (cleanup_iconv, &desc);
481
482 inleft = num_bytes;
483 inp = (char *) bytes;
484
485 space_request = num_bytes;
486
487 while (inleft > 0)
488 {
489 char *outp;
490 size_t outleft, r;
491 int old_size;
492
493 old_size = obstack_object_size (output);
494 obstack_blank (output, space_request);
495
496 outp = obstack_base (output) + old_size;
497 outleft = space_request;
498
499 r = iconv (desc, (ICONV_CONST char **) &inp, &inleft, &outp, &outleft);
500
501 /* Now make sure that the object on the obstack only includes
502 bytes we have converted. */
503 obstack_blank (output, - (int) outleft);
504
505 if (r == (size_t) -1)
506 {
507 switch (errno)
508 {
509 case EILSEQ:
510 {
511 int i;
512
513 /* Invalid input sequence. */
514 if (translit == translit_none)
515 error (_("Could not convert character to `%s' character set"),
516 to);
517
518 /* We emit escape sequence for the bytes, skip them,
519 and try again. */
520 for (i = 0; i < width; ++i)
521 {
522 char octal[5];
523
524 sprintf (octal, "\\%.3o", *inp & 0xff);
525 obstack_grow_str (output, octal);
526
527 ++inp;
528 --inleft;
529 }
530 }
531 break;
532
533 case E2BIG:
534 /* We ran out of space in the output buffer. Make it
535 bigger next time around. */
536 space_request *= 2;
537 break;
538
539 case EINVAL:
540 /* Incomplete input sequence. FIXME: ought to report this
541 to the caller somehow. */
542 inleft = 0;
543 break;
544
545 default:
546 perror_with_name ("Internal error while converting character sets");
547 }
548 }
549 }
550
551 do_cleanups (cleanups);
552 }
553
554 \f
555
556 /* An iterator that returns host wchar_t's from a target string. */
557 struct wchar_iterator
558 {
559 /* The underlying iconv descriptor. */
560 iconv_t desc;
561
562 /* The input string. This is updated as convert characters. */
563 char *input;
564 /* The number of bytes remaining in the input. */
565 size_t bytes;
566
567 /* The width of an input character. */
568 size_t width;
569
570 /* The output buffer and its size. */
571 gdb_wchar_t *out;
572 size_t out_size;
573 };
574
575 /* Create a new iterator. */
576 struct wchar_iterator *
577 make_wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
578 size_t width)
579 {
580 struct wchar_iterator *result;
581 iconv_t desc;
582
583 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
584 if (desc == (iconv_t) -1)
585 perror_with_name ("Converting character sets");
586
587 result = XNEW (struct wchar_iterator);
588 result->desc = desc;
589 result->input = (char *) input;
590 result->bytes = bytes;
591 result->width = width;
592
593 result->out = XNEW (gdb_wchar_t);
594 result->out_size = 1;
595
596 return result;
597 }
598
599 static void
600 do_cleanup_iterator (void *p)
601 {
602 struct wchar_iterator *iter = p;
603
604 iconv_close (iter->desc);
605 xfree (iter->out);
606 xfree (iter);
607 }
608
609 struct cleanup *
610 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
611 {
612 return make_cleanup (do_cleanup_iterator, iter);
613 }
614
615 int
616 wchar_iterate (struct wchar_iterator *iter,
617 enum wchar_iterate_result *out_result,
618 gdb_wchar_t **out_chars,
619 const gdb_byte **ptr,
620 size_t *len)
621 {
622 size_t out_request;
623
624 /* Try to convert some characters. At first we try to convert just
625 a single character. The reason for this is that iconv does not
626 necessarily update its outgoing arguments when it encounters an
627 invalid input sequence -- but we want to reliably report this to
628 our caller so it can emit an escape sequence. */
629 out_request = 1;
630 while (iter->bytes > 0)
631 {
632 char *outptr = (char *) &iter->out[0];
633 char *orig_inptr = iter->input;
634 size_t orig_in = iter->bytes;
635 size_t out_avail = out_request * sizeof (gdb_wchar_t);
636 size_t num;
637
638 size_t r = iconv (iter->desc,
639 (ICONV_CONST char **) &iter->input, &iter->bytes,
640 &outptr, &out_avail);
641 if (r == (size_t) -1)
642 {
643 switch (errno)
644 {
645 case EILSEQ:
646 /* Invalid input sequence. Skip it, and let the caller
647 know about it. */
648 *out_result = wchar_iterate_invalid;
649 *ptr = iter->input;
650 *len = iter->width;
651 iter->input += iter->width;
652 iter->bytes -= iter->width;
653 return 0;
654
655 case E2BIG:
656 /* We ran out of space. We still might have converted a
657 character; if so, return it. Otherwise, grow the
658 buffer and try again. */
659 if (out_avail < out_request * sizeof (gdb_wchar_t))
660 break;
661
662 ++out_request;
663 if (out_request > iter->out_size)
664 {
665 iter->out_size = out_request;
666 iter->out = xrealloc (iter->out,
667 out_request * sizeof (gdb_wchar_t));
668 }
669 continue;
670
671 case EINVAL:
672 /* Incomplete input sequence. Let the caller know, and
673 arrange for future calls to see EOF. */
674 *out_result = wchar_iterate_incomplete;
675 *ptr = iter->input;
676 *len = iter->bytes;
677 iter->bytes = 0;
678 return 0;
679
680 default:
681 perror_with_name ("Internal error while converting character sets");
682 }
683 }
684
685 /* We converted something. */
686 num = out_request - out_avail / sizeof (gdb_wchar_t);
687 *out_result = wchar_iterate_ok;
688 *out_chars = iter->out;
689 *ptr = orig_inptr;
690 *len = orig_in - iter->bytes;
691 return num;
692 }
693
694 /* Really done. */
695 *out_result = wchar_iterate_eof;
696 return -1;
697 }
698
699 \f
700 /* The charset.c module initialization function. */
701
702 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
703
704 DEF_VEC_P (char_ptr);
705
706 static VEC (char_ptr) *charsets;
707
708 #ifdef PHONY_ICONV
709
710 static void
711 find_charset_names (void)
712 {
713 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
714 VEC_safe_push (char_ptr, charsets, NULL);
715 }
716
717 #else /* PHONY_ICONV */
718
719 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
720 provides different symbols in the static and dynamic libraries.
721 So, configure may see libiconvlist but not iconvlist. But, calling
722 iconvlist is the right thing to do and will work. Hence we do a
723 check here but unconditionally call iconvlist below. */
724 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
725
726 /* A helper function that adds some character sets to the vector of
727 all character sets. This is a callback function for iconvlist. */
728
729 static int
730 add_one (unsigned int count, const char *const *names, void *data)
731 {
732 unsigned int i;
733
734 for (i = 0; i < count; ++i)
735 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
736
737 return 0;
738 }
739
740 static void
741 find_charset_names (void)
742 {
743 iconvlist (add_one, NULL);
744 VEC_safe_push (char_ptr, charsets, NULL);
745 }
746
747 #else
748
749 /* Return non-zero if LINE (output from iconv) should be ignored.
750 Older iconv programs (e.g. 2.2.2) include the human readable
751 introduction even when stdout is not a tty. Newer versions omit
752 the intro if stdout is not a tty. */
753
754 static int
755 ignore_line_p (const char *line)
756 {
757 /* This table is used to filter the output. If this text appears
758 anywhere in the line, it is ignored (strstr is used). */
759 static const char * const ignore_lines[] =
760 {
761 "The following",
762 "not necessarily",
763 "the FROM and TO",
764 "listed with several",
765 NULL
766 };
767 int i;
768
769 for (i = 0; ignore_lines[i] != NULL; ++i)
770 {
771 if (strstr (line, ignore_lines[i]) != NULL)
772 return 1;
773 }
774
775 return 0;
776 }
777
778 static void
779 find_charset_names (void)
780 {
781 struct pex_obj *child;
782 char *args[3];
783 int err, status;
784 int fail = 1;
785 struct gdb_environ *iconv_env;
786
787 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is not
788 a tty. We need to recognize it and ignore it. This text is subject
789 to translation, so force LANGUAGE=C. */
790 iconv_env = make_environ ();
791 init_environ (iconv_env);
792 set_in_environ (iconv_env, "LANGUAGE", "C");
793 set_in_environ (iconv_env, "LC_ALL", "C");
794
795 child = pex_init (0, "iconv", NULL);
796
797 args[0] = "iconv";
798 args[1] = "-l";
799 args[2] = NULL;
800 /* Note that we simply ignore errors here. */
801 if (!pex_run_in_environment (child, PEX_SEARCH | PEX_STDERR_TO_STDOUT,
802 "iconv", args, environ_vector (iconv_env),
803 NULL, NULL, &err))
804 {
805 FILE *in = pex_read_output (child, 0);
806
807 /* POSIX says that iconv -l uses an unspecified format. We
808 parse the glibc and libiconv formats; feel free to add others
809 as needed. */
810
811 while (!feof (in))
812 {
813 /* The size of buf is chosen arbitrarily. */
814 char buf[1024];
815 char *start, *r;
816 int len;
817
818 r = fgets (buf, sizeof (buf), in);
819 if (!r)
820 break;
821 len = strlen (r);
822 if (len <= 3)
823 continue;
824 if (ignore_line_p (r))
825 continue;
826
827 /* Strip off the newline. */
828 --len;
829 /* Strip off one or two '/'s. glibc will print lines like
830 "8859_7//", but also "10646-1:1993/UCS4/". */
831 if (buf[len - 1] == '/')
832 --len;
833 if (buf[len - 1] == '/')
834 --len;
835 buf[len] = '\0';
836
837 /* libiconv will print multiple entries per line, separated
838 by spaces. Older iconvs will print multiple entries per line,
839 indented by two spaces, and separated by ", "
840 (i.e. the human readable form). */
841 start = buf;
842 while (1)
843 {
844 int keep_going;
845 char *p;
846
847 /* Skip leading blanks. */
848 for (p = start; *p && *p == ' '; ++p)
849 ;
850 start = p;
851 /* Find the next space, comma, or end-of-line. */
852 for ( ; *p && *p != ' ' && *p != ','; ++p)
853 ;
854 /* Ignore an empty result. */
855 if (p == start)
856 break;
857 keep_going = *p;
858 *p = '\0';
859 VEC_safe_push (char_ptr, charsets, xstrdup (start));
860 if (!keep_going)
861 break;
862 /* Skip any extra spaces. */
863 for (start = p + 1; *start && *start == ' '; ++start)
864 ;
865 }
866 }
867
868 if (pex_get_status (child, 1, &status)
869 && WIFEXITED (status) && !WEXITSTATUS (status))
870 fail = 0;
871
872 }
873
874 pex_free (child);
875 free_environ (iconv_env);
876
877 if (fail)
878 {
879 /* Some error occurred, so drop the vector. */
880 int ix;
881 char *elt;
882 for (ix = 0; VEC_iterate (char_ptr, charsets, ix, elt); ++ix)
883 xfree (elt);
884 VEC_truncate (char_ptr, charsets, 0);
885 }
886 else
887 VEC_safe_push (char_ptr, charsets, NULL);
888 }
889
890 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
891 #endif /* PHONY_ICONV */
892
893 /* The "auto" target charset used by default_auto_charset. */
894 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
895
896 const char *
897 default_auto_charset (void)
898 {
899 return auto_target_charset_name;
900 }
901
902 const char *
903 default_auto_wide_charset (void)
904 {
905 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
906 }
907
908 void
909 _initialize_charset (void)
910 {
911 /* The first element is always "auto". */
912 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
913 find_charset_names ();
914
915 if (VEC_length (char_ptr, charsets) > 1)
916 charset_enum = (const char **) VEC_address (char_ptr, charsets);
917 else
918 charset_enum = default_charset_names;
919
920 #ifndef PHONY_ICONV
921 #ifdef HAVE_LANGINFO_CODESET
922 /* The result of nl_langinfo may be overwritten later. This may
923 leak a little memory, if the user later changes the host charset,
924 but that doesn't matter much. */
925 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
926 /* Solaris will return `646' here -- but the Solaris iconv then
927 does not accept this. Darwin (and maybe FreeBSD) may return "" here,
928 which GNU libiconv doesn't like (infinite loop). */
929 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
930 auto_host_charset_name = "ASCII";
931 auto_target_charset_name = auto_host_charset_name;
932 #elif defined (USE_WIN32API)
933 {
934 static char w32_host_default_charset[16]; /* "CP" + x<=5 digits + paranoia. */
935
936 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
937 "CP%d", GetACP());
938 auto_host_charset_name = w32_host_default_charset;
939 auto_target_charset_name = auto_host_charset_name;
940 }
941 #endif
942 #endif
943
944 add_setshow_enum_cmd ("charset", class_support,
945 charset_enum, &host_charset_name, _("\
946 Set the host and target character sets."), _("\
947 Show the host and target character sets."), _("\
948 The `host character set' is the one used by the system GDB is running on.\n\
949 The `target character set' is the one used by the program being debugged.\n\
950 You may only use supersets of ASCII for your host character set; GDB does\n\
951 not support any others.\n\
952 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
953 /* Note that the sfunc below needs to set
954 target_charset_name, because the 'set
955 charset' command sets two variables. */
956 set_charset_sfunc,
957 show_charset,
958 &setlist, &showlist);
959
960 add_setshow_enum_cmd ("host-charset", class_support,
961 charset_enum, &host_charset_name, _("\
962 Set the host character set."), _("\
963 Show the host character set."), _("\
964 The `host character set' is the one used by the system GDB is running on.\n\
965 You may only use supersets of ASCII for your host character set; GDB does\n\
966 not support any others.\n\
967 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
968 set_host_charset_sfunc,
969 show_host_charset_name,
970 &setlist, &showlist);
971
972 add_setshow_enum_cmd ("target-charset", class_support,
973 charset_enum, &target_charset_name, _("\
974 Set the target character set."), _("\
975 Show the target character set."), _("\
976 The `target character set' is the one used by the program being debugged.\n\
977 GDB translates characters and strings between the host and target\n\
978 character sets as needed.\n\
979 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
980 set_target_charset_sfunc,
981 show_target_charset_name,
982 &setlist, &showlist);
983
984 add_setshow_enum_cmd ("target-wide-charset", class_support,
985 charset_enum, &target_wide_charset_name,
986 _("\
987 Set the target wide character set."), _("\
988 Show the target wide character set."), _("\
989 The `target wide character set' is the one used by the program being debugged.\n\
990 In particular it is the encoding used by `wchar_t'.\n\
991 GDB translates characters and strings between the host and target\n\
992 character sets as needed.\n\
993 To see a list of the character sets GDB supports, type\n\
994 `set target-wide-charset'<TAB>"),
995 set_target_wide_charset_sfunc,
996 show_target_wide_charset_name,
997 &setlist, &showlist);
998 }
This page took 0.066203 seconds and 4 git commands to generate.