Include gdb_assert.h in common-defs.h
[deliverable/binutils-gdb.git] / gdb / charset.c
1 /* Character set conversion support for GDB.
2
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include "defs.h"
21 #include "charset.h"
22 #include "gdbcmd.h"
23 #include "gdb_obstack.h"
24 #include "gdb_wait.h"
25 #include "charset-list.h"
26 #include "vec.h"
27 #include "environ.h"
28 #include "arch-utils.h"
29 #include "gdb_vecs.h"
30
31 #include <string.h>
32 #include <ctype.h>
33
34 #ifdef USE_WIN32API
35 #include <windows.h>
36 #endif
37 \f
38 /* How GDB's character set support works
39
40 GDB has three global settings:
41
42 - The `current host character set' is the character set GDB should
43 use in talking to the user, and which (hopefully) the user's
44 terminal knows how to display properly. Most users should not
45 change this.
46
47 - The `current target character set' is the character set the
48 program being debugged uses.
49
50 - The `current target wide character set' is the wide character set
51 the program being debugged uses, that is, the encoding used for
52 wchar_t.
53
54 There are commands to set each of these, and mechanisms for
55 choosing reasonable default values. GDB has a global list of
56 character sets that it can use as its host or target character
57 sets.
58
59 The header file `charset.h' declares various functions that
60 different pieces of GDB need to perform tasks like:
61
62 - printing target strings and characters to the user's terminal
63 (mostly target->host conversions),
64
65 - building target-appropriate representations of strings and
66 characters the user enters in expressions (mostly host->target
67 conversions),
68
69 and so on.
70
71 To avoid excessive code duplication and maintenance efforts,
72 GDB simply requires a capable iconv function. Users on platforms
73 without a suitable iconv can use the GNU iconv library. */
74
75 \f
76 #ifdef PHONY_ICONV
77
78 /* Provide a phony iconv that does as little as possible. Also,
79 arrange for there to be a single available character set. */
80
81 #undef GDB_DEFAULT_HOST_CHARSET
82 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
83 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
84 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
85 #undef DEFAULT_CHARSET_NAMES
86 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
87
88 #undef iconv_t
89 #define iconv_t int
90 #undef iconv_open
91 #define iconv_open phony_iconv_open
92 #undef iconv
93 #define iconv phony_iconv
94 #undef iconv_close
95 #define iconv_close phony_iconv_close
96
97 #undef ICONV_CONST
98 #define ICONV_CONST const
99
100 /* Some systems don't have EILSEQ, so we define it here, but not as
101 EINVAL, because callers of `iconv' want to distinguish EINVAL and
102 EILSEQ. This is what iconv.h from libiconv does as well. Note
103 that wchar.h may also define EILSEQ, so this needs to be after we
104 include wchar.h, which happens in defs.h through gdb_wchar.h. */
105 #ifndef EILSEQ
106 #define EILSEQ ENOENT
107 #endif
108
109 static iconv_t
110 phony_iconv_open (const char *to, const char *from)
111 {
112 /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
113 We allow conversions to wchar_t and the host charset. */
114 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
115 && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
116 return -1;
117 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
118 return -1;
119
120 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
121 used as a flag in calls to iconv. */
122 return !strcmp (from, "UTF-32BE");
123 }
124
125 static int
126 phony_iconv_close (iconv_t arg)
127 {
128 return 0;
129 }
130
131 static size_t
132 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
133 char **outbuf, size_t *outbytesleft)
134 {
135 if (utf_flag)
136 {
137 while (*inbytesleft >= 4)
138 {
139 size_t j;
140 unsigned long c = 0;
141
142 for (j = 0; j < 4; ++j)
143 {
144 c <<= 8;
145 c += (*inbuf)[j] & 0xff;
146 }
147
148 if (c >= 256)
149 {
150 errno = EILSEQ;
151 return -1;
152 }
153 **outbuf = c & 0xff;
154 ++*outbuf;
155 --*outbytesleft;
156
157 ++*inbuf;
158 *inbytesleft -= 4;
159 }
160 if (*inbytesleft < 4)
161 {
162 errno = EINVAL;
163 return -1;
164 }
165 }
166 else
167 {
168 /* In all other cases we simply copy input bytes to the
169 output. */
170 size_t amt = *inbytesleft;
171
172 if (amt > *outbytesleft)
173 amt = *outbytesleft;
174 memcpy (*outbuf, *inbuf, amt);
175 *inbuf += amt;
176 *outbuf += amt;
177 *inbytesleft -= amt;
178 *outbytesleft -= amt;
179 }
180
181 if (*inbytesleft)
182 {
183 errno = E2BIG;
184 return -1;
185 }
186
187 /* The number of non-reversible conversions -- but they were all
188 reversible. */
189 return 0;
190 }
191
192 #endif
193
194
195 \f
196 /* The global lists of character sets and translations. */
197
198
199 #ifndef GDB_DEFAULT_TARGET_CHARSET
200 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
201 #endif
202
203 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
204 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
205 #endif
206
207 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
208 static const char *host_charset_name = "auto";
209 static void
210 show_host_charset_name (struct ui_file *file, int from_tty,
211 struct cmd_list_element *c,
212 const char *value)
213 {
214 if (!strcmp (value, "auto"))
215 fprintf_filtered (file,
216 _("The host character set is \"auto; currently %s\".\n"),
217 auto_host_charset_name);
218 else
219 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
220 }
221
222 static const char *target_charset_name = "auto";
223 static void
224 show_target_charset_name (struct ui_file *file, int from_tty,
225 struct cmd_list_element *c, const char *value)
226 {
227 if (!strcmp (value, "auto"))
228 fprintf_filtered (file,
229 _("The target character set is \"auto; "
230 "currently %s\".\n"),
231 gdbarch_auto_charset (get_current_arch ()));
232 else
233 fprintf_filtered (file, _("The target character set is \"%s\".\n"),
234 value);
235 }
236
237 static const char *target_wide_charset_name = "auto";
238 static void
239 show_target_wide_charset_name (struct ui_file *file,
240 int from_tty,
241 struct cmd_list_element *c,
242 const char *value)
243 {
244 if (!strcmp (value, "auto"))
245 fprintf_filtered (file,
246 _("The target wide character set is \"auto; "
247 "currently %s\".\n"),
248 gdbarch_auto_wide_charset (get_current_arch ()));
249 else
250 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
251 value);
252 }
253
254 static const char *default_charset_names[] =
255 {
256 DEFAULT_CHARSET_NAMES
257 0
258 };
259
260 static const char **charset_enum;
261
262 \f
263 /* If the target wide character set has big- or little-endian
264 variants, these are the corresponding names. */
265 static const char *target_wide_charset_be_name;
266 static const char *target_wide_charset_le_name;
267
268 /* The architecture for which the BE- and LE-names are valid. */
269 static struct gdbarch *be_le_arch;
270
271 /* A helper function which sets the target wide big- and little-endian
272 character set names, if possible. */
273
274 static void
275 set_be_le_names (struct gdbarch *gdbarch)
276 {
277 int i, len;
278 const char *target_wide;
279
280 if (be_le_arch == gdbarch)
281 return;
282 be_le_arch = gdbarch;
283
284 target_wide_charset_le_name = NULL;
285 target_wide_charset_be_name = NULL;
286
287 target_wide = target_wide_charset_name;
288 if (!strcmp (target_wide, "auto"))
289 target_wide = gdbarch_auto_wide_charset (gdbarch);
290
291 len = strlen (target_wide);
292 for (i = 0; charset_enum[i]; ++i)
293 {
294 if (strncmp (target_wide, charset_enum[i], len))
295 continue;
296 if ((charset_enum[i][len] == 'B'
297 || charset_enum[i][len] == 'L')
298 && charset_enum[i][len + 1] == 'E'
299 && charset_enum[i][len + 2] == '\0')
300 {
301 if (charset_enum[i][len] == 'B')
302 target_wide_charset_be_name = charset_enum[i];
303 else
304 target_wide_charset_le_name = charset_enum[i];
305 }
306 }
307 }
308
309 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
310 target-wide-charset', 'set charset' sfunc's. */
311
312 static void
313 validate (struct gdbarch *gdbarch)
314 {
315 iconv_t desc;
316 const char *host_cset = host_charset ();
317 const char *target_cset = target_charset (gdbarch);
318 const char *target_wide_cset = target_wide_charset_name;
319
320 if (!strcmp (target_wide_cset, "auto"))
321 target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
322
323 desc = iconv_open (target_wide_cset, host_cset);
324 if (desc == (iconv_t) -1)
325 error (_("Cannot convert between character sets `%s' and `%s'"),
326 target_wide_cset, host_cset);
327 iconv_close (desc);
328
329 desc = iconv_open (target_cset, host_cset);
330 if (desc == (iconv_t) -1)
331 error (_("Cannot convert between character sets `%s' and `%s'"),
332 target_cset, host_cset);
333 iconv_close (desc);
334
335 /* Clear the cache. */
336 be_le_arch = NULL;
337 }
338
339 /* This is the sfunc for the 'set charset' command. */
340 static void
341 set_charset_sfunc (char *charset, int from_tty,
342 struct cmd_list_element *c)
343 {
344 /* CAREFUL: set the target charset here as well. */
345 target_charset_name = host_charset_name;
346 validate (get_current_arch ());
347 }
348
349 /* 'set host-charset' command sfunc. We need a wrapper here because
350 the function needs to have a specific signature. */
351 static void
352 set_host_charset_sfunc (char *charset, int from_tty,
353 struct cmd_list_element *c)
354 {
355 validate (get_current_arch ());
356 }
357
358 /* Wrapper for the 'set target-charset' command. */
359 static void
360 set_target_charset_sfunc (char *charset, int from_tty,
361 struct cmd_list_element *c)
362 {
363 validate (get_current_arch ());
364 }
365
366 /* Wrapper for the 'set target-wide-charset' command. */
367 static void
368 set_target_wide_charset_sfunc (char *charset, int from_tty,
369 struct cmd_list_element *c)
370 {
371 validate (get_current_arch ());
372 }
373
374 /* sfunc for the 'show charset' command. */
375 static void
376 show_charset (struct ui_file *file, int from_tty,
377 struct cmd_list_element *c,
378 const char *name)
379 {
380 show_host_charset_name (file, from_tty, c, host_charset_name);
381 show_target_charset_name (file, from_tty, c, target_charset_name);
382 show_target_wide_charset_name (file, from_tty, c,
383 target_wide_charset_name);
384 }
385
386 \f
387 /* Accessor functions. */
388
389 const char *
390 host_charset (void)
391 {
392 if (!strcmp (host_charset_name, "auto"))
393 return auto_host_charset_name;
394 return host_charset_name;
395 }
396
397 const char *
398 target_charset (struct gdbarch *gdbarch)
399 {
400 if (!strcmp (target_charset_name, "auto"))
401 return gdbarch_auto_charset (gdbarch);
402 return target_charset_name;
403 }
404
405 const char *
406 target_wide_charset (struct gdbarch *gdbarch)
407 {
408 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
409
410 set_be_le_names (gdbarch);
411 if (byte_order == BFD_ENDIAN_BIG)
412 {
413 if (target_wide_charset_be_name)
414 return target_wide_charset_be_name;
415 }
416 else
417 {
418 if (target_wide_charset_le_name)
419 return target_wide_charset_le_name;
420 }
421
422 if (!strcmp (target_wide_charset_name, "auto"))
423 return gdbarch_auto_wide_charset (gdbarch);
424
425 return target_wide_charset_name;
426 }
427
428 \f
429 /* Host character set management. For the time being, we assume that
430 the host character set is some superset of ASCII. */
431
432 char
433 host_letter_to_control_character (char c)
434 {
435 if (c == '?')
436 return 0177;
437 return c & 0237;
438 }
439
440 /* Convert a host character, C, to its hex value. C must already have
441 been validated using isxdigit. */
442
443 int
444 host_hex_value (char c)
445 {
446 if (isdigit (c))
447 return c - '0';
448 if (c >= 'a' && c <= 'f')
449 return 10 + c - 'a';
450 gdb_assert (c >= 'A' && c <= 'F');
451 return 10 + c - 'A';
452 }
453
454 \f
455 /* Public character management functions. */
456
457 /* A cleanup function which is run to close an iconv descriptor. */
458
459 static void
460 cleanup_iconv (void *p)
461 {
462 iconv_t *descp = p;
463 iconv_close (*descp);
464 }
465
466 void
467 convert_between_encodings (const char *from, const char *to,
468 const gdb_byte *bytes, unsigned int num_bytes,
469 int width, struct obstack *output,
470 enum transliterations translit)
471 {
472 iconv_t desc;
473 struct cleanup *cleanups;
474 size_t inleft;
475 ICONV_CONST char *inp;
476 unsigned int space_request;
477
478 /* Often, the host and target charsets will be the same. */
479 if (!strcmp (from, to))
480 {
481 obstack_grow (output, bytes, num_bytes);
482 return;
483 }
484
485 desc = iconv_open (to, from);
486 if (desc == (iconv_t) -1)
487 perror_with_name (_("Converting character sets"));
488 cleanups = make_cleanup (cleanup_iconv, &desc);
489
490 inleft = num_bytes;
491 inp = (ICONV_CONST char *) bytes;
492
493 space_request = num_bytes;
494
495 while (inleft > 0)
496 {
497 char *outp;
498 size_t outleft, r;
499 int old_size;
500
501 old_size = obstack_object_size (output);
502 obstack_blank (output, space_request);
503
504 outp = (char *) obstack_base (output) + old_size;
505 outleft = space_request;
506
507 r = iconv (desc, &inp, &inleft, &outp, &outleft);
508
509 /* Now make sure that the object on the obstack only includes
510 bytes we have converted. */
511 obstack_blank (output, - (int) outleft);
512
513 if (r == (size_t) -1)
514 {
515 switch (errno)
516 {
517 case EILSEQ:
518 {
519 int i;
520
521 /* Invalid input sequence. */
522 if (translit == translit_none)
523 error (_("Could not convert character "
524 "to `%s' character set"), to);
525
526 /* We emit escape sequence for the bytes, skip them,
527 and try again. */
528 for (i = 0; i < width; ++i)
529 {
530 char octal[5];
531
532 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
533 obstack_grow_str (output, octal);
534
535 ++inp;
536 --inleft;
537 }
538 }
539 break;
540
541 case E2BIG:
542 /* We ran out of space in the output buffer. Make it
543 bigger next time around. */
544 space_request *= 2;
545 break;
546
547 case EINVAL:
548 /* Incomplete input sequence. FIXME: ought to report this
549 to the caller somehow. */
550 inleft = 0;
551 break;
552
553 default:
554 perror_with_name (_("Internal error while "
555 "converting character sets"));
556 }
557 }
558 }
559
560 do_cleanups (cleanups);
561 }
562
563 \f
564
565 /* An iterator that returns host wchar_t's from a target string. */
566 struct wchar_iterator
567 {
568 /* The underlying iconv descriptor. */
569 iconv_t desc;
570
571 /* The input string. This is updated as convert characters. */
572 const gdb_byte *input;
573 /* The number of bytes remaining in the input. */
574 size_t bytes;
575
576 /* The width of an input character. */
577 size_t width;
578
579 /* The output buffer and its size. */
580 gdb_wchar_t *out;
581 size_t out_size;
582 };
583
584 /* Create a new iterator. */
585 struct wchar_iterator *
586 make_wchar_iterator (const gdb_byte *input, size_t bytes,
587 const char *charset, size_t width)
588 {
589 struct wchar_iterator *result;
590 iconv_t desc;
591
592 desc = iconv_open (INTERMEDIATE_ENCODING, charset);
593 if (desc == (iconv_t) -1)
594 perror_with_name (_("Converting character sets"));
595
596 result = XNEW (struct wchar_iterator);
597 result->desc = desc;
598 result->input = input;
599 result->bytes = bytes;
600 result->width = width;
601
602 result->out = XNEW (gdb_wchar_t);
603 result->out_size = 1;
604
605 return result;
606 }
607
608 static void
609 do_cleanup_iterator (void *p)
610 {
611 struct wchar_iterator *iter = p;
612
613 iconv_close (iter->desc);
614 xfree (iter->out);
615 xfree (iter);
616 }
617
618 struct cleanup *
619 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
620 {
621 return make_cleanup (do_cleanup_iterator, iter);
622 }
623
624 int
625 wchar_iterate (struct wchar_iterator *iter,
626 enum wchar_iterate_result *out_result,
627 gdb_wchar_t **out_chars,
628 const gdb_byte **ptr,
629 size_t *len)
630 {
631 size_t out_request;
632
633 /* Try to convert some characters. At first we try to convert just
634 a single character. The reason for this is that iconv does not
635 necessarily update its outgoing arguments when it encounters an
636 invalid input sequence -- but we want to reliably report this to
637 our caller so it can emit an escape sequence. */
638 out_request = 1;
639 while (iter->bytes > 0)
640 {
641 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
642 char *outptr = (char *) &iter->out[0];
643 const gdb_byte *orig_inptr = iter->input;
644 size_t orig_in = iter->bytes;
645 size_t out_avail = out_request * sizeof (gdb_wchar_t);
646 size_t num;
647 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
648
649 iter->input = (gdb_byte *) inptr;
650
651 if (r == (size_t) -1)
652 {
653 switch (errno)
654 {
655 case EILSEQ:
656 /* Invalid input sequence. We still might have
657 converted a character; if so, return it. */
658 if (out_avail < out_request * sizeof (gdb_wchar_t))
659 break;
660
661 /* Otherwise skip the first invalid character, and let
662 the caller know about it. */
663 *out_result = wchar_iterate_invalid;
664 *ptr = iter->input;
665 *len = iter->width;
666 iter->input += iter->width;
667 iter->bytes -= iter->width;
668 return 0;
669
670 case E2BIG:
671 /* We ran out of space. We still might have converted a
672 character; if so, return it. Otherwise, grow the
673 buffer and try again. */
674 if (out_avail < out_request * sizeof (gdb_wchar_t))
675 break;
676
677 ++out_request;
678 if (out_request > iter->out_size)
679 {
680 iter->out_size = out_request;
681 iter->out = xrealloc (iter->out,
682 out_request * sizeof (gdb_wchar_t));
683 }
684 continue;
685
686 case EINVAL:
687 /* Incomplete input sequence. Let the caller know, and
688 arrange for future calls to see EOF. */
689 *out_result = wchar_iterate_incomplete;
690 *ptr = iter->input;
691 *len = iter->bytes;
692 iter->bytes = 0;
693 return 0;
694
695 default:
696 perror_with_name (_("Internal error while "
697 "converting character sets"));
698 }
699 }
700
701 /* We converted something. */
702 num = out_request - out_avail / sizeof (gdb_wchar_t);
703 *out_result = wchar_iterate_ok;
704 *out_chars = iter->out;
705 *ptr = orig_inptr;
706 *len = orig_in - iter->bytes;
707 return num;
708 }
709
710 /* Really done. */
711 *out_result = wchar_iterate_eof;
712 return -1;
713 }
714
715 \f
716 /* The charset.c module initialization function. */
717
718 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
719
720 static VEC (char_ptr) *charsets;
721
722 #ifdef PHONY_ICONV
723
724 static void
725 find_charset_names (void)
726 {
727 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
728 VEC_safe_push (char_ptr, charsets, NULL);
729 }
730
731 #else /* PHONY_ICONV */
732
733 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
734 provides different symbols in the static and dynamic libraries.
735 So, configure may see libiconvlist but not iconvlist. But, calling
736 iconvlist is the right thing to do and will work. Hence we do a
737 check here but unconditionally call iconvlist below. */
738 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
739
740 /* A helper function that adds some character sets to the vector of
741 all character sets. This is a callback function for iconvlist. */
742
743 static int
744 add_one (unsigned int count, const char *const *names, void *data)
745 {
746 unsigned int i;
747
748 for (i = 0; i < count; ++i)
749 VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
750
751 return 0;
752 }
753
754 static void
755 find_charset_names (void)
756 {
757 iconvlist (add_one, NULL);
758 VEC_safe_push (char_ptr, charsets, NULL);
759 }
760
761 #else
762
763 /* Return non-zero if LINE (output from iconv) should be ignored.
764 Older iconv programs (e.g. 2.2.2) include the human readable
765 introduction even when stdout is not a tty. Newer versions omit
766 the intro if stdout is not a tty. */
767
768 static int
769 ignore_line_p (const char *line)
770 {
771 /* This table is used to filter the output. If this text appears
772 anywhere in the line, it is ignored (strstr is used). */
773 static const char * const ignore_lines[] =
774 {
775 "The following",
776 "not necessarily",
777 "the FROM and TO",
778 "listed with several",
779 NULL
780 };
781 int i;
782
783 for (i = 0; ignore_lines[i] != NULL; ++i)
784 {
785 if (strstr (line, ignore_lines[i]) != NULL)
786 return 1;
787 }
788
789 return 0;
790 }
791
792 static void
793 find_charset_names (void)
794 {
795 struct pex_obj *child;
796 char *args[3];
797 int err, status;
798 int fail = 1;
799 int flags;
800 struct gdb_environ *iconv_env;
801 char *iconv_program;
802
803 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
804 not a tty. We need to recognize it and ignore it. This text is
805 subject to translation, so force LANGUAGE=C. */
806 iconv_env = make_environ ();
807 init_environ (iconv_env);
808 set_in_environ (iconv_env, "LANGUAGE", "C");
809 set_in_environ (iconv_env, "LC_ALL", "C");
810
811 child = pex_init (PEX_USE_PIPES, "iconv", NULL);
812
813 #ifdef ICONV_BIN
814 {
815 char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
816 ICONV_BIN_RELOCATABLE);
817 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
818 xfree (iconv_dir);
819 }
820 #else
821 iconv_program = xstrdup ("iconv");
822 #endif
823 args[0] = iconv_program;
824 args[1] = "-l";
825 args[2] = NULL;
826 flags = PEX_STDERR_TO_STDOUT;
827 #ifndef ICONV_BIN
828 flags |= PEX_SEARCH;
829 #endif
830 /* Note that we simply ignore errors here. */
831 if (!pex_run_in_environment (child, flags,
832 args[0], args, environ_vector (iconv_env),
833 NULL, NULL, &err))
834 {
835 FILE *in = pex_read_output (child, 0);
836
837 /* POSIX says that iconv -l uses an unspecified format. We
838 parse the glibc and libiconv formats; feel free to add others
839 as needed. */
840
841 while (in != NULL && !feof (in))
842 {
843 /* The size of buf is chosen arbitrarily. */
844 char buf[1024];
845 char *start, *r;
846 int len;
847
848 r = fgets (buf, sizeof (buf), in);
849 if (!r)
850 break;
851 len = strlen (r);
852 if (len <= 3)
853 continue;
854 if (ignore_line_p (r))
855 continue;
856
857 /* Strip off the newline. */
858 --len;
859 /* Strip off one or two '/'s. glibc will print lines like
860 "8859_7//", but also "10646-1:1993/UCS4/". */
861 if (buf[len - 1] == '/')
862 --len;
863 if (buf[len - 1] == '/')
864 --len;
865 buf[len] = '\0';
866
867 /* libiconv will print multiple entries per line, separated
868 by spaces. Older iconvs will print multiple entries per
869 line, indented by two spaces, and separated by ", "
870 (i.e. the human readable form). */
871 start = buf;
872 while (1)
873 {
874 int keep_going;
875 char *p;
876
877 /* Skip leading blanks. */
878 for (p = start; *p && *p == ' '; ++p)
879 ;
880 start = p;
881 /* Find the next space, comma, or end-of-line. */
882 for ( ; *p && *p != ' ' && *p != ','; ++p)
883 ;
884 /* Ignore an empty result. */
885 if (p == start)
886 break;
887 keep_going = *p;
888 *p = '\0';
889 VEC_safe_push (char_ptr, charsets, xstrdup (start));
890 if (!keep_going)
891 break;
892 /* Skip any extra spaces. */
893 for (start = p + 1; *start && *start == ' '; ++start)
894 ;
895 }
896 }
897
898 if (pex_get_status (child, 1, &status)
899 && WIFEXITED (status) && !WEXITSTATUS (status))
900 fail = 0;
901
902 }
903
904 xfree (iconv_program);
905 pex_free (child);
906 free_environ (iconv_env);
907
908 if (fail)
909 {
910 /* Some error occurred, so drop the vector. */
911 free_char_ptr_vec (charsets);
912 charsets = NULL;
913 }
914 else
915 VEC_safe_push (char_ptr, charsets, NULL);
916 }
917
918 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
919 #endif /* PHONY_ICONV */
920
921 /* The "auto" target charset used by default_auto_charset. */
922 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
923
924 const char *
925 default_auto_charset (void)
926 {
927 return auto_target_charset_name;
928 }
929
930 const char *
931 default_auto_wide_charset (void)
932 {
933 return GDB_DEFAULT_TARGET_WIDE_CHARSET;
934 }
935
936
937 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
938 /* Macro used for UTF or UCS endianness suffix. */
939 #if WORDS_BIGENDIAN
940 #define ENDIAN_SUFFIX "BE"
941 #else
942 #define ENDIAN_SUFFIX "LE"
943 #endif
944
945 /* The code below serves to generate a compile time error if
946 gdb_wchar_t type is not of size 2 nor 4, despite the fact that
947 macro __STDC_ISO_10646__ is defined.
948 This is better than a gdb_assert call, because GDB cannot handle
949 strings correctly if this size is different. */
950
951 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
952 || sizeof (gdb_wchar_t) == 4)
953 ? 1 : -1];
954
955 /* intermediate_encoding returns the charset used internally by
956 GDB to convert between target and host encodings. As the test above
957 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
958 UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
959 otherwise an error is generated. */
960
961 const char *
962 intermediate_encoding (void)
963 {
964 iconv_t desc;
965 static const char *stored_result = NULL;
966 char *result;
967
968 if (stored_result)
969 return stored_result;
970 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
971 ENDIAN_SUFFIX);
972 /* Check that the name is supported by iconv_open. */
973 desc = iconv_open (result, host_charset ());
974 if (desc != (iconv_t) -1)
975 {
976 iconv_close (desc);
977 stored_result = result;
978 return result;
979 }
980 /* Not valid, free the allocated memory. */
981 xfree (result);
982 /* Second try, with UCS-2 type. */
983 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
984 ENDIAN_SUFFIX);
985 /* Check that the name is supported by iconv_open. */
986 desc = iconv_open (result, host_charset ());
987 if (desc != (iconv_t) -1)
988 {
989 iconv_close (desc);
990 stored_result = result;
991 return result;
992 }
993 /* Not valid, free the allocated memory. */
994 xfree (result);
995 /* No valid charset found, generate error here. */
996 error (_("Unable to find a vaild charset for string conversions"));
997 }
998
999 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
1000
1001 void
1002 _initialize_charset (void)
1003 {
1004 /* The first element is always "auto". */
1005 VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
1006 find_charset_names ();
1007
1008 if (VEC_length (char_ptr, charsets) > 1)
1009 charset_enum = (const char **) VEC_address (char_ptr, charsets);
1010 else
1011 charset_enum = default_charset_names;
1012
1013 #ifndef PHONY_ICONV
1014 #ifdef HAVE_LANGINFO_CODESET
1015 /* The result of nl_langinfo may be overwritten later. This may
1016 leak a little memory, if the user later changes the host charset,
1017 but that doesn't matter much. */
1018 auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
1019 /* Solaris will return `646' here -- but the Solaris iconv then does
1020 not accept this. Darwin (and maybe FreeBSD) may return "" here,
1021 which GNU libiconv doesn't like (infinite loop). */
1022 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
1023 auto_host_charset_name = "ASCII";
1024 auto_target_charset_name = auto_host_charset_name;
1025 #elif defined (USE_WIN32API)
1026 {
1027 /* "CP" + x<=5 digits + paranoia. */
1028 static char w32_host_default_charset[16];
1029
1030 snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
1031 "CP%d", GetACP());
1032 auto_host_charset_name = w32_host_default_charset;
1033 auto_target_charset_name = auto_host_charset_name;
1034 }
1035 #endif
1036 #endif
1037
1038 add_setshow_enum_cmd ("charset", class_support,
1039 charset_enum, &host_charset_name, _("\
1040 Set the host and target character sets."), _("\
1041 Show the host and target character sets."), _("\
1042 The `host character set' is the one used by the system GDB is running on.\n\
1043 The `target character set' is the one used by the program being debugged.\n\
1044 You may only use supersets of ASCII for your host character set; GDB does\n\
1045 not support any others.\n\
1046 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
1047 /* Note that the sfunc below needs to set
1048 target_charset_name, because the 'set
1049 charset' command sets two variables. */
1050 set_charset_sfunc,
1051 show_charset,
1052 &setlist, &showlist);
1053
1054 add_setshow_enum_cmd ("host-charset", class_support,
1055 charset_enum, &host_charset_name, _("\
1056 Set the host character set."), _("\
1057 Show the host character set."), _("\
1058 The `host character set' is the one used by the system GDB is running on.\n\
1059 You may only use supersets of ASCII for your host character set; GDB does\n\
1060 not support any others.\n\
1061 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
1062 set_host_charset_sfunc,
1063 show_host_charset_name,
1064 &setlist, &showlist);
1065
1066 add_setshow_enum_cmd ("target-charset", class_support,
1067 charset_enum, &target_charset_name, _("\
1068 Set the target character set."), _("\
1069 Show the target character set."), _("\
1070 The `target character set' is the one used by the program being debugged.\n\
1071 GDB translates characters and strings between the host and target\n\
1072 character sets as needed.\n\
1073 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
1074 set_target_charset_sfunc,
1075 show_target_charset_name,
1076 &setlist, &showlist);
1077
1078 add_setshow_enum_cmd ("target-wide-charset", class_support,
1079 charset_enum, &target_wide_charset_name,
1080 _("\
1081 Set the target wide character set."), _("\
1082 Show the target wide character set."), _("\
1083 The `target wide character set' is the one used by the program being debugged.\
1084 \nIn particular it is the encoding used by `wchar_t'.\n\
1085 GDB translates characters and strings between the host and target\n\
1086 character sets as needed.\n\
1087 To see a list of the character sets GDB supports, type\n\
1088 `set target-wide-charset'<TAB>"),
1089 set_target_wide_charset_sfunc,
1090 show_target_wide_charset_name,
1091 &setlist, &showlist);
1092 }
This page took 0.054854 seconds and 5 git commands to generate.