Commit | Line | Data |
---|---|---|
234b45d4 | 1 | /* Character set conversion support for GDB. |
61baf725 | 2 | Copyright (C) 2001-2017 Free Software Foundation, Inc. |
234b45d4 KB |
3 | |
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
a9762ec7 | 8 | the Free Software Foundation; either version 3 of the License, or |
234b45d4 KB |
9 | (at your option) any later version. |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
a9762ec7 | 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
234b45d4 KB |
18 | |
19 | #ifndef CHARSET_H | |
20 | #define CHARSET_H | |
21 | ||
cda6c55b TT |
22 | #include <vector> |
23 | ||
234b45d4 KB |
24 | /* If the target program uses a different character set than the host, |
25 | GDB has some support for translating between the two; GDB converts | |
26 | characters and strings to the host character set before displaying | |
27 | them, and converts characters and strings appearing in expressions | |
28 | entered by the user to the target character set. | |
29 | ||
6c7a06a3 TT |
30 | GDB's code pretty much assumes that the host character set is some |
31 | superset of ASCII; there are plenty if ('0' + n) expressions and | |
32 | the like. */ | |
234b45d4 | 33 | |
234b45d4 KB |
34 | /* Return the name of the current host/target character set. The |
35 | result is owned by the charset module; the caller should not free | |
36 | it. */ | |
37 | const char *host_charset (void); | |
f870a310 TT |
38 | const char *target_charset (struct gdbarch *gdbarch); |
39 | const char *target_wide_charset (struct gdbarch *gdbarch); | |
6c7a06a3 TT |
40 | |
41 | /* These values are used to specify the type of transliteration done | |
42 | by convert_between_encodings. */ | |
43 | enum transliterations | |
44 | { | |
45 | /* Error on failure to convert. */ | |
46 | translit_none, | |
47 | /* Transliterate to host char. */ | |
48 | translit_char | |
49 | }; | |
50 | ||
51 | /* Convert between two encodings. | |
52 | ||
53 | FROM is the name of the source encoding. | |
54 | TO is the name of the target encoding. | |
55 | BYTES holds the bytes to convert; this is assumed to be characters | |
56 | in the target encoding. | |
57 | NUM_BYTES is the number of bytes. | |
58 | WIDTH is the width of a character from the FROM charset, in bytes. | |
59 | For a variable width encoding, WIDTH should be the size of a "base | |
60 | character". | |
61 | OUTPUT is an obstack where the converted data is written. The | |
62 | caller is responsible for initializing the obstack, and for | |
63 | destroying the obstack should an error occur. | |
64 | TRANSLIT specifies how invalid conversions should be handled. */ | |
aff410f1 | 65 | |
6c7a06a3 | 66 | void convert_between_encodings (const char *from, const char *to, |
aff410f1 MS |
67 | const gdb_byte *bytes, |
68 | unsigned int num_bytes, | |
6c7a06a3 TT |
69 | int width, struct obstack *output, |
70 | enum transliterations translit); | |
71 | ||
72 | ||
73 | /* These values are used by wchar_iterate to report errors. */ | |
74 | enum wchar_iterate_result | |
75 | { | |
76 | /* Ordinary return. */ | |
77 | wchar_iterate_ok, | |
78 | /* Invalid input sequence. */ | |
79 | wchar_iterate_invalid, | |
80 | /* Incomplete input sequence at the end of the input. */ | |
81 | wchar_iterate_incomplete, | |
82 | /* EOF. */ | |
83 | wchar_iterate_eof | |
84 | }; | |
85 | ||
cda6c55b TT |
86 | /* An iterator that returns host wchar_t's from a target string. */ |
87 | class wchar_iterator | |
88 | { | |
89 | public: | |
6c7a06a3 | 90 | |
cda6c55b TT |
91 | /* Create a new character iterator which returns wchar_t's. INPUT is |
92 | the input buffer. BYTES is the number of bytes in the input | |
93 | buffer. CHARSET is the name of the character set in which INPUT is | |
94 | encoded. WIDTH is the number of bytes in a base character of | |
95 | CHARSET. | |
6c7a06a3 | 96 | |
cda6c55b TT |
97 | This function either returns a new character set iterator, or calls |
98 | error. The result can be freed using a cleanup; see | |
99 | make_cleanup_wchar_iterator. */ | |
100 | wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, | |
101 | size_t width); | |
102 | ||
103 | ~wchar_iterator (); | |
104 | ||
105 | /* Perform a single iteration of a wchar_t iterator. | |
6c7a06a3 | 106 | |
cda6c55b TT |
107 | Returns the number of characters converted. A negative result |
108 | means that EOF has been reached. A positive result indicates the | |
109 | number of valid wchar_ts in the result; *OUT_CHARS is updated to | |
110 | point to the first valid character. | |
6c7a06a3 | 111 | |
cda6c55b TT |
112 | In all cases aside from EOF, *PTR is set to point to the first |
113 | converted target byte. *LEN is set to the number of bytes | |
114 | converted. | |
6c7a06a3 | 115 | |
cda6c55b TT |
116 | A zero result means one of several unusual results. *OUT_RESULT is |
117 | set to indicate the type of un-ordinary return. | |
6c7a06a3 | 118 | |
cda6c55b TT |
119 | wchar_iterate_invalid means that an invalid input character was |
120 | seen. The iterator is advanced by WIDTH (the argument to | |
121 | the wchar_iterator constructor) bytes. | |
6c7a06a3 | 122 | |
cda6c55b TT |
123 | wchar_iterate_incomplete means that an incomplete character was |
124 | seen at the end of the input sequence. | |
6c7a06a3 | 125 | |
cda6c55b TT |
126 | wchar_iterate_eof means that all bytes were successfully |
127 | converted. The other output arguments are not set. */ | |
128 | int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars, | |
129 | const gdb_byte **ptr, size_t *len); | |
130 | ||
131 | private: | |
132 | ||
133 | /* The underlying iconv descriptor. */ | |
5562a44e PA |
134 | #ifdef PHONY_ICONV |
135 | int m_desc; | |
136 | #else | |
cda6c55b | 137 | iconv_t m_desc; |
5562a44e | 138 | #endif |
cda6c55b TT |
139 | |
140 | /* The input string. This is updated as we convert characters. */ | |
141 | const gdb_byte *m_input; | |
142 | /* The number of bytes remaining in the input. */ | |
143 | size_t m_bytes; | |
144 | ||
145 | /* The width of an input character. */ | |
146 | size_t m_width; | |
147 | ||
148 | /* The output buffer. */ | |
149 | std::vector<gdb_wchar_t> m_out; | |
150 | }; | |
6c7a06a3 TT |
151 | |
152 | \f | |
153 | ||
154 | /* GDB needs to know a few details of its execution character set. | |
155 | This knowledge is isolated here and in charset.c. */ | |
156 | ||
157 | /* The escape character. */ | |
158 | #define HOST_ESCAPE_CHAR 27 | |
159 | ||
160 | /* Convert a letter, like 'c', to its corresponding control | |
161 | character. */ | |
162 | char host_letter_to_control_character (char c); | |
163 | ||
164 | /* Convert a hex digit character to its numeric value. E.g., 'f' is | |
165 | converted to 15. This function assumes that C is a valid hex | |
166 | digit. Both upper- and lower-case letters are recognized. */ | |
167 | int host_hex_value (char c); | |
234b45d4 KB |
168 | |
169 | #endif /* CHARSET_H */ |