Commit | Line | Data |
---|---|---|
234b45d4 | 1 | /* Character set conversion support for GDB. |
b811d2c2 | 2 | Copyright (C) 2001-2020 Free Software Foundation, Inc. |
234b45d4 KB |
3 | |
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
a9762ec7 | 8 | the Free Software Foundation; either version 3 of the License, or |
234b45d4 KB |
9 | (at your option) any later version. |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
a9762ec7 | 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
234b45d4 KB |
18 | |
19 | #ifndef CHARSET_H | |
20 | #define CHARSET_H | |
21 | ||
268a13a5 | 22 | #include "gdbsupport/def-vector.h" |
cda6c55b | 23 | |
234b45d4 KB |
24 | /* If the target program uses a different character set than the host, |
25 | GDB has some support for translating between the two; GDB converts | |
26 | characters and strings to the host character set before displaying | |
27 | them, and converts characters and strings appearing in expressions | |
28 | entered by the user to the target character set. | |
29 | ||
6c7a06a3 TT |
30 | GDB's code pretty much assumes that the host character set is some |
31 | superset of ASCII; there are plenty if ('0' + n) expressions and | |
32 | the like. */ | |
234b45d4 | 33 | |
234b45d4 KB |
34 | /* Return the name of the current host/target character set. The |
35 | result is owned by the charset module; the caller should not free | |
36 | it. */ | |
37 | const char *host_charset (void); | |
f870a310 TT |
38 | const char *target_charset (struct gdbarch *gdbarch); |
39 | const char *target_wide_charset (struct gdbarch *gdbarch); | |
6c7a06a3 TT |
40 | |
41 | /* These values are used to specify the type of transliteration done | |
42 | by convert_between_encodings. */ | |
43 | enum transliterations | |
44 | { | |
45 | /* Error on failure to convert. */ | |
46 | translit_none, | |
47 | /* Transliterate to host char. */ | |
48 | translit_char | |
49 | }; | |
50 | ||
51 | /* Convert between two encodings. | |
52 | ||
53 | FROM is the name of the source encoding. | |
54 | TO is the name of the target encoding. | |
55 | BYTES holds the bytes to convert; this is assumed to be characters | |
56 | in the target encoding. | |
57 | NUM_BYTES is the number of bytes. | |
58 | WIDTH is the width of a character from the FROM charset, in bytes. | |
59 | For a variable width encoding, WIDTH should be the size of a "base | |
60 | character". | |
61 | OUTPUT is an obstack where the converted data is written. The | |
62 | caller is responsible for initializing the obstack, and for | |
63 | destroying the obstack should an error occur. | |
64 | TRANSLIT specifies how invalid conversions should be handled. */ | |
aff410f1 | 65 | |
6c7a06a3 | 66 | void convert_between_encodings (const char *from, const char *to, |
aff410f1 MS |
67 | const gdb_byte *bytes, |
68 | unsigned int num_bytes, | |
6c7a06a3 TT |
69 | int width, struct obstack *output, |
70 | enum transliterations translit); | |
71 | ||
72 | ||
73 | /* These values are used by wchar_iterate to report errors. */ | |
74 | enum wchar_iterate_result | |
75 | { | |
76 | /* Ordinary return. */ | |
77 | wchar_iterate_ok, | |
78 | /* Invalid input sequence. */ | |
79 | wchar_iterate_invalid, | |
80 | /* Incomplete input sequence at the end of the input. */ | |
81 | wchar_iterate_incomplete, | |
82 | /* EOF. */ | |
83 | wchar_iterate_eof | |
84 | }; | |
85 | ||
cda6c55b TT |
86 | /* An iterator that returns host wchar_t's from a target string. */ |
87 | class wchar_iterator | |
88 | { | |
89 | public: | |
6c7a06a3 | 90 | |
cda6c55b TT |
91 | /* Create a new character iterator which returns wchar_t's. INPUT is |
92 | the input buffer. BYTES is the number of bytes in the input | |
93 | buffer. CHARSET is the name of the character set in which INPUT is | |
94 | encoded. WIDTH is the number of bytes in a base character of | |
95 | CHARSET. | |
b24b0d6c TT |
96 | |
97 | This constructor can throw on error. */ | |
cda6c55b TT |
98 | wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset, |
99 | size_t width); | |
100 | ||
101 | ~wchar_iterator (); | |
102 | ||
103 | /* Perform a single iteration of a wchar_t iterator. | |
6c7a06a3 | 104 | |
cda6c55b TT |
105 | Returns the number of characters converted. A negative result |
106 | means that EOF has been reached. A positive result indicates the | |
107 | number of valid wchar_ts in the result; *OUT_CHARS is updated to | |
108 | point to the first valid character. | |
6c7a06a3 | 109 | |
cda6c55b TT |
110 | In all cases aside from EOF, *PTR is set to point to the first |
111 | converted target byte. *LEN is set to the number of bytes | |
112 | converted. | |
6c7a06a3 | 113 | |
cda6c55b TT |
114 | A zero result means one of several unusual results. *OUT_RESULT is |
115 | set to indicate the type of un-ordinary return. | |
6c7a06a3 | 116 | |
cda6c55b TT |
117 | wchar_iterate_invalid means that an invalid input character was |
118 | seen. The iterator is advanced by WIDTH (the argument to | |
119 | the wchar_iterator constructor) bytes. | |
6c7a06a3 | 120 | |
cda6c55b TT |
121 | wchar_iterate_incomplete means that an incomplete character was |
122 | seen at the end of the input sequence. | |
6c7a06a3 | 123 | |
cda6c55b TT |
124 | wchar_iterate_eof means that all bytes were successfully |
125 | converted. The other output arguments are not set. */ | |
126 | int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars, | |
127 | const gdb_byte **ptr, size_t *len); | |
128 | ||
129 | private: | |
130 | ||
131 | /* The underlying iconv descriptor. */ | |
5562a44e PA |
132 | #ifdef PHONY_ICONV |
133 | int m_desc; | |
134 | #else | |
cda6c55b | 135 | iconv_t m_desc; |
5562a44e | 136 | #endif |
cda6c55b TT |
137 | |
138 | /* The input string. This is updated as we convert characters. */ | |
139 | const gdb_byte *m_input; | |
140 | /* The number of bytes remaining in the input. */ | |
141 | size_t m_bytes; | |
142 | ||
143 | /* The width of an input character. */ | |
144 | size_t m_width; | |
145 | ||
146 | /* The output buffer. */ | |
d5722aa2 | 147 | gdb::def_vector<gdb_wchar_t> m_out; |
cda6c55b | 148 | }; |
6c7a06a3 TT |
149 | |
150 | \f | |
151 | ||
152 | /* GDB needs to know a few details of its execution character set. | |
153 | This knowledge is isolated here and in charset.c. */ | |
154 | ||
155 | /* The escape character. */ | |
156 | #define HOST_ESCAPE_CHAR 27 | |
157 | ||
158 | /* Convert a letter, like 'c', to its corresponding control | |
159 | character. */ | |
160 | char host_letter_to_control_character (char c); | |
161 | ||
162 | /* Convert a hex digit character to its numeric value. E.g., 'f' is | |
163 | converted to 15. This function assumes that C is a valid hex | |
164 | digit. Both upper- and lower-case letters are recognized. */ | |
165 | int host_hex_value (char c); | |
234b45d4 KB |
166 | |
167 | #endif /* CHARSET_H */ |