Commit | Line | Data |
---|---|---|
234b45d4 | 1 | /* Character set conversion support for GDB. |
6aba47ca | 2 | Copyright (C) 2001, 2007 Free Software Foundation, Inc. |
234b45d4 KB |
3 | |
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
a9762ec7 | 8 | the Free Software Foundation; either version 3 of the License, or |
234b45d4 KB |
9 | (at your option) any later version. |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
a9762ec7 | 17 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
234b45d4 KB |
18 | |
19 | #ifndef CHARSET_H | |
20 | #define CHARSET_H | |
21 | ||
22 | ||
23 | /* If the target program uses a different character set than the host, | |
24 | GDB has some support for translating between the two; GDB converts | |
25 | characters and strings to the host character set before displaying | |
26 | them, and converts characters and strings appearing in expressions | |
27 | entered by the user to the target character set. | |
28 | ||
29 | At the moment, GDB only supports single-byte, stateless character | |
30 | sets. This includes the ISO-8859 family (ASCII extended with | |
31 | accented characters, and (I think) Cyrillic, for European | |
32 | languages), and the EBCDIC family (used on IBM's mainframes). | |
33 | Unfortunately, it excludes many Asian scripts, the fixed- and | |
34 | variable-width Unicode encodings, and other desireable things. | |
35 | Patches are welcome! (For example, it would be nice if the Java | |
36 | string support could simply get absorbed into some more general | |
37 | multi-byte encoding support.) | |
38 | ||
39 | Furthermore, GDB's code pretty much assumes that the host character | |
40 | set is some superset of ASCII; there are plenty if ('0' + n) | |
41 | expressions and the like. | |
42 | ||
43 | When the `iconv' library routine supports a character set meeting | |
44 | the requirements above, it's easy to plug an entry into GDB's table | |
45 | that uses iconv to handle the details. */ | |
46 | ||
234b45d4 KB |
47 | /* Return the name of the current host/target character set. The |
48 | result is owned by the charset module; the caller should not free | |
49 | it. */ | |
50 | const char *host_charset (void); | |
51 | const char *target_charset (void); | |
52 | ||
234b45d4 KB |
53 | /* In general, the set of C backslash escapes (\n, \f) is specific to |
54 | the character set. Not all character sets will have form feed | |
55 | characters, for example. | |
56 | ||
57 | The following functions allow GDB to parse and print control | |
58 | characters in a character-set-independent way. They are both | |
59 | language-specific (to C and C++) and character-set-specific. | |
60 | Putting them here is a compromise. */ | |
61 | ||
62 | ||
63 | /* If the target character TARGET_CHAR have a backslash escape in the | |
64 | C language (i.e., a character like 'n' or 't'), return the host | |
65 | character string that should follow the backslash. Otherwise, | |
66 | return zero. | |
67 | ||
68 | When this function returns non-zero, the string it returns is | |
69 | statically allocated; the caller is not responsible for freeing it. */ | |
70 | const char *c_target_char_has_backslash_escape (int target_char); | |
71 | ||
72 | ||
73 | /* If the host character HOST_CHAR is a valid backslash escape in the | |
74 | C language for the target character set, return non-zero, and set | |
75 | *TARGET_CHAR to the target character the backslash escape represents. | |
76 | Otherwise, return zero. */ | |
77 | int c_parse_backslash (int host_char, int *target_char); | |
78 | ||
79 | ||
80 | /* Return non-zero if the host character HOST_CHAR can be printed | |
81 | literally --- that is, if it can be readably printed as itself in a | |
82 | character or string constant. Return zero if it should be printed | |
83 | using some kind of numeric escape, like '\031' in C, '^(25)' in | |
84 | Chill, or #25 in Pascal. */ | |
85 | int host_char_print_literally (int host_char); | |
86 | ||
87 | ||
88 | /* If the host character HOST_CHAR has an equivalent in the target | |
89 | character set, set *TARGET_CHAR to that equivalent, and return | |
90 | non-zero. Otherwise, return zero. */ | |
91 | int host_char_to_target (int host_char, int *target_char); | |
92 | ||
93 | ||
94 | /* If the target character TARGET_CHAR has an equivalent in the host | |
95 | character set, set *HOST_CHAR to that equivalent, and return | |
96 | non-zero. Otherwise, return zero. */ | |
97 | int target_char_to_host (int target_char, int *host_char); | |
98 | ||
99 | ||
100 | /* If the target character TARGET_CHAR has a corresponding control | |
101 | character (also in the target character set), set *TARGET_CTRL_CHAR | |
102 | to the control character, and return non-zero. Otherwise, return | |
103 | zero. */ | |
104 | int target_char_to_control_char (int target_char, int *target_ctrl_char); | |
105 | ||
106 | ||
107 | #endif /* CHARSET_H */ |