| 1 | /* Character set conversion support for GDB. |
| 2 | Copyright 2001 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of GDB. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 2 of the License, or |
| 9 | (at your option) any later version. |
| 10 | |
| 11 | This program is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with this program; if not, write to the Free Software |
| 18 | Foundation, Inc., 59 Temple Place - Suite 330, |
| 19 | Boston, MA 02111-1307, USA. */ |
| 20 | |
| 21 | #ifndef CHARSET_H |
| 22 | #define CHARSET_H |
| 23 | |
| 24 | |
| 25 | /* If the target program uses a different character set than the host, |
| 26 | GDB has some support for translating between the two; GDB converts |
| 27 | characters and strings to the host character set before displaying |
| 28 | them, and converts characters and strings appearing in expressions |
| 29 | entered by the user to the target character set. |
| 30 | |
| 31 | At the moment, GDB only supports single-byte, stateless character |
| 32 | sets. This includes the ISO-8859 family (ASCII extended with |
| 33 | accented characters, and (I think) Cyrillic, for European |
| 34 | languages), and the EBCDIC family (used on IBM's mainframes). |
| 35 | Unfortunately, it excludes many Asian scripts, the fixed- and |
| 36 | variable-width Unicode encodings, and other desireable things. |
| 37 | Patches are welcome! (For example, it would be nice if the Java |
| 38 | string support could simply get absorbed into some more general |
| 39 | multi-byte encoding support.) |
| 40 | |
| 41 | Furthermore, GDB's code pretty much assumes that the host character |
| 42 | set is some superset of ASCII; there are plenty if ('0' + n) |
| 43 | expressions and the like. |
| 44 | |
| 45 | When the `iconv' library routine supports a character set meeting |
| 46 | the requirements above, it's easy to plug an entry into GDB's table |
| 47 | that uses iconv to handle the details. */ |
| 48 | |
| 49 | |
| 50 | /* Set the host character set to CHARSET. CHARSET must be a superset |
| 51 | of ASCII, since GDB's code assumes this. */ |
| 52 | void set_host_charset (const char *charset); |
| 53 | |
| 54 | |
| 55 | /* Set the target character set to CHARSET. */ |
| 56 | void set_target_charset (const char *charset); |
| 57 | |
| 58 | |
| 59 | /* Return the name of the current host/target character set. The |
| 60 | result is owned by the charset module; the caller should not free |
| 61 | it. */ |
| 62 | const char *host_charset (void); |
| 63 | const char *target_charset (void); |
| 64 | |
| 65 | |
| 66 | /* In general, the set of C backslash escapes (\n, \f) is specific to |
| 67 | the character set. Not all character sets will have form feed |
| 68 | characters, for example. |
| 69 | |
| 70 | The following functions allow GDB to parse and print control |
| 71 | characters in a character-set-independent way. They are both |
| 72 | language-specific (to C and C++) and character-set-specific. |
| 73 | Putting them here is a compromise. */ |
| 74 | |
| 75 | |
| 76 | /* If the target character TARGET_CHAR have a backslash escape in the |
| 77 | C language (i.e., a character like 'n' or 't'), return the host |
| 78 | character string that should follow the backslash. Otherwise, |
| 79 | return zero. |
| 80 | |
| 81 | When this function returns non-zero, the string it returns is |
| 82 | statically allocated; the caller is not responsible for freeing it. */ |
| 83 | const char *c_target_char_has_backslash_escape (int target_char); |
| 84 | |
| 85 | |
| 86 | /* If the host character HOST_CHAR is a valid backslash escape in the |
| 87 | C language for the target character set, return non-zero, and set |
| 88 | *TARGET_CHAR to the target character the backslash escape represents. |
| 89 | Otherwise, return zero. */ |
| 90 | int c_parse_backslash (int host_char, int *target_char); |
| 91 | |
| 92 | |
| 93 | /* Return non-zero if the host character HOST_CHAR can be printed |
| 94 | literally --- that is, if it can be readably printed as itself in a |
| 95 | character or string constant. Return zero if it should be printed |
| 96 | using some kind of numeric escape, like '\031' in C, '^(25)' in |
| 97 | Chill, or #25 in Pascal. */ |
| 98 | int host_char_print_literally (int host_char); |
| 99 | |
| 100 | |
| 101 | /* If the host character HOST_CHAR has an equivalent in the target |
| 102 | character set, set *TARGET_CHAR to that equivalent, and return |
| 103 | non-zero. Otherwise, return zero. */ |
| 104 | int host_char_to_target (int host_char, int *target_char); |
| 105 | |
| 106 | |
| 107 | /* If the target character TARGET_CHAR has an equivalent in the host |
| 108 | character set, set *HOST_CHAR to that equivalent, and return |
| 109 | non-zero. Otherwise, return zero. */ |
| 110 | int target_char_to_host (int target_char, int *host_char); |
| 111 | |
| 112 | |
| 113 | /* If the target character TARGET_CHAR has a corresponding control |
| 114 | character (also in the target character set), set *TARGET_CTRL_CHAR |
| 115 | to the control character, and return non-zero. Otherwise, return |
| 116 | zero. */ |
| 117 | int target_char_to_control_char (int target_char, int *target_ctrl_char); |
| 118 | |
| 119 | |
| 120 | #endif /* CHARSET_H */ |