| 1 | /* Character set conversion support for GDB. |
| 2 | Copyright (C) 2001, 2007, 2008, 2009, 2010, 2011 |
| 3 | Free Software Foundation, Inc. |
| 4 | |
| 5 | This file is part of GDB. |
| 6 | |
| 7 | This program is free software; you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 3 of the License, or |
| 10 | (at your option) any later version. |
| 11 | |
| 12 | This program is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #ifndef CHARSET_H |
| 21 | #define CHARSET_H |
| 22 | |
| 23 | /* If the target program uses a different character set than the host, |
| 24 | GDB has some support for translating between the two; GDB converts |
| 25 | characters and strings to the host character set before displaying |
| 26 | them, and converts characters and strings appearing in expressions |
| 27 | entered by the user to the target character set. |
| 28 | |
| 29 | GDB's code pretty much assumes that the host character set is some |
| 30 | superset of ASCII; there are plenty if ('0' + n) expressions and |
| 31 | the like. */ |
| 32 | |
| 33 | /* Return the name of the current host/target character set. The |
| 34 | result is owned by the charset module; the caller should not free |
| 35 | it. */ |
| 36 | const char *host_charset (void); |
| 37 | const char *target_charset (struct gdbarch *gdbarch); |
| 38 | const char *target_wide_charset (struct gdbarch *gdbarch); |
| 39 | |
| 40 | /* These values are used to specify the type of transliteration done |
| 41 | by convert_between_encodings. */ |
| 42 | enum transliterations |
| 43 | { |
| 44 | /* Error on failure to convert. */ |
| 45 | translit_none, |
| 46 | /* Transliterate to host char. */ |
| 47 | translit_char |
| 48 | }; |
| 49 | |
| 50 | /* Convert between two encodings. |
| 51 | |
| 52 | FROM is the name of the source encoding. |
| 53 | TO is the name of the target encoding. |
| 54 | BYTES holds the bytes to convert; this is assumed to be characters |
| 55 | in the target encoding. |
| 56 | NUM_BYTES is the number of bytes. |
| 57 | WIDTH is the width of a character from the FROM charset, in bytes. |
| 58 | For a variable width encoding, WIDTH should be the size of a "base |
| 59 | character". |
| 60 | OUTPUT is an obstack where the converted data is written. The |
| 61 | caller is responsible for initializing the obstack, and for |
| 62 | destroying the obstack should an error occur. |
| 63 | TRANSLIT specifies how invalid conversions should be handled. */ |
| 64 | |
| 65 | void convert_between_encodings (const char *from, const char *to, |
| 66 | const gdb_byte *bytes, |
| 67 | unsigned int num_bytes, |
| 68 | int width, struct obstack *output, |
| 69 | enum transliterations translit); |
| 70 | |
| 71 | |
| 72 | /* These values are used by wchar_iterate to report errors. */ |
| 73 | enum wchar_iterate_result |
| 74 | { |
| 75 | /* Ordinary return. */ |
| 76 | wchar_iterate_ok, |
| 77 | /* Invalid input sequence. */ |
| 78 | wchar_iterate_invalid, |
| 79 | /* Incomplete input sequence at the end of the input. */ |
| 80 | wchar_iterate_incomplete, |
| 81 | /* EOF. */ |
| 82 | wchar_iterate_eof |
| 83 | }; |
| 84 | |
| 85 | /* Declaration of the opaque wchar iterator type. */ |
| 86 | struct wchar_iterator; |
| 87 | |
| 88 | /* Create a new character iterator which returns wchar_t's. INPUT is |
| 89 | the input buffer. BYTES is the number of bytes in the input |
| 90 | buffer. CHARSET is the name of the character set in which INPUT is |
| 91 | encoded. WIDTH is the number of bytes in a base character of |
| 92 | CHARSET. |
| 93 | |
| 94 | This function either returns a new character set iterator, or calls |
| 95 | error. The result can be freed using a cleanup; see |
| 96 | make_cleanup_wchar_iterator. */ |
| 97 | struct wchar_iterator *make_wchar_iterator (const gdb_byte *input, |
| 98 | size_t bytes, |
| 99 | const char *charset, |
| 100 | size_t width); |
| 101 | |
| 102 | /* Return a new cleanup suitable for destroying the wchar iterator |
| 103 | ITER. */ |
| 104 | struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter); |
| 105 | |
| 106 | /* Perform a single iteration of a wchar_t iterator. |
| 107 | |
| 108 | Returns the number of characters converted. A negative result |
| 109 | means that EOF has been reached. A positive result indicates the |
| 110 | number of valid wchar_ts in the result; *OUT_CHARS is updated to |
| 111 | point to the first valid character. |
| 112 | |
| 113 | In all cases aside from EOF, *PTR is set to point to the first |
| 114 | converted target byte. *LEN is set to the number of bytes |
| 115 | converted. |
| 116 | |
| 117 | A zero result means one of several unusual results. *OUT_RESULT is |
| 118 | set to indicate the type of un-ordinary return. |
| 119 | |
| 120 | wchar_iterate_invalid means that an invalid input character was |
| 121 | seen. The iterator is advanced by WIDTH (the argument to |
| 122 | make_wchar_iterator) bytes. |
| 123 | |
| 124 | wchar_iterate_incomplete means that an incomplete character was |
| 125 | seen at the end of the input sequence. |
| 126 | |
| 127 | wchar_iterate_eof means that all bytes were successfully |
| 128 | converted. The other output arguments are not set. */ |
| 129 | int wchar_iterate (struct wchar_iterator *iter, |
| 130 | enum wchar_iterate_result *out_result, |
| 131 | gdb_wchar_t **out_chars, |
| 132 | const gdb_byte **ptr, size_t *len); |
| 133 | |
| 134 | \f |
| 135 | |
| 136 | /* GDB needs to know a few details of its execution character set. |
| 137 | This knowledge is isolated here and in charset.c. */ |
| 138 | |
| 139 | /* The escape character. */ |
| 140 | #define HOST_ESCAPE_CHAR 27 |
| 141 | |
| 142 | /* Convert a letter, like 'c', to its corresponding control |
| 143 | character. */ |
| 144 | char host_letter_to_control_character (char c); |
| 145 | |
| 146 | /* Convert a hex digit character to its numeric value. E.g., 'f' is |
| 147 | converted to 15. This function assumes that C is a valid hex |
| 148 | digit. Both upper- and lower-case letters are recognized. */ |
| 149 | int host_hex_value (char c); |
| 150 | |
| 151 | #endif /* CHARSET_H */ |