[deliverable/binutils-gdb.git] / gdb / charset.h

/* Character set conversion support for GDB.
   Copyright (C) 2001-2019 Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef CHARSET_H
#define CHARSET_H

#include "gdbsupport/def-vector.h"

/* If the target program uses a different character set than the host,
   GDB has some support for translating between the two; GDB converts
   characters and strings to the host character set before displaying
   them, and converts characters and strings appearing in expressions
   entered by the user to the target character set.

   GDB's code pretty much assumes that the host character set is some
   superset of ASCII; there are plenty if ('0' + n) expressions and
   the like.  */

/* Return the name of the current host/target character set.  The
   result is owned by the charset module; the caller should not free
   it.  */
const char *host_charset (void);
const char *target_charset (struct gdbarch *gdbarch);
const char *target_wide_charset (struct gdbarch *gdbarch);

/* These values are used to specify the type of transliteration done
   by convert_between_encodings.  */
enum transliterations
  {
    /* Error on failure to convert.  */
    translit_none,
    /* Transliterate to host char.  */
    translit_char
  };

/* Convert between two encodings.

   FROM is the name of the source encoding.
   TO is the name of the target encoding.
   BYTES holds the bytes to convert; this is assumed to be characters
   in the target encoding.
   NUM_BYTES is the number of bytes.
   WIDTH is the width of a character from the FROM charset, in bytes.
   For a variable width encoding, WIDTH should be the size of a "base
   character".
   OUTPUT is an obstack where the converted data is written.  The
   caller is responsible for initializing the obstack, and for
   destroying the obstack should an error occur.
   TRANSLIT specifies how invalid conversions should be handled.  */

void convert_between_encodings (const char *from, const char *to,
				const gdb_byte *bytes,
				unsigned int num_bytes,
				int width, struct obstack *output,
				enum transliterations translit);


/* These values are used by wchar_iterate to report errors.  */
enum wchar_iterate_result
  {
    /* Ordinary return.  */
    wchar_iterate_ok,
    /* Invalid input sequence.  */
    wchar_iterate_invalid,
    /* Incomplete input sequence at the end of the input.  */
    wchar_iterate_incomplete,
    /* EOF.  */
    wchar_iterate_eof
  };

/* An iterator that returns host wchar_t's from a target string.  */
class wchar_iterator
{
 public:

  /* Create a new character iterator which returns wchar_t's.  INPUT is
     the input buffer.  BYTES is the number of bytes in the input
     buffer.  CHARSET is the name of the character set in which INPUT is
     encoded.  WIDTH is the number of bytes in a base character of
     CHARSET.

     This constructor can throw on error.  */
  wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
		  size_t width);

  ~wchar_iterator ();

  /* Perform a single iteration of a wchar_t iterator.
   
     Returns the number of characters converted.  A negative result
     means that EOF has been reached.  A positive result indicates the
     number of valid wchar_ts in the result; *OUT_CHARS is updated to
     point to the first valid character.

     In all cases aside from EOF, *PTR is set to point to the first
     converted target byte.  *LEN is set to the number of bytes
     converted.

     A zero result means one of several unusual results.  *OUT_RESULT is
     set to indicate the type of un-ordinary return.

     wchar_iterate_invalid means that an invalid input character was
     seen.  The iterator is advanced by WIDTH (the argument to
     the wchar_iterator constructor) bytes.

     wchar_iterate_incomplete means that an incomplete character was
     seen at the end of the input sequence.
   
     wchar_iterate_eof means that all bytes were successfully
     converted.  The other output arguments are not set.  */
  int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars,
	       const gdb_byte **ptr, size_t *len);

 private:

  /* The underlying iconv descriptor.  */
#ifdef PHONY_ICONV
  int m_desc;
#else
  iconv_t m_desc;
#endif

  /* The input string.  This is updated as we convert characters.  */
  const gdb_byte *m_input;
  /* The number of bytes remaining in the input.  */
  size_t m_bytes;

  /* The width of an input character.  */
  size_t m_width;

  /* The output buffer.  */
  gdb::def_vector<gdb_wchar_t> m_out;
};

\f

/* GDB needs to know a few details of its execution character set.
   This knowledge is isolated here and in charset.c.  */

/* The escape character.  */
#define HOST_ESCAPE_CHAR 27

/* Convert a letter, like 'c', to its corresponding control
   character.  */
char host_letter_to_control_character (char c);

/* Convert a hex digit character to its numeric value.  E.g., 'f' is
   converted to 15.  This function assumes that C is a valid hex
   digit.  Both upper- and lower-case letters are recognized.  */
int host_hex_value (char c);

#endif /* CHARSET_H */
Commit	Line	Data
234b45d4	1	/* Character set conversion support for GDB.
42a4f53d	2	Copyright (C) 2001-2019 Free Software Foundation, Inc.
234b45d4 KB	3
	4	This file is part of GDB.
	5
	6	This program is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
a9762ec7	8	the Free Software Foundation; either version 3 of the License, or
234b45d4 KB	9	(at your option) any later version.
	10
	11	This program is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
a9762ec7	17	along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4 KB	18
	19	#ifndef CHARSET_H
	20	#define CHARSET_H
	21
268a13a5	22	#include "gdbsupport/def-vector.h"
cda6c55b	23
234b45d4 KB	24	/* If the target program uses a different character set than the host,
	25	GDB has some support for translating between the two; GDB converts
	26	characters and strings to the host character set before displaying
	27	them, and converts characters and strings appearing in expressions
	28	entered by the user to the target character set.
	29
6c7a06a3 TT	30	GDB's code pretty much assumes that the host character set is some
	31	superset of ASCII; there are plenty if ('0' + n) expressions and
	32	the like. */
234b45d4	33
234b45d4 KB	34	/* Return the name of the current host/target character set. The
	35	result is owned by the charset module; the caller should not free
	36	it. */
	37	const char *host_charset (void);
f870a310 TT	38	const char target_charset (struct gdbarch gdbarch);
f870a310 TT	39	const char target_wide_charset (struct gdbarch gdbarch);
6c7a06a3 TT	40
	41	/* These values are used to specify the type of transliteration done
	42	by convert_between_encodings. */
	43	enum transliterations
	44	{
	45	/* Error on failure to convert. */
	46	translit_none,
	47	/* Transliterate to host char. */
	48	translit_char
	49	};
	50
	51	/* Convert between two encodings.
	52
	53	FROM is the name of the source encoding.
	54	TO is the name of the target encoding.
	55	BYTES holds the bytes to convert; this is assumed to be characters
	56	in the target encoding.
	57	NUM_BYTES is the number of bytes.
	58	WIDTH is the width of a character from the FROM charset, in bytes.
	59	For a variable width encoding, WIDTH should be the size of a "base
	60	character".
	61	OUTPUT is an obstack where the converted data is written. The
	62	caller is responsible for initializing the obstack, and for
	63	destroying the obstack should an error occur.
	64	TRANSLIT specifies how invalid conversions should be handled. */
aff410f1	65
6c7a06a3	66	void convert_between_encodings (const char from, const char to,
aff410f1 MS	67	const gdb_byte *bytes,
aff410f1 MS	68	unsigned int num_bytes,
6c7a06a3 TT	69	int width, struct obstack *output,
	70	enum transliterations translit);
	71
	72
	73	/* These values are used by wchar_iterate to report errors. */
	74	enum wchar_iterate_result
	75	{
	76	/* Ordinary return. */
	77	wchar_iterate_ok,
	78	/* Invalid input sequence. */
	79	wchar_iterate_invalid,
	80	/* Incomplete input sequence at the end of the input. */
	81	wchar_iterate_incomplete,
	82	/* EOF. */
	83	wchar_iterate_eof
	84	};
	85
cda6c55b TT	86	/* An iterator that returns host wchar_t's from a target string. */
	87	class wchar_iterator
	88	{
	89	public:
6c7a06a3	90
cda6c55b TT	91	/* Create a new character iterator which returns wchar_t's. INPUT is
	92	the input buffer. BYTES is the number of bytes in the input
	93	buffer. CHARSET is the name of the character set in which INPUT is
	94	encoded. WIDTH is the number of bytes in a base character of
	95	CHARSET.
b24b0d6c TT	96
b24b0d6c TT	97	This constructor can throw on error. */
cda6c55b TT	98	wchar_iterator (const gdb_byte input, size_t bytes, const char charset,
	99	size_t width);
	100
	101	~wchar_iterator ();
	102
	103	/* Perform a single iteration of a wchar_t iterator.
6c7a06a3	104
cda6c55b TT	105	Returns the number of characters converted. A negative result
	106	means that EOF has been reached. A positive result indicates the
	107	number of valid wchar_ts in the result; *OUT_CHARS is updated to
	108	point to the first valid character.
6c7a06a3	109
cda6c55b TT	110	In all cases aside from EOF, *PTR is set to point to the first
	111	converted target byte. *LEN is set to the number of bytes
	112	converted.
6c7a06a3	113
cda6c55b TT	114	A zero result means one of several unusual results. *OUT_RESULT is
cda6c55b TT	115	set to indicate the type of un-ordinary return.
6c7a06a3	116
cda6c55b TT	117	wchar_iterate_invalid means that an invalid input character was
	118	seen. The iterator is advanced by WIDTH (the argument to
	119	the wchar_iterator constructor) bytes.
6c7a06a3	120
cda6c55b TT	121	wchar_iterate_incomplete means that an incomplete character was
cda6c55b TT	122	seen at the end of the input sequence.
6c7a06a3	123
cda6c55b TT	124	wchar_iterate_eof means that all bytes were successfully
	125	converted. The other output arguments are not set. */
	126	int iterate (enum wchar_iterate_result out_result, gdb_wchar_t *out_chars,
	127	const gdb_byte *ptr, size_t len);
	128
	129	private:
	130
	131	/* The underlying iconv descriptor. */
5562a44e PA	132	#ifdef PHONY_ICONV
	133	int m_desc;
	134	#else
cda6c55b	135	iconv_t m_desc;
5562a44e	136	#endif
cda6c55b TT	137
	138	/* The input string. This is updated as we convert characters. */
	139	const gdb_byte *m_input;
	140	/* The number of bytes remaining in the input. */
	141	size_t m_bytes;
	142
	143	/* The width of an input character. */
	144	size_t m_width;
	145
	146	/* The output buffer. */
d5722aa2	147	gdb::def_vector<gdb_wchar_t> m_out;
cda6c55b	148	};
6c7a06a3 TT	149
	150	\f
	151
	152	/* GDB needs to know a few details of its execution character set.
	153	This knowledge is isolated here and in charset.c. */
	154
	155	/* The escape character. */
	156	#define HOST_ESCAPE_CHAR 27
	157
	158	/* Convert a letter, like 'c', to its corresponding control
	159	character. */
	160	char host_letter_to_control_character (char c);
	161
	162	/* Convert a hex digit character to its numeric value. E.g., 'f' is
	163	converted to 15. This function assumes that C is a valid hex
	164	digit. Both upper- and lower-case letters are recognized. */
	165	int host_hex_value (char c);
234b45d4 KB	166
234b45d4 KB	167	#endif /* CHARSET_H */