1 /* winduni.c -- unicode support for the windres program.
2 Copyright 1997, 1998, 2000, 2001, 2003, 2007
3 Free Software Foundation, Inc.
4 Written by Ian Lance Taylor, Cygnus Support.
5 Rewritten by Kai Tietz, Onevision.
7 This file is part of GNU Binutils.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
34 #include "libiberty.h" /* for xstrdup */
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
42 #include "safe-ctype.h"
48 static rc_uint_type
wind_WideCharToMultiByte (rc_uint_type
, const unichar
*, char *, rc_uint_type
);
49 static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type
, const char *, unichar
*, rc_uint_type
);
50 static int unichar_isascii (const unichar
*, rc_uint_type
);
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 /* Codepages mapped. */
58 static local_iconv_map codepages
[] =
61 { 1, "WINDOWS-1252" },
64 { 775, "WINBALTRIM" },
71 { 874, "WINDOWS-874" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
87 { CP_UTF16
, "UTF-16" },
88 { (rc_uint_type
) -1, NULL
}
91 /* Languages supported. */
92 static const wind_language_t languages
[] =
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL
, NULL
}
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage
= CP_ACP
;
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage
= CP_ACP
;
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
191 unicode_from_ascii (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
)
193 unicode_from_codepage (length
, unicode
, ascii
, wind_current_codepage
);
196 /* Convert an unicode string to an ASCII string. We just copy it,
197 shrink shorts to chars, rather than doing something intelligent.
198 Shorts with not within the char range are replaced by '_'. */
201 ascii_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
)
203 codepage_from_unicode (length
, unicode
, ascii
, wind_current_codepage
);
206 /* Print the unicode string UNICODE to the file E. LENGTH is the
207 number of characters to print, or -1 if we should print until the
208 end of the string. FIXME: On a Windows host, we should be calling
209 some Windows function, probably WideCharToMultiByte. */
212 unicode_print (FILE *e
, const unichar
*unicode
, rc_uint_type length
)
220 if ((bfd_signed_vma
) length
> 0)
225 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
230 if ((ch
& 0x7f) == ch
)
236 else if (ISPRINT (ch
))
271 fprintf (e
, "\\%03o", (unsigned int) ch
);
276 else if ((ch
& 0xff) == ch
)
277 fprintf (e
, "\\%03o", (unsigned int) ch
);
279 fprintf (e
, "\\x%04x", (unsigned int) ch
);
283 /* Print a unicode string to a file. */
286 ascii_print (FILE *e
, const char *s
, rc_uint_type length
)
294 if ((bfd_signed_vma
) length
> 0)
299 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
304 if ((ch
& 0x7f) == ch
)
310 else if (ISPRINT (ch
))
345 fprintf (e
, "\\%03o", (unsigned int) ch
);
351 fprintf (e
, "\\%03o", (unsigned int) ch
& 0xff);
356 unichar_len (const unichar
*unicode
)
361 while (unicode
[r
] != 0)
369 unichar_dup (const unichar
*unicode
)
376 for (len
= 0; unicode
[len
] != 0; ++len
)
379 r
= ((unichar
*) res_alloc (len
* sizeof (unichar
)));
380 memcpy (r
, unicode
, len
* sizeof (unichar
));
385 unichar_dup_uppercase (const unichar
*u
)
387 unichar
*r
= unichar_dup (u
);
393 for (i
= 0; r
[i
] != 0; ++i
)
395 if (r
[i
] >= 'a' && r
[i
] <= 'z')
402 unichar_isascii (const unichar
*u
, rc_uint_type len
)
406 if ((bfd_signed_vma
) len
< 0)
409 len
= (rc_uint_type
) unichar_len (u
);
414 for (i
= 0; i
< len
; i
++)
415 if ((u
[i
] & 0xff80) != 0)
421 unicode_print_quoted (FILE *e
, const unichar
*u
, rc_uint_type len
)
423 if (! unichar_isascii (u
, len
))
426 unicode_print (e
, u
, len
);
431 unicode_is_valid_codepage (rc_uint_type cp
)
433 if ((cp
& 0xffff) != cp
)
435 if (cp
== CP_UTF16
|| cp
== CP_ACP
)
438 #if !defined (_WIN32) && !defined (__CYGWIN__)
439 if (! wind_find_codepage_info (cp
))
443 return !! IsValidCodePage ((UINT
) cp
);
447 #if defined (_WIN32) || defined (__CYGWIN__)
449 #define max_cp_string_len 6
452 codepage_from_langid (unsigned short langid
)
454 char cp_string
[max_cp_string_len
];
457 memset (cp_string
, 0, max_cp_string_len
);
458 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
459 but is unavailable on Win95. */
460 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
461 LOCALE_IDEFAULTANSICODEPAGE
,
462 cp_string
, max_cp_string_len
);
463 /* If codepage data for an LCID is not installed on users's system,
464 GetLocaleInfo returns an empty string. Fall back to system ANSI
468 return strtoul (cp_string
, 0, 10);
472 wincodepage_from_langid (unsigned short langid
)
474 char cp_string
[max_cp_string_len
];
477 memset (cp_string
, 0, max_cp_string_len
);
478 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
479 but is unavailable on Win95. */
480 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
481 LOCALE_IDEFAULTCODEPAGE
,
482 cp_string
, max_cp_string_len
);
483 /* If codepage data for an LCID is not installed on users's system,
484 GetLocaleInfo returns an empty string. Fall back to system ANSI
488 return strtoul (cp_string
, 0, 10);
492 lang_from_langid (unsigned short langid
)
497 memset (cp_string
, 0, 261);
498 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
501 /* If codepage data for an LCID is not installed on users's system,
502 GetLocaleInfo returns an empty string. Fall back to system ANSI
505 strcpy (cp_string
, "Neutral");
506 return xstrdup (cp_string
);
510 country_from_langid (unsigned short langid
)
515 memset (cp_string
, 0, 261);
516 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
519 /* If codepage data for an LCID is not installed on users's system,
520 GetLocaleInfo returns an empty string. Fall back to system ANSI
523 strcpy (cp_string
, "Neutral");
524 return xstrdup (cp_string
);
529 const wind_language_t
*
530 wind_find_language_by_id (unsigned id
)
532 #if !defined (_WIN32) && !defined (__CYGWIN__)
537 for (i
= 0; languages
[i
].id
!= (unsigned) -1 && languages
[i
].id
!= id
; i
++)
539 if (languages
[i
].id
== id
)
540 return &languages
[i
];
543 static wind_language_t wl
;
546 wl
.doscp
= codepage_from_langid ((unsigned short) id
);
547 wl
.wincp
= wincodepage_from_langid ((unsigned short) id
);
548 wl
.name
= lang_from_langid ((unsigned short) id
);
549 wl
.country
= country_from_langid ((unsigned short) id
);
555 const local_iconv_map
*
556 wind_find_codepage_info (unsigned cp
)
558 #if !defined (_WIN32) && !defined (__CYGWIN__)
561 for (i
= 0; codepages
[i
].codepage
!= (rc_uint_type
) -1 && codepages
[i
].codepage
!= cp
; i
++)
563 if (codepages
[i
].codepage
== (rc_uint_type
) -1)
565 return &codepages
[i
];
567 static local_iconv_map lim
;
568 if (!unicode_is_valid_codepage (cp
))
576 /* Convert an Codepage string to a unicode string. */
579 unicode_from_codepage (rc_uint_type
*length
, unichar
**u
, const char *src
, rc_uint_type cp
)
583 len
= wind_MultiByteToWideChar (cp
, src
, NULL
, 0);
586 *u
= ((unichar
*) res_alloc (len
));
587 wind_MultiByteToWideChar (cp
, src
, *u
, len
);
589 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
590 this will set *length to -1. */
591 len
-= sizeof (unichar
);
594 *length
= len
/ sizeof (unichar
);
597 /* Convert an unicode string to an codepage string. */
600 codepage_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
, rc_uint_type cp
)
604 len
= wind_WideCharToMultiByte (cp
, unicode
, NULL
, 0);
607 *ascii
= (char *) res_alloc (len
* sizeof (char));
608 wind_WideCharToMultiByte (cp
, unicode
, *ascii
, len
);
610 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
611 this will set *length to -1. */
620 iconv_onechar (iconv_t cd
, const char *s
, char *d
, int d_len
, const char **n_s
, char **n_d
)
624 for (i
= 1; i
<= 32; i
++)
627 const char *tmp_s
= s
;
629 size_t s_left
= (size_t) i
;
630 size_t d_left
= (size_t) d_len
;
632 ret
= iconv (cd
, & tmp_s
, & s_left
, & tmp_d
, & d_left
);
634 if (ret
!= (size_t) -1)
646 wind_iconv_cp (rc_uint_type cp
)
648 const local_iconv_map
*lim
= wind_find_codepage_info (cp
);
652 return lim
->iconv_name
;
654 #endif /* HAVE_ICONV_H */
657 wind_MultiByteToWideChar (rc_uint_type cp
, const char *mb
,
658 unichar
*u
, rc_uint_type u_len
)
660 rc_uint_type ret
= 0;
662 #if defined (_WIN32) || defined (__CYGWIN__)
663 ret
= (rc_uint_type
) MultiByteToWideChar (cp
, MB_PRECOMPOSED
,
665 /* Convert to bytes. */
666 ret
*= sizeof (unichar
);
668 #elif defined (HAVE_ICONV_H)
672 const char *iconv_name
= wind_iconv_cp (cp
);
674 if (!mb
|| !iconv_name
)
676 iconv_t cd
= iconv_open ("UTF-16", iconv_name
);
685 iret
= iconv_onechar (cd
, (const char *) mb
, p_tmp
, 32, & n_mb
, & n_tmp
);
693 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
697 if ((size_t) u_len
< l_tmp
)
699 memcpy (u
, tmp
, l_tmp
);
707 if (tmp
[0] == 0 && tmp
[1] == 0)
715 ret
= strlen (mb
) + 1;
716 ret
*= sizeof (unichar
);
717 if (u
!= NULL
&& u_len
!= 0)
721 *u
++ = ((unichar
) *mb
) & 0xff;
724 while (u_len
!= 0 && mb
[-1] != 0);
726 if (u
!= NULL
&& u_len
!= 0)
733 wind_WideCharToMultiByte (rc_uint_type cp
, const unichar
*u
, char *mb
, rc_uint_type mb_len
)
735 rc_uint_type ret
= 0;
736 #if defined (_WIN32) || defined (__CYGWIN__)
737 WINBOOL used_def
= FALSE
;
739 ret
= (rc_uint_type
) WideCharToMultiByte (cp
, 0, u
, -1, mb
, mb_len
,
741 #elif defined (HAVE_ICONV_H)
745 const char *iconv_name
= wind_iconv_cp (cp
);
747 if (!u
|| !iconv_name
)
749 iconv_t cd
= iconv_open (iconv_name
, "UTF-16");
758 iret
= iconv_onechar (cd
, (const char *) u
, p_tmp
, 32, &n_u
, & n_tmp
);
766 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
770 if ((size_t) mb_len
< l_tmp
)
772 memcpy (mb
, tmp
, l_tmp
);
782 u
= (const unichar
*) n_u
;
796 while (*u
!= 0 && mb_len
!= 0)
798 if (u
[0] == (u
[0] & 0x7f))