Commit | Line | Data |
---|---|---|
8690e634 JK |
1 | /* Determine a canonical name for the current locale's character encoding. |
2 | ||
5df4cba6 | 3 | Copyright (C) 2000-2006, 2008-2020 Free Software Foundation, Inc. |
8690e634 JK |
4 | |
5 | This program is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | This program is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License along | |
c0c3707f | 16 | with this program; if not, see <https://www.gnu.org/licenses/>. */ |
8690e634 JK |
17 | |
18 | /* Written by Bruno Haible <bruno@clisp.org>. */ | |
19 | ||
20 | #include <config.h> | |
21 | ||
22 | /* Specification. */ | |
23 | #include "localcharset.h" | |
24 | ||
8690e634 JK |
25 | #include <stddef.h> |
26 | #include <stdio.h> | |
27 | #include <string.h> | |
28 | #include <stdlib.h> | |
29 | ||
30 | #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET | |
31 | # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ | |
32 | #endif | |
33 | ||
c0c3707f | 34 | #if defined _WIN32 && !defined __CYGWIN__ |
8690e634 | 35 | # define WINDOWS_NATIVE |
4a626d0a | 36 | # include <locale.h> |
8690e634 JK |
37 | #endif |
38 | ||
39 | #if defined __EMX__ | |
40 | /* Assume EMX program runs on OS/2, even if compiled under DOS. */ | |
41 | # ifndef OS2 | |
42 | # define OS2 | |
43 | # endif | |
44 | #endif | |
45 | ||
46 | #if !defined WINDOWS_NATIVE | |
8690e634 JK |
47 | # if HAVE_LANGINFO_CODESET |
48 | # include <langinfo.h> | |
49 | # else | |
c0c3707f | 50 | # if 0 /* see comment regarding use of setlocale(), below */ |
8690e634 JK |
51 | # include <locale.h> |
52 | # endif | |
53 | # endif | |
54 | # ifdef __CYGWIN__ | |
55 | # define WIN32_LEAN_AND_MEAN | |
56 | # include <windows.h> | |
57 | # endif | |
58 | #elif defined WINDOWS_NATIVE | |
59 | # define WIN32_LEAN_AND_MEAN | |
60 | # include <windows.h> | |
5df4cba6 SM |
61 | /* For the use of setlocale() below, the Gnulib override in setlocale.c is |
62 | not needed; see the platform lists in setlocale_null.m4. */ | |
63 | # undef setlocale | |
8690e634 JK |
64 | #endif |
65 | #if defined OS2 | |
66 | # define INCL_DOS | |
67 | # include <os2.h> | |
68 | #endif | |
69 | ||
4a626d0a PA |
70 | /* For MB_CUR_MAX_L */ |
71 | #if defined DARWIN7 | |
72 | # include <xlocale.h> | |
73 | #endif | |
74 | ||
8690e634 | 75 | |
c0c3707f | 76 | #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 |
8690e634 | 77 | |
c0c3707f CB |
78 | /* On these platforms, we use a mapping from non-canonical encoding name |
79 | to GNU canonical encoding name. */ | |
8690e634 | 80 | |
c0c3707f CB |
81 | /* With glibc-2.1 or newer, we don't need any canonicalization, |
82 | because glibc has iconv and both glibc and libiconv support all | |
83 | GNU canonical names directly. */ | |
84 | # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__) | |
8690e634 | 85 | |
c0c3707f | 86 | struct table_entry |
8690e634 | 87 | { |
c0c3707f CB |
88 | const char alias[11+1]; |
89 | const char canonical[11+1]; | |
90 | }; | |
91 | ||
92 | /* Table of platform-dependent mappings, sorted in ascending order. */ | |
93 | static const struct table_entry alias_table[] = | |
94 | { | |
95 | # if defined __FreeBSD__ /* FreeBSD */ | |
96 | /*{ "ARMSCII-8", "ARMSCII-8" },*/ | |
97 | { "Big5", "BIG5" }, | |
98 | { "C", "ASCII" }, | |
99 | /*{ "CP1131", "CP1131" },*/ | |
100 | /*{ "CP1251", "CP1251" },*/ | |
101 | /*{ "CP866", "CP866" },*/ | |
102 | /*{ "GB18030", "GB18030" },*/ | |
103 | /*{ "GB2312", "GB2312" },*/ | |
104 | /*{ "GBK", "GBK" },*/ | |
105 | /*{ "ISCII-DEV", "?" },*/ | |
106 | { "ISO8859-1", "ISO-8859-1" }, | |
107 | { "ISO8859-13", "ISO-8859-13" }, | |
108 | { "ISO8859-15", "ISO-8859-15" }, | |
109 | { "ISO8859-2", "ISO-8859-2" }, | |
110 | { "ISO8859-5", "ISO-8859-5" }, | |
111 | { "ISO8859-7", "ISO-8859-7" }, | |
112 | { "ISO8859-9", "ISO-8859-9" }, | |
113 | /*{ "KOI8-R", "KOI8-R" },*/ | |
114 | /*{ "KOI8-U", "KOI8-U" },*/ | |
115 | { "SJIS", "SHIFT_JIS" }, | |
116 | { "US-ASCII", "ASCII" }, | |
117 | { "eucCN", "GB2312" }, | |
118 | { "eucJP", "EUC-JP" }, | |
119 | { "eucKR", "EUC-KR" } | |
120 | # define alias_table_defined | |
121 | # endif | |
122 | # if defined __NetBSD__ /* NetBSD */ | |
123 | { "646", "ASCII" }, | |
124 | /*{ "ARMSCII-8", "ARMSCII-8" },*/ | |
125 | /*{ "BIG5", "BIG5" },*/ | |
126 | { "Big5-HKSCS", "BIG5-HKSCS" }, | |
127 | /*{ "CP1251", "CP1251" },*/ | |
128 | /*{ "CP866", "CP866" },*/ | |
129 | /*{ "GB18030", "GB18030" },*/ | |
130 | /*{ "GB2312", "GB2312" },*/ | |
131 | { "ISO8859-1", "ISO-8859-1" }, | |
132 | { "ISO8859-13", "ISO-8859-13" }, | |
133 | { "ISO8859-15", "ISO-8859-15" }, | |
134 | { "ISO8859-2", "ISO-8859-2" }, | |
135 | { "ISO8859-4", "ISO-8859-4" }, | |
136 | { "ISO8859-5", "ISO-8859-5" }, | |
137 | { "ISO8859-7", "ISO-8859-7" }, | |
138 | /*{ "KOI8-R", "KOI8-R" },*/ | |
139 | /*{ "KOI8-U", "KOI8-U" },*/ | |
140 | /*{ "PT154", "PT154" },*/ | |
141 | { "SJIS", "SHIFT_JIS" }, | |
142 | { "eucCN", "GB2312" }, | |
143 | { "eucJP", "EUC-JP" }, | |
144 | { "eucKR", "EUC-KR" }, | |
145 | { "eucTW", "EUC-TW" } | |
146 | # define alias_table_defined | |
147 | # endif | |
148 | # if defined __OpenBSD__ /* OpenBSD */ | |
149 | { "646", "ASCII" }, | |
150 | { "ISO8859-1", "ISO-8859-1" }, | |
151 | { "ISO8859-13", "ISO-8859-13" }, | |
152 | { "ISO8859-15", "ISO-8859-15" }, | |
153 | { "ISO8859-2", "ISO-8859-2" }, | |
154 | { "ISO8859-4", "ISO-8859-4" }, | |
155 | { "ISO8859-5", "ISO-8859-5" }, | |
5df4cba6 SM |
156 | { "ISO8859-7", "ISO-8859-7" }, |
157 | { "US-ASCII", "ASCII" } | |
c0c3707f CB |
158 | # define alias_table_defined |
159 | # endif | |
160 | # if defined __APPLE__ && defined __MACH__ /* Mac OS X */ | |
161 | /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is | |
162 | useless: | |
163 | - It returns the empty string when LANG is set to a locale of the | |
164 | form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8 | |
165 | LC_CTYPE file. | |
166 | - The environment variables LANG, LC_CTYPE, LC_ALL are not set by | |
167 | the system; nl_langinfo(CODESET) returns "US-ASCII" in this case. | |
168 | - The documentation says: | |
169 | "... all code that calls BSD system routines should ensure | |
170 | that the const *char parameters of these routines are in UTF-8 | |
171 | encoding. All BSD system functions expect their string | |
172 | parameters to be in UTF-8 encoding and nothing else." | |
173 | It also says | |
174 | "An additional caveat is that string parameters for files, | |
175 | paths, and other file-system entities must be in canonical | |
176 | UTF-8. In a canonical UTF-8 Unicode string, all decomposable | |
177 | characters are decomposed ..." | |
178 | but this is not true: You can pass non-decomposed UTF-8 strings | |
179 | to file system functions, and it is the OS which will convert | |
180 | them to decomposed UTF-8 before accessing the file system. | |
181 | - The Apple Terminal application displays UTF-8 by default. | |
182 | - However, other applications are free to use different encodings: | |
183 | - xterm uses ISO-8859-1 by default. | |
184 | - TextEdit uses MacRoman by default. | |
185 | We prefer UTF-8 over decomposed UTF-8-MAC because one should | |
186 | minimize the use of decomposed Unicode. Unfortunately, through the | |
187 | Darwin file system, decomposed UTF-8 strings are leaked into user | |
188 | space nevertheless. | |
189 | Then there are also the locales with encodings other than US-ASCII | |
190 | and UTF-8. These locales can be occasionally useful to users (e.g. | |
191 | when grepping through ISO-8859-1 encoded text files), when all their | |
192 | file names are in US-ASCII. | |
193 | */ | |
194 | { "ARMSCII-8", "ARMSCII-8" }, | |
195 | { "Big5", "BIG5" }, | |
196 | { "Big5HKSCS", "BIG5-HKSCS" }, | |
197 | { "CP1131", "CP1131" }, | |
198 | { "CP1251", "CP1251" }, | |
199 | { "CP866", "CP866" }, | |
200 | { "CP949", "CP949" }, | |
201 | { "GB18030", "GB18030" }, | |
202 | { "GB2312", "GB2312" }, | |
203 | { "GBK", "GBK" }, | |
204 | /*{ "ISCII-DEV", "?" },*/ | |
205 | { "ISO8859-1", "ISO-8859-1" }, | |
206 | { "ISO8859-13", "ISO-8859-13" }, | |
207 | { "ISO8859-15", "ISO-8859-15" }, | |
208 | { "ISO8859-2", "ISO-8859-2" }, | |
209 | { "ISO8859-4", "ISO-8859-4" }, | |
210 | { "ISO8859-5", "ISO-8859-5" }, | |
211 | { "ISO8859-7", "ISO-8859-7" }, | |
212 | { "ISO8859-9", "ISO-8859-9" }, | |
213 | { "KOI8-R", "KOI8-R" }, | |
214 | { "KOI8-U", "KOI8-U" }, | |
215 | { "PT154", "PT154" }, | |
216 | { "SJIS", "SHIFT_JIS" }, | |
217 | { "eucCN", "GB2312" }, | |
218 | { "eucJP", "EUC-JP" }, | |
219 | { "eucKR", "EUC-KR" } | |
220 | # define alias_table_defined | |
221 | # endif | |
222 | # if defined _AIX /* AIX */ | |
223 | /*{ "GBK", "GBK" },*/ | |
224 | { "IBM-1046", "CP1046" }, | |
225 | { "IBM-1124", "CP1124" }, | |
226 | { "IBM-1129", "CP1129" }, | |
227 | { "IBM-1252", "CP1252" }, | |
228 | { "IBM-850", "CP850" }, | |
229 | { "IBM-856", "CP856" }, | |
230 | { "IBM-921", "ISO-8859-13" }, | |
231 | { "IBM-922", "CP922" }, | |
232 | { "IBM-932", "CP932" }, | |
233 | { "IBM-943", "CP943" }, | |
234 | { "IBM-eucCN", "GB2312" }, | |
235 | { "IBM-eucJP", "EUC-JP" }, | |
236 | { "IBM-eucKR", "EUC-KR" }, | |
237 | { "IBM-eucTW", "EUC-TW" }, | |
238 | { "ISO8859-1", "ISO-8859-1" }, | |
239 | { "ISO8859-15", "ISO-8859-15" }, | |
240 | { "ISO8859-2", "ISO-8859-2" }, | |
241 | { "ISO8859-5", "ISO-8859-5" }, | |
242 | { "ISO8859-6", "ISO-8859-6" }, | |
243 | { "ISO8859-7", "ISO-8859-7" }, | |
244 | { "ISO8859-8", "ISO-8859-8" }, | |
245 | { "ISO8859-9", "ISO-8859-9" }, | |
246 | { "TIS-620", "TIS-620" }, | |
247 | /*{ "UTF-8", "UTF-8" },*/ | |
248 | { "big5", "BIG5" } | |
249 | # define alias_table_defined | |
250 | # endif | |
251 | # if defined __hpux /* HP-UX */ | |
252 | { "SJIS", "SHIFT_JIS" }, | |
253 | { "arabic8", "HP-ARABIC8" }, | |
254 | { "big5", "BIG5" }, | |
255 | { "cp1251", "CP1251" }, | |
256 | { "eucJP", "EUC-JP" }, | |
257 | { "eucKR", "EUC-KR" }, | |
258 | { "eucTW", "EUC-TW" }, | |
259 | { "gb18030", "GB18030" }, | |
260 | { "greek8", "HP-GREEK8" }, | |
261 | { "hebrew8", "HP-HEBREW8" }, | |
262 | { "hkbig5", "BIG5-HKSCS" }, | |
263 | { "hp15CN", "GB2312" }, | |
264 | { "iso88591", "ISO-8859-1" }, | |
265 | { "iso885913", "ISO-8859-13" }, | |
266 | { "iso885915", "ISO-8859-15" }, | |
267 | { "iso88592", "ISO-8859-2" }, | |
268 | { "iso88594", "ISO-8859-4" }, | |
269 | { "iso88595", "ISO-8859-5" }, | |
270 | { "iso88596", "ISO-8859-6" }, | |
271 | { "iso88597", "ISO-8859-7" }, | |
272 | { "iso88598", "ISO-8859-8" }, | |
273 | { "iso88599", "ISO-8859-9" }, | |
274 | { "kana8", "HP-KANA8" }, | |
275 | { "koi8r", "KOI8-R" }, | |
276 | { "roman8", "HP-ROMAN8" }, | |
277 | { "tis620", "TIS-620" }, | |
278 | { "turkish8", "HP-TURKISH8" }, | |
279 | { "utf8", "UTF-8" } | |
280 | # define alias_table_defined | |
281 | # endif | |
282 | # if defined __sgi /* IRIX */ | |
283 | { "ISO8859-1", "ISO-8859-1" }, | |
284 | { "ISO8859-15", "ISO-8859-15" }, | |
285 | { "ISO8859-2", "ISO-8859-2" }, | |
286 | { "ISO8859-5", "ISO-8859-5" }, | |
287 | { "ISO8859-7", "ISO-8859-7" }, | |
288 | { "ISO8859-9", "ISO-8859-9" }, | |
289 | { "eucCN", "GB2312" }, | |
290 | { "eucJP", "EUC-JP" }, | |
291 | { "eucKR", "EUC-KR" }, | |
292 | { "eucTW", "EUC-TW" } | |
293 | # define alias_table_defined | |
294 | # endif | |
295 | # if defined __osf__ /* OSF/1 */ | |
296 | /*{ "GBK", "GBK" },*/ | |
297 | { "ISO8859-1", "ISO-8859-1" }, | |
298 | { "ISO8859-15", "ISO-8859-15" }, | |
299 | { "ISO8859-2", "ISO-8859-2" }, | |
300 | { "ISO8859-4", "ISO-8859-4" }, | |
301 | { "ISO8859-5", "ISO-8859-5" }, | |
302 | { "ISO8859-7", "ISO-8859-7" }, | |
303 | { "ISO8859-8", "ISO-8859-8" }, | |
304 | { "ISO8859-9", "ISO-8859-9" }, | |
305 | { "KSC5601", "CP949" }, | |
306 | { "SJIS", "SHIFT_JIS" }, | |
307 | { "TACTIS", "TIS-620" }, | |
308 | /*{ "UTF-8", "UTF-8" },*/ | |
309 | { "big5", "BIG5" }, | |
310 | { "cp850", "CP850" }, | |
311 | { "dechanyu", "DEC-HANYU" }, | |
312 | { "dechanzi", "GB2312" }, | |
313 | { "deckanji", "DEC-KANJI" }, | |
314 | { "deckorean", "EUC-KR" }, | |
315 | { "eucJP", "EUC-JP" }, | |
316 | { "eucKR", "EUC-KR" }, | |
317 | { "eucTW", "EUC-TW" }, | |
318 | { "sdeckanji", "EUC-JP" } | |
319 | # define alias_table_defined | |
320 | # endif | |
321 | # if defined __sun /* Solaris */ | |
322 | { "5601", "EUC-KR" }, | |
323 | { "646", "ASCII" }, | |
324 | /*{ "BIG5", "BIG5" },*/ | |
325 | { "Big5-HKSCS", "BIG5-HKSCS" }, | |
326 | { "GB18030", "GB18030" }, | |
327 | /*{ "GBK", "GBK" },*/ | |
328 | { "ISO8859-1", "ISO-8859-1" }, | |
329 | { "ISO8859-11", "TIS-620" }, | |
330 | { "ISO8859-13", "ISO-8859-13" }, | |
331 | { "ISO8859-15", "ISO-8859-15" }, | |
332 | { "ISO8859-2", "ISO-8859-2" }, | |
333 | { "ISO8859-3", "ISO-8859-3" }, | |
334 | { "ISO8859-4", "ISO-8859-4" }, | |
335 | { "ISO8859-5", "ISO-8859-5" }, | |
336 | { "ISO8859-6", "ISO-8859-6" }, | |
337 | { "ISO8859-7", "ISO-8859-7" }, | |
338 | { "ISO8859-8", "ISO-8859-8" }, | |
339 | { "ISO8859-9", "ISO-8859-9" }, | |
340 | { "PCK", "SHIFT_JIS" }, | |
341 | { "TIS620.2533", "TIS-620" }, | |
342 | /*{ "UTF-8", "UTF-8" },*/ | |
343 | { "ansi-1251", "CP1251" }, | |
344 | { "cns11643", "EUC-TW" }, | |
345 | { "eucJP", "EUC-JP" }, | |
346 | { "gb2312", "GB2312" }, | |
347 | { "koi8-r", "KOI8-R" } | |
348 | # define alias_table_defined | |
349 | # endif | |
350 | # if defined __minix /* Minix */ | |
351 | { "646", "ASCII" } | |
352 | # define alias_table_defined | |
353 | # endif | |
354 | # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */ | |
355 | { "CP1361", "JOHAB" }, | |
356 | { "CP20127", "ASCII" }, | |
357 | { "CP20866", "KOI8-R" }, | |
358 | { "CP20936", "GB2312" }, | |
359 | { "CP21866", "KOI8-RU" }, | |
360 | { "CP28591", "ISO-8859-1" }, | |
361 | { "CP28592", "ISO-8859-2" }, | |
362 | { "CP28593", "ISO-8859-3" }, | |
363 | { "CP28594", "ISO-8859-4" }, | |
364 | { "CP28595", "ISO-8859-5" }, | |
365 | { "CP28596", "ISO-8859-6" }, | |
366 | { "CP28597", "ISO-8859-7" }, | |
367 | { "CP28598", "ISO-8859-8" }, | |
368 | { "CP28599", "ISO-8859-9" }, | |
369 | { "CP28605", "ISO-8859-15" }, | |
370 | { "CP38598", "ISO-8859-8" }, | |
371 | { "CP51932", "EUC-JP" }, | |
372 | { "CP51936", "GB2312" }, | |
373 | { "CP51949", "EUC-KR" }, | |
374 | { "CP51950", "EUC-TW" }, | |
375 | { "CP54936", "GB18030" }, | |
376 | { "CP65001", "UTF-8" }, | |
377 | { "CP936", "GBK" } | |
378 | # define alias_table_defined | |
379 | # endif | |
380 | # if defined OS2 /* OS/2 */ | |
381 | /* The list of encodings is taken from "List of OS/2 Codepages" | |
382 | by Alex Taylor: | |
383 | <http://altsan.org/os2/toolkits/uls/index.html#codepages>. | |
384 | See also "__convcp() of kLIBC": | |
385 | <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */ | |
386 | { "CP1004", "CP1252" }, | |
387 | /*{ "CP1041", "CP943" },*/ | |
388 | /*{ "CP1088", "CP949" },*/ | |
389 | { "CP1089", "ISO-8859-6" }, | |
390 | /*{ "CP1114", "CP950" },*/ | |
391 | /*{ "CP1115", "GB2312" },*/ | |
392 | { "CP1208", "UTF-8" }, | |
393 | /*{ "CP1380", "GB2312" },*/ | |
394 | { "CP1381", "GB2312" }, | |
395 | { "CP1383", "GB2312" }, | |
396 | { "CP1386", "GBK" }, | |
397 | /*{ "CP301", "CP943" },*/ | |
398 | { "CP3372", "EUC-JP" }, | |
399 | { "CP4946", "CP850" }, | |
400 | /*{ "CP5048", "JIS_X0208-1990" },*/ | |
401 | /*{ "CP5049", "JIS_X0212-1990" },*/ | |
402 | /*{ "CP5067", "KS_C_5601-1987" },*/ | |
403 | { "CP813", "ISO-8859-7" }, | |
404 | { "CP819", "ISO-8859-1" }, | |
405 | { "CP878", "KOI8-R" }, | |
406 | /*{ "CP897", "CP943" },*/ | |
407 | { "CP912", "ISO-8859-2" }, | |
408 | { "CP913", "ISO-8859-3" }, | |
409 | { "CP914", "ISO-8859-4" }, | |
410 | { "CP915", "ISO-8859-5" }, | |
411 | { "CP916", "ISO-8859-8" }, | |
412 | { "CP920", "ISO-8859-9" }, | |
413 | { "CP921", "ISO-8859-13" }, | |
414 | { "CP923", "ISO-8859-15" }, | |
415 | /*{ "CP941", "CP943" },*/ | |
416 | /*{ "CP947", "CP950" },*/ | |
417 | /*{ "CP951", "CP949" },*/ | |
418 | /*{ "CP952", "JIS_X0208-1990" },*/ | |
419 | /*{ "CP953", "JIS_X0212-1990" },*/ | |
420 | { "CP954", "EUC-JP" }, | |
421 | { "CP964", "EUC-TW" }, | |
422 | { "CP970", "EUC-KR" }, | |
423 | /*{ "CP971", "KS_C_5601-1987" },*/ | |
424 | { "IBM-1004", "CP1252" }, | |
425 | /*{ "IBM-1006", "?" },*/ | |
426 | /*{ "IBM-1008", "?" },*/ | |
427 | /*{ "IBM-1041", "CP943" },*/ | |
428 | /*{ "IBM-1051", "?" },*/ | |
429 | /*{ "IBM-1088", "CP949" },*/ | |
430 | { "IBM-1089", "ISO-8859-6" }, | |
431 | /*{ "IBM-1098", "?" },*/ | |
432 | /*{ "IBM-1114", "CP950" },*/ | |
433 | /*{ "IBM-1115", "GB2312" },*/ | |
434 | /*{ "IBM-1116", "?" },*/ | |
435 | /*{ "IBM-1117", "?" },*/ | |
436 | /*{ "IBM-1118", "?" },*/ | |
437 | /*{ "IBM-1119", "?" },*/ | |
438 | { "IBM-1124", "CP1124" }, | |
439 | { "IBM-1125", "CP1125" }, | |
440 | { "IBM-1131", "CP1131" }, | |
441 | { "IBM-1208", "UTF-8" }, | |
442 | { "IBM-1250", "CP1250" }, | |
443 | { "IBM-1251", "CP1251" }, | |
444 | { "IBM-1252", "CP1252" }, | |
445 | { "IBM-1253", "CP1253" }, | |
446 | { "IBM-1254", "CP1254" }, | |
447 | { "IBM-1255", "CP1255" }, | |
448 | { "IBM-1256", "CP1256" }, | |
449 | { "IBM-1257", "CP1257" }, | |
450 | /*{ "IBM-1275", "?" },*/ | |
451 | /*{ "IBM-1276", "?" },*/ | |
452 | /*{ "IBM-1277", "?" },*/ | |
453 | /*{ "IBM-1280", "?" },*/ | |
454 | /*{ "IBM-1281", "?" },*/ | |
455 | /*{ "IBM-1282", "?" },*/ | |
456 | /*{ "IBM-1283", "?" },*/ | |
457 | /*{ "IBM-1380", "GB2312" },*/ | |
458 | { "IBM-1381", "GB2312" }, | |
459 | { "IBM-1383", "GB2312" }, | |
460 | { "IBM-1386", "GBK" }, | |
461 | /*{ "IBM-301", "CP943" },*/ | |
462 | { "IBM-3372", "EUC-JP" }, | |
463 | { "IBM-367", "ASCII" }, | |
464 | { "IBM-437", "CP437" }, | |
465 | { "IBM-4946", "CP850" }, | |
466 | /*{ "IBM-5048", "JIS_X0208-1990" },*/ | |
467 | /*{ "IBM-5049", "JIS_X0212-1990" },*/ | |
468 | /*{ "IBM-5067", "KS_C_5601-1987" },*/ | |
469 | { "IBM-813", "ISO-8859-7" }, | |
470 | { "IBM-819", "ISO-8859-1" }, | |
471 | { "IBM-850", "CP850" }, | |
472 | /*{ "IBM-851", "?" },*/ | |
473 | { "IBM-852", "CP852" }, | |
474 | { "IBM-855", "CP855" }, | |
475 | { "IBM-856", "CP856" }, | |
476 | { "IBM-857", "CP857" }, | |
477 | /*{ "IBM-859", "?" },*/ | |
478 | { "IBM-860", "CP860" }, | |
479 | { "IBM-861", "CP861" }, | |
480 | { "IBM-862", "CP862" }, | |
481 | { "IBM-863", "CP863" }, | |
482 | { "IBM-864", "CP864" }, | |
483 | { "IBM-865", "CP865" }, | |
484 | { "IBM-866", "CP866" }, | |
485 | /*{ "IBM-868", "?" },*/ | |
486 | { "IBM-869", "CP869" }, | |
487 | { "IBM-874", "CP874" }, | |
488 | { "IBM-878", "KOI8-R" }, | |
489 | /*{ "IBM-895", "?" },*/ | |
490 | /*{ "IBM-897", "CP943" },*/ | |
491 | /*{ "IBM-907", "?" },*/ | |
492 | /*{ "IBM-909", "?" },*/ | |
493 | { "IBM-912", "ISO-8859-2" }, | |
494 | { "IBM-913", "ISO-8859-3" }, | |
495 | { "IBM-914", "ISO-8859-4" }, | |
496 | { "IBM-915", "ISO-8859-5" }, | |
497 | { "IBM-916", "ISO-8859-8" }, | |
498 | { "IBM-920", "ISO-8859-9" }, | |
499 | { "IBM-921", "ISO-8859-13" }, | |
500 | { "IBM-922", "CP922" }, | |
501 | { "IBM-923", "ISO-8859-15" }, | |
502 | { "IBM-932", "CP932" }, | |
503 | /*{ "IBM-941", "CP943" },*/ | |
504 | /*{ "IBM-942", "?" },*/ | |
505 | { "IBM-943", "CP943" }, | |
506 | /*{ "IBM-947", "CP950" },*/ | |
507 | { "IBM-949", "CP949" }, | |
508 | { "IBM-950", "CP950" }, | |
509 | /*{ "IBM-951", "CP949" },*/ | |
510 | /*{ "IBM-952", "JIS_X0208-1990" },*/ | |
511 | /*{ "IBM-953", "JIS_X0212-1990" },*/ | |
512 | { "IBM-954", "EUC-JP" }, | |
513 | /*{ "IBM-955", "?" },*/ | |
514 | { "IBM-964", "EUC-TW" }, | |
515 | { "IBM-970", "EUC-KR" }, | |
516 | /*{ "IBM-971", "KS_C_5601-1987" },*/ | |
517 | { "IBM-eucCN", "GB2312" }, | |
518 | { "IBM-eucJP", "EUC-JP" }, | |
519 | { "IBM-eucKR", "EUC-KR" }, | |
520 | { "IBM-eucTW", "EUC-TW" }, | |
521 | { "IBM33722", "EUC-JP" }, | |
522 | { "ISO8859-1", "ISO-8859-1" }, | |
523 | { "ISO8859-2", "ISO-8859-2" }, | |
524 | { "ISO8859-3", "ISO-8859-3" }, | |
525 | { "ISO8859-4", "ISO-8859-4" }, | |
526 | { "ISO8859-5", "ISO-8859-5" }, | |
527 | { "ISO8859-6", "ISO-8859-6" }, | |
528 | { "ISO8859-7", "ISO-8859-7" }, | |
529 | { "ISO8859-8", "ISO-8859-8" }, | |
530 | { "ISO8859-9", "ISO-8859-9" }, | |
531 | /*{ "JISX0201-1976", "JISX0201-1976" },*/ | |
532 | /*{ "JISX0208-1978", "?" },*/ | |
533 | /*{ "JISX0208-1983", "JIS_X0208-1983" },*/ | |
534 | /*{ "JISX0208-1990", "JIS_X0208-1990" },*/ | |
535 | /*{ "JISX0212-1990", "JIS_X0212-1990" },*/ | |
536 | /*{ "KSC5601-1987", "KS_C_5601-1987" },*/ | |
537 | { "SJIS-1", "CP943" }, | |
538 | { "SJIS-2", "CP943" }, | |
539 | { "eucJP", "EUC-JP" }, | |
540 | { "eucKR", "EUC-KR" }, | |
541 | { "eucTW-1993", "EUC-TW" } | |
542 | # define alias_table_defined | |
543 | # endif | |
544 | # if defined VMS /* OpenVMS */ | |
545 | /* The list of encodings is taken from the OpenVMS 7.3-1 documentation | |
546 | "Compaq C Run-Time Library Reference Manual for OpenVMS systems" | |
547 | section 10.7 "Handling Different Character Sets". */ | |
548 | { "DECHANYU", "DEC-HANYU" }, | |
549 | { "DECHANZI", "GB2312" }, | |
550 | { "DECKANJI", "DEC-KANJI" }, | |
551 | { "DECKOREAN", "EUC-KR" }, | |
552 | { "ISO8859-1", "ISO-8859-1" }, | |
553 | { "ISO8859-2", "ISO-8859-2" }, | |
554 | { "ISO8859-5", "ISO-8859-5" }, | |
555 | { "ISO8859-7", "ISO-8859-7" }, | |
556 | { "ISO8859-8", "ISO-8859-8" }, | |
557 | { "ISO8859-9", "ISO-8859-9" }, | |
558 | { "SDECKANJI", "EUC-JP" }, | |
559 | { "SJIS", "SHIFT_JIS" }, | |
560 | { "eucJP", "EUC-JP" }, | |
561 | { "eucTW", "EUC-TW" } | |
562 | # define alias_table_defined | |
563 | # endif | |
564 | # ifndef alias_table_defined | |
565 | /* Just a dummy entry, to avoid a C syntax error. */ | |
566 | { "", "" } | |
567 | # endif | |
568 | }; | |
5e8754f9 | 569 | |
c0c3707f | 570 | # endif |
8690e634 JK |
571 | |
572 | #else | |
573 | ||
c0c3707f CB |
574 | /* On these platforms, we use a mapping from locale name to GNU canonical |
575 | encoding name. */ | |
8690e634 | 576 | |
c0c3707f CB |
577 | struct table_entry |
578 | { | |
579 | const char locale[17+1]; | |
580 | const char canonical[11+1]; | |
581 | }; | |
582 | ||
583 | /* Table of platform-dependent mappings, sorted in ascending order. */ | |
584 | static const struct table_entry locale_table[] = | |
585 | { | |
586 | # if defined __FreeBSD__ /* FreeBSD 4.2 */ | |
587 | { "cs_CZ.ISO_8859-2", "ISO-8859-2" }, | |
588 | { "da_DK.DIS_8859-15", "ISO-8859-15" }, | |
589 | { "da_DK.ISO_8859-1", "ISO-8859-1" }, | |
590 | { "de_AT.DIS_8859-15", "ISO-8859-15" }, | |
591 | { "de_AT.ISO_8859-1", "ISO-8859-1" }, | |
592 | { "de_CH.DIS_8859-15", "ISO-8859-15" }, | |
593 | { "de_CH.ISO_8859-1", "ISO-8859-1" }, | |
594 | { "de_DE.DIS_8859-15", "ISO-8859-15" }, | |
595 | { "de_DE.ISO_8859-1", "ISO-8859-1" }, | |
596 | { "en_AU.DIS_8859-15", "ISO-8859-15" }, | |
597 | { "en_AU.ISO_8859-1", "ISO-8859-1" }, | |
598 | { "en_CA.DIS_8859-15", "ISO-8859-15" }, | |
599 | { "en_CA.ISO_8859-1", "ISO-8859-1" }, | |
600 | { "en_GB.DIS_8859-15", "ISO-8859-15" }, | |
601 | { "en_GB.ISO_8859-1", "ISO-8859-1" }, | |
602 | { "en_US.DIS_8859-15", "ISO-8859-15" }, | |
603 | { "en_US.ISO_8859-1", "ISO-8859-1" }, | |
604 | { "es_ES.DIS_8859-15", "ISO-8859-15" }, | |
605 | { "es_ES.ISO_8859-1", "ISO-8859-1" }, | |
606 | { "fi_FI.DIS_8859-15", "ISO-8859-15" }, | |
607 | { "fi_FI.ISO_8859-1", "ISO-8859-1" }, | |
608 | { "fr_BE.DIS_8859-15", "ISO-8859-15" }, | |
609 | { "fr_BE.ISO_8859-1", "ISO-8859-1" }, | |
610 | { "fr_CA.DIS_8859-15", "ISO-8859-15" }, | |
611 | { "fr_CA.ISO_8859-1", "ISO-8859-1" }, | |
612 | { "fr_CH.DIS_8859-15", "ISO-8859-15" }, | |
613 | { "fr_CH.ISO_8859-1", "ISO-8859-1" }, | |
614 | { "fr_FR.DIS_8859-15", "ISO-8859-15" }, | |
615 | { "fr_FR.ISO_8859-1", "ISO-8859-1" }, | |
616 | { "hr_HR.ISO_8859-2", "ISO-8859-2" }, | |
617 | { "hu_HU.ISO_8859-2", "ISO-8859-2" }, | |
618 | { "is_IS.DIS_8859-15", "ISO-8859-15" }, | |
619 | { "is_IS.ISO_8859-1", "ISO-8859-1" }, | |
620 | { "it_CH.DIS_8859-15", "ISO-8859-15" }, | |
621 | { "it_CH.ISO_8859-1", "ISO-8859-1" }, | |
622 | { "it_IT.DIS_8859-15", "ISO-8859-15" }, | |
623 | { "it_IT.ISO_8859-1", "ISO-8859-1" }, | |
624 | { "ja_JP.EUC", "EUC-JP" }, | |
625 | { "ja_JP.SJIS", "SHIFT_JIS" }, | |
626 | { "ja_JP.Shift_JIS", "SHIFT_JIS" }, | |
627 | { "ko_KR.EUC", "EUC-KR" }, | |
628 | { "la_LN.ASCII", "ASCII" }, | |
629 | { "la_LN.DIS_8859-15", "ISO-8859-15" }, | |
630 | { "la_LN.ISO_8859-1", "ISO-8859-1" }, | |
631 | { "la_LN.ISO_8859-2", "ISO-8859-2" }, | |
632 | { "la_LN.ISO_8859-4", "ISO-8859-4" }, | |
633 | { "lt_LN.ASCII", "ASCII" }, | |
634 | { "lt_LN.DIS_8859-15", "ISO-8859-15" }, | |
635 | { "lt_LN.ISO_8859-1", "ISO-8859-1" }, | |
636 | { "lt_LN.ISO_8859-2", "ISO-8859-2" }, | |
637 | { "lt_LT.ISO_8859-4", "ISO-8859-4" }, | |
638 | { "nl_BE.DIS_8859-15", "ISO-8859-15" }, | |
639 | { "nl_BE.ISO_8859-1", "ISO-8859-1" }, | |
640 | { "nl_NL.DIS_8859-15", "ISO-8859-15" }, | |
641 | { "nl_NL.ISO_8859-1", "ISO-8859-1" }, | |
642 | { "no_NO.DIS_8859-15", "ISO-8859-15" }, | |
643 | { "no_NO.ISO_8859-1", "ISO-8859-1" }, | |
644 | { "pl_PL.ISO_8859-2", "ISO-8859-2" }, | |
645 | { "pt_PT.DIS_8859-15", "ISO-8859-15" }, | |
646 | { "pt_PT.ISO_8859-1", "ISO-8859-1" }, | |
647 | { "ru_RU.CP866", "CP866" }, | |
648 | { "ru_RU.ISO_8859-5", "ISO-8859-5" }, | |
649 | { "ru_RU.KOI8-R", "KOI8-R" }, | |
650 | { "ru_SU.CP866", "CP866" }, | |
651 | { "ru_SU.ISO_8859-5", "ISO-8859-5" }, | |
652 | { "ru_SU.KOI8-R", "KOI8-R" }, | |
653 | { "sl_SI.ISO_8859-2", "ISO-8859-2" }, | |
654 | { "sv_SE.DIS_8859-15", "ISO-8859-15" }, | |
655 | { "sv_SE.ISO_8859-1", "ISO-8859-1" }, | |
656 | { "uk_UA.KOI8-U", "KOI8-U" }, | |
657 | { "zh_CN.EUC", "GB2312" }, | |
658 | { "zh_TW.BIG5", "BIG5" }, | |
659 | { "zh_TW.Big5", "BIG5" } | |
660 | # define locale_table_defined | |
8690e634 | 661 | # endif |
c0c3707f CB |
662 | # if defined __DJGPP__ /* DOS / DJGPP 2.03 */ |
663 | /* The encodings given here may not all be correct. | |
664 | If you find that the encoding given for your language and | |
665 | country is not the one your DOS machine actually uses, just | |
666 | correct it in this file, and send a mail to | |
667 | Juan Manuel Guerrero <juan.guerrero@gmx.de> | |
668 | and <bug-gnulib@gnu.org>. */ | |
669 | { "C", "ASCII" }, | |
670 | { "ar", "CP864" }, | |
671 | { "ar_AE", "CP864" }, | |
672 | { "ar_DZ", "CP864" }, | |
673 | { "ar_EG", "CP864" }, | |
674 | { "ar_IQ", "CP864" }, | |
675 | { "ar_IR", "CP864" }, | |
676 | { "ar_JO", "CP864" }, | |
677 | { "ar_KW", "CP864" }, | |
678 | { "ar_MA", "CP864" }, | |
679 | { "ar_OM", "CP864" }, | |
680 | { "ar_QA", "CP864" }, | |
681 | { "ar_SA", "CP864" }, | |
682 | { "ar_SY", "CP864" }, | |
683 | { "be", "CP866" }, | |
684 | { "be_BE", "CP866" }, | |
685 | { "bg", "CP866" }, /* not CP855 ?? */ | |
686 | { "bg_BG", "CP866" }, /* not CP855 ?? */ | |
687 | { "ca", "CP850" }, | |
688 | { "ca_ES", "CP850" }, | |
689 | { "cs", "CP852" }, | |
690 | { "cs_CZ", "CP852" }, | |
691 | { "da", "CP865" }, /* not CP850 ?? */ | |
692 | { "da_DK", "CP865" }, /* not CP850 ?? */ | |
693 | { "de", "CP850" }, | |
694 | { "de_AT", "CP850" }, | |
695 | { "de_CH", "CP850" }, | |
696 | { "de_DE", "CP850" }, | |
697 | { "el", "CP869" }, | |
698 | { "el_GR", "CP869" }, | |
699 | { "en", "CP850" }, | |
700 | { "en_AU", "CP850" }, /* not CP437 ?? */ | |
701 | { "en_CA", "CP850" }, | |
702 | { "en_GB", "CP850" }, | |
703 | { "en_NZ", "CP437" }, | |
704 | { "en_US", "CP437" }, | |
705 | { "en_ZA", "CP850" }, /* not CP437 ?? */ | |
706 | { "eo", "CP850" }, | |
707 | { "eo_EO", "CP850" }, | |
708 | { "es", "CP850" }, | |
709 | { "es_AR", "CP850" }, | |
710 | { "es_BO", "CP850" }, | |
711 | { "es_CL", "CP850" }, | |
712 | { "es_CO", "CP850" }, | |
713 | { "es_CR", "CP850" }, | |
714 | { "es_CU", "CP850" }, | |
715 | { "es_DO", "CP850" }, | |
716 | { "es_EC", "CP850" }, | |
717 | { "es_ES", "CP850" }, | |
718 | { "es_GT", "CP850" }, | |
719 | { "es_HN", "CP850" }, | |
720 | { "es_MX", "CP850" }, | |
721 | { "es_NI", "CP850" }, | |
722 | { "es_PA", "CP850" }, | |
723 | { "es_PE", "CP850" }, | |
724 | { "es_PY", "CP850" }, | |
725 | { "es_SV", "CP850" }, | |
726 | { "es_UY", "CP850" }, | |
727 | { "es_VE", "CP850" }, | |
728 | { "et", "CP850" }, | |
729 | { "et_EE", "CP850" }, | |
730 | { "eu", "CP850" }, | |
731 | { "eu_ES", "CP850" }, | |
732 | { "fi", "CP850" }, | |
733 | { "fi_FI", "CP850" }, | |
734 | { "fr", "CP850" }, | |
735 | { "fr_BE", "CP850" }, | |
736 | { "fr_CA", "CP850" }, | |
737 | { "fr_CH", "CP850" }, | |
738 | { "fr_FR", "CP850" }, | |
739 | { "ga", "CP850" }, | |
740 | { "ga_IE", "CP850" }, | |
741 | { "gd", "CP850" }, | |
742 | { "gd_GB", "CP850" }, | |
743 | { "gl", "CP850" }, | |
744 | { "gl_ES", "CP850" }, | |
745 | { "he", "CP862" }, | |
746 | { "he_IL", "CP862" }, | |
747 | { "hr", "CP852" }, | |
748 | { "hr_HR", "CP852" }, | |
749 | { "hu", "CP852" }, | |
750 | { "hu_HU", "CP852" }, | |
751 | { "id", "CP850" }, /* not CP437 ?? */ | |
752 | { "id_ID", "CP850" }, /* not CP437 ?? */ | |
753 | { "is", "CP861" }, /* not CP850 ?? */ | |
754 | { "is_IS", "CP861" }, /* not CP850 ?? */ | |
755 | { "it", "CP850" }, | |
756 | { "it_CH", "CP850" }, | |
757 | { "it_IT", "CP850" }, | |
758 | { "ja", "CP932" }, | |
759 | { "ja_JP", "CP932" }, | |
760 | { "kr", "CP949" }, /* not CP934 ?? */ | |
761 | { "kr_KR", "CP949" }, /* not CP934 ?? */ | |
762 | { "lt", "CP775" }, | |
763 | { "lt_LT", "CP775" }, | |
764 | { "lv", "CP775" }, | |
765 | { "lv_LV", "CP775" }, | |
766 | { "mk", "CP866" }, /* not CP855 ?? */ | |
767 | { "mk_MK", "CP866" }, /* not CP855 ?? */ | |
768 | { "mt", "CP850" }, | |
769 | { "mt_MT", "CP850" }, | |
770 | { "nb", "CP865" }, /* not CP850 ?? */ | |
771 | { "nb_NO", "CP865" }, /* not CP850 ?? */ | |
772 | { "nl", "CP850" }, | |
773 | { "nl_BE", "CP850" }, | |
774 | { "nl_NL", "CP850" }, | |
775 | { "nn", "CP865" }, /* not CP850 ?? */ | |
776 | { "nn_NO", "CP865" }, /* not CP850 ?? */ | |
777 | { "no", "CP865" }, /* not CP850 ?? */ | |
778 | { "no_NO", "CP865" }, /* not CP850 ?? */ | |
779 | { "pl", "CP852" }, | |
780 | { "pl_PL", "CP852" }, | |
781 | { "pt", "CP850" }, | |
782 | { "pt_BR", "CP850" }, | |
783 | { "pt_PT", "CP850" }, | |
784 | { "ro", "CP852" }, | |
785 | { "ro_RO", "CP852" }, | |
786 | { "ru", "CP866" }, | |
787 | { "ru_RU", "CP866" }, | |
788 | { "sk", "CP852" }, | |
789 | { "sk_SK", "CP852" }, | |
790 | { "sl", "CP852" }, | |
791 | { "sl_SI", "CP852" }, | |
792 | { "sq", "CP852" }, | |
793 | { "sq_AL", "CP852" }, | |
794 | { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */ | |
795 | { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */ | |
796 | { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */ | |
797 | { "sv", "CP850" }, | |
798 | { "sv_SE", "CP850" }, | |
799 | { "th", "CP874" }, | |
800 | { "th_TH", "CP874" }, | |
801 | { "tr", "CP857" }, | |
802 | { "tr_TR", "CP857" }, | |
803 | { "uk", "CP1125" }, | |
804 | { "uk_UA", "CP1125" }, | |
805 | { "zh_CN", "GBK" }, | |
806 | { "zh_TW", "CP950" } /* not CP938 ?? */ | |
807 | # define locale_table_defined | |
8690e634 | 808 | # endif |
c0c3707f CB |
809 | # ifndef locale_table_defined |
810 | /* Just a dummy entry, to avoid a C syntax error. */ | |
811 | { "", "" } | |
4a626d0a | 812 | # endif |
c0c3707f | 813 | }; |
8690e634 | 814 | |
c0c3707f | 815 | #endif |
5e8754f9 | 816 | |
8690e634 JK |
817 | |
818 | /* Determine the current locale's character encoding, and canonicalize it | |
5df4cba6 SM |
819 | into one of the canonical names listed below. |
820 | The result must not be freed; it is statically allocated. The result | |
821 | becomes invalid when setlocale() is used to change the global locale, or | |
822 | when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG | |
823 | is changed; threads in multithreaded programs should not do this. | |
8690e634 JK |
824 | If the canonical name cannot be determined, the result is a non-canonical |
825 | name. */ | |
826 | ||
827 | #ifdef STATIC | |
828 | STATIC | |
829 | #endif | |
830 | const char * | |
831 | locale_charset (void) | |
832 | { | |
833 | const char *codeset; | |
8690e634 | 834 | |
5df4cba6 SM |
835 | /* This function must be multithread-safe. To achieve this without using |
836 | thread-local storage, we use a simple strcpy or memcpy to fill this static | |
837 | buffer. Filling it through, for example, strcpy + strcat would not be | |
838 | guaranteed to leave the buffer's contents intact if another thread is | |
839 | currently accessing it. If necessary, the contents is first assembled in | |
840 | a stack-allocated buffer. */ | |
841 | ||
c0c3707f | 842 | #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 |
8690e634 JK |
843 | |
844 | # if HAVE_LANGINFO_CODESET | |
845 | ||
846 | /* Most systems support nl_langinfo (CODESET) nowadays. */ | |
847 | codeset = nl_langinfo (CODESET); | |
848 | ||
849 | # ifdef __CYGWIN__ | |
850 | /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always | |
851 | returns "US-ASCII". Return the suffix of the locale name from the | |
852 | environment variables (if present) or the codepage as a number. */ | |
853 | if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) | |
854 | { | |
855 | const char *locale; | |
5df4cba6 | 856 | static char resultbuf[2 + 10 + 1]; |
8690e634 JK |
857 | |
858 | locale = getenv ("LC_ALL"); | |
859 | if (locale == NULL || locale[0] == '\0') | |
860 | { | |
861 | locale = getenv ("LC_CTYPE"); | |
862 | if (locale == NULL || locale[0] == '\0') | |
863 | locale = getenv ("LANG"); | |
864 | } | |
865 | if (locale != NULL && locale[0] != '\0') | |
866 | { | |
867 | /* If the locale name contains an encoding after the dot, return | |
868 | it. */ | |
869 | const char *dot = strchr (locale, '.'); | |
870 | ||
871 | if (dot != NULL) | |
872 | { | |
873 | const char *modifier; | |
874 | ||
875 | dot++; | |
876 | /* Look for the possible @... trailer and remove it, if any. */ | |
877 | modifier = strchr (dot, '@'); | |
878 | if (modifier == NULL) | |
879 | return dot; | |
5df4cba6 | 880 | if (modifier - dot < sizeof (resultbuf)) |
8690e634 | 881 | { |
5df4cba6 SM |
882 | /* This way of filling resultbuf is multithread-safe. */ |
883 | memcpy (resultbuf, dot, modifier - dot); | |
884 | resultbuf [modifier - dot] = '\0'; | |
885 | return resultbuf; | |
8690e634 JK |
886 | } |
887 | } | |
888 | } | |
889 | ||
890 | /* The Windows API has a function returning the locale's codepage as a | |
891 | number: GetACP(). This encoding is used by Cygwin, unless the user | |
892 | has set the environment variable CYGWIN=codepage:oem (which very few | |
893 | people do). | |
894 | Output directed to console windows needs to be converted (to | |
895 | GetOEMCP() if the console is using a raster font, or to | |
896 | GetConsoleOutputCP() if it is using a TrueType font). Cygwin does | |
897 | this conversion transparently (see winsup/cygwin/fhandler_console.cc), | |
898 | converting to GetConsoleOutputCP(). This leads to correct results, | |
899 | except when SetConsoleOutputCP has been called and a raster font is | |
900 | in use. */ | |
5df4cba6 SM |
901 | { |
902 | char buf[2 + 10 + 1]; | |
903 | ||
904 | sprintf (buf, "CP%u", GetACP ()); | |
905 | strcpy (resultbuf, buf); | |
906 | codeset = resultbuf; | |
907 | } | |
8690e634 JK |
908 | } |
909 | # endif | |
910 | ||
c0c3707f CB |
911 | if (codeset == NULL) |
912 | /* The canonical name cannot be determined. */ | |
913 | codeset = ""; | |
5e8754f9 | 914 | |
c0c3707f | 915 | # elif defined WINDOWS_NATIVE |
8690e634 | 916 | |
5df4cba6 SM |
917 | char buf[2 + 10 + 1]; |
918 | static char resultbuf[2 + 10 + 1]; | |
8690e634 | 919 | |
4a626d0a PA |
920 | /* The Windows API has a function returning the locale's codepage as |
921 | a number, but the value doesn't change according to what the | |
922 | 'setlocale' call specified. So we use it as a last resort, in | |
923 | case the string returned by 'setlocale' doesn't specify the | |
924 | codepage. */ | |
5df4cba6 SM |
925 | char *current_locale = setlocale (LC_CTYPE, NULL); |
926 | char *pdot = strrchr (current_locale, '.'); | |
4a626d0a | 927 | |
c0c3707f | 928 | if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf)) |
4a626d0a PA |
929 | sprintf (buf, "CP%s", pdot + 1); |
930 | else | |
931 | { | |
932 | /* The Windows API has a function returning the locale's codepage as a | |
5df4cba6 SM |
933 | number: GetACP(). |
934 | When the output goes to a console window, it needs to be provided in | |
935 | GetOEMCP() encoding if the console is using a raster font, or in | |
936 | GetConsoleOutputCP() encoding if it is using a TrueType font. | |
937 | But in GUI programs and for output sent to files and pipes, GetACP() | |
938 | encoding is the best bet. */ | |
4a626d0a PA |
939 | sprintf (buf, "CP%u", GetACP ()); |
940 | } | |
c0c3707f CB |
941 | /* For a locale name such as "French_France.65001", in Windows 10, |
942 | setlocale now returns "French_France.utf8" instead. */ | |
943 | if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0) | |
944 | codeset = "UTF-8"; | |
945 | else | |
5df4cba6 SM |
946 | { |
947 | strcpy (resultbuf, buf); | |
948 | codeset = resultbuf; | |
949 | } | |
8690e634 | 950 | |
c0c3707f | 951 | # elif defined OS2 |
8690e634 JK |
952 | |
953 | const char *locale; | |
5df4cba6 | 954 | static char resultbuf[2 + 10 + 1]; |
8690e634 JK |
955 | ULONG cp[3]; |
956 | ULONG cplen; | |
957 | ||
4a626d0a PA |
958 | codeset = NULL; |
959 | ||
8690e634 JK |
960 | /* Allow user to override the codeset, as set in the operating system, |
961 | with standard language environment variables. */ | |
962 | locale = getenv ("LC_ALL"); | |
963 | if (locale == NULL || locale[0] == '\0') | |
964 | { | |
965 | locale = getenv ("LC_CTYPE"); | |
966 | if (locale == NULL || locale[0] == '\0') | |
967 | locale = getenv ("LANG"); | |
968 | } | |
969 | if (locale != NULL && locale[0] != '\0') | |
970 | { | |
971 | /* If the locale name contains an encoding after the dot, return it. */ | |
972 | const char *dot = strchr (locale, '.'); | |
973 | ||
974 | if (dot != NULL) | |
975 | { | |
976 | const char *modifier; | |
977 | ||
978 | dot++; | |
979 | /* Look for the possible @... trailer and remove it, if any. */ | |
980 | modifier = strchr (dot, '@'); | |
981 | if (modifier == NULL) | |
982 | return dot; | |
5df4cba6 | 983 | if (modifier - dot < sizeof (resultbuf)) |
8690e634 | 984 | { |
5df4cba6 SM |
985 | /* This way of filling resultbuf is multithread-safe. */ |
986 | memcpy (resultbuf, dot, modifier - dot); | |
987 | resultbuf [modifier - dot] = '\0'; | |
988 | return resultbuf; | |
8690e634 JK |
989 | } |
990 | } | |
991 | ||
4a626d0a PA |
992 | /* For the POSIX locale, don't use the system's codepage. */ |
993 | if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) | |
994 | codeset = ""; | |
8690e634 | 995 | } |
4a626d0a PA |
996 | |
997 | if (codeset == NULL) | |
8690e634 JK |
998 | { |
999 | /* OS/2 has a function returning the locale's codepage as a number. */ | |
1000 | if (DosQueryCp (sizeof (cp), cp, &cplen)) | |
1001 | codeset = ""; | |
1002 | else | |
1003 | { | |
5df4cba6 SM |
1004 | char buf[2 + 10 + 1]; |
1005 | ||
8690e634 | 1006 | sprintf (buf, "CP%u", cp[0]); |
5df4cba6 SM |
1007 | strcpy (resultbuf, buf); |
1008 | codeset = resultbuf; | |
8690e634 JK |
1009 | } |
1010 | } | |
1011 | ||
c0c3707f | 1012 | # else |
7a6dbc2f | 1013 | |
c0c3707f | 1014 | # error "Add code for other platforms here." |
7a6dbc2f | 1015 | |
c0c3707f CB |
1016 | # endif |
1017 | ||
1018 | /* Resolve alias. */ | |
1019 | { | |
1020 | # ifdef alias_table_defined | |
1021 | /* On some platforms, UTF-8 locales are the most frequently used ones. | |
1022 | Speed up the common case and slow down the less common cases by | |
1023 | testing for this case first. */ | |
1024 | # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__ | |
1025 | if (strcmp (codeset, "UTF-8") == 0) | |
1026 | goto done_table_lookup; | |
1027 | else | |
1028 | # endif | |
7a6dbc2f | 1029 | { |
c0c3707f CB |
1030 | const struct table_entry * const table = alias_table; |
1031 | size_t const table_size = | |
1032 | sizeof (alias_table) / sizeof (struct table_entry); | |
1033 | /* The table is sorted. Perform a binary search. */ | |
1034 | size_t hi = table_size; | |
1035 | size_t lo = 0; | |
1036 | while (lo < hi) | |
1037 | { | |
1038 | /* Invariant: | |
1039 | for i < lo, strcmp (table[i].alias, codeset) < 0, | |
1040 | for i >= hi, strcmp (table[i].alias, codeset) > 0. */ | |
1041 | size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ | |
1042 | int cmp = strcmp (table[mid].alias, codeset); | |
1043 | if (cmp < 0) | |
1044 | lo = mid + 1; | |
1045 | else if (cmp > 0) | |
1046 | hi = mid; | |
1047 | else | |
1048 | { | |
1049 | /* Found an i with | |
1050 | strcmp (table[i].alias, codeset) == 0. */ | |
1051 | codeset = table[mid].canonical; | |
1052 | goto done_table_lookup; | |
1053 | } | |
1054 | } | |
7a6dbc2f | 1055 | } |
c0c3707f CB |
1056 | if (0) |
1057 | done_table_lookup: ; | |
1058 | else | |
1059 | # endif | |
1060 | { | |
1061 | /* Did not find it in the table. */ | |
1062 | /* On Mac OS X, all modern locales use the UTF-8 encoding. | |
1063 | BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ | |
1064 | # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ | |
1065 | codeset = "UTF-8"; | |
1066 | # else | |
1067 | /* Don't return an empty string. GNU libc and GNU libiconv interpret | |
1068 | the empty string as denoting "the locale's character encoding", | |
1069 | thus GNU libiconv would call this function a second time. */ | |
1070 | if (codeset[0] == '\0') | |
1071 | codeset = "ASCII"; | |
1072 | # endif | |
1073 | } | |
1074 | } | |
7a6dbc2f | 1075 | |
c0c3707f CB |
1076 | #else |
1077 | ||
1078 | /* On old systems which lack it, use setlocale or getenv. */ | |
1079 | const char *locale = NULL; | |
1080 | ||
1081 | /* But most old systems don't have a complete set of locales. Some | |
1082 | (like DJGPP) have only the C locale. Therefore we don't use setlocale | |
1083 | here; it would return "C" when it doesn't support the locale name the | |
1084 | user has set. */ | |
1085 | # if 0 | |
1086 | locale = setlocale (LC_CTYPE, NULL); | |
1087 | # endif | |
1088 | if (locale == NULL || locale[0] == '\0') | |
1089 | { | |
1090 | locale = getenv ("LC_ALL"); | |
1091 | if (locale == NULL || locale[0] == '\0') | |
1092 | { | |
1093 | locale = getenv ("LC_CTYPE"); | |
1094 | if (locale == NULL || locale[0] == '\0') | |
1095 | locale = getenv ("LANG"); | |
1096 | if (locale == NULL) | |
1097 | locale = ""; | |
1098 | } | |
1099 | } | |
1100 | ||
1101 | /* Map locale name to canonical encoding name. */ | |
1102 | { | |
1103 | # ifdef locale_table_defined | |
1104 | const struct table_entry * const table = locale_table; | |
1105 | size_t const table_size = | |
1106 | sizeof (locale_table) / sizeof (struct table_entry); | |
1107 | /* The table is sorted. Perform a binary search. */ | |
1108 | size_t hi = table_size; | |
1109 | size_t lo = 0; | |
1110 | while (lo < hi) | |
1111 | { | |
1112 | /* Invariant: | |
1113 | for i < lo, strcmp (table[i].locale, locale) < 0, | |
1114 | for i >= hi, strcmp (table[i].locale, locale) > 0. */ | |
1115 | size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ | |
1116 | int cmp = strcmp (table[mid].locale, locale); | |
1117 | if (cmp < 0) | |
1118 | lo = mid + 1; | |
1119 | else if (cmp > 0) | |
1120 | hi = mid; | |
1121 | else | |
1122 | { | |
1123 | /* Found an i with | |
1124 | strcmp (table[i].locale, locale) == 0. */ | |
1125 | codeset = table[mid].canonical; | |
1126 | goto done_table_lookup; | |
1127 | } | |
1128 | } | |
1129 | if (0) | |
1130 | done_table_lookup: ; | |
1131 | else | |
1132 | # endif | |
1133 | { | |
1134 | /* Did not find it in the table. */ | |
1135 | /* On Mac OS X, all modern locales use the UTF-8 encoding. | |
1136 | BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ | |
1137 | # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ | |
1138 | codeset = "UTF-8"; | |
1139 | # else | |
1140 | /* The canonical name cannot be determined. */ | |
1141 | /* Don't return an empty string. GNU libc and GNU libiconv interpret | |
1142 | the empty string as denoting "the locale's character encoding", | |
1143 | thus GNU libiconv would call this function a second time. */ | |
1144 | codeset = "ASCII"; | |
1145 | # endif | |
1146 | } | |
1147 | } | |
1148 | ||
1149 | #endif | |
8690e634 | 1150 | |
a512b375 JB |
1151 | #ifdef DARWIN7 |
1152 | /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" | |
1153 | (the default codeset) does not work when MB_CUR_MAX is 1. */ | |
4a626d0a | 1154 | if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) |
a512b375 JB |
1155 | codeset = "ASCII"; |
1156 | #endif | |
1157 | ||
8690e634 JK |
1158 | return codeset; |
1159 | } |