Commit | Line | Data |
---|---|---|
5df4cba6 SM |
1 | /* Convert multibyte character to wide character. |
2 | Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software: you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 3 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program. If not, see <https://www.gnu.org/licenses/>. */ | |
16 | ||
17 | /* Written by Bruno Haible <bruno@clisp.org>, 2008. */ | |
18 | ||
19 | /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions | |
20 | that handles the special case of the UTF-8 encoding. */ | |
21 | ||
22 | /* Cf. unistr/u8-mbtouc.c. */ | |
23 | unsigned char c = (unsigned char) p[0]; | |
24 | ||
25 | if (c < 0x80) | |
26 | { | |
27 | if (pwc != NULL) | |
28 | *pwc = c; | |
29 | res = (c == 0 ? 0 : 1); | |
30 | goto success; | |
31 | } | |
32 | if (c >= 0xc2) | |
33 | { | |
34 | if (c < 0xe0) | |
35 | { | |
36 | if (m == 1) | |
37 | goto incomplete; | |
38 | else /* m >= 2 */ | |
39 | { | |
40 | unsigned char c2 = (unsigned char) p[1]; | |
41 | ||
42 | if ((c2 ^ 0x80) < 0x40) | |
43 | { | |
44 | if (pwc != NULL) | |
45 | *pwc = ((unsigned int) (c & 0x1f) << 6) | |
46 | | (unsigned int) (c2 ^ 0x80); | |
47 | res = 2; | |
48 | goto success; | |
49 | } | |
50 | } | |
51 | } | |
52 | else if (c < 0xf0) | |
53 | { | |
54 | if (m == 1) | |
55 | goto incomplete; | |
56 | else | |
57 | { | |
58 | unsigned char c2 = (unsigned char) p[1]; | |
59 | ||
60 | if ((c2 ^ 0x80) < 0x40 | |
61 | && (c >= 0xe1 || c2 >= 0xa0) | |
62 | && (c != 0xed || c2 < 0xa0)) | |
63 | { | |
64 | if (m == 2) | |
65 | goto incomplete; | |
66 | else /* m >= 3 */ | |
67 | { | |
68 | unsigned char c3 = (unsigned char) p[2]; | |
69 | ||
70 | if ((c3 ^ 0x80) < 0x40) | |
71 | { | |
72 | unsigned int wc = | |
73 | (((unsigned int) (c & 0x0f) << 12) | |
74 | | ((unsigned int) (c2 ^ 0x80) << 6) | |
75 | | (unsigned int) (c3 ^ 0x80)); | |
76 | ||
77 | if (FITS_IN_CHAR_TYPE (wc)) | |
78 | { | |
79 | if (pwc != NULL) | |
80 | *pwc = wc; | |
81 | res = 3; | |
82 | goto success; | |
83 | } | |
84 | } | |
85 | } | |
86 | } | |
87 | } | |
88 | } | |
89 | else if (c <= 0xf4) | |
90 | { | |
91 | if (m == 1) | |
92 | goto incomplete; | |
93 | else | |
94 | { | |
95 | unsigned char c2 = (unsigned char) p[1]; | |
96 | ||
97 | if ((c2 ^ 0x80) < 0x40 | |
98 | && (c >= 0xf1 || c2 >= 0x90) | |
99 | && (c < 0xf4 || (c == 0xf4 && c2 < 0x90))) | |
100 | { | |
101 | if (m == 2) | |
102 | goto incomplete; | |
103 | else | |
104 | { | |
105 | unsigned char c3 = (unsigned char) p[2]; | |
106 | ||
107 | if ((c3 ^ 0x80) < 0x40) | |
108 | { | |
109 | if (m == 3) | |
110 | goto incomplete; | |
111 | else /* m >= 4 */ | |
112 | { | |
113 | unsigned char c4 = (unsigned char) p[3]; | |
114 | ||
115 | if ((c4 ^ 0x80) < 0x40) | |
116 | { | |
117 | unsigned int wc = | |
118 | (((unsigned int) (c & 0x07) << 18) | |
119 | | ((unsigned int) (c2 ^ 0x80) << 12) | |
120 | | ((unsigned int) (c3 ^ 0x80) << 6) | |
121 | | (unsigned int) (c4 ^ 0x80)); | |
122 | ||
123 | if (FITS_IN_CHAR_TYPE (wc)) | |
124 | { | |
125 | if (pwc != NULL) | |
126 | *pwc = wc; | |
127 | res = 4; | |
128 | goto success; | |
129 | } | |
130 | } | |
131 | } | |
132 | } | |
133 | } | |
134 | } | |
135 | } | |
136 | } | |
137 | } | |
138 | goto invalid; |