Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * fs/cifs/cifs_unicode.c | |
3 | * | |
e89dc920 | 4 | * Copyright (c) International Business Machines Corp., 2000,2005 |
1da177e4 LT |
5 | * Modified by Steve French (sfrench@us.ibm.com) |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
221601c3 | 9 | * the Free Software Foundation; either version 2 of the License, or |
1da177e4 | 10 | * (at your option) any later version. |
221601c3 | 11 | * |
1da177e4 LT |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
15 | * the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
221601c3 | 18 | * along with this program; if not, write to the Free Software |
1da177e4 LT |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | |
21 | #include <linux/fs.h> | |
22 | #include "cifs_unicode.h" | |
23 | #include "cifs_uniupr.h" | |
24 | #include "cifspdu.h" | |
3979877e | 25 | #include "cifsglob.h" |
1da177e4 LT |
26 | #include "cifs_debug.h" |
27 | ||
69f801fc JL |
28 | /* |
29 | * cifs_ucs2_bytes - how long will a string be after conversion? | |
30 | * @ucs - pointer to input string | |
31 | * @maxbytes - don't go past this many bytes of input string | |
32 | * @codepage - destination codepage | |
33 | * | |
34 | * Walk a ucs2le string and return the number of bytes that the string will | |
35 | * be after being converted to the given charset, not including any null | |
36 | * termination required. Don't walk past maxbytes in the source buffer. | |
37 | */ | |
38 | int | |
39 | cifs_ucs2_bytes(const __le16 *from, int maxbytes, | |
40 | const struct nls_table *codepage) | |
41 | { | |
42 | int i; | |
43 | int charlen, outlen = 0; | |
44 | int maxwords = maxbytes / 2; | |
45 | char tmp[NLS_MAX_CHARSET_SIZE]; | |
46 | ||
47 | for (i = 0; from[i] && i < maxwords; i++) { | |
48 | charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, | |
49 | NLS_MAX_CHARSET_SIZE); | |
50 | if (charlen > 0) | |
51 | outlen += charlen; | |
52 | else | |
53 | outlen++; | |
54 | } | |
55 | ||
56 | return outlen; | |
57 | } | |
58 | ||
7fabf0c9 JL |
59 | /* |
60 | * cifs_mapchar - convert a little-endian char to proper char in codepage | |
61 | * @target - where converted character should be copied | |
62 | * @src_char - 2 byte little-endian source character | |
63 | * @cp - codepage to which character should be converted | |
64 | * @mapchar - should character be mapped according to mapchars mount option? | |
65 | * | |
66 | * This function handles the conversion of a single character. It is the | |
67 | * responsibility of the caller to ensure that the target buffer is large | |
68 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). | |
69 | */ | |
70 | static int | |
71 | cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, | |
72 | bool mapchar) | |
73 | { | |
74 | int len = 1; | |
75 | ||
76 | if (!mapchar) | |
77 | goto cp_convert; | |
78 | ||
79 | /* | |
80 | * BB: Cannot handle remapping UNI_SLASH until all the calls to | |
81 | * build_path_from_dentry are modified, as they use slash as | |
82 | * separator. | |
83 | */ | |
84 | switch (le16_to_cpu(src_char)) { | |
85 | case UNI_COLON: | |
86 | *target = ':'; | |
87 | break; | |
88 | case UNI_ASTERIK: | |
89 | *target = '*'; | |
90 | break; | |
91 | case UNI_QUESTION: | |
92 | *target = '?'; | |
93 | break; | |
94 | case UNI_PIPE: | |
95 | *target = '|'; | |
96 | break; | |
97 | case UNI_GRTRTHAN: | |
98 | *target = '>'; | |
99 | break; | |
100 | case UNI_LESSTHAN: | |
101 | *target = '<'; | |
102 | break; | |
103 | default: | |
104 | goto cp_convert; | |
105 | } | |
106 | ||
107 | out: | |
108 | return len; | |
109 | ||
110 | cp_convert: | |
111 | len = cp->uni2char(le16_to_cpu(src_char), target, | |
112 | NLS_MAX_CHARSET_SIZE); | |
113 | if (len <= 0) { | |
114 | *target = '?'; | |
115 | len = 1; | |
116 | } | |
117 | goto out; | |
118 | } | |
119 | ||
120 | /* | |
121 | * cifs_from_ucs2 - convert utf16le string to local charset | |
122 | * @to - destination buffer | |
123 | * @from - source buffer | |
124 | * @tolen - destination buffer size (in bytes) | |
125 | * @fromlen - source buffer size (in bytes) | |
126 | * @codepage - codepage to which characters should be converted | |
127 | * @mapchar - should characters be remapped according to the mapchars option? | |
128 | * | |
129 | * Convert a little-endian ucs2le string (as sent by the server) to a string | |
130 | * in the provided codepage. The tolen and fromlen parameters are to ensure | |
131 | * that the code doesn't walk off of the end of the buffer (which is always | |
132 | * a danger if the alignment of the source buffer is off). The destination | |
133 | * string is always properly null terminated and fits in the destination | |
134 | * buffer. Returns the length of the destination string in bytes (including | |
135 | * null terminator). | |
136 | * | |
137 | * Note that some windows versions actually send multiword UTF-16 characters | |
138 | * instead of straight UCS-2. The linux nls routines however aren't able to | |
139 | * deal with those characters properly. In the event that we get some of | |
140 | * those characters, they won't be translated properly. | |
141 | */ | |
142 | int | |
143 | cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | |
144 | const struct nls_table *codepage, bool mapchar) | |
145 | { | |
146 | int i, charlen, safelen; | |
147 | int outlen = 0; | |
148 | int nullsize = nls_nullsize(codepage); | |
149 | int fromwords = fromlen / 2; | |
150 | char tmp[NLS_MAX_CHARSET_SIZE]; | |
151 | ||
152 | /* | |
153 | * because the chars can be of varying widths, we need to take care | |
154 | * not to overflow the destination buffer when we get close to the | |
155 | * end of it. Until we get to this offset, we don't need to check | |
156 | * for overflow however. | |
157 | */ | |
158 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); | |
159 | ||
160 | for (i = 0; i < fromwords && from[i]; i++) { | |
161 | /* | |
162 | * check to see if converting this character might make the | |
163 | * conversion bleed into the null terminator | |
164 | */ | |
165 | if (outlen >= safelen) { | |
166 | charlen = cifs_mapchar(tmp, from[i], codepage, mapchar); | |
167 | if ((outlen + charlen) > (tolen - nullsize)) | |
168 | break; | |
169 | } | |
170 | ||
171 | /* put converted char into 'to' buffer */ | |
172 | charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar); | |
173 | outlen += charlen; | |
174 | } | |
175 | ||
176 | /* properly null-terminate string */ | |
177 | for (i = 0; i < nullsize; i++) | |
178 | to[outlen++] = 0; | |
179 | ||
180 | return outlen; | |
181 | } | |
182 | ||
1da177e4 LT |
183 | /* |
184 | * NAME: cifs_strfromUCS() | |
185 | * | |
186 | * FUNCTION: Convert little-endian unicode string to character string | |
187 | * | |
188 | */ | |
189 | int | |
ad7a2926 | 190 | cifs_strfromUCS_le(char *to, const __le16 *from, |
1da177e4 LT |
191 | int len, const struct nls_table *codepage) |
192 | { | |
193 | int i; | |
194 | int outlen = 0; | |
195 | ||
196 | for (i = 0; (i < len) && from[i]; i++) { | |
197 | int charlen; | |
198 | /* 2.4.0 kernel or greater */ | |
199 | charlen = | |
200 | codepage->uni2char(le16_to_cpu(from[i]), &to[outlen], | |
201 | NLS_MAX_CHARSET_SIZE); | |
202 | if (charlen > 0) { | |
203 | outlen += charlen; | |
204 | } else { | |
205 | to[outlen++] = '?'; | |
206 | } | |
207 | } | |
208 | to[outlen] = 0; | |
209 | return outlen; | |
210 | } | |
211 | ||
212 | /* | |
213 | * NAME: cifs_strtoUCS() | |
214 | * | |
215 | * FUNCTION: Convert character string to unicode string | |
216 | * | |
217 | */ | |
218 | int | |
ad7a2926 | 219 | cifs_strtoUCS(__le16 *to, const char *from, int len, |
1da177e4 LT |
220 | const struct nls_table *codepage) |
221 | { | |
222 | int charlen; | |
223 | int i; | |
50c2f753 | 224 | wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */ |
1da177e4 LT |
225 | |
226 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { | |
227 | ||
228 | /* works for 2.4.0 kernel or later */ | |
e89dc920 | 229 | charlen = codepage->char2uni(from, len, &wchar_to[i]); |
1da177e4 LT |
230 | if (charlen < 1) { |
231 | cERROR(1, | |
3a9f462f SF |
232 | ("strtoUCS: char2uni of %d returned %d", |
233 | (int)*from, charlen)); | |
69114089 | 234 | /* A question mark */ |
e89dc920 | 235 | to[i] = cpu_to_le16(0x003f); |
1da177e4 | 236 | charlen = 1; |
221601c3 | 237 | } else |
e89dc920 | 238 | to[i] = cpu_to_le16(wchar_to[i]); |
1da177e4 LT |
239 | |
240 | } | |
241 | ||
242 | to[i] = 0; | |
243 | return i; | |
244 | } | |
245 |