diff options
author | Nakajima Akira <nakajima.akira@nttcom.co.jp> | 2015-04-09 04:27:39 -0400 |
---|---|---|
committer | Steve French <smfrench@gmail.com> | 2015-05-20 14:12:51 -0400 |
commit | b29103076bec8316e155e71309dc0fba499022c6 (patch) | |
tree | cee456c475ba1d7efea3eca41ec836e8fb8707c9 /fs | |
parent | 00b8c95b680791a72b4bb14dc371ff1f1daae39c (diff) |
Fix to convert SURROGATE PAIR
Garbled characters happen by using surrogate pair for filename.
(replace each 1 character to ??)
[Steps to Reproduce for bug]
client# touch $(echo -e '\xf0\x9d\x9f\xa3')
client# touch $(echo -e '\xf0\x9d\x9f\xa4')
client# ls -li
You see same inode number, same filename(=?? and ??) .
Fix the bug about these functions do not consider about surrogate pair (and IVS).
cifs_utf16_bytes()
cifs_mapchar()
cifs_from_utf16()
cifsConvertToUTF16()
Reported-by: Nakajima Akira <nakajima.akira@nttcom.co.jp>
Signed-off-by: Nakajima Akira <nakajima.akira@nttcom.co.jp>
Signed-off-by: Steve French <smfrench@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/cifs/cifs_unicode.c | 182 |
1 files changed, 136 insertions, 46 deletions
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0303c6793d90..5a53ac6b1e02 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -27,41 +27,6 @@ | |||
27 | #include "cifsglob.h" | 27 | #include "cifsglob.h" |
28 | #include "cifs_debug.h" | 28 | #include "cifs_debug.h" |
29 | 29 | ||
30 | /* | ||
31 | * cifs_utf16_bytes - how long will a string be after conversion? | ||
32 | * @utf16 - pointer to input string | ||
33 | * @maxbytes - don't go past this many bytes of input string | ||
34 | * @codepage - destination codepage | ||
35 | * | ||
36 | * Walk a utf16le string and return the number of bytes that the string will | ||
37 | * be after being converted to the given charset, not including any null | ||
38 | * termination required. Don't walk past maxbytes in the source buffer. | ||
39 | */ | ||
40 | int | ||
41 | cifs_utf16_bytes(const __le16 *from, int maxbytes, | ||
42 | const struct nls_table *codepage) | ||
43 | { | ||
44 | int i; | ||
45 | int charlen, outlen = 0; | ||
46 | int maxwords = maxbytes / 2; | ||
47 | char tmp[NLS_MAX_CHARSET_SIZE]; | ||
48 | __u16 ftmp; | ||
49 | |||
50 | for (i = 0; i < maxwords; i++) { | ||
51 | ftmp = get_unaligned_le16(&from[i]); | ||
52 | if (ftmp == 0) | ||
53 | break; | ||
54 | |||
55 | charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); | ||
56 | if (charlen > 0) | ||
57 | outlen += charlen; | ||
58 | else | ||
59 | outlen++; | ||
60 | } | ||
61 | |||
62 | return outlen; | ||
63 | } | ||
64 | |||
65 | int cifs_remap(struct cifs_sb_info *cifs_sb) | 30 | int cifs_remap(struct cifs_sb_info *cifs_sb) |
66 | { | 31 | { |
67 | int map_type; | 32 | int map_type; |
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target) | |||
155 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). | 120 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). |
156 | */ | 121 | */ |
157 | static int | 122 | static int |
158 | cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, | 123 | cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, |
159 | int maptype) | 124 | int maptype) |
160 | { | 125 | { |
161 | int len = 1; | 126 | int len = 1; |
127 | __u16 src_char; | ||
128 | |||
129 | src_char = *from; | ||
162 | 130 | ||
163 | if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) | 131 | if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) |
164 | return len; | 132 | return len; |
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, | |||
168 | 136 | ||
169 | /* if character not one of seven in special remap set */ | 137 | /* if character not one of seven in special remap set */ |
170 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); | 138 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); |
171 | if (len <= 0) { | 139 | if (len <= 0) |
172 | *target = '?'; | 140 | goto surrogate_pair; |
173 | len = 1; | 141 | |
174 | } | 142 | return len; |
143 | |||
144 | surrogate_pair: | ||
145 | /* convert SURROGATE_PAIR and IVS */ | ||
146 | if (strcmp(cp->charset, "utf8")) | ||
147 | goto unknown; | ||
148 | len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); | ||
149 | if (len <= 0) | ||
150 | goto unknown; | ||
151 | return len; | ||
152 | |||
153 | unknown: | ||
154 | *target = '?'; | ||
155 | len = 1; | ||
175 | return len; | 156 | return len; |
176 | } | 157 | } |
177 | 158 | ||
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
206 | int nullsize = nls_nullsize(codepage); | 187 | int nullsize = nls_nullsize(codepage); |
207 | int fromwords = fromlen / 2; | 188 | int fromwords = fromlen / 2; |
208 | char tmp[NLS_MAX_CHARSET_SIZE]; | 189 | char tmp[NLS_MAX_CHARSET_SIZE]; |
209 | __u16 ftmp; | 190 | __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ |
210 | 191 | ||
211 | /* | 192 | /* |
212 | * because the chars can be of varying widths, we need to take care | 193 | * because the chars can be of varying widths, we need to take care |
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
217 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); | 198 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); |
218 | 199 | ||
219 | for (i = 0; i < fromwords; i++) { | 200 | for (i = 0; i < fromwords; i++) { |
220 | ftmp = get_unaligned_le16(&from[i]); | 201 | ftmp[0] = get_unaligned_le16(&from[i]); |
221 | if (ftmp == 0) | 202 | if (ftmp[0] == 0) |
222 | break; | 203 | break; |
204 | if (i + 1 < fromwords) | ||
205 | ftmp[1] = get_unaligned_le16(&from[i + 1]); | ||
206 | else | ||
207 | ftmp[1] = 0; | ||
208 | if (i + 2 < fromwords) | ||
209 | ftmp[2] = get_unaligned_le16(&from[i + 2]); | ||
210 | else | ||
211 | ftmp[2] = 0; | ||
223 | 212 | ||
224 | /* | 213 | /* |
225 | * check to see if converting this character might make the | 214 | * check to see if converting this character might make the |
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
234 | /* put converted char into 'to' buffer */ | 223 | /* put converted char into 'to' buffer */ |
235 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); | 224 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); |
236 | outlen += charlen; | 225 | outlen += charlen; |
226 | |||
227 | /* charlen (=bytes of UTF-8 for 1 character) | ||
228 | * 4bytes UTF-8(surrogate pair) is charlen=4 | ||
229 | * (4bytes UTF-16 code) | ||
230 | * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 | ||
231 | * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ | ||
232 | if (charlen == 4) | ||
233 | i++; | ||
234 | else if (charlen >= 5) | ||
235 | /* 5-6bytes UTF-8 */ | ||
236 | i += 2; | ||
237 | } | 237 | } |
238 | 238 | ||
239 | /* properly null-terminate string */ | 239 | /* properly null-terminate string */ |
@@ -296,6 +296,46 @@ success: | |||
296 | } | 296 | } |
297 | 297 | ||
298 | /* | 298 | /* |
299 | * cifs_utf16_bytes - how long will a string be after conversion? | ||
300 | * @utf16 - pointer to input string | ||
301 | * @maxbytes - don't go past this many bytes of input string | ||
302 | * @codepage - destination codepage | ||
303 | * | ||
304 | * Walk a utf16le string and return the number of bytes that the string will | ||
305 | * be after being converted to the given charset, not including any null | ||
306 | * termination required. Don't walk past maxbytes in the source buffer. | ||
307 | */ | ||
308 | int | ||
309 | cifs_utf16_bytes(const __le16 *from, int maxbytes, | ||
310 | const struct nls_table *codepage) | ||
311 | { | ||
312 | int i; | ||
313 | int charlen, outlen = 0; | ||
314 | int maxwords = maxbytes / 2; | ||
315 | char tmp[NLS_MAX_CHARSET_SIZE]; | ||
316 | __u16 ftmp[3]; | ||
317 | |||
318 | for (i = 0; i < maxwords; i++) { | ||
319 | ftmp[0] = get_unaligned_le16(&from[i]); | ||
320 | if (ftmp[0] == 0) | ||
321 | break; | ||
322 | if (i + 1 < maxwords) | ||
323 | ftmp[1] = get_unaligned_le16(&from[i + 1]); | ||
324 | else | ||
325 | ftmp[1] = 0; | ||
326 | if (i + 2 < maxwords) | ||
327 | ftmp[2] = get_unaligned_le16(&from[i + 2]); | ||
328 | else | ||
329 | ftmp[2] = 0; | ||
330 | |||
331 | charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); | ||
332 | outlen += charlen; | ||
333 | } | ||
334 | |||
335 | return outlen; | ||
336 | } | ||
337 | |||
338 | /* | ||
299 | * cifs_strndup_from_utf16 - copy a string from wire format to the local | 339 | * cifs_strndup_from_utf16 - copy a string from wire format to the local |
300 | * codepage | 340 | * codepage |
301 | * @src - source string | 341 | * @src - source string |
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
409 | char src_char; | 449 | char src_char; |
410 | __le16 dst_char; | 450 | __le16 dst_char; |
411 | wchar_t tmp; | 451 | wchar_t tmp; |
452 | wchar_t *wchar_to; /* UTF-16 */ | ||
453 | int ret; | ||
454 | unicode_t u; | ||
412 | 455 | ||
413 | if (map_chars == NO_MAP_UNI_RSVD) | 456 | if (map_chars == NO_MAP_UNI_RSVD) |
414 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); | 457 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); |
415 | 458 | ||
459 | wchar_to = kzalloc(6, GFP_KERNEL); | ||
460 | |||
416 | for (i = 0; i < srclen; j++) { | 461 | for (i = 0; i < srclen; j++) { |
417 | src_char = source[i]; | 462 | src_char = source[i]; |
418 | charlen = 1; | 463 | charlen = 1; |
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
441 | * if no match, use question mark, which at least in | 486 | * if no match, use question mark, which at least in |
442 | * some cases serves as wild card | 487 | * some cases serves as wild card |
443 | */ | 488 | */ |
444 | if (charlen < 1) { | 489 | if (charlen > 0) |
445 | dst_char = cpu_to_le16(0x003f); | 490 | goto ctoUTF16; |
446 | charlen = 1; | 491 | |
492 | /* convert SURROGATE_PAIR */ | ||
493 | if (strcmp(cp->charset, "utf8") || !wchar_to) | ||
494 | goto unknown; | ||
495 | if (*(source + i) & 0x80) { | ||
496 | charlen = utf8_to_utf32(source + i, 6, &u); | ||
497 | if (charlen < 0) | ||
498 | goto unknown; | ||
499 | } else | ||
500 | goto unknown; | ||
501 | ret = utf8s_to_utf16s(source + i, charlen, | ||
502 | UTF16_LITTLE_ENDIAN, | ||
503 | wchar_to, 6); | ||
504 | if (ret < 0) | ||
505 | goto unknown; | ||
506 | |||
507 | i += charlen; | ||
508 | dst_char = cpu_to_le16(*wchar_to); | ||
509 | if (charlen <= 3) | ||
510 | /* 1-3bytes UTF-8 to 2bytes UTF-16 */ | ||
511 | put_unaligned(dst_char, &target[j]); | ||
512 | else if (charlen == 4) { | ||
513 | /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 | ||
514 | * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 | ||
515 | * (charlen=3+4 or 4+4) */ | ||
516 | put_unaligned(dst_char, &target[j]); | ||
517 | dst_char = cpu_to_le16(*(wchar_to + 1)); | ||
518 | j++; | ||
519 | put_unaligned(dst_char, &target[j]); | ||
520 | } else if (charlen >= 5) { | ||
521 | /* 5-6bytes UTF-8 to 6bytes UTF-16 */ | ||
522 | put_unaligned(dst_char, &target[j]); | ||
523 | dst_char = cpu_to_le16(*(wchar_to + 1)); | ||
524 | j++; | ||
525 | put_unaligned(dst_char, &target[j]); | ||
526 | dst_char = cpu_to_le16(*(wchar_to + 2)); | ||
527 | j++; | ||
528 | put_unaligned(dst_char, &target[j]); | ||
447 | } | 529 | } |
530 | continue; | ||
531 | |||
532 | unknown: | ||
533 | dst_char = cpu_to_le16(0x003f); | ||
534 | charlen = 1; | ||
448 | } | 535 | } |
536 | |||
537 | ctoUTF16: | ||
449 | /* | 538 | /* |
450 | * character may take more than one byte in the source string, | 539 | * character may take more than one byte in the source string, |
451 | * but will take exactly two bytes in the target string | 540 | * but will take exactly two bytes in the target string |
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
456 | 545 | ||
457 | ctoUTF16_out: | 546 | ctoUTF16_out: |
458 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ | 547 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ |
548 | kfree(wchar_to); | ||
459 | return j; | 549 | return j; |
460 | } | 550 | } |
461 | 551 | ||