aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNakajima Akira <nakajima.akira@nttcom.co.jp>2015-04-09 04:27:39 -0400
committerSteve French <smfrench@gmail.com>2015-05-20 14:12:51 -0400
commitb29103076bec8316e155e71309dc0fba499022c6 (patch)
treecee456c475ba1d7efea3eca41ec836e8fb8707c9 /fs
parent00b8c95b680791a72b4bb14dc371ff1f1daae39c (diff)
Fix to convert SURROGATE PAIR
Garbled characters happen by using surrogate pair for filename. (replace each 1 character to ??) [Steps to Reproduce for bug] client# touch $(echo -e '\xf0\x9d\x9f\xa3') client# touch $(echo -e '\xf0\x9d\x9f\xa4') client# ls -li You see same inode number, same filename(=?? and ??) . Fix the bug about these functions do not consider about surrogate pair (and IVS). cifs_utf16_bytes() cifs_mapchar() cifs_from_utf16() cifsConvertToUTF16() Reported-by: Nakajima Akira <nakajima.akira@nttcom.co.jp> Signed-off-by: Nakajima Akira <nakajima.akira@nttcom.co.jp> Signed-off-by: Steve French <smfrench@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/cifs_unicode.c182
1 files changed, 136 insertions, 46 deletions
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0303c6793d90..5a53ac6b1e02 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -27,41 +27,6 @@
27#include "cifsglob.h" 27#include "cifsglob.h"
28#include "cifs_debug.h" 28#include "cifs_debug.h"
29 29
30/*
31 * cifs_utf16_bytes - how long will a string be after conversion?
32 * @utf16 - pointer to input string
33 * @maxbytes - don't go past this many bytes of input string
34 * @codepage - destination codepage
35 *
36 * Walk a utf16le string and return the number of bytes that the string will
37 * be after being converted to the given charset, not including any null
38 * termination required. Don't walk past maxbytes in the source buffer.
39 */
40int
41cifs_utf16_bytes(const __le16 *from, int maxbytes,
42 const struct nls_table *codepage)
43{
44 int i;
45 int charlen, outlen = 0;
46 int maxwords = maxbytes / 2;
47 char tmp[NLS_MAX_CHARSET_SIZE];
48 __u16 ftmp;
49
50 for (i = 0; i < maxwords; i++) {
51 ftmp = get_unaligned_le16(&from[i]);
52 if (ftmp == 0)
53 break;
54
55 charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
56 if (charlen > 0)
57 outlen += charlen;
58 else
59 outlen++;
60 }
61
62 return outlen;
63}
64
65int cifs_remap(struct cifs_sb_info *cifs_sb) 30int cifs_remap(struct cifs_sb_info *cifs_sb)
66{ 31{
67 int map_type; 32 int map_type;
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target)
155 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 120 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
156 */ 121 */
157static int 122static int
158cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, 123cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
159 int maptype) 124 int maptype)
160{ 125{
161 int len = 1; 126 int len = 1;
127 __u16 src_char;
128
129 src_char = *from;
162 130
163 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) 131 if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
164 return len; 132 return len;
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
168 136
169 /* if character not one of seven in special remap set */ 137 /* if character not one of seven in special remap set */
170 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 138 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
171 if (len <= 0) { 139 if (len <= 0)
172 *target = '?'; 140 goto surrogate_pair;
173 len = 1; 141
174 } 142 return len;
143
144surrogate_pair:
145 /* convert SURROGATE_PAIR and IVS */
146 if (strcmp(cp->charset, "utf8"))
147 goto unknown;
148 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
149 if (len <= 0)
150 goto unknown;
151 return len;
152
153unknown:
154 *target = '?';
155 len = 1;
175 return len; 156 return len;
176} 157}
177 158
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
206 int nullsize = nls_nullsize(codepage); 187 int nullsize = nls_nullsize(codepage);
207 int fromwords = fromlen / 2; 188 int fromwords = fromlen / 2;
208 char tmp[NLS_MAX_CHARSET_SIZE]; 189 char tmp[NLS_MAX_CHARSET_SIZE];
209 __u16 ftmp; 190 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
210 191
211 /* 192 /*
212 * because the chars can be of varying widths, we need to take care 193 * because the chars can be of varying widths, we need to take care
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
217 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 198 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
218 199
219 for (i = 0; i < fromwords; i++) { 200 for (i = 0; i < fromwords; i++) {
220 ftmp = get_unaligned_le16(&from[i]); 201 ftmp[0] = get_unaligned_le16(&from[i]);
221 if (ftmp == 0) 202 if (ftmp[0] == 0)
222 break; 203 break;
204 if (i + 1 < fromwords)
205 ftmp[1] = get_unaligned_le16(&from[i + 1]);
206 else
207 ftmp[1] = 0;
208 if (i + 2 < fromwords)
209 ftmp[2] = get_unaligned_le16(&from[i + 2]);
210 else
211 ftmp[2] = 0;
223 212
224 /* 213 /*
225 * check to see if converting this character might make the 214 * check to see if converting this character might make the
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
234 /* put converted char into 'to' buffer */ 223 /* put converted char into 'to' buffer */
235 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); 224 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
236 outlen += charlen; 225 outlen += charlen;
226
227 /* charlen (=bytes of UTF-8 for 1 character)
228 * 4bytes UTF-8(surrogate pair) is charlen=4
229 * (4bytes UTF-16 code)
230 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
231 * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
232 if (charlen == 4)
233 i++;
234 else if (charlen >= 5)
235 /* 5-6bytes UTF-8 */
236 i += 2;
237 } 237 }
238 238
239 /* properly null-terminate string */ 239 /* properly null-terminate string */
@@ -296,6 +296,46 @@ success:
296} 296}
297 297
298/* 298/*
299 * cifs_utf16_bytes - how long will a string be after conversion?
300 * @utf16 - pointer to input string
301 * @maxbytes - don't go past this many bytes of input string
302 * @codepage - destination codepage
303 *
304 * Walk a utf16le string and return the number of bytes that the string will
305 * be after being converted to the given charset, not including any null
306 * termination required. Don't walk past maxbytes in the source buffer.
307 */
308int
309cifs_utf16_bytes(const __le16 *from, int maxbytes,
310 const struct nls_table *codepage)
311{
312 int i;
313 int charlen, outlen = 0;
314 int maxwords = maxbytes / 2;
315 char tmp[NLS_MAX_CHARSET_SIZE];
316 __u16 ftmp[3];
317
318 for (i = 0; i < maxwords; i++) {
319 ftmp[0] = get_unaligned_le16(&from[i]);
320 if (ftmp[0] == 0)
321 break;
322 if (i + 1 < maxwords)
323 ftmp[1] = get_unaligned_le16(&from[i + 1]);
324 else
325 ftmp[1] = 0;
326 if (i + 2 < maxwords)
327 ftmp[2] = get_unaligned_le16(&from[i + 2]);
328 else
329 ftmp[2] = 0;
330
331 charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
332 outlen += charlen;
333 }
334
335 return outlen;
336}
337
338/*
299 * cifs_strndup_from_utf16 - copy a string from wire format to the local 339 * cifs_strndup_from_utf16 - copy a string from wire format to the local
300 * codepage 340 * codepage
301 * @src - source string 341 * @src - source string
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
409 char src_char; 449 char src_char;
410 __le16 dst_char; 450 __le16 dst_char;
411 wchar_t tmp; 451 wchar_t tmp;
452 wchar_t *wchar_to; /* UTF-16 */
453 int ret;
454 unicode_t u;
412 455
413 if (map_chars == NO_MAP_UNI_RSVD) 456 if (map_chars == NO_MAP_UNI_RSVD)
414 return cifs_strtoUTF16(target, source, PATH_MAX, cp); 457 return cifs_strtoUTF16(target, source, PATH_MAX, cp);
415 458
459 wchar_to = kzalloc(6, GFP_KERNEL);
460
416 for (i = 0; i < srclen; j++) { 461 for (i = 0; i < srclen; j++) {
417 src_char = source[i]; 462 src_char = source[i];
418 charlen = 1; 463 charlen = 1;
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
441 * if no match, use question mark, which at least in 486 * if no match, use question mark, which at least in
442 * some cases serves as wild card 487 * some cases serves as wild card
443 */ 488 */
444 if (charlen < 1) { 489 if (charlen > 0)
445 dst_char = cpu_to_le16(0x003f); 490 goto ctoUTF16;
446 charlen = 1; 491
492 /* convert SURROGATE_PAIR */
493 if (strcmp(cp->charset, "utf8") || !wchar_to)
494 goto unknown;
495 if (*(source + i) & 0x80) {
496 charlen = utf8_to_utf32(source + i, 6, &u);
497 if (charlen < 0)
498 goto unknown;
499 } else
500 goto unknown;
501 ret = utf8s_to_utf16s(source + i, charlen,
502 UTF16_LITTLE_ENDIAN,
503 wchar_to, 6);
504 if (ret < 0)
505 goto unknown;
506
507 i += charlen;
508 dst_char = cpu_to_le16(*wchar_to);
509 if (charlen <= 3)
510 /* 1-3bytes UTF-8 to 2bytes UTF-16 */
511 put_unaligned(dst_char, &target[j]);
512 else if (charlen == 4) {
513 /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
514 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
515 * (charlen=3+4 or 4+4) */
516 put_unaligned(dst_char, &target[j]);
517 dst_char = cpu_to_le16(*(wchar_to + 1));
518 j++;
519 put_unaligned(dst_char, &target[j]);
520 } else if (charlen >= 5) {
521 /* 5-6bytes UTF-8 to 6bytes UTF-16 */
522 put_unaligned(dst_char, &target[j]);
523 dst_char = cpu_to_le16(*(wchar_to + 1));
524 j++;
525 put_unaligned(dst_char, &target[j]);
526 dst_char = cpu_to_le16(*(wchar_to + 2));
527 j++;
528 put_unaligned(dst_char, &target[j]);
447 } 529 }
530 continue;
531
532unknown:
533 dst_char = cpu_to_le16(0x003f);
534 charlen = 1;
448 } 535 }
536
537ctoUTF16:
449 /* 538 /*
450 * character may take more than one byte in the source string, 539 * character may take more than one byte in the source string,
451 * but will take exactly two bytes in the target string 540 * but will take exactly two bytes in the target string
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
456 545
457ctoUTF16_out: 546ctoUTF16_out:
458 put_unaligned(0, &target[j]); /* Null terminate target unicode string */ 547 put_unaligned(0, &target[j]); /* Null terminate target unicode string */
548 kfree(wchar_to);
459 return j; 549 return j;
460} 550}
461 551