diff options
| author | Nakajima Akira <nakajima.akira@nttcom.co.jp> | 2015-04-09 04:27:39 -0400 |
|---|---|---|
| committer | Steve French <smfrench@gmail.com> | 2015-05-20 14:12:51 -0400 |
| commit | b29103076bec8316e155e71309dc0fba499022c6 (patch) | |
| tree | cee456c475ba1d7efea3eca41ec836e8fb8707c9 /fs/cifs | |
| parent | 00b8c95b680791a72b4bb14dc371ff1f1daae39c (diff) | |
Fix to convert SURROGATE PAIR
Garbled characters happen by using surrogate pair for filename.
(replace each 1 character to ??)
[Steps to Reproduce for bug]
client# touch $(echo -e '\xf0\x9d\x9f\xa3')
client# touch $(echo -e '\xf0\x9d\x9f\xa4')
client# ls -li
You see same inode number, same filename(=?? and ??) .
Fix the bug about these functions do not consider about surrogate pair (and IVS).
cifs_utf16_bytes()
cifs_mapchar()
cifs_from_utf16()
cifsConvertToUTF16()
Reported-by: Nakajima Akira <nakajima.akira@nttcom.co.jp>
Signed-off-by: Nakajima Akira <nakajima.akira@nttcom.co.jp>
Signed-off-by: Steve French <smfrench@gmail.com>
Diffstat (limited to 'fs/cifs')
| -rw-r--r-- | fs/cifs/cifs_unicode.c | 182 |
1 files changed, 136 insertions, 46 deletions
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0303c6793d90..5a53ac6b1e02 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
| @@ -27,41 +27,6 @@ | |||
| 27 | #include "cifsglob.h" | 27 | #include "cifsglob.h" |
| 28 | #include "cifs_debug.h" | 28 | #include "cifs_debug.h" |
| 29 | 29 | ||
| 30 | /* | ||
| 31 | * cifs_utf16_bytes - how long will a string be after conversion? | ||
| 32 | * @utf16 - pointer to input string | ||
| 33 | * @maxbytes - don't go past this many bytes of input string | ||
| 34 | * @codepage - destination codepage | ||
| 35 | * | ||
| 36 | * Walk a utf16le string and return the number of bytes that the string will | ||
| 37 | * be after being converted to the given charset, not including any null | ||
| 38 | * termination required. Don't walk past maxbytes in the source buffer. | ||
| 39 | */ | ||
| 40 | int | ||
| 41 | cifs_utf16_bytes(const __le16 *from, int maxbytes, | ||
| 42 | const struct nls_table *codepage) | ||
| 43 | { | ||
| 44 | int i; | ||
| 45 | int charlen, outlen = 0; | ||
| 46 | int maxwords = maxbytes / 2; | ||
| 47 | char tmp[NLS_MAX_CHARSET_SIZE]; | ||
| 48 | __u16 ftmp; | ||
| 49 | |||
| 50 | for (i = 0; i < maxwords; i++) { | ||
| 51 | ftmp = get_unaligned_le16(&from[i]); | ||
| 52 | if (ftmp == 0) | ||
| 53 | break; | ||
| 54 | |||
| 55 | charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); | ||
| 56 | if (charlen > 0) | ||
| 57 | outlen += charlen; | ||
| 58 | else | ||
| 59 | outlen++; | ||
| 60 | } | ||
| 61 | |||
| 62 | return outlen; | ||
| 63 | } | ||
| 64 | |||
| 65 | int cifs_remap(struct cifs_sb_info *cifs_sb) | 30 | int cifs_remap(struct cifs_sb_info *cifs_sb) |
| 66 | { | 31 | { |
| 67 | int map_type; | 32 | int map_type; |
| @@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target) | |||
| 155 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). | 120 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). |
| 156 | */ | 121 | */ |
| 157 | static int | 122 | static int |
| 158 | cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, | 123 | cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, |
| 159 | int maptype) | 124 | int maptype) |
| 160 | { | 125 | { |
| 161 | int len = 1; | 126 | int len = 1; |
| 127 | __u16 src_char; | ||
| 128 | |||
| 129 | src_char = *from; | ||
| 162 | 130 | ||
| 163 | if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) | 131 | if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) |
| 164 | return len; | 132 | return len; |
| @@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, | |||
| 168 | 136 | ||
| 169 | /* if character not one of seven in special remap set */ | 137 | /* if character not one of seven in special remap set */ |
| 170 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); | 138 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); |
| 171 | if (len <= 0) { | 139 | if (len <= 0) |
| 172 | *target = '?'; | 140 | goto surrogate_pair; |
| 173 | len = 1; | 141 | |
| 174 | } | 142 | return len; |
| 143 | |||
| 144 | surrogate_pair: | ||
| 145 | /* convert SURROGATE_PAIR and IVS */ | ||
| 146 | if (strcmp(cp->charset, "utf8")) | ||
| 147 | goto unknown; | ||
| 148 | len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); | ||
| 149 | if (len <= 0) | ||
| 150 | goto unknown; | ||
| 151 | return len; | ||
| 152 | |||
| 153 | unknown: | ||
| 154 | *target = '?'; | ||
| 155 | len = 1; | ||
| 175 | return len; | 156 | return len; |
| 176 | } | 157 | } |
| 177 | 158 | ||
| @@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
| 206 | int nullsize = nls_nullsize(codepage); | 187 | int nullsize = nls_nullsize(codepage); |
| 207 | int fromwords = fromlen / 2; | 188 | int fromwords = fromlen / 2; |
| 208 | char tmp[NLS_MAX_CHARSET_SIZE]; | 189 | char tmp[NLS_MAX_CHARSET_SIZE]; |
| 209 | __u16 ftmp; | 190 | __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ |
| 210 | 191 | ||
| 211 | /* | 192 | /* |
| 212 | * because the chars can be of varying widths, we need to take care | 193 | * because the chars can be of varying widths, we need to take care |
| @@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
| 217 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); | 198 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); |
| 218 | 199 | ||
| 219 | for (i = 0; i < fromwords; i++) { | 200 | for (i = 0; i < fromwords; i++) { |
| 220 | ftmp = get_unaligned_le16(&from[i]); | 201 | ftmp[0] = get_unaligned_le16(&from[i]); |
| 221 | if (ftmp == 0) | 202 | if (ftmp[0] == 0) |
| 222 | break; | 203 | break; |
| 204 | if (i + 1 < fromwords) | ||
| 205 | ftmp[1] = get_unaligned_le16(&from[i + 1]); | ||
| 206 | else | ||
| 207 | ftmp[1] = 0; | ||
| 208 | if (i + 2 < fromwords) | ||
| 209 | ftmp[2] = get_unaligned_le16(&from[i + 2]); | ||
| 210 | else | ||
| 211 | ftmp[2] = 0; | ||
| 223 | 212 | ||
| 224 | /* | 213 | /* |
| 225 | * check to see if converting this character might make the | 214 | * check to see if converting this character might make the |
| @@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, | |||
| 234 | /* put converted char into 'to' buffer */ | 223 | /* put converted char into 'to' buffer */ |
| 235 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); | 224 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); |
| 236 | outlen += charlen; | 225 | outlen += charlen; |
| 226 | |||
| 227 | /* charlen (=bytes of UTF-8 for 1 character) | ||
| 228 | * 4bytes UTF-8(surrogate pair) is charlen=4 | ||
| 229 | * (4bytes UTF-16 code) | ||
| 230 | * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 | ||
| 231 | * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ | ||
| 232 | if (charlen == 4) | ||
| 233 | i++; | ||
| 234 | else if (charlen >= 5) | ||
| 235 | /* 5-6bytes UTF-8 */ | ||
| 236 | i += 2; | ||
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | /* properly null-terminate string */ | 239 | /* properly null-terminate string */ |
| @@ -296,6 +296,46 @@ success: | |||
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | /* | 298 | /* |
| 299 | * cifs_utf16_bytes - how long will a string be after conversion? | ||
| 300 | * @utf16 - pointer to input string | ||
| 301 | * @maxbytes - don't go past this many bytes of input string | ||
| 302 | * @codepage - destination codepage | ||
| 303 | * | ||
| 304 | * Walk a utf16le string and return the number of bytes that the string will | ||
| 305 | * be after being converted to the given charset, not including any null | ||
| 306 | * termination required. Don't walk past maxbytes in the source buffer. | ||
| 307 | */ | ||
| 308 | int | ||
| 309 | cifs_utf16_bytes(const __le16 *from, int maxbytes, | ||
| 310 | const struct nls_table *codepage) | ||
| 311 | { | ||
| 312 | int i; | ||
| 313 | int charlen, outlen = 0; | ||
| 314 | int maxwords = maxbytes / 2; | ||
| 315 | char tmp[NLS_MAX_CHARSET_SIZE]; | ||
| 316 | __u16 ftmp[3]; | ||
| 317 | |||
| 318 | for (i = 0; i < maxwords; i++) { | ||
| 319 | ftmp[0] = get_unaligned_le16(&from[i]); | ||
| 320 | if (ftmp[0] == 0) | ||
| 321 | break; | ||
| 322 | if (i + 1 < maxwords) | ||
| 323 | ftmp[1] = get_unaligned_le16(&from[i + 1]); | ||
| 324 | else | ||
| 325 | ftmp[1] = 0; | ||
| 326 | if (i + 2 < maxwords) | ||
| 327 | ftmp[2] = get_unaligned_le16(&from[i + 2]); | ||
| 328 | else | ||
| 329 | ftmp[2] = 0; | ||
| 330 | |||
| 331 | charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); | ||
| 332 | outlen += charlen; | ||
| 333 | } | ||
| 334 | |||
| 335 | return outlen; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 299 | * cifs_strndup_from_utf16 - copy a string from wire format to the local | 339 | * cifs_strndup_from_utf16 - copy a string from wire format to the local |
| 300 | * codepage | 340 | * codepage |
| 301 | * @src - source string | 341 | * @src - source string |
| @@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
| 409 | char src_char; | 449 | char src_char; |
| 410 | __le16 dst_char; | 450 | __le16 dst_char; |
| 411 | wchar_t tmp; | 451 | wchar_t tmp; |
| 452 | wchar_t *wchar_to; /* UTF-16 */ | ||
| 453 | int ret; | ||
| 454 | unicode_t u; | ||
| 412 | 455 | ||
| 413 | if (map_chars == NO_MAP_UNI_RSVD) | 456 | if (map_chars == NO_MAP_UNI_RSVD) |
| 414 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); | 457 | return cifs_strtoUTF16(target, source, PATH_MAX, cp); |
| 415 | 458 | ||
| 459 | wchar_to = kzalloc(6, GFP_KERNEL); | ||
| 460 | |||
| 416 | for (i = 0; i < srclen; j++) { | 461 | for (i = 0; i < srclen; j++) { |
| 417 | src_char = source[i]; | 462 | src_char = source[i]; |
| 418 | charlen = 1; | 463 | charlen = 1; |
| @@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
| 441 | * if no match, use question mark, which at least in | 486 | * if no match, use question mark, which at least in |
| 442 | * some cases serves as wild card | 487 | * some cases serves as wild card |
| 443 | */ | 488 | */ |
| 444 | if (charlen < 1) { | 489 | if (charlen > 0) |
| 445 | dst_char = cpu_to_le16(0x003f); | 490 | goto ctoUTF16; |
| 446 | charlen = 1; | 491 | |
| 492 | /* convert SURROGATE_PAIR */ | ||
| 493 | if (strcmp(cp->charset, "utf8") || !wchar_to) | ||
| 494 | goto unknown; | ||
| 495 | if (*(source + i) & 0x80) { | ||
| 496 | charlen = utf8_to_utf32(source + i, 6, &u); | ||
| 497 | if (charlen < 0) | ||
| 498 | goto unknown; | ||
| 499 | } else | ||
| 500 | goto unknown; | ||
| 501 | ret = utf8s_to_utf16s(source + i, charlen, | ||
| 502 | UTF16_LITTLE_ENDIAN, | ||
| 503 | wchar_to, 6); | ||
| 504 | if (ret < 0) | ||
| 505 | goto unknown; | ||
| 506 | |||
| 507 | i += charlen; | ||
| 508 | dst_char = cpu_to_le16(*wchar_to); | ||
| 509 | if (charlen <= 3) | ||
| 510 | /* 1-3bytes UTF-8 to 2bytes UTF-16 */ | ||
| 511 | put_unaligned(dst_char, &target[j]); | ||
| 512 | else if (charlen == 4) { | ||
| 513 | /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 | ||
| 514 | * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 | ||
| 515 | * (charlen=3+4 or 4+4) */ | ||
| 516 | put_unaligned(dst_char, &target[j]); | ||
| 517 | dst_char = cpu_to_le16(*(wchar_to + 1)); | ||
| 518 | j++; | ||
| 519 | put_unaligned(dst_char, &target[j]); | ||
| 520 | } else if (charlen >= 5) { | ||
| 521 | /* 5-6bytes UTF-8 to 6bytes UTF-16 */ | ||
| 522 | put_unaligned(dst_char, &target[j]); | ||
| 523 | dst_char = cpu_to_le16(*(wchar_to + 1)); | ||
| 524 | j++; | ||
| 525 | put_unaligned(dst_char, &target[j]); | ||
| 526 | dst_char = cpu_to_le16(*(wchar_to + 2)); | ||
| 527 | j++; | ||
| 528 | put_unaligned(dst_char, &target[j]); | ||
| 447 | } | 529 | } |
| 530 | continue; | ||
| 531 | |||
| 532 | unknown: | ||
| 533 | dst_char = cpu_to_le16(0x003f); | ||
| 534 | charlen = 1; | ||
| 448 | } | 535 | } |
| 536 | |||
| 537 | ctoUTF16: | ||
| 449 | /* | 538 | /* |
| 450 | * character may take more than one byte in the source string, | 539 | * character may take more than one byte in the source string, |
| 451 | * but will take exactly two bytes in the target string | 540 | * but will take exactly two bytes in the target string |
| @@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, | |||
| 456 | 545 | ||
| 457 | ctoUTF16_out: | 546 | ctoUTF16_out: |
| 458 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ | 547 | put_unaligned(0, &target[j]); /* Null terminate target unicode string */ |
| 548 | kfree(wchar_to); | ||
| 459 | return j; | 549 | return j; |
| 460 | } | 550 | } |
| 461 | 551 | ||
