aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErnesto A. Fernández <ernesto.mnd.fernandez@gmail.com>2018-08-22 00:59:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-22 13:52:50 -0400
commitafd6c9e1f5287ad236adcf56db8c42fef65561fa (patch)
tree873279d1400a511e6c6cb739a39f1b9eeee294a0
parent31651c607151f1034cfb57e5a78678bea54c362b (diff)
hfsplus: fix decomposition of Hangul characters
Files created under macOS cannot be opened under linux if their names contain Korean characters, and vice versa. The Korean alphabet is special because its normalization is done without a table. The module deals with it correctly when composing, but forgets about it for the decomposition. Fix this using the Hangul decomposition function provided in the Unicode Standard. The code fits a bit awkwardly because it requires a buffer, while all the other normalizations are returned as pointers to the decomposition table. This is actually also a bug because reordering may still be needed, but for now leave it as it is. The patch will cause trouble for Hangul filenames already created by the module in the past. This shouldn't really be concern because its main purpose was always sharing with macOS. If a user actually needs to access such a file the nodecompose mount option should be enough. Link: http://lkml.kernel.org/r/20180717220951.p6qqrgautc4pxvzu@eaf Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com> Reported-by: Ting-Chang Hou <tchou@synology.com> Tested-by: Ting-Chang Hou <tchou@synology.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/hfsplus/unicode.c62
1 files changed, 56 insertions, 6 deletions
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index dfa90c21948f..c8d1b2be7854 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -272,8 +272,8 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
272 return size; 272 return size;
273} 273}
274 274
275/* Decomposes a single unicode character. */ 275/* Decomposes a non-Hangul unicode character. */
276static inline u16 *decompose_unichar(wchar_t uc, int *size) 276static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
277{ 277{
278 int off; 278 int off;
279 279
@@ -296,6 +296,51 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
296 return hfsplus_decompose_table + (off / 4); 296 return hfsplus_decompose_table + (off / 4);
297} 297}
298 298
299/*
300 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
301 * precomposed Hangul, otherwise return the length of the decomposition.
302 *
303 * This function was adapted from sample code from the Unicode Standard
304 * Annex #15: Unicode Normalization Forms, version 3.2.0.
305 *
306 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
307 * under the Terms of Use in http://www.unicode.org/copyright.html.
308 */
309static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
310{
311 int index;
312 int l, v, t;
313
314 index = uc - Hangul_SBase;
315 if (index < 0 || index >= Hangul_SCount)
316 return 0;
317
318 l = Hangul_LBase + index / Hangul_NCount;
319 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
320 t = Hangul_TBase + index % Hangul_TCount;
321
322 result[0] = l;
323 result[1] = v;
324 if (t != Hangul_TBase) {
325 result[2] = t;
326 return 3;
327 }
328 return 2;
329}
330
331/* Decomposes a single unicode character. */
332static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
333{
334 u16 *result;
335
336 /* Hangul is handled separately */
337 result = hangul_buffer;
338 *size = hfsplus_try_decompose_hangul(uc, result);
339 if (*size == 0)
340 result = hfsplus_decompose_nonhangul(uc, size);
341 return result;
342}
343
299int hfsplus_asc2uni(struct super_block *sb, 344int hfsplus_asc2uni(struct super_block *sb,
300 struct hfsplus_unistr *ustr, int max_unistr_len, 345 struct hfsplus_unistr *ustr, int max_unistr_len,
301 const char *astr, int len) 346 const char *astr, int len)
@@ -303,13 +348,14 @@ int hfsplus_asc2uni(struct super_block *sb,
303 int size, dsize, decompose; 348 int size, dsize, decompose;
304 u16 *dstr, outlen = 0; 349 u16 *dstr, outlen = 0;
305 wchar_t c; 350 wchar_t c;
351 u16 dhangul[3];
306 352
307 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 353 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
308 while (outlen < max_unistr_len && len > 0) { 354 while (outlen < max_unistr_len && len > 0) {
309 size = asc2unichar(sb, astr, len, &c); 355 size = asc2unichar(sb, astr, len, &c);
310 356
311 if (decompose) 357 if (decompose)
312 dstr = decompose_unichar(c, &dsize); 358 dstr = decompose_unichar(c, &dsize, dhangul);
313 else 359 else
314 dstr = NULL; 360 dstr = NULL;
315 if (dstr) { 361 if (dstr) {
@@ -344,6 +390,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
344 unsigned long hash; 390 unsigned long hash;
345 wchar_t c; 391 wchar_t c;
346 u16 c2; 392 u16 c2;
393 u16 dhangul[3];
347 394
348 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 395 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
349 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 396 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -357,7 +404,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
357 len -= size; 404 len -= size;
358 405
359 if (decompose) 406 if (decompose)
360 dstr = decompose_unichar(c, &dsize); 407 dstr = decompose_unichar(c, &dsize, dhangul);
361 else 408 else
362 dstr = NULL; 409 dstr = NULL;
363 if (dstr) { 410 if (dstr) {
@@ -396,6 +443,7 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
396 const char *astr1, *astr2; 443 const char *astr1, *astr2;
397 u16 c1, c2; 444 u16 c1, c2;
398 wchar_t c; 445 wchar_t c;
446 u16 dhangul_1[3], dhangul_2[3];
399 447
400 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 448 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
401 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 449 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -413,7 +461,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
413 len1 -= size; 461 len1 -= size;
414 462
415 if (decompose) 463 if (decompose)
416 dstr1 = decompose_unichar(c, &dsize1); 464 dstr1 = decompose_unichar(c, &dsize1,
465 dhangul_1);
417 if (!decompose || !dstr1) { 466 if (!decompose || !dstr1) {
418 c1 = c; 467 c1 = c;
419 dstr1 = &c1; 468 dstr1 = &c1;
@@ -427,7 +476,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
427 len2 -= size; 476 len2 -= size;
428 477
429 if (decompose) 478 if (decompose)
430 dstr2 = decompose_unichar(c, &dsize2); 479 dstr2 = decompose_unichar(c, &dsize2,
480 dhangul_2);
431 if (!decompose || !dstr2) { 481 if (!decompose || !dstr2) {
432 c2 = c; 482 c2 = c;
433 dstr2 = &c2; 483 dstr2 = &c2;