diff options
author | Jan Kara <jack@suse.cz> | 2018-04-12 11:22:23 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2018-04-18 10:34:55 -0400 |
commit | 44f06ba8297c7e9dfd0e49b40cbe119113cca094 (patch) | |
tree | 376c1b3c48c949da90bd7cfebd254d2f72d76e98 | |
parent | 06856938112b84ff3c6b0594d017f59cfda2a43d (diff) |
udf: Fix leak of UTF-16 surrogates into encoded strings
OSTA UDF specification does not mention whether the CS0 charset in case
of two bytes per character encoding should be treated in UTF-16 or
UCS-2. The sample code in the standard does not treat UTF-16 surrogates
in any special way but on systems such as Windows which work in UTF-16
internally, filenames would be treated as being in UTF-16 effectively.
In Linux it is more difficult to handle characters outside of Base
Multilingual plane (beyond 0xffff) as NLS framework works with 2-byte
characters only. Just make sure we don't leak UTF-16 surrogates into the
resulting string when loading names from the filesystem for now.
CC: stable@vger.kernel.org # >= v4.6
Reported-by: Mingye Wang <arthur200126@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/udf/unicode.c | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index f897e55f2cd0..16a8ad21b77e 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -28,6 +28,9 @@ | |||
28 | 28 | ||
29 | #include "udf_sb.h" | 29 | #include "udf_sb.h" |
30 | 30 | ||
31 | #define SURROGATE_MASK 0xfffff800 | ||
32 | #define SURROGATE_PAIR 0x0000d800 | ||
33 | |||
31 | static int udf_uni2char_utf8(wchar_t uni, | 34 | static int udf_uni2char_utf8(wchar_t uni, |
32 | unsigned char *out, | 35 | unsigned char *out, |
33 | int boundlen) | 36 | int boundlen) |
@@ -37,6 +40,9 @@ static int udf_uni2char_utf8(wchar_t uni, | |||
37 | if (boundlen <= 0) | 40 | if (boundlen <= 0) |
38 | return -ENAMETOOLONG; | 41 | return -ENAMETOOLONG; |
39 | 42 | ||
43 | if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) | ||
44 | return -EINVAL; | ||
45 | |||
40 | if (uni < 0x80) { | 46 | if (uni < 0x80) { |
41 | out[u_len++] = (unsigned char)uni; | 47 | out[u_len++] = (unsigned char)uni; |
42 | } else if (uni < 0x800) { | 48 | } else if (uni < 0x800) { |