diff options
author | Jan Kara <jack@suse.cz> | 2018-04-12 11:06:18 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2018-04-19 10:00:48 -0400 |
commit | b8a41c44a4ed8bad89b91584a7c7e4610c4b8c88 (patch) | |
tree | b8c5544efa9d2df87c5555b3716a5df197e45fee | |
parent | b8333ea1adf08c5ccc3790403f0d1053e1b49f62 (diff) |
udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Instead of implementing our own functions converting to and from UTF-8,
use the ones provided by NLS.
Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/udf/unicode.c | 80 |
1 files changed, 17 insertions, 63 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 16a8ad21b77e..18df831afd3d 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include "udf_sb.h" | 29 | #include "udf_sb.h" |
30 | 30 | ||
31 | #define UNICODE_MAX 0x10ffff | ||
31 | #define SURROGATE_MASK 0xfffff800 | 32 | #define SURROGATE_MASK 0xfffff800 |
32 | #define SURROGATE_PAIR 0x0000d800 | 33 | #define SURROGATE_PAIR 0x0000d800 |
33 | 34 | ||
@@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni, | |||
40 | if (boundlen <= 0) | 41 | if (boundlen <= 0) |
41 | return -ENAMETOOLONG; | 42 | return -ENAMETOOLONG; |
42 | 43 | ||
43 | if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) | 44 | u_len = utf32_to_utf8(uni, out, boundlen); |
44 | return -EINVAL; | 45 | if (u_len < 0) { |
45 | 46 | if (uni > UNICODE_MAX || | |
46 | if (uni < 0x80) { | 47 | (uni & SURROGATE_MASK) == SURROGATE_PAIR) |
47 | out[u_len++] = (unsigned char)uni; | 48 | return -EINVAL; |
48 | } else if (uni < 0x800) { | 49 | return -ENAMETOOLONG; |
49 | if (boundlen < 2) | ||
50 | return -ENAMETOOLONG; | ||
51 | out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); | ||
52 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | ||
53 | } else { | ||
54 | if (boundlen < 3) | ||
55 | return -ENAMETOOLONG; | ||
56 | out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); | ||
57 | out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); | ||
58 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | ||
59 | } | 50 | } |
60 | return u_len; | 51 | return u_len; |
61 | } | 52 | } |
@@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in, | |||
64 | int boundlen, | 55 | int boundlen, |
65 | wchar_t *uni) | 56 | wchar_t *uni) |
66 | { | 57 | { |
67 | unsigned int utf_char; | 58 | int u_len; |
68 | unsigned char c; | 59 | unicode_t c; |
69 | int utf_cnt, u_len; | 60 | |
70 | 61 | u_len = utf8_to_utf32(in, boundlen, &c); | |
71 | utf_char = 0; | 62 | if (u_len < 0) { |
72 | utf_cnt = 0; | ||
73 | for (u_len = 0; u_len < boundlen;) { | ||
74 | c = in[u_len++]; | ||
75 | |||
76 | /* Complete a multi-byte UTF-8 character */ | ||
77 | if (utf_cnt) { | ||
78 | utf_char = (utf_char << 6) | (c & 0x3f); | ||
79 | if (--utf_cnt) | ||
80 | continue; | ||
81 | } else { | ||
82 | /* Check for a multi-byte UTF-8 character */ | ||
83 | if (c & 0x80) { | ||
84 | /* Start a multi-byte UTF-8 character */ | ||
85 | if ((c & 0xe0) == 0xc0) { | ||
86 | utf_char = c & 0x1f; | ||
87 | utf_cnt = 1; | ||
88 | } else if ((c & 0xf0) == 0xe0) { | ||
89 | utf_char = c & 0x0f; | ||
90 | utf_cnt = 2; | ||
91 | } else if ((c & 0xf8) == 0xf0) { | ||
92 | utf_char = c & 0x07; | ||
93 | utf_cnt = 3; | ||
94 | } else if ((c & 0xfc) == 0xf8) { | ||
95 | utf_char = c & 0x03; | ||
96 | utf_cnt = 4; | ||
97 | } else if ((c & 0xfe) == 0xfc) { | ||
98 | utf_char = c & 0x01; | ||
99 | utf_cnt = 5; | ||
100 | } else { | ||
101 | utf_cnt = -1; | ||
102 | break; | ||
103 | } | ||
104 | continue; | ||
105 | } else { | ||
106 | /* Single byte UTF-8 character (most common) */ | ||
107 | utf_char = c; | ||
108 | } | ||
109 | } | ||
110 | *uni = utf_char; | ||
111 | break; | ||
112 | } | ||
113 | if (utf_cnt) { | ||
114 | *uni = '?'; | 63 | *uni = '?'; |
115 | return -EINVAL; | 64 | return -EINVAL; |
116 | } | 65 | } |
66 | |||
67 | if (c > MAX_WCHAR_T) | ||
68 | *uni = '?'; | ||
69 | else | ||
70 | *uni = c; | ||
117 | return u_len; | 71 | return u_len; |
118 | } | 72 | } |
119 | 73 | ||