summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2018-04-12 11:06:18 -0400
committerJan Kara <jack@suse.cz>2018-04-19 10:00:48 -0400
commitb8a41c44a4ed8bad89b91584a7c7e4610c4b8c88 (patch)
treeb8c5544efa9d2df87c5555b3716a5df197e45fee
parentb8333ea1adf08c5ccc3790403f0d1053e1b49f62 (diff)
udf: Use UTF-32 <-> UTF-8 conversion functions from NLS
Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/udf/unicode.c80
1 files changed, 17 insertions, 63 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 16a8ad21b77e..18df831afd3d 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,6 +28,7 @@
28 28
29#include "udf_sb.h" 29#include "udf_sb.h"
30 30
31#define UNICODE_MAX 0x10ffff
31#define SURROGATE_MASK 0xfffff800 32#define SURROGATE_MASK 0xfffff800
32#define SURROGATE_PAIR 0x0000d800 33#define SURROGATE_PAIR 0x0000d800
33 34
@@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
40 if (boundlen <= 0) 41 if (boundlen <= 0)
41 return -ENAMETOOLONG; 42 return -ENAMETOOLONG;
42 43
43 if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) 44 u_len = utf32_to_utf8(uni, out, boundlen);
44 return -EINVAL; 45 if (u_len < 0) {
45 46 if (uni > UNICODE_MAX ||
46 if (uni < 0x80) { 47 (uni & SURROGATE_MASK) == SURROGATE_PAIR)
47 out[u_len++] = (unsigned char)uni; 48 return -EINVAL;
48 } else if (uni < 0x800) { 49 return -ENAMETOOLONG;
49 if (boundlen < 2)
50 return -ENAMETOOLONG;
51 out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
52 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
53 } else {
54 if (boundlen < 3)
55 return -ENAMETOOLONG;
56 out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
57 out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
58 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
59 } 50 }
60 return u_len; 51 return u_len;
61} 52}
@@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
64 int boundlen, 55 int boundlen,
65 wchar_t *uni) 56 wchar_t *uni)
66{ 57{
67 unsigned int utf_char; 58 int u_len;
68 unsigned char c; 59 unicode_t c;
69 int utf_cnt, u_len; 60
70 61 u_len = utf8_to_utf32(in, boundlen, &c);
71 utf_char = 0; 62 if (u_len < 0) {
72 utf_cnt = 0;
73 for (u_len = 0; u_len < boundlen;) {
74 c = in[u_len++];
75
76 /* Complete a multi-byte UTF-8 character */
77 if (utf_cnt) {
78 utf_char = (utf_char << 6) | (c & 0x3f);
79 if (--utf_cnt)
80 continue;
81 } else {
82 /* Check for a multi-byte UTF-8 character */
83 if (c & 0x80) {
84 /* Start a multi-byte UTF-8 character */
85 if ((c & 0xe0) == 0xc0) {
86 utf_char = c & 0x1f;
87 utf_cnt = 1;
88 } else if ((c & 0xf0) == 0xe0) {
89 utf_char = c & 0x0f;
90 utf_cnt = 2;
91 } else if ((c & 0xf8) == 0xf0) {
92 utf_char = c & 0x07;
93 utf_cnt = 3;
94 } else if ((c & 0xfc) == 0xf8) {
95 utf_char = c & 0x03;
96 utf_cnt = 4;
97 } else if ((c & 0xfe) == 0xfc) {
98 utf_char = c & 0x01;
99 utf_cnt = 5;
100 } else {
101 utf_cnt = -1;
102 break;
103 }
104 continue;
105 } else {
106 /* Single byte UTF-8 character (most common) */
107 utf_char = c;
108 }
109 }
110 *uni = utf_char;
111 break;
112 }
113 if (utf_cnt) {
114 *uni = '?'; 63 *uni = '?';
115 return -EINVAL; 64 return -EINVAL;
116 } 65 }
66
67 if (c > MAX_WCHAR_T)
68 *uni = '?';
69 else
70 *uni = c;
117 return u_len; 71 return u_len;
118} 72}
119 73