udf: Use UTF-32 <-> UTF-8 conversion functions from NLS

Instead of implementing our own functions converting to and from UTF-8, use the ones provided by NLS. Signed-off-by: Jan Kara <jack@suse.cz>
author: Jan Kara <jack@suse.cz> 2018-04-12 11:06:18 -0400
committer: Jan Kara <jack@suse.cz> 2018-04-19 10:00:48 -0400
commit: b8a41c44a4ed8bad89b91584a7c7e4610c4b8c88 (patch)
tree: b8c5544efa9d2df87c5555b3716a5df197e45fee
parent: b8333ea1adf08c5ccc3790403f0d1053e1b49f62 (diff)
1 files changed, 17 insertions, 63 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 16a8ad21b77e..18df831afd3d 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,6 +28,7 @@
 #include "udf_sb.h"
+#define UNICODE_MAX 0x10ffff
 #define SURROGATE_MASK 0xfffff800
 #define SURROGATE_PAIR 0x0000d800
@@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
        if (boundlen <= 0)
                return -ENAMETOOLONG;
-        if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)
+        u_len = utf32_to_utf8(uni, out, boundlen);
-                return -EINVAL;
+        if (u_len < 0) {
+                if (uni > UNICODE_MAX ||
-        if (uni < 0x80) {
+                    (uni & SURROGATE_MASK) == SURROGATE_PAIR)
-                out[u_len++] = (unsigned char)uni;
+                        return -EINVAL;
-        } else if (uni < 0x800) {
+                return -ENAMETOOLONG;
-                if (boundlen < 2)
-                        return -ENAMETOOLONG;
-                out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
-                out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
-        } else {
-                if (boundlen < 3)
-                        return -ENAMETOOLONG;
-                out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
-                out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
-                out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
        }
        return u_len;
 }
@@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
                             int boundlen,
                             wchar_t *uni)
 {
-        unsigned int utf_char;
+        int u_len;
-        unsigned char c;
+        unicode_t c;
-        int utf_cnt, u_len;
+        u_len = utf8_to_utf32(in, boundlen, &c);
-        utf_char = 0;
+        if (u_len < 0) {
-        utf_cnt = 0;
-        for (u_len = 0; u_len < boundlen;) {
-                c = in[u_len++];
-                /* Complete a multi-byte UTF-8 character */
-                if (utf_cnt) {
-                        utf_char = (utf_char << 6) | (c & 0x3f);
-                        if (--utf_cnt)
-                                continue;
-                } else {
-                        /* Check for a multi-byte UTF-8 character */
-                        if (c & 0x80) {
-                                /* Start a multi-byte UTF-8 character */
-                                if ((c & 0xe0) == 0xc0) {
-                                        utf_char = c & 0x1f;
-                                        utf_cnt = 1;
-                                } else if ((c & 0xf0) == 0xe0) {
-                                        utf_char = c & 0x0f;
-                                        utf_cnt = 2;
-                                } else if ((c & 0xf8) == 0xf0) {
-                                        utf_char = c & 0x07;
-                                        utf_cnt = 3;
-                                } else if ((c & 0xfc) == 0xf8) {
-                                        utf_char = c & 0x03;
-                                        utf_cnt = 4;
-                                } else if ((c & 0xfe) == 0xfc) {
-                                        utf_char = c & 0x01;
-                                        utf_cnt = 5;
-                                } else {
-                                        utf_cnt = -1;
-                                        break;
-                                }
-                                continue;
-                        } else {
-                                /* Single byte UTF-8 character (most common) */
-                                utf_char = c;
-                        }
-                }
-                *uni = utf_char;
-                break;
-        }
-        if (utf_cnt) {
                *uni = '?';
                return -EINVAL;
        }
+        if (c > MAX_WCHAR_T)
+                *uni = '?';
+        else
+                *uni = c;
        return u_len;
 }
author	Jan Kara <jack@suse.cz>	2018-04-12 11:06:18 -0400
committer	Jan Kara <jack@suse.cz>	2018-04-19 10:00:48 -0400
commit	b8a41c44a4ed8bad89b91584a7c7e4610c4b8c88 (patch)
tree	b8c5544efa9d2df87c5555b3716a5df197e45fee
parent	b8333ea1adf08c5ccc3790403f0d1053e1b49f62 (diff)

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 16a8ad21b77e..18df831afd3d 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c
@@ -28,6 +28,7 @@
28		28
29	#include "udf_sb.h"	29	#include "udf_sb.h"
30		30
		31	#define UNICODE_MAX 0x10ffff
31	#define SURROGATE_MASK 0xfffff800	32	#define SURROGATE_MASK 0xfffff800
32	#define SURROGATE_PAIR 0x0000d800	33	#define SURROGATE_PAIR 0x0000d800
33		34
@@ -40,22 +41,12 @@ static int udf_uni2char_utf8(wchar_t uni,
40	if (boundlen <= 0)	41	if (boundlen <= 0)
41	return -ENAMETOOLONG;	42	return -ENAMETOOLONG;
42		43
43	if ((uni & SURROGATE_MASK) == SURROGATE_PAIR)	44	u_len = utf32_to_utf8(uni, out, boundlen);
44	return -EINVAL;	45	if (u_len < 0) {
45		46	if (uni > UNICODE_MAX \|\|
46	if (uni < 0x80) {	47	(uni & SURROGATE_MASK) == SURROGATE_PAIR)
47	out[u_len++] = (unsigned char)uni;	48	return -EINVAL;
48	} else if (uni < 0x800) {	49	return -ENAMETOOLONG;
49	if (boundlen < 2)
50	return -ENAMETOOLONG;
51	out[u_len++] = (unsigned char)(0xc0 \| (uni >> 6));
52	out[u_len++] = (unsigned char)(0x80 \| (uni & 0x3f));
53	} else {
54	if (boundlen < 3)
55	return -ENAMETOOLONG;
56	out[u_len++] = (unsigned char)(0xe0 \| (uni >> 12));
57	out[u_len++] = (unsigned char)(0x80 \| ((uni >> 6) & 0x3f));
58	out[u_len++] = (unsigned char)(0x80 \| (uni & 0x3f));
59	}	50	}
60	return u_len;	51	return u_len;
61	}	52	}
@@ -64,56 +55,19 @@ static int udf_char2uni_utf8(const unsigned char *in,
64	int boundlen,	55	int boundlen,
65	wchar_t *uni)	56	wchar_t *uni)
66	{	57	{
67	unsigned int utf_char;	58	int u_len;
68	unsigned char c;	59	unicode_t c;
69	int utf_cnt, u_len;	60
70		61	u_len = utf8_to_utf32(in, boundlen, &c);
71	utf_char = 0;	62	if (u_len < 0) {
72	utf_cnt = 0;
73	for (u_len = 0; u_len < boundlen;) {
74	c = in[u_len++];
75
76	/* Complete a multi-byte UTF-8 character */
77	if (utf_cnt) {
78	utf_char = (utf_char << 6) \| (c & 0x3f);
79	if (--utf_cnt)
80	continue;
81	} else {
82	/* Check for a multi-byte UTF-8 character */
83	if (c & 0x80) {
84	/* Start a multi-byte UTF-8 character */
85	if ((c & 0xe0) == 0xc0) {
86	utf_char = c & 0x1f;
87	utf_cnt = 1;
88	} else if ((c & 0xf0) == 0xe0) {
89	utf_char = c & 0x0f;
90	utf_cnt = 2;
91	} else if ((c & 0xf8) == 0xf0) {
92	utf_char = c & 0x07;
93	utf_cnt = 3;
94	} else if ((c & 0xfc) == 0xf8) {
95	utf_char = c & 0x03;
96	utf_cnt = 4;
97	} else if ((c & 0xfe) == 0xfc) {
98	utf_char = c & 0x01;
99	utf_cnt = 5;
100	} else {
101	utf_cnt = -1;
102	break;
103	}
104	continue;
105	} else {
106	/* Single byte UTF-8 character (most common) */
107	utf_char = c;
108	}
109	}
110	*uni = utf_char;
111	break;
112	}
113	if (utf_cnt) {
114	*uni = '?';	63	*uni = '?';
115	return -EINVAL;	64	return -EINVAL;
116	}	65	}
		66
		67	if (c > MAX_WCHAR_T)
		68	*uni = '?';
		69	else
		70	*uni = c;
117	return u_len;	71	return u_len;
118	}	72	}
119		73