NLS: update handling of Unicode

This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
author: Alan Stern <stern@rowland.harvard.edu> 2009-04-30 10:08:18 -0400
committer: Greg Kroah-Hartman <gregkh@suse.de> 2009-06-16 00:44:43 -0400
commit: 74675a58507e769beee7d949dbed788af3c4139d (patch)
tree: d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/isofs/joliet.c
parent: a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)
1 files changed, 3 insertions, 33 deletions
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index 92c14b850e9c..a048de81c093 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
        return (op - ascii);
 }
-/* Convert big endian wide character string to utf8 */
-static int
-wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
-{
-        const __u8 *ip;
-        __u8 *op;
-        int size;
-        __u16 c;
-        op = s;
-        ip = pwcs;
-        while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
-                c = (*ip << 8) | ip[1];
-                if (c > 0x7f) {
-                        size = utf8_wctomb(op, c, maxlen);
-                        if (size == -1) {
-                                /* Ignore character and move on */
-                                maxlen--;
-                        } else {
-                                op += size;
-                                maxlen -= size;
-                        }
-                } else {
-                        *op++ = (__u8) c;
-                }
-                ip += 2;
-                inlen--;
-        }
-        return (op - s);
-}
 int
 get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
 {
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
        nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
        if (utf8) {
-                len = wcsntombs_be(outname, de->name,
+                len = utf16s_to_utf8s((const wchar_t *) de->name,
-                                de->name_len[0] >> 1, PAGE_SIZE);
+                                de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
+                                outname, PAGE_SIZE);
        } else {
                len = uni16_to_x8(outname, (__be16 *) de->name,
                                de->name_len[0] >> 1, nls);
author	Alan Stern <stern@rowland.harvard.edu>	2009-04-30 10:08:18 -0400
committer	Greg Kroah-Hartman <gregkh@suse.de>	2009-06-16 00:44:43 -0400
commit	74675a58507e769beee7d949dbed788af3c4139d (patch)
tree	d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/isofs/joliet.c
parent	a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)

diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char ascii, __be16 uni, int len, struct nls_table *nls)
37	return (op - ascii);	37	return (op - ascii);
38	}	38	}
39		39
40	/* Convert big endian wide character string to utf8 */
41	static int
42	wcsntombs_be(__u8 s, const __u8 pwcs, int inlen, int maxlen)
43	{
44	const __u8 *ip;
45	__u8 *op;
46	int size;
47	__u16 c;
48
49	op = s;
50	ip = pwcs;
51	while ((*ip \|\| ip[1]) && (maxlen > 0) && (inlen > 0)) {
52	c = (*ip << 8) \| ip[1];
53	if (c > 0x7f) {
54	size = utf8_wctomb(op, c, maxlen);
55	if (size == -1) {
56	/* Ignore character and move on */
57	maxlen--;
58	} else {
59	op += size;
60	maxlen -= size;
61	}
62	} else {
63	*op++ = (__u8) c;
64	}
65	ip += 2;
66	inlen--;
67	}
68	return (op - s);
69	}
70
71	int	40	int
72	get_joliet_filename(struct iso_directory_record * de, unsigned char outname, struct inode inode)	41	get_joliet_filename(struct iso_directory_record * de, unsigned char outname, struct inode inode)
73	{	42	{
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
79	nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;	48	nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
80		49
81	if (utf8) {	50	if (utf8) {
82	len = wcsntombs_be(outname, de->name,	51	len = utf16s_to_utf8s((const wchar_t *) de->name,
83	de->name_len[0] >> 1, PAGE_SIZE);	52	de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
		53	outname, PAGE_SIZE);
84	} else {	54	} else {
85	len = uni16_to_x8(outname, (__be16 *) de->name,	55	len = uni16_to_x8(outname, (__be16 *) de->name,
86	de->name_len[0] >> 1, nls);	56	de->name_len[0] >> 1, nls);