NLS: update handling of Unicode

This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
author: Alan Stern <stern@rowland.harvard.edu> 2009-04-30 10:08:18 -0400
committer: Greg Kroah-Hartman <gregkh@suse.de> 2009-06-16 00:44:43 -0400
commit: 74675a58507e769beee7d949dbed788af3c4139d (patch)
tree: d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/befs/linuxvfs.c
parent: a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)
1 files changed, 9 insertions, 11 deletions
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 9367b6297d84..89cd2deeb4af 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
 {
        struct nls_table *nls = BEFS_SB(sb)->nls;
        int i, o;
-        wchar_t uni;
+        unicode_t uni;
        int unilen, utflen;
        char *result;
        /* The utf8->nls conversion won't make the final nls string bigger
@@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in,
        for (i = o = 0; i < in_len; i += utflen, o += unilen) {
                /* convert from UTF-8 to Unicode */
-                utflen = utf8_mbtowc(&uni, &in[i], in_len - i);
+                utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
-                if (utflen < 0) {
+                if (utflen < 0)
                        goto conv_err;
-                }
                /* convert from Unicode to nls */
+                if (uni > MAX_WCHAR_T)
+                        goto conv_err;
                unilen = nls->uni2char(uni, &result[o], in_len - o);
-                if (unilen < 0) {
+                if (unilen < 0)
                        goto conv_err;
-                }
        }
        result[o] = '\0';
        *out_len = o;
@@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in,
                /* convert from nls to unicode */
                unilen = nls->char2uni(&in[i], in_len - i, &uni);
-                if (unilen < 0) {
+                if (unilen < 0)
                        goto conv_err;
-                }
                /* convert from unicode to UTF-8 */
-                utflen = utf8_wctomb(&result[o], uni, 3);
+                utflen = utf32_to_utf8(uni, &result[o], 3);
-                if (utflen <= 0) {
+                if (utflen <= 0)
                        goto conv_err;
-                }
        }
        result[o] = '\0';
author	Alan Stern <stern@rowland.harvard.edu>	2009-04-30 10:08:18 -0400
committer	Greg Kroah-Hartman <gregkh@suse.de>	2009-06-16 00:44:43 -0400
commit	74675a58507e769beee7d949dbed788af3c4139d (patch)
tree	d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/befs/linuxvfs.c
parent	a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)