diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2009-04-30 10:08:18 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2009-06-16 00:44:43 -0400 |
commit | 74675a58507e769beee7d949dbed788af3c4139d (patch) | |
tree | d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/nls/nls_utf8.c | |
parent | a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff) |
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.
The old wchar_t 16-bit type is retained because it's still used in
lots of places. This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.
Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/nls/nls_utf8.c')
-rw-r--r-- | fs/nls/nls_utf8.c | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index aa2c42fdd977..0d60a44acacd 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c | |||
@@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) | |||
15 | { | 15 | { |
16 | int n; | 16 | int n; |
17 | 17 | ||
18 | if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { | 18 | if (boundlen <= 0) |
19 | return -ENAMETOOLONG; | ||
20 | |||
21 | n = utf32_to_utf8(uni, out, boundlen); | ||
22 | if (n < 0) { | ||
19 | *out = '?'; | 23 | *out = '?'; |
20 | return -EINVAL; | 24 | return -EINVAL; |
21 | } | 25 | } |
@@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) | |||
25 | static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) | 29 | static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) |
26 | { | 30 | { |
27 | int n; | 31 | int n; |
32 | unicode_t u; | ||
28 | 33 | ||
29 | if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { | 34 | n = utf8_to_utf32(rawstring, boundlen, &u); |
35 | if (n < 0 || u > MAX_WCHAR_T) { | ||
30 | *uni = 0x003f; /* ? */ | 36 | *uni = 0x003f; /* ? */ |
31 | n = -EINVAL; | 37 | return -EINVAL; |
32 | } | 38 | } |
39 | *uni = (wchar_t) u; | ||
33 | return n; | 40 | return n; |
34 | } | 41 | } |
35 | 42 | ||