aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nls/nls_utf8.c
diff options
context:
space:
mode:
authorAlan Stern <stern@rowland.harvard.edu>2009-04-30 10:08:18 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2009-06-16 00:44:43 -0400
commit74675a58507e769beee7d949dbed788af3c4139d (patch)
treed4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/nls/nls_utf8.c
parenta853a3d4eb2edb066248a39f0634f6f5858816a0 (diff)
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/nls/nls_utf8.c')
-rw-r--r--fs/nls/nls_utf8.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index aa2c42fdd977..0d60a44acacd 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
15{ 15{
16 int n; 16 int n;
17 17
18 if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { 18 if (boundlen <= 0)
19 return -ENAMETOOLONG;
20
21 n = utf32_to_utf8(uni, out, boundlen);
22 if (n < 0) {
19 *out = '?'; 23 *out = '?';
20 return -EINVAL; 24 return -EINVAL;
21 } 25 }
@@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
25static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) 29static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
26{ 30{
27 int n; 31 int n;
32 unicode_t u;
28 33
29 if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { 34 n = utf8_to_utf32(rawstring, boundlen, &u);
35 if (n < 0 || u > MAX_WCHAR_T) {
30 *uni = 0x003f; /* ? */ 36 *uni = 0x003f; /* ? */
31 n = -EINVAL; 37 return -EINVAL;
32 } 38 }
39 *uni = (wchar_t) u;
33 return n; 40 return n;
34} 41}
35 42