diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2009-04-30 10:08:18 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2009-06-16 00:44:43 -0400 |
commit | 74675a58507e769beee7d949dbed788af3c4139d (patch) | |
tree | d4ae3cc06dbfadecf1eaf6ed0aef249fc87b07e6 /fs/isofs/joliet.c | |
parent | a853a3d4eb2edb066248a39f0634f6f5858816a0 (diff) |
NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode. The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.
The old wchar_t 16-bit type is retained because it's still used in
lots of places. This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.
Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/isofs/joliet.c')
-rw-r--r-- | fs/isofs/joliet.c | 36 |
1 files changed, 3 insertions, 33 deletions
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c | |||
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) | |||
37 | return (op - ascii); | 37 | return (op - ascii); |
38 | } | 38 | } |
39 | 39 | ||
40 | /* Convert big endian wide character string to utf8 */ | ||
41 | static int | ||
42 | wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen) | ||
43 | { | ||
44 | const __u8 *ip; | ||
45 | __u8 *op; | ||
46 | int size; | ||
47 | __u16 c; | ||
48 | |||
49 | op = s; | ||
50 | ip = pwcs; | ||
51 | while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) { | ||
52 | c = (*ip << 8) | ip[1]; | ||
53 | if (c > 0x7f) { | ||
54 | size = utf8_wctomb(op, c, maxlen); | ||
55 | if (size == -1) { | ||
56 | /* Ignore character and move on */ | ||
57 | maxlen--; | ||
58 | } else { | ||
59 | op += size; | ||
60 | maxlen -= size; | ||
61 | } | ||
62 | } else { | ||
63 | *op++ = (__u8) c; | ||
64 | } | ||
65 | ip += 2; | ||
66 | inlen--; | ||
67 | } | ||
68 | return (op - s); | ||
69 | } | ||
70 | |||
71 | int | 40 | int |
72 | get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) | 41 | get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) |
73 | { | 42 | { |
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st | |||
79 | nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; | 48 | nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; |
80 | 49 | ||
81 | if (utf8) { | 50 | if (utf8) { |
82 | len = wcsntombs_be(outname, de->name, | 51 | len = utf16s_to_utf8s((const wchar_t *) de->name, |
83 | de->name_len[0] >> 1, PAGE_SIZE); | 52 | de->name_len[0] >> 1, UTF16_BIG_ENDIAN, |
53 | outname, PAGE_SIZE); | ||
84 | } else { | 54 | } else { |
85 | len = uni16_to_x8(outname, (__be16 *) de->name, | 55 | len = uni16_to_x8(outname, (__be16 *) de->name, |
86 | de->name_len[0] >> 1, nls); | 56 | de->name_len[0] >> 1, nls); |