diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2011-11-17 16:42:19 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-11-18 13:51:01 -0500 |
commit | 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd (patch) | |
tree | 6895ca20afb24b38f0246b370cba7dc6e72764cb /fs/nls | |
parent | b7463c71fbbff7111d0c879d2f64fe2b08f51848 (diff) |
NLS: improve UTF8 -> UTF16 string conversion routine
The utf8s_to_utf16s conversion routine needs to be improved. Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.
This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called. A
follow-on patch will add a third caller that does utilize the new
capabilities.
The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations. But that's a subject for a
different patch.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/nls')
-rw-r--r-- | fs/nls/nls_base.c | 43 |
1 files changed, 33 insertions, 10 deletions
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c8..0eb059ec6f28 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) | |||
114 | } | 114 | } |
115 | EXPORT_SYMBOL(utf32_to_utf8); | 115 | EXPORT_SYMBOL(utf32_to_utf8); |
116 | 116 | ||
117 | int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) | 117 | static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) |
118 | { | ||
119 | switch (endian) { | ||
120 | default: | ||
121 | *s = (wchar_t) c; | ||
122 | break; | ||
123 | case UTF16_LITTLE_ENDIAN: | ||
124 | *s = __cpu_to_le16(c); | ||
125 | break; | ||
126 | case UTF16_BIG_ENDIAN: | ||
127 | *s = __cpu_to_be16(c); | ||
128 | break; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, | ||
133 | wchar_t *pwcs, int maxlen) | ||
118 | { | 134 | { |
119 | u16 *op; | 135 | u16 *op; |
120 | int size; | 136 | int size; |
121 | unicode_t u; | 137 | unicode_t u; |
122 | 138 | ||
123 | op = pwcs; | 139 | op = pwcs; |
124 | while (*s && len > 0) { | 140 | while (len > 0 && maxlen > 0 && *s) { |
125 | if (*s & 0x80) { | 141 | if (*s & 0x80) { |
126 | size = utf8_to_utf32(s, len, &u); | 142 | size = utf8_to_utf32(s, len, &u); |
127 | if (size < 0) | 143 | if (size < 0) |
128 | return -EINVAL; | 144 | return -EINVAL; |
145 | s += size; | ||
146 | len -= size; | ||
129 | 147 | ||
130 | if (u >= PLANE_SIZE) { | 148 | if (u >= PLANE_SIZE) { |
149 | if (maxlen < 2) | ||
150 | break; | ||
131 | u -= PLANE_SIZE; | 151 | u -= PLANE_SIZE; |
132 | *op++ = (wchar_t) (SURROGATE_PAIR | | 152 | put_utf16(op++, SURROGATE_PAIR | |
133 | ((u >> 10) & SURROGATE_BITS)); | 153 | ((u >> 10) & SURROGATE_BITS), |
134 | *op++ = (wchar_t) (SURROGATE_PAIR | | 154 | endian); |
155 | put_utf16(op++, SURROGATE_PAIR | | ||
135 | SURROGATE_LOW | | 156 | SURROGATE_LOW | |
136 | (u & SURROGATE_BITS)); | 157 | (u & SURROGATE_BITS), |
158 | endian); | ||
159 | maxlen -= 2; | ||
137 | } else { | 160 | } else { |
138 | *op++ = (wchar_t) u; | 161 | put_utf16(op++, u, endian); |
162 | maxlen--; | ||
139 | } | 163 | } |
140 | s += size; | ||
141 | len -= size; | ||
142 | } else { | 164 | } else { |
143 | *op++ = *s++; | 165 | put_utf16(op++, *s++, endian); |
144 | len--; | 166 | len--; |
167 | maxlen--; | ||
145 | } | 168 | } |
146 | } | 169 | } |
147 | return op - pwcs; | 170 | return op - pwcs; |