diff options
author | Alan Stern <stern@rowland.harvard.edu> | 2011-11-17 16:42:19 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2011-11-18 13:51:01 -0500 |
commit | 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd (patch) | |
tree | 6895ca20afb24b38f0246b370cba7dc6e72764cb | |
parent | b7463c71fbbff7111d0c879d2f64fe2b08f51848 (diff) |
NLS: improve UTF8 -> UTF16 string conversion routine
The utf8s_to_utf16s conversion routine needs to be improved. Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.
This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called. A
follow-on patch will add a third caller that does utilize the new
capabilities.
The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations. But that's a subject for a
different patch.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | drivers/hv/hv_kvp.c | 10 | ||||
-rw-r--r-- | fs/fat/namei_vfat.c | 3 | ||||
-rw-r--r-- | fs/nls/nls_base.c | 43 | ||||
-rw-r--r-- | include/linux/nls.h | 5 |
4 files changed, 44 insertions, 17 deletions
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 89f52440fcf4..0e8343f585bb 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c | |||
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error) | |||
212 | * The windows host expects the key/value pair to be encoded | 212 | * The windows host expects the key/value pair to be encoded |
213 | * in utf16. | 213 | * in utf16. |
214 | */ | 214 | */ |
215 | keylen = utf8s_to_utf16s(key_name, strlen(key_name), | 215 | keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN, |
216 | (wchar_t *)kvp_data->data.key); | 216 | (wchar_t *) kvp_data->data.key, |
217 | HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2); | ||
217 | kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */ | 218 | kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */ |
218 | valuelen = utf8s_to_utf16s(value, strlen(value), | 219 | valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN, |
219 | (wchar_t *)kvp_data->data.value); | 220 | (wchar_t *) kvp_data->data.value, |
221 | HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2); | ||
220 | kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */ | 222 | kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */ |
221 | 223 | ||
222 | kvp_data->data.value_type = REG_SZ; /* all our values are strings */ | 224 | kvp_data->data.value_type = REG_SZ; /* all our values are strings */ |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a87a65663c25..c25cf151b84b 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, | |||
512 | int charlen; | 512 | int charlen; |
513 | 513 | ||
514 | if (utf8) { | 514 | if (utf8) { |
515 | *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); | 515 | *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, |
516 | (wchar_t *) outname, FAT_LFN_LEN + 2); | ||
516 | if (*outlen < 0) | 517 | if (*outlen < 0) |
517 | return *outlen; | 518 | return *outlen; |
518 | else if (*outlen > FAT_LFN_LEN) | 519 | else if (*outlen > FAT_LFN_LEN) |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c8..0eb059ec6f28 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) | |||
114 | } | 114 | } |
115 | EXPORT_SYMBOL(utf32_to_utf8); | 115 | EXPORT_SYMBOL(utf32_to_utf8); |
116 | 116 | ||
117 | int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) | 117 | static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) |
118 | { | ||
119 | switch (endian) { | ||
120 | default: | ||
121 | *s = (wchar_t) c; | ||
122 | break; | ||
123 | case UTF16_LITTLE_ENDIAN: | ||
124 | *s = __cpu_to_le16(c); | ||
125 | break; | ||
126 | case UTF16_BIG_ENDIAN: | ||
127 | *s = __cpu_to_be16(c); | ||
128 | break; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, | ||
133 | wchar_t *pwcs, int maxlen) | ||
118 | { | 134 | { |
119 | u16 *op; | 135 | u16 *op; |
120 | int size; | 136 | int size; |
121 | unicode_t u; | 137 | unicode_t u; |
122 | 138 | ||
123 | op = pwcs; | 139 | op = pwcs; |
124 | while (*s && len > 0) { | 140 | while (len > 0 && maxlen > 0 && *s) { |
125 | if (*s & 0x80) { | 141 | if (*s & 0x80) { |
126 | size = utf8_to_utf32(s, len, &u); | 142 | size = utf8_to_utf32(s, len, &u); |
127 | if (size < 0) | 143 | if (size < 0) |
128 | return -EINVAL; | 144 | return -EINVAL; |
145 | s += size; | ||
146 | len -= size; | ||
129 | 147 | ||
130 | if (u >= PLANE_SIZE) { | 148 | if (u >= PLANE_SIZE) { |
149 | if (maxlen < 2) | ||
150 | break; | ||
131 | u -= PLANE_SIZE; | 151 | u -= PLANE_SIZE; |
132 | *op++ = (wchar_t) (SURROGATE_PAIR | | 152 | put_utf16(op++, SURROGATE_PAIR | |
133 | ((u >> 10) & SURROGATE_BITS)); | 153 | ((u >> 10) & SURROGATE_BITS), |
134 | *op++ = (wchar_t) (SURROGATE_PAIR | | 154 | endian); |
155 | put_utf16(op++, SURROGATE_PAIR | | ||
135 | SURROGATE_LOW | | 156 | SURROGATE_LOW | |
136 | (u & SURROGATE_BITS)); | 157 | (u & SURROGATE_BITS), |
158 | endian); | ||
159 | maxlen -= 2; | ||
137 | } else { | 160 | } else { |
138 | *op++ = (wchar_t) u; | 161 | put_utf16(op++, u, endian); |
162 | maxlen--; | ||
139 | } | 163 | } |
140 | s += size; | ||
141 | len -= size; | ||
142 | } else { | 164 | } else { |
143 | *op++ = *s++; | 165 | put_utf16(op++, *s++, endian); |
144 | len--; | 166 | len--; |
167 | maxlen--; | ||
145 | } | 168 | } |
146 | } | 169 | } |
147 | return op - pwcs; | 170 | return op - pwcs; |
diff --git a/include/linux/nls.h b/include/linux/nls.h index d47beef08dfd..5dc635f8d79e 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h | |||
@@ -43,7 +43,7 @@ enum utf16_endian { | |||
43 | UTF16_BIG_ENDIAN | 43 | UTF16_BIG_ENDIAN |
44 | }; | 44 | }; |
45 | 45 | ||
46 | /* nls.c */ | 46 | /* nls_base.c */ |
47 | extern int register_nls(struct nls_table *); | 47 | extern int register_nls(struct nls_table *); |
48 | extern int unregister_nls(struct nls_table *); | 48 | extern int unregister_nls(struct nls_table *); |
49 | extern struct nls_table *load_nls(char *); | 49 | extern struct nls_table *load_nls(char *); |
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void); | |||
52 | 52 | ||
53 | extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); | 53 | extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); |
54 | extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); | 54 | extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); |
55 | extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs); | 55 | extern int utf8s_to_utf16s(const u8 *s, int len, |
56 | enum utf16_endian endian, wchar_t *pwcs, int maxlen); | ||
56 | extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, | 57 | extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, |
57 | enum utf16_endian endian, u8 *s, int maxlen); | 58 | enum utf16_endian endian, u8 *s, int maxlen); |
58 | 59 | ||