diff options
-rw-r--r-- | drivers/usb/core/message.c | 10 | ||||
-rw-r--r-- | fs/befs/linuxvfs.c | 20 | ||||
-rw-r--r-- | fs/fat/dir.c | 29 | ||||
-rw-r--r-- | fs/fat/namei_vfat.c | 4 | ||||
-rw-r--r-- | fs/isofs/joliet.c | 36 | ||||
-rw-r--r-- | fs/ncpfs/ncplib_kernel.c | 8 | ||||
-rw-r--r-- | fs/nls/nls_base.c | 164 | ||||
-rw-r--r-- | fs/nls/nls_utf8.c | 13 | ||||
-rw-r--r-- | include/linux/nls.h | 35 |
9 files changed, 182 insertions, 137 deletions
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index e98f928c08ea..9bd26dec7599 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c | |||
@@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) | |||
780 | { | 780 | { |
781 | unsigned char *tbuf; | 781 | unsigned char *tbuf; |
782 | int err; | 782 | int err; |
783 | unsigned int u; | ||
784 | 783 | ||
785 | if (dev->state == USB_STATE_SUSPENDED) | 784 | if (dev->state == USB_STATE_SUSPENDED) |
786 | return -EHOSTUNREACH; | 785 | return -EHOSTUNREACH; |
787 | if (size <= 0 || !buf || !index) | 786 | if (size <= 0 || !buf || !index) |
788 | return -EINVAL; | 787 | return -EINVAL; |
789 | buf[0] = 0; | 788 | buf[0] = 0; |
790 | tbuf = kmalloc(256 + 2, GFP_NOIO); | 789 | tbuf = kmalloc(256, GFP_NOIO); |
791 | if (!tbuf) | 790 | if (!tbuf) |
792 | return -ENOMEM; | 791 | return -ENOMEM; |
793 | 792 | ||
@@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) | |||
814 | if (err < 0) | 813 | if (err < 0) |
815 | goto errout; | 814 | goto errout; |
816 | 815 | ||
817 | for (u = 2; u < err; u += 2) | ||
818 | le16_to_cpus((u16 *)&tbuf[u]); | ||
819 | tbuf[u] = 0; | ||
820 | tbuf[u + 1] = 0; | ||
821 | size--; /* leave room for trailing NULL char in output buffer */ | 816 | size--; /* leave room for trailing NULL char in output buffer */ |
822 | err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size); | 817 | err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2, |
818 | UTF16_LITTLE_ENDIAN, buf, size); | ||
823 | buf[err] = 0; | 819 | buf[err] = 0; |
824 | 820 | ||
825 | if (tbuf[1] != USB_DT_STRING) | 821 | if (tbuf[1] != USB_DT_STRING) |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9367b6297d84..89cd2deeb4af 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
513 | { | 513 | { |
514 | struct nls_table *nls = BEFS_SB(sb)->nls; | 514 | struct nls_table *nls = BEFS_SB(sb)->nls; |
515 | int i, o; | 515 | int i, o; |
516 | wchar_t uni; | 516 | unicode_t uni; |
517 | int unilen, utflen; | 517 | int unilen, utflen; |
518 | char *result; | 518 | char *result; |
519 | /* The utf8->nls conversion won't make the final nls string bigger | 519 | /* The utf8->nls conversion won't make the final nls string bigger |
@@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
539 | for (i = o = 0; i < in_len; i += utflen, o += unilen) { | 539 | for (i = o = 0; i < in_len; i += utflen, o += unilen) { |
540 | 540 | ||
541 | /* convert from UTF-8 to Unicode */ | 541 | /* convert from UTF-8 to Unicode */ |
542 | utflen = utf8_mbtowc(&uni, &in[i], in_len - i); | 542 | utflen = utf8_to_utf32(&in[i], in_len - i, &uni); |
543 | if (utflen < 0) { | 543 | if (utflen < 0) |
544 | goto conv_err; | 544 | goto conv_err; |
545 | } | ||
546 | 545 | ||
547 | /* convert from Unicode to nls */ | 546 | /* convert from Unicode to nls */ |
547 | if (uni > MAX_WCHAR_T) | ||
548 | goto conv_err; | ||
548 | unilen = nls->uni2char(uni, &result[o], in_len - o); | 549 | unilen = nls->uni2char(uni, &result[o], in_len - o); |
549 | if (unilen < 0) { | 550 | if (unilen < 0) |
550 | goto conv_err; | 551 | goto conv_err; |
551 | } | ||
552 | } | 552 | } |
553 | result[o] = '\0'; | 553 | result[o] = '\0'; |
554 | *out_len = o; | 554 | *out_len = o; |
@@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
619 | 619 | ||
620 | /* convert from nls to unicode */ | 620 | /* convert from nls to unicode */ |
621 | unilen = nls->char2uni(&in[i], in_len - i, &uni); | 621 | unilen = nls->char2uni(&in[i], in_len - i, &uni); |
622 | if (unilen < 0) { | 622 | if (unilen < 0) |
623 | goto conv_err; | 623 | goto conv_err; |
624 | } | ||
625 | 624 | ||
626 | /* convert from unicode to UTF-8 */ | 625 | /* convert from unicode to UTF-8 */ |
627 | utflen = utf8_wctomb(&result[o], uni, 3); | 626 | utflen = utf32_to_utf8(uni, &result[o], 3); |
628 | if (utflen <= 0) { | 627 | if (utflen <= 0) |
629 | goto conv_err; | 628 | goto conv_err; |
630 | } | ||
631 | } | 629 | } |
632 | 630 | ||
633 | result[o] = '\0'; | 631 | result[o] = '\0'; |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index f3500294eec5..7c14c8cbbaba 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
@@ -22,6 +22,19 @@ | |||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | #include "fat.h" | 23 | #include "fat.h" |
24 | 24 | ||
25 | /* | ||
26 | * Maximum buffer size of short name. | ||
27 | * [(MSDOS_NAME + '.') * max one char + nul] | ||
28 | * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] | ||
29 | */ | ||
30 | #define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) | ||
31 | /* | ||
32 | * Maximum buffer size of unicode chars from slots. | ||
33 | * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] | ||
34 | */ | ||
35 | #define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) | ||
36 | #define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) | ||
37 | |||
25 | static inline loff_t fat_make_i_pos(struct super_block *sb, | 38 | static inline loff_t fat_make_i_pos(struct super_block *sb, |
26 | struct buffer_head *bh, | 39 | struct buffer_head *bh, |
27 | struct msdos_dir_entry *de) | 40 | struct msdos_dir_entry *de) |
@@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, | |||
171 | unsigned char *buf, int size) | 184 | unsigned char *buf, int size) |
172 | { | 185 | { |
173 | if (sbi->options.utf8) | 186 | if (sbi->options.utf8) |
174 | return utf8_wcstombs(buf, uni, size); | 187 | return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, |
188 | UTF16_HOST_ENDIAN, buf, size); | ||
175 | else | 189 | else |
176 | return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, | 190 | return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, |
177 | sbi->nls_io); | 191 | sbi->nls_io); |
@@ -325,19 +339,6 @@ parse_long: | |||
325 | } | 339 | } |
326 | 340 | ||
327 | /* | 341 | /* |
328 | * Maximum buffer size of short name. | ||
329 | * [(MSDOS_NAME + '.') * max one char + nul] | ||
330 | * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] | ||
331 | */ | ||
332 | #define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) | ||
333 | /* | ||
334 | * Maximum buffer size of unicode chars from slots. | ||
335 | * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] | ||
336 | */ | ||
337 | #define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) | ||
338 | #define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) | ||
339 | |||
340 | /* | ||
341 | * Return values: negative -> error, 0 -> not found, positive -> found, | 342 | * Return values: negative -> error, 0 -> not found, positive -> found, |
342 | * value is the total amount of slots, including the shortname entry. | 343 | * value is the total amount of slots, including the shortname entry. |
343 | */ | 344 | */ |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b50ecbe97f83..f92ad9995356 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, | |||
502 | if (utf8) { | 502 | if (utf8) { |
503 | int name_len = strlen(name); | 503 | int name_len = strlen(name); |
504 | 504 | ||
505 | *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); | 505 | *outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * We stripped '.'s before and set len appropriately, | 508 | * We stripped '.'s before and set len appropriately, |
509 | * but utf8_mbstowcs doesn't care about len | 509 | * but utf8s_to_utf16s doesn't care about len |
510 | */ | 510 | */ |
511 | *outlen -= (name_len - len); | 511 | *outlen -= (name_len - len); |
512 | 512 | ||
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c | |||
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) | |||
37 | return (op - ascii); | 37 | return (op - ascii); |
38 | } | 38 | } |
39 | 39 | ||
40 | /* Convert big endian wide character string to utf8 */ | ||
41 | static int | ||
42 | wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen) | ||
43 | { | ||
44 | const __u8 *ip; | ||
45 | __u8 *op; | ||
46 | int size; | ||
47 | __u16 c; | ||
48 | |||
49 | op = s; | ||
50 | ip = pwcs; | ||
51 | while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) { | ||
52 | c = (*ip << 8) | ip[1]; | ||
53 | if (c > 0x7f) { | ||
54 | size = utf8_wctomb(op, c, maxlen); | ||
55 | if (size == -1) { | ||
56 | /* Ignore character and move on */ | ||
57 | maxlen--; | ||
58 | } else { | ||
59 | op += size; | ||
60 | maxlen -= size; | ||
61 | } | ||
62 | } else { | ||
63 | *op++ = (__u8) c; | ||
64 | } | ||
65 | ip += 2; | ||
66 | inlen--; | ||
67 | } | ||
68 | return (op - s); | ||
69 | } | ||
70 | |||
71 | int | 40 | int |
72 | get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) | 41 | get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) |
73 | { | 42 | { |
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st | |||
79 | nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; | 48 | nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; |
80 | 49 | ||
81 | if (utf8) { | 50 | if (utf8) { |
82 | len = wcsntombs_be(outname, de->name, | 51 | len = utf16s_to_utf8s((const wchar_t *) de->name, |
83 | de->name_len[0] >> 1, PAGE_SIZE); | 52 | de->name_len[0] >> 1, UTF16_BIG_ENDIAN, |
53 | outname, PAGE_SIZE); | ||
84 | } else { | 54 | } else { |
85 | len = uni16_to_x8(outname, (__be16 *) de->name, | 55 | len = uni16_to_x8(outname, (__be16 *) de->name, |
86 | de->name_len[0] >> 1, nls); | 56 | de->name_len[0] >> 1, nls); |
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 97645f112114..0ec6237a5970 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c | |||
@@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen, | |||
1113 | 1113 | ||
1114 | if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { | 1114 | if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { |
1115 | int k; | 1115 | int k; |
1116 | unicode_t u; | ||
1116 | 1117 | ||
1117 | k = utf8_mbtowc(&ec, iname, iname_end - iname); | 1118 | k = utf8_to_utf32(iname, iname_end - iname, &u); |
1118 | if (k < 0) | 1119 | if (k < 0 || u > MAX_WCHAR_T) |
1119 | return -EINVAL; | 1120 | return -EINVAL; |
1120 | iname += k; | 1121 | iname += k; |
1122 | ec = u; | ||
1121 | } else { | 1123 | } else { |
1122 | if (*iname == NCP_ESC) { | 1124 | if (*iname == NCP_ESC) { |
1123 | int k; | 1125 | int k; |
@@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen, | |||
1214 | if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { | 1216 | if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { |
1215 | int k; | 1217 | int k; |
1216 | 1218 | ||
1217 | k = utf8_wctomb(iname, ec, iname_end - iname); | 1219 | k = utf32_to_utf8(ec, iname, iname_end - iname); |
1218 | if (k < 0) { | 1220 | if (k < 0) { |
1219 | err = -ENAMETOOLONG; | 1221 | err = -ENAMETOOLONG; |
1220 | goto quit; | 1222 | goto quit; |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 750abf211e26..477d37d83b31 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <asm/byteorder.h> | ||
18 | 19 | ||
19 | static struct nls_table default_table; | 20 | static struct nls_table default_table; |
20 | static struct nls_table *tables = &default_table; | 21 | static struct nls_table *tables = &default_table; |
@@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] = | |||
43 | {0, /* end of table */} | 44 | {0, /* end of table */} |
44 | }; | 45 | }; |
45 | 46 | ||
46 | int | 47 | #define UNICODE_MAX 0x0010ffff |
47 | utf8_mbtowc(wchar_t *p, const __u8 *s, int n) | 48 | #define PLANE_SIZE 0x00010000 |
49 | |||
50 | #define SURROGATE_MASK 0xfffff800 | ||
51 | #define SURROGATE_PAIR 0x0000d800 | ||
52 | #define SURROGATE_LOW 0x00000400 | ||
53 | #define SURROGATE_BITS 0x000003ff | ||
54 | |||
55 | int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) | ||
48 | { | 56 | { |
49 | long l; | 57 | unsigned long l; |
50 | int c0, c, nc; | 58 | int c0, c, nc; |
51 | const struct utf8_table *t; | 59 | const struct utf8_table *t; |
52 | 60 | ||
@@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) | |||
57 | nc++; | 65 | nc++; |
58 | if ((c0 & t->cmask) == t->cval) { | 66 | if ((c0 & t->cmask) == t->cval) { |
59 | l &= t->lmask; | 67 | l &= t->lmask; |
60 | if (l < t->lval) | 68 | if (l < t->lval || l > UNICODE_MAX || |
69 | (l & SURROGATE_MASK) == SURROGATE_PAIR) | ||
61 | return -1; | 70 | return -1; |
62 | *p = l; | 71 | *pu = (unicode_t) l; |
63 | return nc; | 72 | return nc; |
64 | } | 73 | } |
65 | if (n <= nc) | 74 | if (len <= nc) |
66 | return -1; | 75 | return -1; |
67 | s++; | 76 | s++; |
68 | c = (*s ^ 0x80) & 0xFF; | 77 | c = (*s ^ 0x80) & 0xFF; |
@@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) | |||
72 | } | 81 | } |
73 | return -1; | 82 | return -1; |
74 | } | 83 | } |
84 | EXPORT_SYMBOL(utf8_to_utf32); | ||
75 | 85 | ||
76 | int | 86 | int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) |
77 | utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n) | ||
78 | { | 87 | { |
79 | __u16 *op; | 88 | unsigned long l; |
80 | const __u8 *ip; | ||
81 | int size; | ||
82 | |||
83 | op = pwcs; | ||
84 | ip = s; | ||
85 | while (*ip && n > 0) { | ||
86 | if (*ip & 0x80) { | ||
87 | size = utf8_mbtowc(op, ip, n); | ||
88 | if (size == -1) { | ||
89 | /* Ignore character and move on */ | ||
90 | ip++; | ||
91 | n--; | ||
92 | } else { | ||
93 | op++; | ||
94 | ip += size; | ||
95 | n -= size; | ||
96 | } | ||
97 | } else { | ||
98 | *op++ = *ip++; | ||
99 | n--; | ||
100 | } | ||
101 | } | ||
102 | return (op - pwcs); | ||
103 | } | ||
104 | |||
105 | int | ||
106 | utf8_wctomb(__u8 *s, wchar_t wc, int maxlen) | ||
107 | { | ||
108 | long l; | ||
109 | int c, nc; | 89 | int c, nc; |
110 | const struct utf8_table *t; | 90 | const struct utf8_table *t; |
111 | 91 | ||
112 | if (!s) | 92 | if (!s) |
113 | return 0; | 93 | return 0; |
114 | 94 | ||
115 | l = wc; | 95 | l = u; |
96 | if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) | ||
97 | return -1; | ||
98 | |||
116 | nc = 0; | 99 | nc = 0; |
117 | for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { | 100 | for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { |
118 | nc++; | 101 | nc++; |
119 | if (l <= t->lmask) { | 102 | if (l <= t->lmask) { |
120 | c = t->shift; | 103 | c = t->shift; |
121 | *s = t->cval | (l >> c); | 104 | *s = (u8) (t->cval | (l >> c)); |
122 | while (c > 0) { | 105 | while (c > 0) { |
123 | c -= 6; | 106 | c -= 6; |
124 | s++; | 107 | s++; |
125 | *s = 0x80 | ((l >> c) & 0x3F); | 108 | *s = (u8) (0x80 | ((l >> c) & 0x3F)); |
126 | } | 109 | } |
127 | return nc; | 110 | return nc; |
128 | } | 111 | } |
129 | } | 112 | } |
130 | return -1; | 113 | return -1; |
131 | } | 114 | } |
115 | EXPORT_SYMBOL(utf32_to_utf8); | ||
132 | 116 | ||
133 | int | 117 | int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) |
134 | utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) | ||
135 | { | 118 | { |
136 | const __u16 *ip; | 119 | u16 *op; |
137 | __u8 *op; | ||
138 | int size; | 120 | int size; |
121 | unicode_t u; | ||
122 | |||
123 | op = pwcs; | ||
124 | while (*s && len > 0) { | ||
125 | if (*s & 0x80) { | ||
126 | size = utf8_to_utf32(s, len, &u); | ||
127 | if (size < 0) { | ||
128 | /* Ignore character and move on */ | ||
129 | size = 1; | ||
130 | } else if (u >= PLANE_SIZE) { | ||
131 | u -= PLANE_SIZE; | ||
132 | *op++ = (wchar_t) (SURROGATE_PAIR | | ||
133 | ((u >> 10) & SURROGATE_BITS)); | ||
134 | *op++ = (wchar_t) (SURROGATE_PAIR | | ||
135 | SURROGATE_LOW | | ||
136 | (u & SURROGATE_BITS)); | ||
137 | } else { | ||
138 | *op++ = (wchar_t) u; | ||
139 | } | ||
140 | s += size; | ||
141 | len -= size; | ||
142 | } else { | ||
143 | *op++ = *s++; | ||
144 | len--; | ||
145 | } | ||
146 | } | ||
147 | return op - pwcs; | ||
148 | } | ||
149 | EXPORT_SYMBOL(utf8s_to_utf16s); | ||
150 | |||
151 | static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) | ||
152 | { | ||
153 | switch (endian) { | ||
154 | default: | ||
155 | return c; | ||
156 | case UTF16_LITTLE_ENDIAN: | ||
157 | return __le16_to_cpu(c); | ||
158 | case UTF16_BIG_ENDIAN: | ||
159 | return __be16_to_cpu(c); | ||
160 | } | ||
161 | } | ||
162 | |||
163 | int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, | ||
164 | u8 *s, int maxlen) | ||
165 | { | ||
166 | u8 *op; | ||
167 | int size; | ||
168 | unsigned long u, v; | ||
139 | 169 | ||
140 | op = s; | 170 | op = s; |
141 | ip = pwcs; | 171 | while (len > 0 && maxlen > 0) { |
142 | while (*ip && maxlen > 0) { | 172 | u = get_utf16(*pwcs, endian); |
143 | if (*ip > 0x7f) { | 173 | if (!u) |
144 | size = utf8_wctomb(op, *ip, maxlen); | 174 | break; |
175 | pwcs++; | ||
176 | len--; | ||
177 | if (u > 0x7f) { | ||
178 | if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { | ||
179 | if (u & SURROGATE_LOW) { | ||
180 | /* Ignore character and move on */ | ||
181 | continue; | ||
182 | } | ||
183 | if (len <= 0) | ||
184 | break; | ||
185 | v = get_utf16(*pwcs, endian); | ||
186 | if ((v & SURROGATE_MASK) != SURROGATE_PAIR || | ||
187 | !(v & SURROGATE_LOW)) { | ||
188 | /* Ignore character and move on */ | ||
189 | continue; | ||
190 | } | ||
191 | u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) | ||
192 | + (v & SURROGATE_BITS); | ||
193 | pwcs++; | ||
194 | len--; | ||
195 | } | ||
196 | size = utf32_to_utf8(u, op, maxlen); | ||
145 | if (size == -1) { | 197 | if (size == -1) { |
146 | /* Ignore character and move on */ | 198 | /* Ignore character and move on */ |
147 | } else { | 199 | } else { |
@@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) | |||
149 | maxlen -= size; | 201 | maxlen -= size; |
150 | } | 202 | } |
151 | } else { | 203 | } else { |
152 | *op++ = (__u8) *ip; | 204 | *op++ = (u8) u; |
153 | maxlen--; | 205 | maxlen--; |
154 | } | 206 | } |
155 | ip++; | ||
156 | } | 207 | } |
157 | return (op - s); | 208 | return op - s; |
158 | } | 209 | } |
210 | EXPORT_SYMBOL(utf16s_to_utf8s); | ||
159 | 211 | ||
160 | int register_nls(struct nls_table * nls) | 212 | int register_nls(struct nls_table * nls) |
161 | { | 213 | { |
@@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls); | |||
467 | EXPORT_SYMBOL(unload_nls); | 519 | EXPORT_SYMBOL(unload_nls); |
468 | EXPORT_SYMBOL(load_nls); | 520 | EXPORT_SYMBOL(load_nls); |
469 | EXPORT_SYMBOL(load_nls_default); | 521 | EXPORT_SYMBOL(load_nls_default); |
470 | EXPORT_SYMBOL(utf8_mbtowc); | ||
471 | EXPORT_SYMBOL(utf8_mbstowcs); | ||
472 | EXPORT_SYMBOL(utf8_wctomb); | ||
473 | EXPORT_SYMBOL(utf8_wcstombs); | ||
474 | 522 | ||
475 | MODULE_LICENSE("Dual BSD/GPL"); | 523 | MODULE_LICENSE("Dual BSD/GPL"); |
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index aa2c42fdd977..0d60a44acacd 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c | |||
@@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) | |||
15 | { | 15 | { |
16 | int n; | 16 | int n; |
17 | 17 | ||
18 | if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { | 18 | if (boundlen <= 0) |
19 | return -ENAMETOOLONG; | ||
20 | |||
21 | n = utf32_to_utf8(uni, out, boundlen); | ||
22 | if (n < 0) { | ||
19 | *out = '?'; | 23 | *out = '?'; |
20 | return -EINVAL; | 24 | return -EINVAL; |
21 | } | 25 | } |
@@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) | |||
25 | static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) | 29 | static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) |
26 | { | 30 | { |
27 | int n; | 31 | int n; |
32 | unicode_t u; | ||
28 | 33 | ||
29 | if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { | 34 | n = utf8_to_utf32(rawstring, boundlen, &u); |
35 | if (n < 0 || u > MAX_WCHAR_T) { | ||
30 | *uni = 0x003f; /* ? */ | 36 | *uni = 0x003f; /* ? */ |
31 | n = -EINVAL; | 37 | return -EINVAL; |
32 | } | 38 | } |
39 | *uni = (wchar_t) u; | ||
33 | return n; | 40 | return n; |
34 | } | 41 | } |
35 | 42 | ||
diff --git a/include/linux/nls.h b/include/linux/nls.h index 52b1a76c1b43..d47beef08dfd 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h | |||
@@ -3,8 +3,23 @@ | |||
3 | 3 | ||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | 5 | ||
6 | /* unicode character */ | 6 | /* Unicode has changed over the years. Unicode code points no longer |
7 | typedef __u16 wchar_t; | 7 | * fit into 16 bits; as of Unicode 5 valid code points range from 0 |
8 | * to 0x10ffff (17 planes, where each plane holds 65536 code points). | ||
9 | * | ||
10 | * The original decision to represent Unicode characters as 16-bit | ||
11 | * wchar_t values is now outdated. But plane 0 still includes the | ||
12 | * most commonly used characters, so we will retain it. The newer | ||
13 | * 32-bit unicode_t type can be used when it is necessary to | ||
14 | * represent the full Unicode character set. | ||
15 | */ | ||
16 | |||
17 | /* Plane-0 Unicode character */ | ||
18 | typedef u16 wchar_t; | ||
19 | #define MAX_WCHAR_T 0xffff | ||
20 | |||
21 | /* Arbitrary Unicode character */ | ||
22 | typedef u32 unicode_t; | ||
8 | 23 | ||
9 | struct nls_table { | 24 | struct nls_table { |
10 | const char *charset; | 25 | const char *charset; |
@@ -21,6 +36,13 @@ struct nls_table { | |||
21 | /* this value hold the maximum octet of charset */ | 36 | /* this value hold the maximum octet of charset */ |
22 | #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ | 37 | #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ |
23 | 38 | ||
39 | /* Byte order for UTF-16 strings */ | ||
40 | enum utf16_endian { | ||
41 | UTF16_HOST_ENDIAN, | ||
42 | UTF16_LITTLE_ENDIAN, | ||
43 | UTF16_BIG_ENDIAN | ||
44 | }; | ||
45 | |||
24 | /* nls.c */ | 46 | /* nls.c */ |
25 | extern int register_nls(struct nls_table *); | 47 | extern int register_nls(struct nls_table *); |
26 | extern int unregister_nls(struct nls_table *); | 48 | extern int unregister_nls(struct nls_table *); |
@@ -28,10 +50,11 @@ extern struct nls_table *load_nls(char *); | |||
28 | extern void unload_nls(struct nls_table *); | 50 | extern void unload_nls(struct nls_table *); |
29 | extern struct nls_table *load_nls_default(void); | 51 | extern struct nls_table *load_nls_default(void); |
30 | 52 | ||
31 | extern int utf8_mbtowc(wchar_t *, const __u8 *, int); | 53 | extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); |
32 | extern int utf8_mbstowcs(wchar_t *, const __u8 *, int); | 54 | extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); |
33 | extern int utf8_wctomb(__u8 *, wchar_t, int); | 55 | extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs); |
34 | extern int utf8_wcstombs(__u8 *, const wchar_t *, int); | 56 | extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, |
57 | enum utf16_endian endian, u8 *s, int maxlen); | ||
35 | 58 | ||
36 | static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) | 59 | static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) |
37 | { | 60 | { |