aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nls/nls_cp936.c
diff options
context:
space:
mode:
authorJun Chen <jimcgnu@yahoo.com>2006-12-06 23:40:37 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:46 -0500
commitf46ba2235feab5e686b1234c328a0577cde86e21 (patch)
tree91b800c09f4097462838a930a61adfa93286b2d9 /fs/nls/nls_cp936.c
parent15ad7cdcfd76450d4beebc789ec646664238184d (diff)
[PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC correctly
Twenty characters in cp936 are not correctly handled. They're all in the U00 plane. nls_cp936 converts all U00XY to XY but this is not correct for some characters.(e.g. U00B7 -> A1A4, U00A8 -> A1A7). This problem is fixed by generating u2c_00 based on all c2u_xx and changing uni2char() to give U00 plane a special handling. The "€"(U20AC,80 in cp936) is also be handled properly. Acked-by: Gang Chen <cgdlut@gmail.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/nls/nls_cp936.c')
-rw-r--r--fs/nls/nls_cp936.c113
1 files changed, 103 insertions, 10 deletions
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index 046fde8170ea..65e640c61c8b 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -4421,6 +4421,73 @@ static wchar_t *page_charset2uni[256] = {
4421 c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, 4421 c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL,
4422}; 4422};
4423 4423
4424static unsigned char u2c_00[512] = {
4425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
4426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
4427 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */
4428 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */
4429 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */
4430 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */
4431 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */
4432 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */
4433 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */
4434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */
4435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */
4436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */
4437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */
4438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */
4439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */
4440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */
4441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */
4443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */
4448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */
4449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */
4450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */
4451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */
4452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */
4453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */
4454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */
4455 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */
4456 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */
4457 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */
4458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */
4459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */
4460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */
4461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */
4462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */
4463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */
4464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */
4465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */
4466 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */
4467 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */
4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */
4469 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */
4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */
4474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */
4475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */
4476 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */
4477 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */
4478 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */
4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */
4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */
4481 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */
4482 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */
4483 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */
4484 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */
4485 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */
4486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */
4487 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */
4488 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */
4489};
4490
4424static unsigned char u2c_01[512] = { 4491static unsigned char u2c_01[512] = {
4425 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ 4492 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
4426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ 4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
@@ -10825,7 +10892,7 @@ static unsigned char u2c_FF[512] = {
10825}; 10892};
10826 10893
10827static unsigned char *page_uni2charset[256] = { 10894static unsigned char *page_uni2charset[256] = {
10828 NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, 10895 u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL,
10829 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 10896 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
10830 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 10897 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
10831 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 10898 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -10936,11 +11003,34 @@ static int uni2char(const wchar_t uni,
10936 unsigned char *uni2charset; 11003 unsigned char *uni2charset;
10937 unsigned char cl = uni&0xFF; 11004 unsigned char cl = uni&0xFF;
10938 unsigned char ch = (uni>>8)&0xFF; 11005 unsigned char ch = (uni>>8)&0xFF;
10939 int n; 11006 unsigned char out0,out1;
10940 11007
10941 if (boundlen <= 0) 11008 if (boundlen <= 0)
10942 return -ENAMETOOLONG; 11009 return -ENAMETOOLONG;
10943 11010
11011 if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */
11012 out[0] = 0x80;
11013 return 1;
11014 }
11015
11016 if (ch == 0) { /* handle the U00 plane*/
11017 /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/
11018 out0 = u2c_00[cl*2];
11019 out1 = u2c_00[cl*2+1];
11020 if (out0 == 0x00 && out1 == 0x00) {
11021 if (cl<0x80) {
11022 out[0] = cl;
11023 return 1;
11024 }
11025 return -EINVAL;
11026 } else {
11027 if (boundlen <= 1)
11028 return -ENAMETOOLONG;
11029 out[0] = out0;
11030 out[1] = out1;
11031 return 2;
11032 }
11033 }
10944 11034
10945 uni2charset = page_uni2charset[ch]; 11035 uni2charset = page_uni2charset[ch];
10946 if (uni2charset) { 11036 if (uni2charset) {
@@ -10950,15 +11040,10 @@ static int uni2char(const wchar_t uni,
10950 out[1] = uni2charset[cl*2+1]; 11040 out[1] = uni2charset[cl*2+1];
10951 if (out[0] == 0x00 && out[1] == 0x00) 11041 if (out[0] == 0x00 && out[1] == 0x00)
10952 return -EINVAL; 11042 return -EINVAL;
10953 n = 2; 11043 return 2;
10954 } else if (ch==0 && cl) {
10955 out[0] = cl;
10956 n = 1;
10957 } 11044 }
10958 else 11045 else
10959 return -EINVAL; 11046 return -EINVAL;
10960
10961 return n;
10962} 11047}
10963 11048
10964static int char2uni(const unsigned char *rawstring, int boundlen, 11049static int char2uni(const unsigned char *rawstring, int boundlen,
@@ -10972,7 +11057,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
10972 return -ENAMETOOLONG; 11057 return -ENAMETOOLONG;
10973 11058
10974 if (boundlen == 1) { 11059 if (boundlen == 1) {
10975 *uni = rawstring[0]; 11060 if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */
11061 *uni = 0x20ac;
11062 } else {
11063 *uni = rawstring[0];
11064 }
10976 return 1; 11065 return 1;
10977 } 11066 }
10978 11067
@@ -10986,7 +11075,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
10986 return -EINVAL; 11075 return -EINVAL;
10987 n = 2; 11076 n = 2;
10988 } else{ 11077 } else{
10989 *uni = ch; 11078 if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */
11079 *uni = 0x20ac;
11080 } else {
11081 *uni = ch;
11082 }
10990 n = 1; 11083 n = 1;
10991 } 11084 }
10992 return n; 11085 return n;