diff options
author | Jun Chen <jimcgnu@yahoo.com> | 2006-12-06 23:40:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-07 11:39:46 -0500 |
commit | f46ba2235feab5e686b1234c328a0577cde86e21 (patch) | |
tree | 91b800c09f4097462838a930a61adfa93286b2d9 /fs/nls/nls_cp936.c | |
parent | 15ad7cdcfd76450d4beebc789ec646664238184d (diff) |
[PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC correctly
Twenty characters in cp936 are not correctly handled. They're all in the
U00 plane. nls_cp936 converts all U00XY to XY but this is not correct for
some characters.(e.g. U00B7 -> A1A4, U00A8 -> A1A7).
This problem is fixed by generating u2c_00 based on all c2u_xx and changing
uni2char() to give U00 plane a special handling. The "â¬"(U20AC,80 in
cp936) is also be handled properly.
Acked-by: Gang Chen <cgdlut@gmail.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/nls/nls_cp936.c')
-rw-r--r-- | fs/nls/nls_cp936.c | 113 |
1 files changed, 103 insertions, 10 deletions
diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c index 046fde8170ea..65e640c61c8b 100644 --- a/fs/nls/nls_cp936.c +++ b/fs/nls/nls_cp936.c | |||
@@ -4421,6 +4421,73 @@ static wchar_t *page_charset2uni[256] = { | |||
4421 | c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, | 4421 | c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL, |
4422 | }; | 4422 | }; |
4423 | 4423 | ||
4424 | static unsigned char u2c_00[512] = { | ||
4425 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ | ||
4426 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ | ||
4427 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */ | ||
4428 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */ | ||
4429 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */ | ||
4430 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */ | ||
4431 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */ | ||
4432 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */ | ||
4433 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */ | ||
4434 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */ | ||
4435 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */ | ||
4436 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */ | ||
4437 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */ | ||
4438 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */ | ||
4439 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */ | ||
4440 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */ | ||
4441 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */ | ||
4442 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */ | ||
4443 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */ | ||
4444 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */ | ||
4445 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */ | ||
4446 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */ | ||
4447 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */ | ||
4448 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */ | ||
4449 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */ | ||
4450 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */ | ||
4451 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */ | ||
4452 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */ | ||
4453 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */ | ||
4454 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */ | ||
4455 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */ | ||
4456 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */ | ||
4457 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */ | ||
4458 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */ | ||
4459 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */ | ||
4460 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */ | ||
4461 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */ | ||
4462 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */ | ||
4463 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */ | ||
4464 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */ | ||
4465 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */ | ||
4466 | 0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */ | ||
4467 | 0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */ | ||
4468 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */ | ||
4469 | 0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */ | ||
4470 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */ | ||
4471 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */ | ||
4472 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */ | ||
4473 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */ | ||
4474 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */ | ||
4475 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */ | ||
4476 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */ | ||
4477 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */ | ||
4478 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */ | ||
4479 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */ | ||
4480 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */ | ||
4481 | 0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */ | ||
4482 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */ | ||
4483 | 0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */ | ||
4484 | 0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */ | ||
4485 | 0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */ | ||
4486 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */ | ||
4487 | 0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */ | ||
4488 | 0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */ | ||
4489 | }; | ||
4490 | |||
4424 | static unsigned char u2c_01[512] = { | 4491 | static unsigned char u2c_01[512] = { |
4425 | 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ | 4492 | 0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */ |
4426 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ | 4493 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */ |
@@ -10825,7 +10892,7 @@ static unsigned char u2c_FF[512] = { | |||
10825 | }; | 10892 | }; |
10826 | 10893 | ||
10827 | static unsigned char *page_uni2charset[256] = { | 10894 | static unsigned char *page_uni2charset[256] = { |
10828 | NULL, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, | 10895 | u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL, NULL, NULL, |
10829 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 10896 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
10830 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 10897 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
10831 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 10898 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
@@ -10936,11 +11003,34 @@ static int uni2char(const wchar_t uni, | |||
10936 | unsigned char *uni2charset; | 11003 | unsigned char *uni2charset; |
10937 | unsigned char cl = uni&0xFF; | 11004 | unsigned char cl = uni&0xFF; |
10938 | unsigned char ch = (uni>>8)&0xFF; | 11005 | unsigned char ch = (uni>>8)&0xFF; |
10939 | int n; | 11006 | unsigned char out0,out1; |
10940 | 11007 | ||
10941 | if (boundlen <= 0) | 11008 | if (boundlen <= 0) |
10942 | return -ENAMETOOLONG; | 11009 | return -ENAMETOOLONG; |
10943 | 11010 | ||
11011 | if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */ | ||
11012 | out[0] = 0x80; | ||
11013 | return 1; | ||
11014 | } | ||
11015 | |||
11016 | if (ch == 0) { /* handle the U00 plane*/ | ||
11017 | /* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/ | ||
11018 | out0 = u2c_00[cl*2]; | ||
11019 | out1 = u2c_00[cl*2+1]; | ||
11020 | if (out0 == 0x00 && out1 == 0x00) { | ||
11021 | if (cl<0x80) { | ||
11022 | out[0] = cl; | ||
11023 | return 1; | ||
11024 | } | ||
11025 | return -EINVAL; | ||
11026 | } else { | ||
11027 | if (boundlen <= 1) | ||
11028 | return -ENAMETOOLONG; | ||
11029 | out[0] = out0; | ||
11030 | out[1] = out1; | ||
11031 | return 2; | ||
11032 | } | ||
11033 | } | ||
10944 | 11034 | ||
10945 | uni2charset = page_uni2charset[ch]; | 11035 | uni2charset = page_uni2charset[ch]; |
10946 | if (uni2charset) { | 11036 | if (uni2charset) { |
@@ -10950,15 +11040,10 @@ static int uni2char(const wchar_t uni, | |||
10950 | out[1] = uni2charset[cl*2+1]; | 11040 | out[1] = uni2charset[cl*2+1]; |
10951 | if (out[0] == 0x00 && out[1] == 0x00) | 11041 | if (out[0] == 0x00 && out[1] == 0x00) |
10952 | return -EINVAL; | 11042 | return -EINVAL; |
10953 | n = 2; | 11043 | return 2; |
10954 | } else if (ch==0 && cl) { | ||
10955 | out[0] = cl; | ||
10956 | n = 1; | ||
10957 | } | 11044 | } |
10958 | else | 11045 | else |
10959 | return -EINVAL; | 11046 | return -EINVAL; |
10960 | |||
10961 | return n; | ||
10962 | } | 11047 | } |
10963 | 11048 | ||
10964 | static int char2uni(const unsigned char *rawstring, int boundlen, | 11049 | static int char2uni(const unsigned char *rawstring, int boundlen, |
@@ -10972,7 +11057,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen, | |||
10972 | return -ENAMETOOLONG; | 11057 | return -ENAMETOOLONG; |
10973 | 11058 | ||
10974 | if (boundlen == 1) { | 11059 | if (boundlen == 1) { |
10975 | *uni = rawstring[0]; | 11060 | if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */ |
11061 | *uni = 0x20ac; | ||
11062 | } else { | ||
11063 | *uni = rawstring[0]; | ||
11064 | } | ||
10976 | return 1; | 11065 | return 1; |
10977 | } | 11066 | } |
10978 | 11067 | ||
@@ -10986,7 +11075,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen, | |||
10986 | return -EINVAL; | 11075 | return -EINVAL; |
10987 | n = 2; | 11076 | n = 2; |
10988 | } else{ | 11077 | } else{ |
10989 | *uni = ch; | 11078 | if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */ |
11079 | *uni = 0x20ac; | ||
11080 | } else { | ||
11081 | *uni = ch; | ||
11082 | } | ||
10990 | n = 1; | 11083 | n = 1; |
10991 | } | 11084 | } |
10992 | return n; | 11085 | return n; |