diff options
author | Adam Tlalka <atlka@pg.gda.pl> | 2006-09-29 04:59:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-29 12:18:11 -0400 |
commit | d4328b40af16bae62ff8f854060d33daad237093 (patch) | |
tree | 7fdb3bb354da8a04efc19ec0eb9cdabe7adef98e /drivers/char/vt.c | |
parent | 08c67d2a5d1c97b7997dc9589f702d875c63de07 (diff) |
[PATCH] console utf-8 mode fixes
Fix utf-8 mode so alternate charset modes always work according to control
sequences interpreted in do_con_trol function preserving backward US-ASCII
and VT100 semigraphics compatibility.
Malformed utf-8 sequences are represented as sequences of replacement
glyphs,original codes or '?' as a last resort.
unicode-xterm, gnome-terminal, kconsole and other terminal emulators in
utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that some
important system programs (from Debian distro) uses acsc in utf-8 mode -
dselect, aptitude, w3m for example.
Signed-off-by: Adam Tlalka <atlka@pg.gda.pl>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/char/vt.c')
-rw-r--r-- | drivers/char/vt.c | 79 |
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/char/vt.c b/drivers/char/vt.c index d7d880f8147b..0fca83ededff 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c | |||
@@ -63,6 +63,13 @@ | |||
63 | * | 63 | * |
64 | * Removed console_lock, enabled interrupts across all console operations | 64 | * Removed console_lock, enabled interrupts across all console operations |
65 | * 13 March 2001, Andrew Morton | 65 | * 13 March 2001, Andrew Morton |
66 | * | ||
67 | * Fixed UTF-8 mode so alternate charset modes always work according | ||
68 | * to control sequences interpreted in do_con_trol function | ||
69 | * preserving backward VT100 semigraphics compatibility, | ||
70 | * malformed UTF sequences represented as sequences of replacement glyphs, | ||
71 | * original codes or '?' as a last resort if replacement glyph is undefined | ||
72 | * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006 | ||
66 | */ | 73 | */ |
67 | 74 | ||
68 | #include <linux/module.h> | 75 | #include <linux/module.h> |
@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co | |||
2005 | /* Do no translation at all in control states */ | 2012 | /* Do no translation at all in control states */ |
2006 | if (vc->vc_state != ESnormal) { | 2013 | if (vc->vc_state != ESnormal) { |
2007 | tc = c; | 2014 | tc = c; |
2008 | } else if (vc->vc_utf) { | 2015 | } else if (vc->vc_utf && !vc->vc_disp_ctrl) { |
2009 | /* Combine UTF-8 into Unicode */ | 2016 | /* Combine UTF-8 into Unicode */ |
2010 | /* Incomplete characters silently ignored */ | 2017 | /* Malformed sequences as sequences of replacement glyphs */ |
2018 | rescan_last_byte: | ||
2011 | if(c > 0x7f) { | 2019 | if(c > 0x7f) { |
2012 | if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) { | 2020 | if (vc->vc_utf_count) { |
2013 | vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); | 2021 | if ((c & 0xc0) == 0x80) { |
2014 | vc->vc_utf_count--; | 2022 | vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); |
2015 | if (vc->vc_utf_count == 0) | 2023 | if (--vc->vc_utf_count) { |
2016 | tc = c = vc->vc_utf_char; | 2024 | vc->vc_npar++; |
2017 | else continue; | 2025 | continue; |
2026 | } | ||
2027 | tc = c = vc->vc_utf_char; | ||
2028 | } else | ||
2029 | goto replacement_glyph; | ||
2018 | } else { | 2030 | } else { |
2031 | vc->vc_npar = 0; | ||
2019 | if ((c & 0xe0) == 0xc0) { | 2032 | if ((c & 0xe0) == 0xc0) { |
2020 | vc->vc_utf_count = 1; | 2033 | vc->vc_utf_count = 1; |
2021 | vc->vc_utf_char = (c & 0x1f); | 2034 | vc->vc_utf_char = (c & 0x1f); |
@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co | |||
2032 | vc->vc_utf_count = 5; | 2045 | vc->vc_utf_count = 5; |
2033 | vc->vc_utf_char = (c & 0x01); | 2046 | vc->vc_utf_char = (c & 0x01); |
2034 | } else | 2047 | } else |
2035 | vc->vc_utf_count = 0; | 2048 | goto replacement_glyph; |
2036 | continue; | 2049 | continue; |
2037 | } | 2050 | } |
2038 | } else { | 2051 | } else { |
2052 | if (vc->vc_utf_count) | ||
2053 | goto replacement_glyph; | ||
2039 | tc = c; | 2054 | tc = c; |
2040 | vc->vc_utf_count = 0; | ||
2041 | } | 2055 | } |
2042 | } else { /* no utf */ | 2056 | } else { /* no utf or alternate charset mode */ |
2043 | tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; | 2057 | tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; |
2044 | } | 2058 | } |
2045 | 2059 | ||
@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co | |||
2054 | * direct-to-font zone in UTF-8 mode. | 2068 | * direct-to-font zone in UTF-8 mode. |
2055 | */ | 2069 | */ |
2056 | ok = tc && (c >= 32 || | 2070 | ok = tc && (c >= 32 || |
2057 | (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS | 2071 | !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 : |
2058 | : CTRL_ACTION) >> c) & 1))) | 2072 | vc->vc_utf || ((CTRL_ACTION >> c) & 1))) |
2059 | && (c != 127 || vc->vc_disp_ctrl) | 2073 | && (c != 127 || vc->vc_disp_ctrl) |
2060 | && (c != 128+27); | 2074 | && (c != 128+27); |
2061 | 2075 | ||
2062 | if (vc->vc_state == ESnormal && ok) { | 2076 | if (vc->vc_state == ESnormal && ok) { |
2063 | /* Now try to find out how to display it */ | 2077 | /* Now try to find out how to display it */ |
2064 | tc = conv_uni_to_pc(vc, tc); | 2078 | tc = conv_uni_to_pc(vc, tc); |
2065 | if ( tc == -4 ) { | 2079 | if (tc & ~charmask) { |
2080 | if ( tc == -4 ) { | ||
2066 | /* If we got -4 (not found) then see if we have | 2081 | /* If we got -4 (not found) then see if we have |
2067 | defined a replacement character (U+FFFD) */ | 2082 | defined a replacement character (U+FFFD) */ |
2068 | tc = conv_uni_to_pc(vc, 0xfffd); | 2083 | replacement_glyph: |
2069 | 2084 | tc = conv_uni_to_pc(vc, 0xfffd); | |
2070 | /* One reason for the -4 can be that we just | 2085 | if (!(tc & ~charmask)) |
2071 | did a clear_unimap(); | 2086 | goto display_glyph; |
2072 | try at least to show something. */ | 2087 | } else if ( tc != -3 ) |
2073 | if (tc == -4) | 2088 | continue; /* nothing to display */ |
2074 | tc = c; | 2089 | /* no hash table or no replacement -- |
2075 | } else if ( tc == -3 ) { | 2090 | * hope for the best */ |
2076 | /* Bad hash table -- hope for the best */ | 2091 | if ( c & ~charmask ) |
2077 | tc = c; | 2092 | tc = '?'; |
2078 | } | 2093 | else |
2079 | if (tc & ~charmask) | 2094 | tc = c; |
2080 | continue; /* Conversion failed */ | 2095 | } |
2081 | 2096 | ||
2097 | display_glyph: | ||
2082 | if (vc->vc_need_wrap || vc->vc_decim) | 2098 | if (vc->vc_need_wrap || vc->vc_decim) |
2083 | FLUSH | 2099 | FLUSH |
2084 | if (vc->vc_need_wrap) { | 2100 | if (vc->vc_need_wrap) { |
@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co | |||
2102 | vc->vc_x++; | 2118 | vc->vc_x++; |
2103 | draw_to = (vc->vc_pos += 2); | 2119 | draw_to = (vc->vc_pos += 2); |
2104 | } | 2120 | } |
2121 | if (vc->vc_utf_count) { | ||
2122 | if (vc->vc_npar) { | ||
2123 | vc->vc_npar--; | ||
2124 | goto display_glyph; | ||
2125 | } | ||
2126 | vc->vc_utf_count = 0; | ||
2127 | c = orig; | ||
2128 | goto rescan_last_byte; | ||
2129 | } | ||
2105 | continue; | 2130 | continue; |
2106 | } | 2131 | } |
2107 | FLUSH | 2132 | FLUSH |