aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Tlalka <atlka@pg.gda.pl>2006-09-29 04:59:53 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-29 12:18:11 -0400
commitd4328b40af16bae62ff8f854060d33daad237093 (patch)
tree7fdb3bb354da8a04efc19ec0eb9cdabe7adef98e
parent08c67d2a5d1c97b7997dc9589f702d875c63de07 (diff)
[PATCH] console utf-8 mode fixes
Fix utf-8 mode so alternate charset modes always work according to control sequences interpreted in do_con_trol function preserving backward US-ASCII and VT100 semigraphics compatibility. Malformed utf-8 sequences are represented as sequences of replacement glyphs,original codes or '?' as a last resort. unicode-xterm, gnome-terminal, kconsole and other terminal emulators in utf-8 mode respect acsc, enacs, rmacs sequences. Also I found that some important system programs (from Debian distro) uses acsc in utf-8 mode - dselect, aptitude, w3m for example. Signed-off-by: Adam Tlalka <atlka@pg.gda.pl> Acked-by: Alan Cox <alan@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/char/vt.c79
1 files changed, 52 insertions, 27 deletions
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index d7d880f8147b..0fca83ededff 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -63,6 +63,13 @@
63 * 63 *
64 * Removed console_lock, enabled interrupts across all console operations 64 * Removed console_lock, enabled interrupts across all console operations
65 * 13 March 2001, Andrew Morton 65 * 13 March 2001, Andrew Morton
66 *
67 * Fixed UTF-8 mode so alternate charset modes always work according
68 * to control sequences interpreted in do_con_trol function
69 * preserving backward VT100 semigraphics compatibility,
70 * malformed UTF sequences represented as sequences of replacement glyphs,
71 * original codes or '?' as a last resort if replacement glyph is undefined
72 * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006
66 */ 73 */
67 74
68#include <linux/module.h> 75#include <linux/module.h>
@@ -2005,17 +2012,23 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
2005 /* Do no translation at all in control states */ 2012 /* Do no translation at all in control states */
2006 if (vc->vc_state != ESnormal) { 2013 if (vc->vc_state != ESnormal) {
2007 tc = c; 2014 tc = c;
2008 } else if (vc->vc_utf) { 2015 } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
2009 /* Combine UTF-8 into Unicode */ 2016 /* Combine UTF-8 into Unicode */
2010 /* Incomplete characters silently ignored */ 2017 /* Malformed sequences as sequences of replacement glyphs */
2018rescan_last_byte:
2011 if(c > 0x7f) { 2019 if(c > 0x7f) {
2012 if (vc->vc_utf_count > 0 && (c & 0xc0) == 0x80) { 2020 if (vc->vc_utf_count) {
2013 vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); 2021 if ((c & 0xc0) == 0x80) {
2014 vc->vc_utf_count--; 2022 vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
2015 if (vc->vc_utf_count == 0) 2023 if (--vc->vc_utf_count) {
2016 tc = c = vc->vc_utf_char; 2024 vc->vc_npar++;
2017 else continue; 2025 continue;
2026 }
2027 tc = c = vc->vc_utf_char;
2028 } else
2029 goto replacement_glyph;
2018 } else { 2030 } else {
2031 vc->vc_npar = 0;
2019 if ((c & 0xe0) == 0xc0) { 2032 if ((c & 0xe0) == 0xc0) {
2020 vc->vc_utf_count = 1; 2033 vc->vc_utf_count = 1;
2021 vc->vc_utf_char = (c & 0x1f); 2034 vc->vc_utf_char = (c & 0x1f);
@@ -2032,14 +2045,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
2032 vc->vc_utf_count = 5; 2045 vc->vc_utf_count = 5;
2033 vc->vc_utf_char = (c & 0x01); 2046 vc->vc_utf_char = (c & 0x01);
2034 } else 2047 } else
2035 vc->vc_utf_count = 0; 2048 goto replacement_glyph;
2036 continue; 2049 continue;
2037 } 2050 }
2038 } else { 2051 } else {
2052 if (vc->vc_utf_count)
2053 goto replacement_glyph;
2039 tc = c; 2054 tc = c;
2040 vc->vc_utf_count = 0;
2041 } 2055 }
2042 } else { /* no utf */ 2056 } else { /* no utf or alternate charset mode */
2043 tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; 2057 tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
2044 } 2058 }
2045 2059
@@ -2054,31 +2068,33 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
2054 * direct-to-font zone in UTF-8 mode. 2068 * direct-to-font zone in UTF-8 mode.
2055 */ 2069 */
2056 ok = tc && (c >= 32 || 2070 ok = tc && (c >= 32 ||
2057 (!vc->vc_utf && !(((vc->vc_disp_ctrl ? CTRL_ALWAYS 2071 !(vc->vc_disp_ctrl ? (CTRL_ALWAYS >> c) & 1 :
2058 : CTRL_ACTION) >> c) & 1))) 2072 vc->vc_utf || ((CTRL_ACTION >> c) & 1)))
2059 && (c != 127 || vc->vc_disp_ctrl) 2073 && (c != 127 || vc->vc_disp_ctrl)
2060 && (c != 128+27); 2074 && (c != 128+27);
2061 2075
2062 if (vc->vc_state == ESnormal && ok) { 2076 if (vc->vc_state == ESnormal && ok) {
2063 /* Now try to find out how to display it */ 2077 /* Now try to find out how to display it */
2064 tc = conv_uni_to_pc(vc, tc); 2078 tc = conv_uni_to_pc(vc, tc);
2065 if ( tc == -4 ) { 2079 if (tc & ~charmask) {
2080 if ( tc == -4 ) {
2066 /* If we got -4 (not found) then see if we have 2081 /* If we got -4 (not found) then see if we have
2067 defined a replacement character (U+FFFD) */ 2082 defined a replacement character (U+FFFD) */
2068 tc = conv_uni_to_pc(vc, 0xfffd); 2083replacement_glyph:
2069 2084 tc = conv_uni_to_pc(vc, 0xfffd);
2070 /* One reason for the -4 can be that we just 2085 if (!(tc & ~charmask))
2071 did a clear_unimap(); 2086 goto display_glyph;
2072 try at least to show something. */ 2087 } else if ( tc != -3 )
2073 if (tc == -4) 2088 continue; /* nothing to display */
2074 tc = c; 2089 /* no hash table or no replacement --
2075 } else if ( tc == -3 ) { 2090 * hope for the best */
2076 /* Bad hash table -- hope for the best */ 2091 if ( c & ~charmask )
2077 tc = c; 2092 tc = '?';
2078 } 2093 else
2079 if (tc & ~charmask) 2094 tc = c;
2080 continue; /* Conversion failed */ 2095 }
2081 2096
2097display_glyph:
2082 if (vc->vc_need_wrap || vc->vc_decim) 2098 if (vc->vc_need_wrap || vc->vc_decim)
2083 FLUSH 2099 FLUSH
2084 if (vc->vc_need_wrap) { 2100 if (vc->vc_need_wrap) {
@@ -2102,6 +2118,15 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
2102 vc->vc_x++; 2118 vc->vc_x++;
2103 draw_to = (vc->vc_pos += 2); 2119 draw_to = (vc->vc_pos += 2);
2104 } 2120 }
2121 if (vc->vc_utf_count) {
2122 if (vc->vc_npar) {
2123 vc->vc_npar--;
2124 goto display_glyph;
2125 }
2126 vc->vc_utf_count = 0;
2127 c = orig;
2128 goto rescan_last_byte;
2129 }
2105 continue; 2130 continue;
2106 } 2131 }
2107 FLUSH 2132 FLUSH