aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/char/consolemap.c6
-rw-r--r--drivers/char/vt.c257
2 files changed, 182 insertions, 81 deletions
diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c
index b99b7561260d..fd40b959afdd 100644
--- a/drivers/char/consolemap.c
+++ b/drivers/char/consolemap.c
@@ -626,10 +626,10 @@ conv_uni_to_pc(struct vc_data *conp, long ucs)
626 626
627 /* Only 16-bit codes supported at this time */ 627 /* Only 16-bit codes supported at this time */
628 if (ucs > 0xffff) 628 if (ucs > 0xffff)
629 ucs = 0xfffd; /* U+FFFD: REPLACEMENT CHARACTER */ 629 return -4; /* Not found */
630 else if (ucs < 0x20 || ucs >= 0xfffe) 630 else if (ucs < 0x20)
631 return -1; /* Not a printable character */ 631 return -1; /* Not a printable character */
632 else if (ucs == 0xfeff || (ucs >= 0x200a && ucs <= 0x200f)) 632 else if (ucs == 0xfeff || (ucs >= 0x200b && ucs <= 0x200f))
633 return -2; /* Zero-width space */ 633 return -2; /* Zero-width space */
634 /* 634 /*
635 * UNI_DIRECT_BASE indicates the start of the region in the User Zone 635 * UNI_DIRECT_BASE indicates the start of the region in the User Zone
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 1bbb45b937fd..afd00464184e 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -1932,6 +1932,46 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
1932char con_buf[CON_BUF_SIZE]; 1932char con_buf[CON_BUF_SIZE];
1933DECLARE_MUTEX(con_buf_sem); 1933DECLARE_MUTEX(con_buf_sem);
1934 1934
1935/* is_double_width() is based on the wcwidth() implementation by
1936 * Markus Kuhn -- 2003-05-20 (Unicode 4.0)
1937 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1938 */
1939struct interval {
1940 uint32_t first;
1941 uint32_t last;
1942};
1943
1944static int bisearch(uint32_t ucs, const struct interval *table, int max)
1945{
1946 int min = 0;
1947 int mid;
1948
1949 if (ucs < table[0].first || ucs > table[max].last)
1950 return 0;
1951 while (max >= min) {
1952 mid = (min + max) / 2;
1953 if (ucs > table[mid].last)
1954 min = mid + 1;
1955 else if (ucs < table[mid].first)
1956 max = mid - 1;
1957 else
1958 return 1;
1959 }
1960 return 0;
1961}
1962
1963static int is_double_width(uint32_t ucs)
1964{
1965 static const struct interval double_width[] = {
1966 { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
1967 { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
1968 { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, { 0xFFE0, 0xFFE6 },
1969 { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
1970 };
1971 return bisearch(ucs, double_width,
1972 sizeof(double_width) / sizeof(*double_width) - 1);
1973}
1974
1935/* acquires console_sem */ 1975/* acquires console_sem */
1936static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count) 1976static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
1937{ 1977{
@@ -1948,6 +1988,10 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
1948 unsigned int currcons; 1988 unsigned int currcons;
1949 unsigned long draw_from = 0, draw_to = 0; 1989 unsigned long draw_from = 0, draw_to = 0;
1950 struct vc_data *vc; 1990 struct vc_data *vc;
1991 unsigned char vc_attr;
1992 uint8_t rescan;
1993 uint8_t inverse;
1994 uint8_t width;
1951 u16 himask, charmask; 1995 u16 himask, charmask;
1952 const unsigned char *orig_buf = NULL; 1996 const unsigned char *orig_buf = NULL;
1953 int orig_count; 1997 int orig_count;
@@ -2010,53 +2054,86 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
2010 buf++; 2054 buf++;
2011 n++; 2055 n++;
2012 count--; 2056 count--;
2057 rescan = 0;
2058 inverse = 0;
2059 width = 1;
2013 2060
2014 /* Do no translation at all in control states */ 2061 /* Do no translation at all in control states */
2015 if (vc->vc_state != ESnormal) { 2062 if (vc->vc_state != ESnormal) {
2016 tc = c; 2063 tc = c;
2017 } else if (vc->vc_utf && !vc->vc_disp_ctrl) { 2064 } else if (vc->vc_utf && !vc->vc_disp_ctrl) {
2018 /* Combine UTF-8 into Unicode */ 2065 /* Combine UTF-8 into Unicode in vc_utf_char.
2019 /* Malformed sequences as sequences of replacement glyphs */ 2066 * vc_utf_count is the number of continuation bytes still
2067 * expected to arrive.
2068 * vc_npar is the number of continuation bytes arrived so
2069 * far
2070 */
2020rescan_last_byte: 2071rescan_last_byte:
2021 if(c > 0x7f) { 2072 if ((c & 0xc0) == 0x80) {
2073 /* Continuation byte received */
2074 static const uint32_t utf8_length_changes[] = { 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff };
2022 if (vc->vc_utf_count) { 2075 if (vc->vc_utf_count) {
2023 if ((c & 0xc0) == 0x80) { 2076 vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f);
2024 vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); 2077 vc->vc_npar++;
2025 if (--vc->vc_utf_count) { 2078 if (--vc->vc_utf_count) {
2026 vc->vc_npar++; 2079 /* Still need some bytes */
2027 continue;
2028 }
2029 tc = c = vc->vc_utf_char;
2030 } else
2031 goto replacement_glyph;
2032 } else {
2033 vc->vc_npar = 0;
2034 if ((c & 0xe0) == 0xc0) {
2035 vc->vc_utf_count = 1;
2036 vc->vc_utf_char = (c & 0x1f);
2037 } else if ((c & 0xf0) == 0xe0) {
2038 vc->vc_utf_count = 2;
2039 vc->vc_utf_char = (c & 0x0f);
2040 } else if ((c & 0xf8) == 0xf0) {
2041 vc->vc_utf_count = 3;
2042 vc->vc_utf_char = (c & 0x07);
2043 } else if ((c & 0xfc) == 0xf8) {
2044 vc->vc_utf_count = 4;
2045 vc->vc_utf_char = (c & 0x03);
2046 } else if ((c & 0xfe) == 0xfc) {
2047 vc->vc_utf_count = 5;
2048 vc->vc_utf_char = (c & 0x01);
2049 } else
2050 goto replacement_glyph;
2051 continue; 2080 continue;
2052 } 2081 }
2082 /* Got a whole character */
2083 c = vc->vc_utf_char;
2084 /* Reject overlong sequences */
2085 if (c <= utf8_length_changes[vc->vc_npar - 1] ||
2086 c > utf8_length_changes[vc->vc_npar])
2087 c = 0xfffd;
2088 } else {
2089 /* Unexpected continuation byte */
2090 vc->vc_utf_count = 0;
2091 c = 0xfffd;
2092 }
2053 } else { 2093 } else {
2054 if (vc->vc_utf_count) 2094 /* Single ASCII byte or first byte of a sequence received */
2055 goto replacement_glyph; 2095 if (vc->vc_utf_count) {
2056 tc = c; 2096 /* Continuation byte expected */
2097 rescan = 1;
2098 vc->vc_utf_count = 0;
2099 c = 0xfffd;
2100 } else if (c > 0x7f) {
2101 /* First byte of a multibyte sequence received */
2102 vc->vc_npar = 0;
2103 if ((c & 0xe0) == 0xc0) {
2104 vc->vc_utf_count = 1;
2105 vc->vc_utf_char = (c & 0x1f);
2106 } else if ((c & 0xf0) == 0xe0) {
2107 vc->vc_utf_count = 2;
2108 vc->vc_utf_char = (c & 0x0f);
2109 } else if ((c & 0xf8) == 0xf0) {
2110 vc->vc_utf_count = 3;
2111 vc->vc_utf_char = (c & 0x07);
2112 } else if ((c & 0xfc) == 0xf8) {
2113 vc->vc_utf_count = 4;
2114 vc->vc_utf_char = (c & 0x03);
2115 } else if ((c & 0xfe) == 0xfc) {
2116 vc->vc_utf_count = 5;
2117 vc->vc_utf_char = (c & 0x01);
2118 } else {
2119 /* 254 and 255 are invalid */
2120 c = 0xfffd;
2121 }
2122 if (vc->vc_utf_count) {
2123 /* Still need some bytes */
2124 continue;
2125 }
2126 }
2127 /* Nothing to do if an ASCII byte was received */
2057 } 2128 }
2129 /* End of UTF-8 decoding. */
2130 /* c is the received character, or U+FFFD for invalid sequences. */
2131 /* Replace invalid Unicode code points with U+FFFD too */
2132 if ((c >= 0xd800 && c <= 0xdfff) || c == 0xfffe || c == 0xffff)
2133 c = 0xfffd;
2134 tc = c;
2058 } else { /* no utf or alternate charset mode */ 2135 } else { /* no utf or alternate charset mode */
2059 tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c]; 2136 tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
2060 } 2137 }
2061 2138
2062 /* If the original code was a control character we 2139 /* If the original code was a control character we
@@ -2076,56 +2153,80 @@ rescan_last_byte:
2076 && (c != 128+27); 2153 && (c != 128+27);
2077 2154
2078 if (vc->vc_state == ESnormal && ok) { 2155 if (vc->vc_state == ESnormal && ok) {
2156 if (vc->vc_utf && !vc->vc_disp_ctrl) {
2157 if (is_double_width(c))
2158 width = 2;
2159 }
2079 /* Now try to find out how to display it */ 2160 /* Now try to find out how to display it */
2080 tc = conv_uni_to_pc(vc, tc); 2161 tc = conv_uni_to_pc(vc, tc);
2081 if (tc & ~charmask) { 2162 if (tc & ~charmask) {
2082 if ( tc == -4 ) { 2163 if (tc == -1 || tc == -2) {
2083 /* If we got -4 (not found) then see if we have 2164 continue; /* nothing to display */
2084 defined a replacement character (U+FFFD) */ 2165 }
2085replacement_glyph: 2166 /* Glyph not found */
2086 tc = conv_uni_to_pc(vc, 0xfffd); 2167 if (!(vc->vc_utf && !vc->vc_disp_ctrl) && !(c & ~charmask)) {
2087 if (!(tc & ~charmask)) 2168 /* In legacy mode use the glyph we get by a 1:1 mapping.
2088 goto display_glyph; 2169 This would make absolutely no sense with Unicode in mind. */
2089 } else if ( tc != -3 ) 2170 tc = c;
2090 continue; /* nothing to display */ 2171 } else {
2091 /* no hash table or no replacement -- 2172 /* Display U+FFFD. If it's not found, display an inverse question mark. */
2092 * hope for the best */ 2173 tc = conv_uni_to_pc(vc, 0xfffd);
2093 if ( c & ~charmask ) 2174 if (tc < 0) {
2094 tc = '?'; 2175 inverse = 1;
2095 else 2176 tc = conv_uni_to_pc(vc, '?');
2096 tc = c; 2177 if (tc < 0) tc = '?';
2178 }
2179 }
2097 } 2180 }
2098 2181
2099display_glyph: 2182 if (!inverse) {
2100 if (vc->vc_need_wrap || vc->vc_decim) 2183 vc_attr = vc->vc_attr;
2101 FLUSH
2102 if (vc->vc_need_wrap) {
2103 cr(vc);
2104 lf(vc);
2105 }
2106 if (vc->vc_decim)
2107 insert_char(vc, 1);
2108 scr_writew(himask ?
2109 ((vc->vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
2110 (vc->vc_attr << 8) + tc,
2111 (u16 *) vc->vc_pos);
2112 if (DO_UPDATE(vc) && draw_x < 0) {
2113 draw_x = vc->vc_x;
2114 draw_from = vc->vc_pos;
2115 }
2116 if (vc->vc_x == vc->vc_cols - 1) {
2117 vc->vc_need_wrap = vc->vc_decawm;
2118 draw_to = vc->vc_pos + 2;
2119 } else { 2184 } else {
2120 vc->vc_x++; 2185 /* invert vc_attr */
2121 draw_to = (vc->vc_pos += 2); 2186 if (!vc->vc_can_do_color) {
2187 vc_attr = (vc->vc_attr) ^ 0x08;
2188 } else if (vc->vc_hi_font_mask == 0x100) {
2189 vc_attr = ((vc->vc_attr) & 0x11) | (((vc->vc_attr) & 0xe0) >> 4) | (((vc->vc_attr) & 0x0e) << 4);
2190 } else {
2191 vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4);
2192 }
2122 } 2193 }
2123 if (vc->vc_utf_count) { 2194
2124 if (vc->vc_npar) { 2195 while (1) {
2125 vc->vc_npar--; 2196 if (vc->vc_need_wrap || vc->vc_decim)
2126 goto display_glyph; 2197 FLUSH
2198 if (vc->vc_need_wrap) {
2199 cr(vc);
2200 lf(vc);
2201 }
2202 if (vc->vc_decim)
2203 insert_char(vc, 1);
2204 scr_writew(himask ?
2205 ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
2206 (vc_attr << 8) + tc,
2207 (u16 *) vc->vc_pos);
2208 if (DO_UPDATE(vc) && draw_x < 0) {
2209 draw_x = vc->vc_x;
2210 draw_from = vc->vc_pos;
2211 }
2212 if (vc->vc_x == vc->vc_cols - 1) {
2213 vc->vc_need_wrap = vc->vc_decawm;
2214 draw_to = vc->vc_pos + 2;
2215 } else {
2216 vc->vc_x++;
2217 draw_to = (vc->vc_pos += 2);
2127 } 2218 }
2128 vc->vc_utf_count = 0; 2219
2220 if (!--width) break;
2221
2222 tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
2223 if (tc < 0) tc = ' ';
2224 }
2225
2226 if (rescan) {
2227 rescan = 0;
2228 inverse = 0;
2229 width = 1;
2129 c = orig; 2230 c = orig;
2130 goto rescan_last_byte; 2231 goto rescan_last_byte;
2131 } 2232 }