aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEgmont Koblinger <egmont@uhulinux.hu>2007-06-23 20:16:27 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-06-24 11:59:10 -0400
commit1ed8a2b3c501bedd4b35130c8a52662ccf78abad (patch)
treed168ca2105cc946550643c2bf5364a6b1c8c89a0
parent4e71e474c784dc274f28ec8bb22a5dbabc6dc7c5 (diff)
console UTF-8 fixes (fix)
Recently my console UTF-8 patch went mainline. Here is an additional patch that fixes two nasty issues and improves a third one, namely: 1. My patch changed the behavior if a glyph is not found in the Unicode mapping table. Previously for Unicode values less than 256 or 512 the kernel tried to display the glyph from that position of the glyph table, which could lead to a different accented letter being displayed. I removed this fallback possibility and changed it to display the replacement symbol. As Behdad pointed out, some fonts (e.g. sun12x22 from the kbd package) lack Unicode mapping information, hence all you get is lots of question marks. Though theoretically it's actually a user-space bug (the font should be fixed), Behdad and I both believe that it'd be good to work around in the kernel by re-introducing the fallback solution for ASCII characters only. This sounds a quite reasonable decision, since all fonts ship the ASCII characters in the first 128 positions. This way users won't be surprised by lots of question marks just because s/he issued a not-so-perfectly parameterized setfont command. As this fallback is only re-introduced for code points below 128, you still won't see an accented letter replaced by another, but at least you'll always get the English letters right. 2. My patch introduced "question mark with inverted color attributes" as a last resort fallback glyph. Though it perfectly works on VGA console, on framebuffer you may end up with question marks that are highlighed but shouldn't be, and normal characters that are accidentally highlighed. This is caused by missing FLUSHes when changing the color attribute. 3. I've updated the table of double-width character based on Markus's updated version. Only ten new code poings (one interval) is added. Signed-off-by: Egmont Koblinger <egmont@uhulinux.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/char/vt.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index bbd9fc412877..6650ae1c088f 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -1956,7 +1956,7 @@ char con_buf[CON_BUF_SIZE];
1956DEFINE_MUTEX(con_buf_mtx); 1956DEFINE_MUTEX(con_buf_mtx);
1957 1957
1958/* is_double_width() is based on the wcwidth() implementation by 1958/* is_double_width() is based on the wcwidth() implementation by
1959 * Markus Kuhn -- 2003-05-20 (Unicode 4.0) 1959 * Markus Kuhn -- 2007-05-26 (Unicode 5.0)
1960 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 1960 * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
1961 */ 1961 */
1962struct interval { 1962struct interval {
@@ -1988,8 +1988,8 @@ static int is_double_width(uint32_t ucs)
1988 static const struct interval double_width[] = { 1988 static const struct interval double_width[] = {
1989 { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E }, 1989 { 0x1100, 0x115F }, { 0x2329, 0x232A }, { 0x2E80, 0x303E },
1990 { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF }, 1990 { 0x3040, 0xA4CF }, { 0xAC00, 0xD7A3 }, { 0xF900, 0xFAFF },
1991 { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 }, { 0xFFE0, 0xFFE6 }, 1991 { 0xFE10, 0xFE19 }, { 0xFE30, 0xFE6F }, { 0xFF00, 0xFF60 },
1992 { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD } 1992 { 0xFFE0, 0xFFE6 }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD }
1993 }; 1993 };
1994 return bisearch(ucs, double_width, 1994 return bisearch(ucs, double_width,
1995 sizeof(double_width) / sizeof(*double_width) - 1); 1995 sizeof(double_width) / sizeof(*double_width) - 1);
@@ -2187,9 +2187,12 @@ rescan_last_byte:
2187 continue; /* nothing to display */ 2187 continue; /* nothing to display */
2188 } 2188 }
2189 /* Glyph not found */ 2189 /* Glyph not found */
2190 if (!(vc->vc_utf && !vc->vc_disp_ctrl) && !(c & ~charmask)) { 2190 if ((!(vc->vc_utf && !vc->vc_disp_ctrl) || c < 128) && !(c & ~charmask)) {
2191 /* In legacy mode use the glyph we get by a 1:1 mapping. 2191 /* In legacy mode use the glyph we get by a 1:1 mapping.
2192 This would make absolutely no sense with Unicode in mind. */ 2192 This would make absolutely no sense with Unicode in mind,
2193 but do this for ASCII characters since a font may lack
2194 Unicode mapping info and we don't want to end up with
2195 having question marks only. */
2193 tc = c; 2196 tc = c;
2194 } else { 2197 } else {
2195 /* Display U+FFFD. If it's not found, display an inverse question mark. */ 2198 /* Display U+FFFD. If it's not found, display an inverse question mark. */
@@ -2213,6 +2216,7 @@ rescan_last_byte:
2213 } else { 2216 } else {
2214 vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4); 2217 vc_attr = ((vc->vc_attr) & 0x88) | (((vc->vc_attr) & 0x70) >> 4) | (((vc->vc_attr) & 0x07) << 4);
2215 } 2218 }
2219 FLUSH
2216 } 2220 }
2217 2221
2218 while (1) { 2222 while (1) {
@@ -2246,6 +2250,10 @@ rescan_last_byte:
2246 if (tc < 0) tc = ' '; 2250 if (tc < 0) tc = ' ';
2247 } 2251 }
2248 2252
2253 if (inverse) {
2254 FLUSH
2255 }
2256
2249 if (rescan) { 2257 if (rescan) {
2250 rescan = 0; 2258 rescan = 0;
2251 inverse = 0; 2259 inverse = 0;