aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Engelhardt <jengelh@linux01.gwdg.de>2007-07-16 02:40:40 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 12:05:46 -0400
commit759448f459234bfcf34b82471f0dba77a9aca498 (patch)
tree61cbf8501bdad78c03e034072791fbf3e0436d43
parentaa0ac36518be648dda3a32f0b37a8b2b546e1b24 (diff)
Kernel utf-8 handling
This patch fixes dead keys and copy/paste of non-ASCII characters in UTF-8 mode on Linux console. See more details about the original patch at: http://chris.heathens.co.nz/linux/utf8.html Already posted on (Oldest) http://lkml.org/lkml/2003/5/31/148 http://lkml.org/lkml/2005/12/24/69 (Recent) http://lkml.org/lkml/2006/8/7/75 [bunk@stusta.de: make drivers/char/selection.c:store_utf8() static] Signed-off-by: Jan Engelhardt <jengelh@gmx.de> Cc: Alexander E. Patrakov <patrakov@ums.usu.ru> Cc: Dmitry Torokhov <dtor@mail.ru> Cc: "Antonino A. Daplas" <adaplas@pol.net> Signed-off-by: Adrian Bunk <bunk@stusta.de> Cc: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/char/consolemap.c78
-rw-r--r--drivers/char/keyboard.c26
-rw-r--r--drivers/char/selection.c48
-rw-r--r--include/linux/consolemap.h5
4 files changed, 134 insertions, 23 deletions
diff --git a/drivers/char/consolemap.c b/drivers/char/consolemap.c
index fd40b959afdd..4b3916f54909 100644
--- a/drivers/char/consolemap.c
+++ b/drivers/char/consolemap.c
@@ -177,6 +177,7 @@ struct uni_pagedir {
177 unsigned long refcount; 177 unsigned long refcount;
178 unsigned long sum; 178 unsigned long sum;
179 unsigned char *inverse_translations[4]; 179 unsigned char *inverse_translations[4];
180 u16 *inverse_trans_unicode;
180 int readonly; 181 int readonly;
181}; 182};
182 183
@@ -207,6 +208,41 @@ static void set_inverse_transl(struct vc_data *conp, struct uni_pagedir *p, int
207 } 208 }
208} 209}
209 210
211static void set_inverse_trans_unicode(struct vc_data *conp,
212 struct uni_pagedir *p)
213{
214 int i, j, k, glyph;
215 u16 **p1, *p2;
216 u16 *q;
217
218 if (!p) return;
219 q = p->inverse_trans_unicode;
220 if (!q) {
221 q = p->inverse_trans_unicode =
222 kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
223 if (!q)
224 return;
225 }
226 memset(q, 0, MAX_GLYPH * sizeof(u16));
227
228 for (i = 0; i < 32; i++) {
229 p1 = p->uni_pgdir[i];
230 if (!p1)
231 continue;
232 for (j = 0; j < 32; j++) {
233 p2 = p1[j];
234 if (!p2)
235 continue;
236 for (k = 0; k < 64; k++) {
237 glyph = p2[k];
238 if (glyph >= 0 && glyph < MAX_GLYPH
239 && q[glyph] < 32)
240 q[glyph] = (i << 11) + (j << 6) + k;
241 }
242 }
243 }
244}
245
210unsigned short *set_translate(int m, struct vc_data *vc) 246unsigned short *set_translate(int m, struct vc_data *vc)
211{ 247{
212 inv_translate[vc->vc_num] = m; 248 inv_translate[vc->vc_num] = m;
@@ -217,19 +253,29 @@ unsigned short *set_translate(int m, struct vc_data *vc)
217 * Inverse translation is impossible for several reasons: 253 * Inverse translation is impossible for several reasons:
218 * 1. The font<->character maps are not 1-1. 254 * 1. The font<->character maps are not 1-1.
219 * 2. The text may have been written while a different translation map 255 * 2. The text may have been written while a different translation map
220 * was active, or using Unicode. 256 * was active.
221 * Still, it is now possible to a certain extent to cut and paste non-ASCII. 257 * Still, it is now possible to a certain extent to cut and paste non-ASCII.
222 */ 258 */
223unsigned char inverse_translate(struct vc_data *conp, int glyph) 259u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
224{ 260{
225 struct uni_pagedir *p; 261 struct uni_pagedir *p;
262 int m;
226 if (glyph < 0 || glyph >= MAX_GLYPH) 263 if (glyph < 0 || glyph >= MAX_GLYPH)
227 return 0; 264 return 0;
228 else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) || 265 else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
229 !p->inverse_translations[inv_translate[conp->vc_num]])
230 return glyph; 266 return glyph;
231 else 267 else if (use_unicode) {
232 return p->inverse_translations[inv_translate[conp->vc_num]][glyph]; 268 if (!p->inverse_trans_unicode)
269 return glyph;
270 else
271 return p->inverse_trans_unicode[glyph];
272 } else {
273 m = inv_translate[conp->vc_num];
274 if (!p->inverse_translations[m])
275 return glyph;
276 else
277 return p->inverse_translations[m][glyph];
278 }
233} 279}
234 280
235static void update_user_maps(void) 281static void update_user_maps(void)
@@ -243,6 +289,7 @@ static void update_user_maps(void)
243 p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc; 289 p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc;
244 if (p && p != q) { 290 if (p && p != q) {
245 set_inverse_transl(vc_cons[i].d, p, USER_MAP); 291 set_inverse_transl(vc_cons[i].d, p, USER_MAP);
292 set_inverse_trans_unicode(vc_cons[i].d, p);
246 q = p; 293 q = p;
247 } 294 }
248 } 295 }
@@ -353,6 +400,10 @@ static void con_release_unimap(struct uni_pagedir *p)
353 kfree(p->inverse_translations[i]); 400 kfree(p->inverse_translations[i]);
354 p->inverse_translations[i] = NULL; 401 p->inverse_translations[i] = NULL;
355 } 402 }
403 if (p->inverse_trans_unicode) {
404 kfree(p->inverse_trans_unicode);
405 p->inverse_trans_unicode = NULL;
406 }
356} 407}
357 408
358void con_free_unimap(struct vc_data *vc) 409void con_free_unimap(struct vc_data *vc)
@@ -511,6 +562,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
511 562
512 for (i = 0; i <= 3; i++) 563 for (i = 0; i <= 3; i++)
513 set_inverse_transl(vc, p, i); /* Update all inverse translations */ 564 set_inverse_transl(vc, p, i); /* Update all inverse translations */
565 set_inverse_trans_unicode(vc, p);
514 566
515 return err; 567 return err;
516} 568}
@@ -561,6 +613,7 @@ int con_set_default_unimap(struct vc_data *vc)
561 613
562 for (i = 0; i <= 3; i++) 614 for (i = 0; i <= 3; i++)
563 set_inverse_transl(vc, p, i); /* Update all inverse translations */ 615 set_inverse_transl(vc, p, i); /* Update all inverse translations */
616 set_inverse_trans_unicode(vc, p);
564 dflt = p; 617 dflt = p;
565 return err; 618 return err;
566} 619}
@@ -617,6 +670,19 @@ void con_protect_unimap(struct vc_data *vc, int rdonly)
617 p->readonly = rdonly; 670 p->readonly = rdonly;
618} 671}
619 672
673/* may be called during an interrupt */
674u32 conv_8bit_to_uni(unsigned char c)
675{
676 /*
677 * Always use USER_MAP. This function is used by the keyboard,
678 * which shouldn't be affected by G0/G1 switching, etc.
679 * If the user map still contains default values, i.e. the
680 * direct-to-font mapping, then assume user is using Latin1.
681 */
682 unsigned short uni = translations[USER_MAP][c];
683 return uni == (0xf000 | c) ? c : uni;
684}
685
620int 686int
621conv_uni_to_pc(struct vc_data *conp, long ucs) 687conv_uni_to_pc(struct vc_data *conp, long ucs)
622{ 688{
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 90965b4def5c..2ce0af1bd588 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -24,6 +24,7 @@
24 * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik) 24 * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
25 */ 25 */
26 26
27#include <linux/consolemap.h>
27#include <linux/module.h> 28#include <linux/module.h>
28#include <linux/sched.h> 29#include <linux/sched.h>
29#include <linux/tty.h> 30#include <linux/tty.h>
@@ -308,10 +309,9 @@ static void applkey(struct vc_data *vc, int key, char mode)
308 * Many other routines do put_queue, but I think either 309 * Many other routines do put_queue, but I think either
309 * they produce ASCII, or they produce some user-assigned 310 * they produce ASCII, or they produce some user-assigned
310 * string, and in both cases we might assume that it is 311 * string, and in both cases we might assume that it is
311 * in utf-8 already. UTF-8 is defined for words of up to 31 bits, 312 * in utf-8 already.
312 * but we need only 16 bits here
313 */ 313 */
314static void to_utf8(struct vc_data *vc, ushort c) 314static void to_utf8(struct vc_data *vc, uint c)
315{ 315{
316 if (c < 0x80) 316 if (c < 0x80)
317 /* 0******* */ 317 /* 0******* */
@@ -320,11 +320,21 @@ static void to_utf8(struct vc_data *vc, ushort c)
320 /* 110***** 10****** */ 320 /* 110***** 10****** */
321 put_queue(vc, 0xc0 | (c >> 6)); 321 put_queue(vc, 0xc0 | (c >> 6));
322 put_queue(vc, 0x80 | (c & 0x3f)); 322 put_queue(vc, 0x80 | (c & 0x3f));
323 } else { 323 } else if (c < 0x10000) {
324 if (c >= 0xD800 && c < 0xE000)
325 return;
326 if (c == 0xFFFF)
327 return;
324 /* 1110**** 10****** 10****** */ 328 /* 1110**** 10****** 10****** */
325 put_queue(vc, 0xe0 | (c >> 12)); 329 put_queue(vc, 0xe0 | (c >> 12));
326 put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); 330 put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
327 put_queue(vc, 0x80 | (c & 0x3f)); 331 put_queue(vc, 0x80 | (c & 0x3f));
332 } else if (c < 0x110000) {
333 /* 11110*** 10****** 10****** 10****** */
334 put_queue(vc, 0xf0 | (c >> 18));
335 put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
336 put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
337 put_queue(vc, 0x80 | (c & 0x3f));
328 } 338 }
329} 339}
330 340
@@ -393,7 +403,7 @@ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch)
393 return d; 403 return d;
394 404
395 if (kbd->kbdmode == VC_UNICODE) 405 if (kbd->kbdmode == VC_UNICODE)
396 to_utf8(vc, d); 406 to_utf8(vc, conv_8bit_to_uni(d));
397 else if (d < 0x100) 407 else if (d < 0x100)
398 put_queue(vc, d); 408 put_queue(vc, d);
399 409
@@ -407,7 +417,7 @@ static void fn_enter(struct vc_data *vc)
407{ 417{
408 if (diacr) { 418 if (diacr) {
409 if (kbd->kbdmode == VC_UNICODE) 419 if (kbd->kbdmode == VC_UNICODE)
410 to_utf8(vc, diacr); 420 to_utf8(vc, conv_8bit_to_uni(diacr));
411 else if (diacr < 0x100) 421 else if (diacr < 0x100)
412 put_queue(vc, diacr); 422 put_queue(vc, diacr);
413 diacr = 0; 423 diacr = 0;
@@ -617,7 +627,7 @@ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag)
617 return; 627 return;
618 } 628 }
619 if (kbd->kbdmode == VC_UNICODE) 629 if (kbd->kbdmode == VC_UNICODE)
620 to_utf8(vc, value); 630 to_utf8(vc, conv_8bit_to_uni(value));
621 else if (value < 0x100) 631 else if (value < 0x100)
622 put_queue(vc, value); 632 put_queue(vc, value);
623} 633}
@@ -775,7 +785,7 @@ static void k_shift(struct vc_data *vc, unsigned char value, char up_flag)
775 /* kludge */ 785 /* kludge */
776 if (up_flag && shift_state != old_state && npadch != -1) { 786 if (up_flag && shift_state != old_state && npadch != -1) {
777 if (kbd->kbdmode == VC_UNICODE) 787 if (kbd->kbdmode == VC_UNICODE)
778 to_utf8(vc, npadch & 0xffff); 788 to_utf8(vc, npadch);
779 else 789 else
780 put_queue(vc, npadch & 0xff); 790 put_queue(vc, npadch & 0xff);
781 npadch = -1; 791 npadch = -1;
diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index a69f094d1ed3..d63f5ccc29e6 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -20,6 +20,7 @@
20 20
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22 22
23#include <linux/kbd_kern.h>
23#include <linux/vt_kern.h> 24#include <linux/vt_kern.h>
24#include <linux/consolemap.h> 25#include <linux/consolemap.h>
25#include <linux/selection.h> 26#include <linux/selection.h>
@@ -34,6 +35,7 @@ extern void poke_blanked_console(void);
34/* Variables for selection control. */ 35/* Variables for selection control. */
35/* Use a dynamic buffer, instead of static (Dec 1994) */ 36/* Use a dynamic buffer, instead of static (Dec 1994) */
36struct vc_data *sel_cons; /* must not be deallocated */ 37struct vc_data *sel_cons; /* must not be deallocated */
38static int use_unicode;
37static volatile int sel_start = -1; /* cleared by clear_selection */ 39static volatile int sel_start = -1; /* cleared by clear_selection */
38static int sel_end; 40static int sel_end;
39static int sel_buffer_lth; 41static int sel_buffer_lth;
@@ -54,10 +56,11 @@ static inline void highlight_pointer(const int where)
54 complement_pos(sel_cons, where); 56 complement_pos(sel_cons, where);
55} 57}
56 58
57static unsigned char 59static u16
58sel_pos(int n) 60sel_pos(int n)
59{ 61{
60 return inverse_translate(sel_cons, screen_glyph(sel_cons, n)); 62 return inverse_translate(sel_cons, screen_glyph(sel_cons, n),
63 use_unicode);
61} 64}
62 65
63/* remove the current selection highlight, if any, 66/* remove the current selection highlight, if any,
@@ -86,8 +89,8 @@ static u32 inwordLut[8]={
86 0xFF7FFFFF /* latin-1 accented letters, not division sign */ 89 0xFF7FFFFF /* latin-1 accented letters, not division sign */
87}; 90};
88 91
89static inline int inword(const unsigned char c) { 92static inline int inword(const u16 c) {
90 return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1; 93 return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
91} 94}
92 95
93/* set inwordLut contents. Invoked by ioctl(). */ 96/* set inwordLut contents. Invoked by ioctl(). */
@@ -108,13 +111,36 @@ static inline unsigned short limit(const unsigned short v, const unsigned short
108 return (v > u) ? u : v; 111 return (v > u) ? u : v;
109} 112}
110 113
114/* stores the char in UTF8 and returns the number of bytes used (1-3) */
115static int store_utf8(u16 c, char *p)
116{
117 if (c < 0x80) {
118 /* 0******* */
119 p[0] = c;
120 return 1;
121 } else if (c < 0x800) {
122 /* 110***** 10****** */
123 p[0] = 0xc0 | (c >> 6);
124 p[1] = 0x80 | (c & 0x3f);
125 return 2;
126 } else {
127 /* 1110**** 10****** 10****** */
128 p[0] = 0xe0 | (c >> 12);
129 p[1] = 0x80 | ((c >> 6) & 0x3f);
130 p[2] = 0x80 | (c & 0x3f);
131 return 3;
132 }
133}
134
111/* set the current selection. Invoked by ioctl() or by kernel code. */ 135/* set the current selection. Invoked by ioctl() or by kernel code. */
112int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) 136int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty)
113{ 137{
114 struct vc_data *vc = vc_cons[fg_console].d; 138 struct vc_data *vc = vc_cons[fg_console].d;
115 int sel_mode, new_sel_start, new_sel_end, spc; 139 int sel_mode, new_sel_start, new_sel_end, spc;
116 char *bp, *obp; 140 char *bp, *obp;
117 int i, ps, pe; 141 int i, ps, pe, multiplier;
142 u16 c;
143 struct kbd_struct *kbd = kbd_table + fg_console;
118 144
119 poke_blanked_console(); 145 poke_blanked_console();
120 146
@@ -158,6 +184,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
158 clear_selection(); 184 clear_selection();
159 sel_cons = vc_cons[fg_console].d; 185 sel_cons = vc_cons[fg_console].d;
160 } 186 }
187 use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
161 188
162 switch (sel_mode) 189 switch (sel_mode)
163 { 190 {
@@ -240,7 +267,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
240 sel_end = new_sel_end; 267 sel_end = new_sel_end;
241 268
242 /* Allocate a new buffer before freeing the old one ... */ 269 /* Allocate a new buffer before freeing the old one ... */
243 bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL); 270 multiplier = use_unicode ? 3 : 1; /* chars can take up to 3 bytes */
271 bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
244 if (!bp) { 272 if (!bp) {
245 printk(KERN_WARNING "selection: kmalloc() failed\n"); 273 printk(KERN_WARNING "selection: kmalloc() failed\n");
246 clear_selection(); 274 clear_selection();
@@ -251,8 +279,12 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
251 279
252 obp = bp; 280 obp = bp;
253 for (i = sel_start; i <= sel_end; i += 2) { 281 for (i = sel_start; i <= sel_end; i += 2) {
254 *bp = sel_pos(i); 282 c = sel_pos(i);
255 if (!isspace(*bp++)) 283 if (use_unicode)
284 bp += store_utf8(c, bp);
285 else
286 *bp++ = c;
287 if (!isspace(c))
256 obp = bp; 288 obp = bp;
257 if (! ((i + 2) % vc->vc_size_row)) { 289 if (! ((i + 2) % vc->vc_size_row)) {
258 /* strip trailing blanks from line and add newline, 290 /* strip trailing blanks from line and add newline,
diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h
index 82c9a1f11020..06b2768c603f 100644
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -8,9 +8,12 @@
8#define IBMPC_MAP 2 8#define IBMPC_MAP 2
9#define USER_MAP 3 9#define USER_MAP 3
10 10
11#include <linux/types.h>
12
11struct vc_data; 13struct vc_data;
12 14
13extern unsigned char inverse_translate(struct vc_data *conp, int glyph); 15extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
14extern unsigned short *set_translate(int m, struct vc_data *vc); 16extern unsigned short *set_translate(int m, struct vc_data *vc);
15extern int conv_uni_to_pc(struct vc_data *conp, long ucs); 17extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
18extern u32 conv_8bit_to_uni(unsigned char c);
16void console_map_init(void); 19void console_map_init(void);