aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2005-09-03 18:56:44 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:12 -0400
commita5201129307f414890f9a4410e38da205f5d7359 (patch)
treead70c5f1d3b336ef5665a7fd5ad8707aaec23847
parent0998e4228aca046fbd747c3fed909791d52e88eb (diff)
[PATCH] x86: make IOPL explicit
The pushf/popf in switch_to are ONLY used to switch IOPL. Making this explicit in C code is more clear. This pushf/popf pair was added as a bugfix for leaking IOPL to unprivileged processes when using sysenter/sysexit based system calls (sysexit does not restore flags). When requesting an IOPL change in sys_iopl(), it is just as easy to change the current flags and the flags in the stack image (in case an IRET is required), but there is no reason to force an IRET if we came in from the SYSENTER path. This change is the minimal solution for supporting a paravirtualized Linux kernel that allows user processes to run with I/O privilege. Other solutions require radical rewrites of part of the low level fault / system call handling code, or do not fully support sysenter based system calls. Unfortunately, this added one field to the thread_struct. But as a bonus, on P4, the fastest time measured for switch_to() went from 312 to 260 cycles, a win of about 17% in the fast case through this performance critical path. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/process.c6
-rw-r--r--include/asm-i386/processor.h16
-rw-r--r--include/asm-i386/system.h4
4 files changed, 27 insertions, 6 deletions
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c1..f2b37654777f 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
132 volatile struct pt_regs * regs = (struct pt_regs *) &unused; 132 volatile struct pt_regs * regs = (struct pt_regs *) &unused;
133 unsigned int level = regs->ebx; 133 unsigned int level = regs->ebx;
134 unsigned int old = (regs->eflags >> 12) & 3; 134 unsigned int old = (regs->eflags >> 12) & 3;
135 struct thread_struct *t = &current->thread;
135 136
136 if (level > 3) 137 if (level > 3)
137 return -EINVAL; 138 return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
140 if (!capable(CAP_SYS_RAWIO)) 141 if (!capable(CAP_SYS_RAWIO))
141 return -EPERM; 142 return -EPERM;
142 } 143 }
143 regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); 144 t->iopl = level << 12;
144 /* Make sure we return the long way (not sysenter) */ 145 regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
145 set_thread_flag(TIF_IRET); 146 set_iopl_mask(t->iopl);
146 return 0; 147 return 0;
147} 148}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 660997800393..b45cbf93d439 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -712,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
712 loadsegment(gs, next->gs); 712 loadsegment(gs, next->gs);
713 713
714 /* 714 /*
715 * Restore IOPL if needed.
716 */
717 if (unlikely(prev->iopl != next->iopl))
718 set_iopl_mask(next->iopl);
719
720 /*
715 * Now maybe reload the debug registers 721 * Now maybe reload the debug registers
716 */ 722 */
717 if (unlikely(next->debugreg[7])) { 723 if (unlikely(next->debugreg[7])) {
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 992224bff549..37bef8ed7bed 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -455,6 +455,7 @@ struct thread_struct {
455 unsigned int saved_fs, saved_gs; 455 unsigned int saved_fs, saved_gs;
456/* IO permissions */ 456/* IO permissions */
457 unsigned long *io_bitmap_ptr; 457 unsigned long *io_bitmap_ptr;
458 unsigned long iopl;
458/* max allowed port in the bitmap, in bytes: */ 459/* max allowed port in the bitmap, in bytes: */
459 unsigned long io_bitmap_max; 460 unsigned long io_bitmap_max;
460}; 461};
@@ -511,6 +512,21 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa
511 : /* no output */ \ 512 : /* no output */ \
512 :"r" (value)) 513 :"r" (value))
513 514
515/*
516 * Set IOPL bits in EFLAGS from given mask
517 */
518static inline void set_iopl_mask(unsigned mask)
519{
520 unsigned int reg;
521 __asm__ __volatile__ ("pushfl;"
522 "popl %0;"
523 "andl %1, %0;"
524 "orl %2, %0;"
525 "pushl %0;"
526 "popfl"
527 : "=&r" (reg)
528 : "i" (~X86_EFLAGS_IOPL), "r" (mask));
529}
514 530
515/* Forward declaration, a strange C thing */ 531/* Forward declaration, a strange C thing */
516struct task_struct; 532struct task_struct;
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index 37fd2f8c7196..acd5c26b69ba 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -14,8 +14,7 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
14 14
15#define switch_to(prev,next,last) do { \ 15#define switch_to(prev,next,last) do { \
16 unsigned long esi,edi; \ 16 unsigned long esi,edi; \
17 asm volatile("pushfl\n\t" \ 17 asm volatile("pushl %%ebp\n\t" \
18 "pushl %%ebp\n\t" \
19 "movl %%esp,%0\n\t" /* save ESP */ \ 18 "movl %%esp,%0\n\t" /* save ESP */ \
20 "movl %5,%%esp\n\t" /* restore ESP */ \ 19 "movl %5,%%esp\n\t" /* restore ESP */ \
21 "movl $1f,%1\n\t" /* save EIP */ \ 20 "movl $1f,%1\n\t" /* save EIP */ \
@@ -23,7 +22,6 @@ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struc
23 "jmp __switch_to\n" \ 22 "jmp __switch_to\n" \
24 "1:\t" \ 23 "1:\t" \
25 "popl %%ebp\n\t" \ 24 "popl %%ebp\n\t" \
26 "popfl" \
27 :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ 25 :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \
28 "=a" (last),"=S" (esi),"=D" (edi) \ 26 "=a" (last),"=S" (esi),"=D" (edi) \
29 :"m" (next->thread.esp),"m" (next->thread.eip), \ 27 :"m" (next->thread.esp),"m" (next->thread.eip), \