aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2005-09-03 18:56:44 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:12 -0400
commita5201129307f414890f9a4410e38da205f5d7359 (patch)
treead70c5f1d3b336ef5665a7fd5ad8707aaec23847 /arch/i386/kernel
parent0998e4228aca046fbd747c3fed909791d52e88eb (diff)
[PATCH] x86: make IOPL explicit
The pushf/popf in switch_to are ONLY used to switch IOPL. Making this explicit in C code is more clear. This pushf/popf pair was added as a bugfix for leaking IOPL to unprivileged processes when using sysenter/sysexit based system calls (sysexit does not restore flags). When requesting an IOPL change in sys_iopl(), it is just as easy to change the current flags and the flags in the stack image (in case an IRET is required), but there is no reason to force an IRET if we came in from the SYSENTER path. This change is the minimal solution for supporting a paravirtualized Linux kernel that allows user processes to run with I/O privilege. Other solutions require radical rewrites of part of the low level fault / system call handling code, or do not fully support sysenter based system calls. Unfortunately, this added one field to the thread_struct. But as a bonus, on P4, the fastest time measured for switch_to() went from 312 to 260 cycles, a win of about 17% in the fast case through this performance critical path. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/process.c6
2 files changed, 10 insertions, 3 deletions
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c1..f2b37654777f 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
132 volatile struct pt_regs * regs = (struct pt_regs *) &unused; 132 volatile struct pt_regs * regs = (struct pt_regs *) &unused;
133 unsigned int level = regs->ebx; 133 unsigned int level = regs->ebx;
134 unsigned int old = (regs->eflags >> 12) & 3; 134 unsigned int old = (regs->eflags >> 12) & 3;
135 struct thread_struct *t = &current->thread;
135 136
136 if (level > 3) 137 if (level > 3)
137 return -EINVAL; 138 return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
140 if (!capable(CAP_SYS_RAWIO)) 141 if (!capable(CAP_SYS_RAWIO))
141 return -EPERM; 142 return -EPERM;
142 } 143 }
143 regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); 144 t->iopl = level << 12;
144 /* Make sure we return the long way (not sysenter) */ 145 regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
145 set_thread_flag(TIF_IRET); 146 set_iopl_mask(t->iopl);
146 return 0; 147 return 0;
147} 148}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 660997800393..b45cbf93d439 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -712,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
712 loadsegment(gs, next->gs); 712 loadsegment(gs, next->gs);
713 713
714 /* 714 /*
715 * Restore IOPL if needed.
716 */
717 if (unlikely(prev->iopl != next->iopl))
718 set_iopl_mask(next->iopl);
719
720 /*
715 * Now maybe reload the debug registers 721 * Now maybe reload the debug registers
716 */ 722 */
717 if (unlikely(next->debugreg[7])) { 723 if (unlikely(next->debugreg[7])) {