aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2006-09-26 04:52:39 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:39 -0400
commit78be3706b21a232310590fe00258b224177ac05f (patch)
tree57dbe444e57241613067145b04c0e15c98278cd6
parent0da5db313317e3195482d3e660a1074857374a89 (diff)
[PATCH] i386: Allow a kernel not to be in ring 0
We allow for the fact that the guest kernel may not run in ring 0. This requires some abstraction in a few places when setting %cs or checking privilege level (user vs kernel). This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather than using #define USER_MODE_MASK which depends on a config option, we use Zach's more flexible approach of assuming ring 3 == userspace. I also used "get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in the code... 1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a get_kernel_rpl() macro, and don't assume it's zero. And: Clean up of patch for letting kernel run other than ring 0: a. Add some comments about the SEGMENT_IS_*_CODE() macros. b. Add a USER_RPL macro. (Code was comparing a value to a mask in some places and to the magic number 3 in other places.) c. Add macros for table indicator field and use them. d. Change the entry.S tests for LDT stack segment to use the macros Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/i386/kernel/entry.S9
-rw-r--r--arch/i386/kernel/process.c2
-rw-r--r--arch/i386/mm/extable.c2
-rw-r--r--arch/i386/mm/fault.c11
-rw-r--r--include/asm-i386/ptrace.h5
-rw-r--r--include/asm-i386/segment.h17
6 files changed, 31 insertions, 15 deletions
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3872fca5c74a..284f2e908ad0 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -240,8 +240,9 @@ ret_from_intr:
240check_userspace: 240check_userspace:
241 movl EFLAGS(%esp), %eax # mix EFLAGS and CS 241 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
242 movb CS(%esp), %al 242 movb CS(%esp), %al
243 testl $(VM_MASK | 3), %eax 243 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
244 jz resume_kernel 244 cmpl $USER_RPL, %eax
245 jb resume_kernel # not returning to v8086 or userspace
245ENTRY(resume_userspace) 246ENTRY(resume_userspace)
246 DISABLE_INTERRUPTS # make sure we don't miss an interrupt 247 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
247 # setting need_resched or sigpending 248 # setting need_resched or sigpending
@@ -377,8 +378,8 @@ restore_all:
377 # See comments in process.c:copy_thread() for details. 378 # See comments in process.c:copy_thread() for details.
378 movb OLDSS(%esp), %ah 379 movb OLDSS(%esp), %ah
379 movb CS(%esp), %al 380 movb CS(%esp), %al
380 andl $(VM_MASK | (4 << 8) | 3), %eax 381 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
381 cmpl $((4 << 8) | 3), %eax 382 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
382 CFI_REMEMBER_STATE 383 CFI_REMEMBER_STATE
383 je ldt_ss # returning to user-space with LDT SS 384 je ldt_ss # returning to user-space with LDT SS
384restore_nocheck: 385restore_nocheck:
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 220aeca59c3a..8c190ca7ae44 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
338 regs.xes = __USER_DS; 338 regs.xes = __USER_DS;
339 regs.orig_eax = -1; 339 regs.orig_eax = -1;
340 regs.eip = (unsigned long) kernel_thread_helper; 340 regs.eip = (unsigned long) kernel_thread_helper;
341 regs.xcs = __KERNEL_CS; 341 regs.xcs = __KERNEL_CS | get_kernel_rpl();
342 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; 342 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
343 343
344 /* Ok, create the new process.. */ 344 /* Ok, create the new process.. */
diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c
index de03c5430abc..0ce4f22a2635 100644
--- a/arch/i386/mm/extable.c
+++ b/arch/i386/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
11 const struct exception_table_entry *fixup; 11 const struct exception_table_entry *fixup;
12 12
13#ifdef CONFIG_PNPBIOS 13#ifdef CONFIG_PNPBIOS
14 if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) 14 if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
15 { 15 {
16 extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; 16 extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
17 extern u32 pnp_bios_is_utter_crap; 17 extern u32 pnp_bios_is_utter_crap;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 0ce86168a0b1..5e17a3f43b41 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -27,6 +27,7 @@
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28#include <asm/desc.h> 28#include <asm/desc.h>
29#include <asm/kdebug.h> 29#include <asm/kdebug.h>
30#include <asm/segment.h>
30 31
31extern void die(const char *,struct pt_regs *,long); 32extern void die(const char *,struct pt_regs *,long);
32 33
@@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
113 } 114 }
114 115
115 /* The standard kernel/user address space limit. */ 116 /* The standard kernel/user address space limit. */
116 *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; 117 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
117 118
118 /* By far the most common cases. */ 119 /* By far the most common cases. */
119 if (likely(seg == __USER_CS || seg == __KERNEL_CS)) 120 if (likely(SEGMENT_IS_FLAT_CODE(seg)))
120 return eip; 121 return eip;
121 122
122 /* Check the segment exists, is within the current LDT/GDT size, 123 /* Check the segment exists, is within the current LDT/GDT size,
@@ -430,11 +431,7 @@ good_area:
430 write = 0; 431 write = 0;
431 switch (error_code & 3) { 432 switch (error_code & 3) {
432 default: /* 3: write, present */ 433 default: /* 3: write, present */
433#ifdef TEST_VERIFY_AREA 434 /* fall through */
434 if (regs->cs == KERNEL_CS)
435 printk("WP fault at %08lx\n", regs->eip);
436#endif
437 /* fall through */
438 case 2: /* write, not present */ 435 case 2: /* write, not present */
439 if (!(vma->vm_flags & VM_WRITE)) 436 if (!(vma->vm_flags & VM_WRITE))
440 goto bad_area; 437 goto bad_area;
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index 30a442ec2059..21bb91679c82 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -60,6 +60,7 @@ struct pt_regs {
60#ifdef __KERNEL__ 60#ifdef __KERNEL__
61 61
62#include <asm/vm86.h> 62#include <asm/vm86.h>
63#include <asm/segment.h>
63 64
64struct task_struct; 65struct task_struct;
65extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); 66extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
@@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
73 */ 74 */
74static inline int user_mode(struct pt_regs *regs) 75static inline int user_mode(struct pt_regs *regs)
75{ 76{
76 return (regs->xcs & 3) != 0; 77 return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
77} 78}
78static inline int user_mode_vm(struct pt_regs *regs) 79static inline int user_mode_vm(struct pt_regs *regs)
79{ 80{
80 return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; 81 return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
81} 82}
82#define instruction_pointer(regs) ((regs)->eip) 83#define instruction_pointer(regs) ((regs)->eip)
83extern unsigned long profile_pc(struct pt_regs *regs); 84extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h
index faf995307b9e..b7ab59685ba7 100644
--- a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -83,6 +83,11 @@
83 83
84#define GDT_SIZE (GDT_ENTRIES * 8) 84#define GDT_SIZE (GDT_ENTRIES * 8)
85 85
86/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
87#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
88/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
89#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
90
86/* Simple and small GDT entries for booting only */ 91/* Simple and small GDT entries for booting only */
87 92
88#define GDT_ENTRY_BOOT_CS 2 93#define GDT_ENTRY_BOOT_CS 2
@@ -112,4 +117,16 @@
112 */ 117 */
113#define IDT_ENTRIES 256 118#define IDT_ENTRIES 256
114 119
120/* Bottom two bits of selector give the ring privilege level */
121#define SEGMENT_RPL_MASK 0x3
122/* Bit 2 is table indicator (LDT/GDT) */
123#define SEGMENT_TI_MASK 0x4
124
125/* User mode is privilege level 3 */
126#define USER_RPL 0x3
127/* LDT segment has TI set, GDT has it cleared */
128#define SEGMENT_LDT 0x4
129#define SEGMENT_GDT 0x0
130
131#define get_kernel_rpl() 0
115#endif 132#endif