diff options
author | Rusty Russell <rusty@rustcorp.com.au> | 2006-09-26 04:52:39 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:39 -0400 |
commit | 78be3706b21a232310590fe00258b224177ac05f (patch) | |
tree | 57dbe444e57241613067145b04c0e15c98278cd6 | |
parent | 0da5db313317e3195482d3e660a1074857374a89 (diff) |
[PATCH] i386: Allow a kernel not to be in ring 0
We allow for the fact that the guest kernel may not run in ring 0. This
requires some abstraction in a few places when setting %cs or checking
privilege level (user vs kernel).
This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather
than using #define USER_MODE_MASK which depends on a config option, we use
Zach's more flexible approach of assuming ring 3 == userspace. I also used
"get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in
the code...
1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a
get_kernel_rpl() macro, and don't assume it's zero.
And:
Clean up of patch for letting kernel run other than ring 0:
a. Add some comments about the SEGMENT_IS_*_CODE() macros.
b. Add a USER_RPL macro. (Code was comparing a value to a mask
in some places and to the magic number 3 in other places.)
c. Add macros for table indicator field and use them.
d. Change the entry.S tests for LDT stack segment to use the macros
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r-- | arch/i386/kernel/entry.S | 9 | ||||
-rw-r--r-- | arch/i386/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/i386/mm/extable.c | 2 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 11 | ||||
-rw-r--r-- | include/asm-i386/ptrace.h | 5 | ||||
-rw-r--r-- | include/asm-i386/segment.h | 17 |
6 files changed, 31 insertions, 15 deletions
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3872fca5c74a..284f2e908ad0 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -240,8 +240,9 @@ ret_from_intr: | |||
240 | check_userspace: | 240 | check_userspace: |
241 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS | 241 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS |
242 | movb CS(%esp), %al | 242 | movb CS(%esp), %al |
243 | testl $(VM_MASK | 3), %eax | 243 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax |
244 | jz resume_kernel | 244 | cmpl $USER_RPL, %eax |
245 | jb resume_kernel # not returning to v8086 or userspace | ||
245 | ENTRY(resume_userspace) | 246 | ENTRY(resume_userspace) |
246 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt | 247 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
247 | # setting need_resched or sigpending | 248 | # setting need_resched or sigpending |
@@ -377,8 +378,8 @@ restore_all: | |||
377 | # See comments in process.c:copy_thread() for details. | 378 | # See comments in process.c:copy_thread() for details. |
378 | movb OLDSS(%esp), %ah | 379 | movb OLDSS(%esp), %ah |
379 | movb CS(%esp), %al | 380 | movb CS(%esp), %al |
380 | andl $(VM_MASK | (4 << 8) | 3), %eax | 381 | andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
381 | cmpl $((4 << 8) | 3), %eax | 382 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
382 | CFI_REMEMBER_STATE | 383 | CFI_REMEMBER_STATE |
383 | je ldt_ss # returning to user-space with LDT SS | 384 | je ldt_ss # returning to user-space with LDT SS |
384 | restore_nocheck: | 385 | restore_nocheck: |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 220aeca59c3a..8c190ca7ae44 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
338 | regs.xes = __USER_DS; | 338 | regs.xes = __USER_DS; |
339 | regs.orig_eax = -1; | 339 | regs.orig_eax = -1; |
340 | regs.eip = (unsigned long) kernel_thread_helper; | 340 | regs.eip = (unsigned long) kernel_thread_helper; |
341 | regs.xcs = __KERNEL_CS; | 341 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
342 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | 342 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; |
343 | 343 | ||
344 | /* Ok, create the new process.. */ | 344 | /* Ok, create the new process.. */ |
diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c index de03c5430abc..0ce4f22a2635 100644 --- a/arch/i386/mm/extable.c +++ b/arch/i386/mm/extable.c | |||
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) | |||
11 | const struct exception_table_entry *fixup; | 11 | const struct exception_table_entry *fixup; |
12 | 12 | ||
13 | #ifdef CONFIG_PNPBIOS | 13 | #ifdef CONFIG_PNPBIOS |
14 | if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) | 14 | if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) |
15 | { | 15 | { |
16 | extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; | 16 | extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; |
17 | extern u32 pnp_bios_is_utter_crap; | 17 | extern u32 pnp_bios_is_utter_crap; |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 0ce86168a0b1..5e17a3f43b41 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
28 | #include <asm/desc.h> | 28 | #include <asm/desc.h> |
29 | #include <asm/kdebug.h> | 29 | #include <asm/kdebug.h> |
30 | #include <asm/segment.h> | ||
30 | 31 | ||
31 | extern void die(const char *,struct pt_regs *,long); | 32 | extern void die(const char *,struct pt_regs *,long); |
32 | 33 | ||
@@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, | |||
113 | } | 114 | } |
114 | 115 | ||
115 | /* The standard kernel/user address space limit. */ | 116 | /* The standard kernel/user address space limit. */ |
116 | *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; | 117 | *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; |
117 | 118 | ||
118 | /* By far the most common cases. */ | 119 | /* By far the most common cases. */ |
119 | if (likely(seg == __USER_CS || seg == __KERNEL_CS)) | 120 | if (likely(SEGMENT_IS_FLAT_CODE(seg))) |
120 | return eip; | 121 | return eip; |
121 | 122 | ||
122 | /* Check the segment exists, is within the current LDT/GDT size, | 123 | /* Check the segment exists, is within the current LDT/GDT size, |
@@ -430,11 +431,7 @@ good_area: | |||
430 | write = 0; | 431 | write = 0; |
431 | switch (error_code & 3) { | 432 | switch (error_code & 3) { |
432 | default: /* 3: write, present */ | 433 | default: /* 3: write, present */ |
433 | #ifdef TEST_VERIFY_AREA | 434 | /* fall through */ |
434 | if (regs->cs == KERNEL_CS) | ||
435 | printk("WP fault at %08lx\n", regs->eip); | ||
436 | #endif | ||
437 | /* fall through */ | ||
438 | case 2: /* write, not present */ | 435 | case 2: /* write, not present */ |
439 | if (!(vma->vm_flags & VM_WRITE)) | 436 | if (!(vma->vm_flags & VM_WRITE)) |
440 | goto bad_area; | 437 | goto bad_area; |
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 30a442ec2059..21bb91679c82 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h | |||
@@ -60,6 +60,7 @@ struct pt_regs { | |||
60 | #ifdef __KERNEL__ | 60 | #ifdef __KERNEL__ |
61 | 61 | ||
62 | #include <asm/vm86.h> | 62 | #include <asm/vm86.h> |
63 | #include <asm/segment.h> | ||
63 | 64 | ||
64 | struct task_struct; | 65 | struct task_struct; |
65 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); | 66 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); |
@@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro | |||
73 | */ | 74 | */ |
74 | static inline int user_mode(struct pt_regs *regs) | 75 | static inline int user_mode(struct pt_regs *regs) |
75 | { | 76 | { |
76 | return (regs->xcs & 3) != 0; | 77 | return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL; |
77 | } | 78 | } |
78 | static inline int user_mode_vm(struct pt_regs *regs) | 79 | static inline int user_mode_vm(struct pt_regs *regs) |
79 | { | 80 | { |
80 | return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0; | 81 | return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; |
81 | } | 82 | } |
82 | #define instruction_pointer(regs) ((regs)->eip) | 83 | #define instruction_pointer(regs) ((regs)->eip) |
83 | extern unsigned long profile_pc(struct pt_regs *regs); | 84 | extern unsigned long profile_pc(struct pt_regs *regs); |
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index faf995307b9e..b7ab59685ba7 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h | |||
@@ -83,6 +83,11 @@ | |||
83 | 83 | ||
84 | #define GDT_SIZE (GDT_ENTRIES * 8) | 84 | #define GDT_SIZE (GDT_ENTRIES * 8) |
85 | 85 | ||
86 | /* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ | ||
87 | #define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) | ||
88 | /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | ||
89 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | ||
90 | |||
86 | /* Simple and small GDT entries for booting only */ | 91 | /* Simple and small GDT entries for booting only */ |
87 | 92 | ||
88 | #define GDT_ENTRY_BOOT_CS 2 | 93 | #define GDT_ENTRY_BOOT_CS 2 |
@@ -112,4 +117,16 @@ | |||
112 | */ | 117 | */ |
113 | #define IDT_ENTRIES 256 | 118 | #define IDT_ENTRIES 256 |
114 | 119 | ||
120 | /* Bottom two bits of selector give the ring privilege level */ | ||
121 | #define SEGMENT_RPL_MASK 0x3 | ||
122 | /* Bit 2 is table indicator (LDT/GDT) */ | ||
123 | #define SEGMENT_TI_MASK 0x4 | ||
124 | |||
125 | /* User mode is privilege level 3 */ | ||
126 | #define USER_RPL 0x3 | ||
127 | /* LDT segment has TI set, GDT has it cleared */ | ||
128 | #define SEGMENT_LDT 0x4 | ||
129 | #define SEGMENT_GDT 0x0 | ||
130 | |||
131 | #define get_kernel_rpl() 0 | ||
115 | #endif | 132 | #endif |