aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault_32.c
diff options
context:
space:
mode:
authorHarvey Harrison <harvey.harrison@gmail.com>2008-01-30 07:32:19 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:32:19 -0500
commit33cb52438341d6e2d7e06e3b64ed776bc54a2ca4 (patch)
treef251dcde40e15e6a4ebb6f1918c8c110ed6135a4 /arch/x86/mm/fault_32.c
parent1d16b53e387b255d8e30f00594220b23b1290e6b (diff)
x86: cosmetic fixes fault_{32|64}.c
First step towards unifying these files. - Checkpatch trailing whitespace fixes - Checkpatch indentation of switch statement fixes - Checkpatch single statement ifs need no braces fixes - Checkpatch consistent spacing after comma fixes - Introduce defines for pagefault error bits from X86_64 and add useful comment from X86_32. Use these defines in X86_32 where obvious. - Unify comments between 32|64 bit - Small ifdef movement for CONFIG_KPROBES in notify_page_fault() - Introduce X86_64 only case statement No Functional Changes. Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/mm/fault_32.c')
-rw-r--r--arch/x86/mm/fault_32.c148
1 files changed, 83 insertions, 65 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index db8d748814e4..bfb0917d699d 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/arch/i386/mm/fault.c
3 *
4 * Copyright (C) 1995 Linus Torvalds 2 * Copyright (C) 1995 Linus Torvalds
5 */ 3 */
6 4
@@ -30,11 +28,25 @@
30#include <asm/desc.h> 28#include <asm/desc.h>
31#include <asm/segment.h> 29#include <asm/segment.h>
32 30
33extern void die(const char *,struct pt_regs *,long); 31/*
32 * Page fault error code bits
33 * bit 0 == 0 means no page found, 1 means protection fault
34 * bit 1 == 0 means read, 1 means write
35 * bit 2 == 0 means kernel, 1 means user-mode
36 * bit 3 == 1 means use of reserved bit detected
37 * bit 4 == 1 means fault was an instruction fetch
38 */
39#define PF_PROT (1<<0)
40#define PF_WRITE (1<<1)
41#define PF_USER (1<<2)
42#define PF_RSVD (1<<3)
43#define PF_INSTR (1<<4)
44
45extern void die(const char *, struct pt_regs *, long);
34 46
35#ifdef CONFIG_KPROBES
36static inline int notify_page_fault(struct pt_regs *regs) 47static inline int notify_page_fault(struct pt_regs *regs)
37{ 48{
49#ifdef CONFIG_KPROBES
38 int ret = 0; 50 int ret = 0;
39 51
40 /* kprobe_running() needs smp_processor_id() */ 52 /* kprobe_running() needs smp_processor_id() */
@@ -46,13 +58,10 @@ static inline int notify_page_fault(struct pt_regs *regs)
46 } 58 }
47 59
48 return ret; 60 return ret;
49}
50#else 61#else
51static inline int notify_page_fault(struct pt_regs *regs)
52{
53 return 0; 62 return 0;
54}
55#endif 63#endif
64}
56 65
57/* 66/*
58 * Return EIP plus the CS segment base. The segment limit is also 67 * Return EIP plus the CS segment base. The segment limit is also
@@ -65,7 +74,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
65 * If CS is no longer a valid code segment, or if EIP is beyond the 74 * If CS is no longer a valid code segment, or if EIP is beyond the
66 * limit, or if it is a kernel address when CS is not a kernel segment, 75 * limit, or if it is a kernel address when CS is not a kernel segment,
67 * then the returned value will be greater than *eip_limit. 76 * then the returned value will be greater than *eip_limit.
68 * 77 *
69 * This is slow, but is very rarely executed. 78 * This is slow, but is very rarely executed.
70 */ 79 */
71static inline unsigned long get_segment_eip(struct pt_regs *regs, 80static inline unsigned long get_segment_eip(struct pt_regs *regs,
@@ -84,7 +93,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
84 93
85 /* The standard kernel/user address space limit. */ 94 /* The standard kernel/user address space limit. */
86 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; 95 *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
87 96
88 /* By far the most common cases. */ 97 /* By far the most common cases. */
89 if (likely(SEGMENT_IS_FLAT_CODE(seg))) 98 if (likely(SEGMENT_IS_FLAT_CODE(seg)))
90 return ip; 99 return ip;
@@ -99,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
99 return 1; /* So that returned ip > *eip_limit. */ 108 return 1; /* So that returned ip > *eip_limit. */
100 } 109 }
101 110
102 /* Get the GDT/LDT descriptor base. 111 /* Get the GDT/LDT descriptor base.
103 When you look for races in this code remember that 112 When you look for races in this code remember that
104 LDT and other horrors are only used in user space. */ 113 LDT and other horrors are only used in user space. */
105 if (seg & (1<<2)) { 114 if (seg & (1<<2)) {
@@ -109,16 +118,16 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
109 desc = (void *)desc + (seg & ~7); 118 desc = (void *)desc + (seg & ~7);
110 } else { 119 } else {
111 /* Must disable preemption while reading the GDT. */ 120 /* Must disable preemption while reading the GDT. */
112 desc = (u32 *)get_cpu_gdt_table(get_cpu()); 121 desc = (u32 *)get_cpu_gdt_table(get_cpu());
113 desc = (void *)desc + (seg & ~7); 122 desc = (void *)desc + (seg & ~7);
114 } 123 }
115 124
116 /* Decode the code segment base from the descriptor */ 125 /* Decode the code segment base from the descriptor */
117 base = get_desc_base((struct desc_struct *)desc); 126 base = get_desc_base((struct desc_struct *)desc);
118 127
119 if (seg & (1<<2)) { 128 if (seg & (1<<2))
120 mutex_unlock(&current->mm->context.lock); 129 mutex_unlock(&current->mm->context.lock);
121 } else 130 else
122 put_cpu(); 131 put_cpu();
123 132
124 /* Adjust EIP and segment limit, and clamp at the kernel limit. 133 /* Adjust EIP and segment limit, and clamp at the kernel limit.
@@ -129,19 +138,19 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
129 return ip + base; 138 return ip + base;
130} 139}
131 140
132/* 141/*
133 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. 142 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
134 * Check that here and ignore it. 143 * Check that here and ignore it.
135 */ 144 */
136static int __is_prefetch(struct pt_regs *regs, unsigned long addr) 145static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
137{ 146{
138 unsigned long limit; 147 unsigned long limit;
139 unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); 148 unsigned char *instr = (unsigned char *)get_segment_eip(regs, &limit);
140 int scan_more = 1; 149 int scan_more = 1;
141 int prefetch = 0; 150 int prefetch = 0;
142 int i; 151 int i;
143 152
144 for (i = 0; scan_more && i < 15; i++) { 153 for (i = 0; scan_more && i < 15; i++) {
145 unsigned char opcode; 154 unsigned char opcode;
146 unsigned char instr_hi; 155 unsigned char instr_hi;
147 unsigned char instr_lo; 156 unsigned char instr_lo;
@@ -149,27 +158,43 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
149 if (instr > (unsigned char *)limit) 158 if (instr > (unsigned char *)limit)
150 break; 159 break;
151 if (probe_kernel_address(instr, opcode)) 160 if (probe_kernel_address(instr, opcode))
152 break; 161 break;
153 162
154 instr_hi = opcode & 0xf0; 163 instr_hi = opcode & 0xf0;
155 instr_lo = opcode & 0x0f; 164 instr_lo = opcode & 0x0f;
156 instr++; 165 instr++;
157 166
158 switch (instr_hi) { 167 switch (instr_hi) {
159 case 0x20: 168 case 0x20:
160 case 0x30: 169 case 0x30:
161 /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ 170 /*
171 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
172 * In X86_64 long mode, the CPU will signal invalid
173 * opcode if some of these prefixes are present so
174 * X86_64 will never get here anyway
175 */
162 scan_more = ((instr_lo & 7) == 0x6); 176 scan_more = ((instr_lo & 7) == 0x6);
163 break; 177 break;
164 178#ifdef CONFIG_X86_64
179 case 0x40:
180 /*
181 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
182 * Need to figure out under what instruction mode the
183 * instruction was issued. Could check the LDT for lm,
184 * but for now it's good enough to assume that long
185 * mode only uses well known segments or kernel.
186 */
187 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
188 break;
189#endif
165 case 0x60: 190 case 0x60:
166 /* 0x64 thru 0x67 are valid prefixes in all modes. */ 191 /* 0x64 thru 0x67 are valid prefixes in all modes. */
167 scan_more = (instr_lo & 0xC) == 0x4; 192 scan_more = (instr_lo & 0xC) == 0x4;
168 break; 193 break;
169 case 0xF0: 194 case 0xF0:
170 /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ 195 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
171 scan_more = !instr_lo || (instr_lo>>1) == 1; 196 scan_more = !instr_lo || (instr_lo>>1) == 1;
172 break; 197 break;
173 case 0x00: 198 case 0x00:
174 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 199 /* Prefetch instruction is 0x0F0D or 0x0F18 */
175 scan_more = 0; 200 scan_more = 0;
@@ -179,11 +204,11 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
179 break; 204 break;
180 prefetch = (instr_lo == 0xF) && 205 prefetch = (instr_lo == 0xF) &&
181 (opcode == 0x0D || opcode == 0x18); 206 (opcode == 0x0D || opcode == 0x18);
182 break; 207 break;
183 default: 208 default:
184 scan_more = 0; 209 scan_more = 0;
185 break; 210 break;
186 } 211 }
187 } 212 }
188 return prefetch; 213 return prefetch;
189} 214}
@@ -199,7 +224,7 @@ static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
199 return __is_prefetch(regs, addr); 224 return __is_prefetch(regs, addr);
200 } 225 }
201 return 0; 226 return 0;
202} 227}
203 228
204static noinline void force_sig_info_fault(int si_signo, int si_code, 229static noinline void force_sig_info_fault(int si_signo, int si_code,
205 unsigned long address, struct task_struct *tsk) 230 unsigned long address, struct task_struct *tsk)
@@ -284,19 +309,12 @@ int show_unhandled_signals = 1;
284 * This routine handles page faults. It determines the address, 309 * This routine handles page faults. It determines the address,
285 * and the problem, and then passes it off to one of the appropriate 310 * and the problem, and then passes it off to one of the appropriate
286 * routines. 311 * routines.
287 *
288 * error_code:
289 * bit 0 == 0 means no page found, 1 means protection fault
290 * bit 1 == 0 means read, 1 means write
291 * bit 2 == 0 means kernel, 1 means user-mode
292 * bit 3 == 1 means use of reserved bit detected
293 * bit 4 == 1 means fault was an instruction fetch
294 */ 312 */
295void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) 313void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
296{ 314{
297 struct task_struct *tsk; 315 struct task_struct *tsk;
298 struct mm_struct *mm; 316 struct mm_struct *mm;
299 struct vm_area_struct * vma; 317 struct vm_area_struct *vma;
300 unsigned long address; 318 unsigned long address;
301 int write, si_code; 319 int write, si_code;
302 int fault; 320 int fault;
@@ -307,7 +325,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
307 trace_hardirqs_fixup(); 325 trace_hardirqs_fixup();
308 326
309 /* get the address */ 327 /* get the address */
310 address = read_cr2(); 328 address = read_cr2();
311 329
312 tsk = current; 330 tsk = current;
313 331
@@ -350,7 +368,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
350 368
351 /* 369 /*
352 * If we're in an interrupt, have no user context or are running in an 370 * If we're in an interrupt, have no user context or are running in an
353 * atomic region then we must not take the fault.. 371 * atomic region then we must not take the fault.
354 */ 372 */
355 if (in_atomic() || !mm) 373 if (in_atomic() || !mm)
356 goto bad_area_nosemaphore; 374 goto bad_area_nosemaphore;
@@ -371,7 +389,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
371 * thus avoiding the deadlock. 389 * thus avoiding the deadlock.
372 */ 390 */
373 if (!down_read_trylock(&mm->mmap_sem)) { 391 if (!down_read_trylock(&mm->mmap_sem)) {
374 if ((error_code & 4) == 0 && 392 if ((error_code & PF_USER) == 0 &&
375 !search_exception_tables(regs->ip)) 393 !search_exception_tables(regs->ip))
376 goto bad_area_nosemaphore; 394 goto bad_area_nosemaphore;
377 down_read(&mm->mmap_sem); 395 down_read(&mm->mmap_sem);
@@ -384,7 +402,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
384 goto good_area; 402 goto good_area;
385 if (!(vma->vm_flags & VM_GROWSDOWN)) 403 if (!(vma->vm_flags & VM_GROWSDOWN))
386 goto bad_area; 404 goto bad_area;
387 if (error_code & 4) { 405 if (error_code & PF_USER) {
388 /* 406 /*
389 * Accessing the stack below %sp is always a bug. 407 * Accessing the stack below %sp is always a bug.
390 * The large cushion allows instructions like enter 408 * The large cushion allows instructions like enter
@@ -403,19 +421,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
403good_area: 421good_area:
404 si_code = SEGV_ACCERR; 422 si_code = SEGV_ACCERR;
405 write = 0; 423 write = 0;
406 switch (error_code & 3) { 424 switch (error_code & (PF_PROT|PF_WRITE)) {
407 default: /* 3: write, present */ 425 default: /* 3: write, present */
408 /* fall through */ 426 /* fall through */
409 case 2: /* write, not present */ 427 case PF_WRITE: /* write, not present */
410 if (!(vma->vm_flags & VM_WRITE)) 428 if (!(vma->vm_flags & VM_WRITE))
411 goto bad_area; 429 goto bad_area;
412 write++; 430 write++;
413 break; 431 break;
414 case 1: /* read, present */ 432 case PF_PROT: /* read, present */
433 goto bad_area;
434 case 0: /* read, not present */
435 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
415 goto bad_area; 436 goto bad_area;
416 case 0: /* read, not present */
417 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
418 goto bad_area;
419 } 437 }
420 438
421 survive: 439 survive:
@@ -457,14 +475,14 @@ bad_area:
457 475
458bad_area_nosemaphore: 476bad_area_nosemaphore:
459 /* User mode accesses just cause a SIGSEGV */ 477 /* User mode accesses just cause a SIGSEGV */
460 if (error_code & 4) { 478 if (error_code & PF_USER) {
461 /* 479 /*
462 * It's possible to have interrupts off here. 480 * It's possible to have interrupts off here.
463 */ 481 */
464 local_irq_enable(); 482 local_irq_enable();
465 483
466 /* 484 /*
467 * Valid to do another page fault here because this one came 485 * Valid to do another page fault here because this one came
468 * from user space. 486 * from user space.
469 */ 487 */
470 if (is_prefetch(regs, address, error_code)) 488 if (is_prefetch(regs, address, error_code))
@@ -492,7 +510,7 @@ bad_area_nosemaphore:
492 */ 510 */
493 if (boot_cpu_data.f00f_bug) { 511 if (boot_cpu_data.f00f_bug) {
494 unsigned long nr; 512 unsigned long nr;
495 513
496 nr = (address - idt_descr.address) >> 3; 514 nr = (address - idt_descr.address) >> 3;
497 515
498 if (nr == 6) { 516 if (nr == 6) {
@@ -507,13 +525,13 @@ no_context:
507 if (fixup_exception(regs)) 525 if (fixup_exception(regs))
508 return; 526 return;
509 527
510 /* 528 /*
511 * Valid to do another page fault here, because if this fault 529 * Valid to do another page fault here, because if this fault
512 * had been triggered by is_prefetch fixup_exception would have 530 * had been triggered by is_prefetch fixup_exception would have
513 * handled it. 531 * handled it.
514 */ 532 */
515 if (is_prefetch(regs, address, error_code)) 533 if (is_prefetch(regs, address, error_code))
516 return; 534 return;
517 535
518/* 536/*
519 * Oops. The kernel tried to access some bad page. We'll have to 537 * Oops. The kernel tried to access some bad page. We'll have to
@@ -541,7 +559,7 @@ no_context:
541 else 559 else
542 printk(KERN_ALERT "BUG: unable to handle kernel paging" 560 printk(KERN_ALERT "BUG: unable to handle kernel paging"
543 " request"); 561 " request");
544 printk(" at virtual address %08lx\n",address); 562 printk(" at virtual address %08lx\n", address);
545 printk(KERN_ALERT "printing ip: %08lx ", regs->ip); 563 printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
546 564
547 page = read_cr3(); 565 page = read_cr3();
@@ -605,7 +623,7 @@ do_sigbus:
605 up_read(&mm->mmap_sem); 623 up_read(&mm->mmap_sem);
606 624
607 /* Kernel mode? Handle exceptions or die */ 625 /* Kernel mode? Handle exceptions or die */
608 if (!(error_code & 4)) 626 if (!(error_code & PF_USER))
609 goto no_context; 627 goto no_context;
610 628
611 /* User space => ok to do another page fault */ 629 /* User space => ok to do another page fault */