aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault_64.c
diff options
context:
space:
mode:
authorHarvey Harrison <harvey.harrison@gmail.com>2008-01-30 07:32:19 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:32:19 -0500
commit33cb52438341d6e2d7e06e3b64ed776bc54a2ca4 (patch)
treef251dcde40e15e6a4ebb6f1918c8c110ed6135a4 /arch/x86/mm/fault_64.c
parent1d16b53e387b255d8e30f00594220b23b1290e6b (diff)
x86: cosmetic fixes fault_{32|64}.c
First step towards unifying these files. - Checkpatch trailing whitespace fixes - Checkpatch indentation of switch statement fixes - Checkpatch single statement ifs need no braces fixes - Checkpatch consistent spacing after comma fixes - Introduce defines for pagefault error bits from X86_64 and add useful comment from X86_32. Use these defines in X86_32 where obvious. - Unify comments between 32|64 bit - Small ifdef movement for CONFIG_KPROBES in notify_page_fault() - Introduce X86_64 only case statement No Functional Changes. Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/mm/fault_64.c')
-rw-r--r--arch/x86/mm/fault_64.c151
1 files changed, 77 insertions, 74 deletions
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 3a94941578fa..7e98a7691283 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -1,6 +1,4 @@
1/* 1/*
2 * linux/arch/x86-64/mm/fault.c
3 *
4 * Copyright (C) 1995 Linus Torvalds 2 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. 3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
6 */ 4 */
@@ -33,16 +31,23 @@
33#include <asm/proto.h> 31#include <asm/proto.h>
34#include <asm-generic/sections.h> 32#include <asm-generic/sections.h>
35 33
36/* Page fault error code bits */ 34/*
37#define PF_PROT (1<<0) /* or no page found */ 35 * Page fault error code bits
36 * bit 0 == 0 means no page found, 1 means protection fault
37 * bit 1 == 0 means read, 1 means write
38 * bit 2 == 0 means kernel, 1 means user-mode
39 * bit 3 == 1 means use of reserved bit detected
40 * bit 4 == 1 means fault was an instruction fetch
41 */
42#define PF_PROT (1<<0)
38#define PF_WRITE (1<<1) 43#define PF_WRITE (1<<1)
39#define PF_USER (1<<2) 44#define PF_USER (1<<2)
40#define PF_RSVD (1<<3) 45#define PF_RSVD (1<<3)
41#define PF_INSTR (1<<4) 46#define PF_INSTR (1<<4)
42 47
43#ifdef CONFIG_KPROBES
44static inline int notify_page_fault(struct pt_regs *regs) 48static inline int notify_page_fault(struct pt_regs *regs)
45{ 49{
50#ifdef CONFIG_KPROBES
46 int ret = 0; 51 int ret = 0;
47 52
48 /* kprobe_running() needs smp_processor_id() */ 53 /* kprobe_running() needs smp_processor_id() */
@@ -54,75 +59,75 @@ static inline int notify_page_fault(struct pt_regs *regs)
54 } 59 }
55 60
56 return ret; 61 return ret;
57}
58#else 62#else
59static inline int notify_page_fault(struct pt_regs *regs)
60{
61 return 0; 63 return 0;
62}
63#endif 64#endif
65}
64 66
65/* Sometimes the CPU reports invalid exceptions on prefetch. 67/* Sometimes the CPU reports invalid exceptions on prefetch.
66 Check that here and ignore. 68 Check that here and ignore.
67 Opcode checker based on code by Richard Brunner */ 69 Opcode checker based on code by Richard Brunner */
68static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, 70static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
69 unsigned long error_code) 71 unsigned long error_code)
70{ 72{
71 unsigned char *instr; 73 unsigned char *instr;
72 int scan_more = 1; 74 int scan_more = 1;
73 int prefetch = 0; 75 int prefetch = 0;
74 unsigned char *max_instr; 76 unsigned char *max_instr;
75 77
76 /* If it was a exec fault ignore */ 78 /* If it was a exec fault ignore */
77 if (error_code & PF_INSTR) 79 if (error_code & PF_INSTR)
78 return 0; 80 return 0;
79 81
80 instr = (unsigned char __user *)convert_rip_to_linear(current, regs); 82 instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
81 max_instr = instr + 15; 83 max_instr = instr + 15;
82 84
83 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) 85 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
84 return 0; 86 return 0;
85 87
86 while (scan_more && instr < max_instr) { 88 while (scan_more && instr < max_instr) {
87 unsigned char opcode; 89 unsigned char opcode;
88 unsigned char instr_hi; 90 unsigned char instr_hi;
89 unsigned char instr_lo; 91 unsigned char instr_lo;
90 92
91 if (probe_kernel_address(instr, opcode)) 93 if (probe_kernel_address(instr, opcode))
92 break; 94 break;
93 95
94 instr_hi = opcode & 0xf0; 96 instr_hi = opcode & 0xf0;
95 instr_lo = opcode & 0x0f; 97 instr_lo = opcode & 0x0f;
96 instr++; 98 instr++;
97 99
98 switch (instr_hi) { 100 switch (instr_hi) {
99 case 0x20: 101 case 0x20:
100 case 0x30: 102 case 0x30:
101 /* Values 0x26,0x2E,0x36,0x3E are valid x86 103 /*
102 prefixes. In long mode, the CPU will signal 104 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
103 invalid opcode if some of these prefixes are 105 * In X86_64 long mode, the CPU will signal invalid
104 present so we will never get here anyway */ 106 * opcode if some of these prefixes are present so
107 * X86_64 will never get here anyway
108 */
105 scan_more = ((instr_lo & 7) == 0x6); 109 scan_more = ((instr_lo & 7) == 0x6);
106 break; 110 break;
107 111#ifdef CONFIG_X86_64
108 case 0x40: 112 case 0x40:
109 /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes 113 /*
110 Need to figure out under what instruction mode the 114 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
111 instruction was issued ... */ 115 * Need to figure out under what instruction mode the
112 /* Could check the LDT for lm, but for now it's good 116 * instruction was issued. Could check the LDT for lm,
113 enough to assume that long mode only uses well known 117 * but for now it's good enough to assume that long
114 segments or kernel. */ 118 * mode only uses well known segments or kernel.
119 */
115 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); 120 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
116 break; 121 break;
117 122#endif
118 case 0x60: 123 case 0x60:
119 /* 0x64 thru 0x67 are valid prefixes in all modes. */ 124 /* 0x64 thru 0x67 are valid prefixes in all modes. */
120 scan_more = (instr_lo & 0xC) == 0x4; 125 scan_more = (instr_lo & 0xC) == 0x4;
121 break; 126 break;
122 case 0xF0: 127 case 0xF0:
123 /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ 128 /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
124 scan_more = !instr_lo || (instr_lo>>1) == 1; 129 scan_more = !instr_lo || (instr_lo>>1) == 1;
125 break; 130 break;
126 case 0x00: 131 case 0x00:
127 /* Prefetch instruction is 0x0F0D or 0x0F18 */ 132 /* Prefetch instruction is 0x0F0D or 0x0F18 */
128 scan_more = 0; 133 scan_more = 0;
@@ -130,20 +135,20 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
130 break; 135 break;
131 prefetch = (instr_lo == 0xF) && 136 prefetch = (instr_lo == 0xF) &&
132 (opcode == 0x0D || opcode == 0x18); 137 (opcode == 0x0D || opcode == 0x18);
133 break; 138 break;
134 default: 139 default:
135 scan_more = 0; 140 scan_more = 0;
136 break; 141 break;
137 } 142 }
138 } 143 }
139 return prefetch; 144 return prefetch;
140} 145}
141 146
142static int bad_address(void *p) 147static int bad_address(void *p)
143{ 148{
144 unsigned long dummy; 149 unsigned long dummy;
145 return probe_kernel_address((unsigned long *)p, dummy); 150 return probe_kernel_address((unsigned long *)p, dummy);
146} 151}
147 152
148void dump_pagetable(unsigned long address) 153void dump_pagetable(unsigned long address)
149{ 154{
@@ -154,11 +159,11 @@ void dump_pagetable(unsigned long address)
154 159
155 pgd = (pgd_t *)read_cr3(); 160 pgd = (pgd_t *)read_cr3();
156 161
157 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); 162 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
158 pgd += pgd_index(address); 163 pgd += pgd_index(address);
159 if (bad_address(pgd)) goto bad; 164 if (bad_address(pgd)) goto bad;
160 printk("PGD %lx ", pgd_val(*pgd)); 165 printk("PGD %lx ", pgd_val(*pgd));
161 if (!pgd_present(*pgd)) goto ret; 166 if (!pgd_present(*pgd)) goto ret;
162 167
163 pud = pud_offset(pgd, address); 168 pud = pud_offset(pgd, address);
164 if (bad_address(pud)) goto bad; 169 if (bad_address(pud)) goto bad;
@@ -172,7 +177,7 @@ void dump_pagetable(unsigned long address)
172 177
173 pte = pte_offset_kernel(pmd, address); 178 pte = pte_offset_kernel(pmd, address);
174 if (bad_address(pte)) goto bad; 179 if (bad_address(pte)) goto bad;
175 printk("PTE %lx", pte_val(*pte)); 180 printk("PTE %lx", pte_val(*pte));
176ret: 181ret:
177 printk("\n"); 182 printk("\n");
178 return; 183 return;
@@ -180,7 +185,7 @@ bad:
180 printk("BAD\n"); 185 printk("BAD\n");
181} 186}
182 187
183static const char errata93_warning[] = 188static const char errata93_warning[] =
184KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" 189KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
185KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" 190KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
186KERN_ERR "******* Please consider a BIOS update.\n" 191KERN_ERR "******* Please consider a BIOS update.\n"
@@ -188,31 +193,31 @@ KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
188 193
189/* Workaround for K8 erratum #93 & buggy BIOS. 194/* Workaround for K8 erratum #93 & buggy BIOS.
190 BIOS SMM functions are required to use a specific workaround 195 BIOS SMM functions are required to use a specific workaround
191 to avoid corruption of the 64bit RIP register on C stepping K8. 196 to avoid corruption of the 64bit RIP register on C stepping K8.
192 A lot of BIOS that didn't get tested properly miss this. 197 A lot of BIOS that didn't get tested properly miss this.
193 The OS sees this as a page fault with the upper 32bits of RIP cleared. 198 The OS sees this as a page fault with the upper 32bits of RIP cleared.
194 Try to work around it here. 199 Try to work around it here.
195 Note we only handle faults in kernel here. */ 200 Note we only handle faults in kernel here. */
196 201
197static int is_errata93(struct pt_regs *regs, unsigned long address) 202static int is_errata93(struct pt_regs *regs, unsigned long address)
198{ 203{
199 static int warned; 204 static int warned;
200 if (address != regs->ip) 205 if (address != regs->ip)
201 return 0; 206 return 0;
202 if ((address >> 32) != 0) 207 if ((address >> 32) != 0)
203 return 0; 208 return 0;
204 address |= 0xffffffffUL << 32; 209 address |= 0xffffffffUL << 32;
205 if ((address >= (u64)_stext && address <= (u64)_etext) || 210 if ((address >= (u64)_stext && address <= (u64)_etext) ||
206 (address >= MODULES_VADDR && address <= MODULES_END)) { 211 (address >= MODULES_VADDR && address <= MODULES_END)) {
207 if (!warned) { 212 if (!warned) {
208 printk(errata93_warning); 213 printk(errata93_warning);
209 warned = 1; 214 warned = 1;
210 } 215 }
211 regs->ip = address; 216 regs->ip = address;
212 return 1; 217 return 1;
213 } 218 }
214 return 0; 219 return 0;
215} 220}
216 221
217static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 222static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
218 unsigned long error_code) 223 unsigned long error_code)
@@ -296,7 +301,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
296{ 301{
297 struct task_struct *tsk; 302 struct task_struct *tsk;
298 struct mm_struct *mm; 303 struct mm_struct *mm;
299 struct vm_area_struct * vma; 304 struct vm_area_struct *vma;
300 unsigned long address; 305 unsigned long address;
301 int write, fault; 306 int write, fault;
302 unsigned long flags; 307 unsigned long flags;
@@ -360,8 +365,8 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
360 pgtable_bad(address, regs, error_code); 365 pgtable_bad(address, regs, error_code);
361 366
362 /* 367 /*
363 * If we're in an interrupt or have no user 368 * If we're in an interrupt, have no user context or are running in an
364 * context, we must not take the fault.. 369 * atomic region then we must not take the fault.
365 */ 370 */
366 if (unlikely(in_atomic() || !mm)) 371 if (unlikely(in_atomic() || !mm))
367 goto bad_area_nosemaphore; 372 goto bad_area_nosemaphore;
@@ -403,7 +408,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
403 goto good_area; 408 goto good_area;
404 if (!(vma->vm_flags & VM_GROWSDOWN)) 409 if (!(vma->vm_flags & VM_GROWSDOWN))
405 goto bad_area; 410 goto bad_area;
406 if (error_code & 4) { 411 if (error_code & PF_USER) {
407 /* Allow userspace just enough access below the stack pointer 412 /* Allow userspace just enough access below the stack pointer
408 * to let the 'enter' instruction work. 413 * to let the 'enter' instruction work.
409 */ 414 */
@@ -420,18 +425,18 @@ good_area:
420 info.si_code = SEGV_ACCERR; 425 info.si_code = SEGV_ACCERR;
421 write = 0; 426 write = 0;
422 switch (error_code & (PF_PROT|PF_WRITE)) { 427 switch (error_code & (PF_PROT|PF_WRITE)) {
423 default: /* 3: write, present */ 428 default: /* 3: write, present */
424 /* fall through */ 429 /* fall through */
425 case PF_WRITE: /* write, not present */ 430 case PF_WRITE: /* write, not present */
426 if (!(vma->vm_flags & VM_WRITE)) 431 if (!(vma->vm_flags & VM_WRITE))
427 goto bad_area; 432 goto bad_area;
428 write++; 433 write++;
429 break; 434 break;
430 case PF_PROT: /* read, present */ 435 case PF_PROT: /* read, present */
436 goto bad_area;
437 case 0: /* read, not present */
438 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
431 goto bad_area; 439 goto bad_area;
432 case 0: /* read, not present */
433 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
434 goto bad_area;
435 } 440 }
436 441
437 /* 442 /*
@@ -491,7 +496,7 @@ bad_area_nosemaphore:
491 tsk->comm, tsk->pid, address, regs->ip, 496 tsk->comm, tsk->pid, address, regs->ip,
492 regs->sp, error_code); 497 regs->sp, error_code);
493 } 498 }
494 499
495 tsk->thread.cr2 = address; 500 tsk->thread.cr2 = address;
496 /* Kernel addresses are always protection faults */ 501 /* Kernel addresses are always protection faults */
497 tsk->thread.error_code = error_code | (address >= TASK_SIZE); 502 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -505,21 +510,19 @@ bad_area_nosemaphore:
505 } 510 }
506 511
507no_context: 512no_context:
508
509 /* Are we prepared to handle this kernel fault? */ 513 /* Are we prepared to handle this kernel fault? */
510 if (fixup_exception(regs)) { 514 if (fixup_exception(regs))
511 return; 515 return;
512 }
513 516
514 /* 517 /*
515 * Hall of shame of CPU/BIOS bugs. 518 * Hall of shame of CPU/BIOS bugs.
516 */ 519 */
517 520
518 if (is_prefetch(regs, address, error_code)) 521 if (is_prefetch(regs, address, error_code))
519 return; 522 return;
520 523
521 if (is_errata93(regs, address)) 524 if (is_errata93(regs, address))
522 return; 525 return;
523 526
524/* 527/*
525 * Oops. The kernel tried to access some bad page. We'll have to 528 * Oops. The kernel tried to access some bad page. We'll have to
@@ -532,7 +535,7 @@ no_context:
532 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); 535 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
533 else 536 else
534 printk(KERN_ALERT "Unable to handle kernel paging request"); 537 printk(KERN_ALERT "Unable to handle kernel paging request");
535 printk(" at %016lx RIP: \n" KERN_ALERT,address); 538 printk(" at %016lx RIP: \n" KERN_ALERT, address);
536 printk_address(regs->ip); 539 printk_address(regs->ip);
537 dump_pagetable(address); 540 dump_pagetable(address);
538 tsk->thread.cr2 = address; 541 tsk->thread.cr2 = address;
@@ -582,7 +585,7 @@ LIST_HEAD(pgd_list);
582 585
583void vmalloc_sync_all(void) 586void vmalloc_sync_all(void)
584{ 587{
585 /* Note that races in the updates of insync and start aren't 588 /* Note that races in the updates of insync and start aren't
586 problematic: 589 problematic:
587 insync can only get set bits added, and updates to start are only 590 insync can only get set bits added, and updates to start are only
588 improving performance (without affecting correctness if undone). */ 591 improving performance (without affecting correctness if undone). */
@@ -614,6 +617,6 @@ void vmalloc_sync_all(void)
614 } 617 }
615 /* Check that there is no need to do the same for the modules area. */ 618 /* Check that there is no need to do the same for the modules area. */
616 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 619 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
617 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 620 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
618 (__START_KERNEL & PGDIR_MASK))); 621 (__START_KERNEL & PGDIR_MASK)));
619} 622}