diff options
Diffstat (limited to 'arch/powerpc/mm/fault.c')
-rw-r--r-- | arch/powerpc/mm/fault.c | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c new file mode 100644 index 000000000000..3df641fa789d --- /dev/null +++ b/arch/powerpc/mm/fault.c | |||
@@ -0,0 +1,391 @@ | |||
1 | /* | ||
2 | * arch/ppc/mm/fault.c | ||
3 | * | ||
4 | * PowerPC version | ||
5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
6 | * | ||
7 | * Derived from "arch/i386/mm/fault.c" | ||
8 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
9 | * | ||
10 | * Modified by Cort Dougan and Paul Mackerras. | ||
11 | * | ||
12 | * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/config.h> | ||
21 | #include <linux/signal.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/ptrace.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/interrupt.h> | ||
31 | #include <linux/highmem.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/kprobes.h> | ||
34 | |||
35 | #include <asm/page.h> | ||
36 | #include <asm/pgtable.h> | ||
37 | #include <asm/mmu.h> | ||
38 | #include <asm/mmu_context.h> | ||
39 | #include <asm/system.h> | ||
40 | #include <asm/uaccess.h> | ||
41 | #include <asm/tlbflush.h> | ||
42 | #include <asm/kdebug.h> | ||
43 | #include <asm/siginfo.h> | ||
44 | |||
45 | /* | ||
46 | * Check whether the instruction at regs->nip is a store using | ||
47 | * an update addressing form which will update r1. | ||
48 | */ | ||
49 | static int store_updates_sp(struct pt_regs *regs) | ||
50 | { | ||
51 | unsigned int inst; | ||
52 | |||
53 | if (get_user(inst, (unsigned int __user *)regs->nip)) | ||
54 | return 0; | ||
55 | /* check for 1 in the rA field */ | ||
56 | if (((inst >> 16) & 0x1f) != 1) | ||
57 | return 0; | ||
58 | /* check major opcode */ | ||
59 | switch (inst >> 26) { | ||
60 | case 37: /* stwu */ | ||
61 | case 39: /* stbu */ | ||
62 | case 45: /* sthu */ | ||
63 | case 53: /* stfsu */ | ||
64 | case 55: /* stfdu */ | ||
65 | return 1; | ||
66 | case 62: /* std or stdu */ | ||
67 | return (inst & 3) == 1; | ||
68 | case 31: | ||
69 | /* check minor opcode */ | ||
70 | switch ((inst >> 1) & 0x3ff) { | ||
71 | case 181: /* stdux */ | ||
72 | case 183: /* stwux */ | ||
73 | case 247: /* stbux */ | ||
74 | case 439: /* sthux */ | ||
75 | case 695: /* stfsux */ | ||
76 | case 759: /* stfdux */ | ||
77 | return 1; | ||
78 | } | ||
79 | } | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void do_dabr(struct pt_regs *regs, unsigned long error_code) | ||
84 | { | ||
85 | siginfo_t info; | ||
86 | |||
87 | if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, | ||
88 | 11, SIGSEGV) == NOTIFY_STOP) | ||
89 | return; | ||
90 | |||
91 | if (debugger_dabr_match(regs)) | ||
92 | return; | ||
93 | |||
94 | /* Clear the DABR */ | ||
95 | set_dabr(0); | ||
96 | |||
97 | /* Deliver the signal to userspace */ | ||
98 | info.si_signo = SIGTRAP; | ||
99 | info.si_errno = 0; | ||
100 | info.si_code = TRAP_HWBKPT; | ||
101 | info.si_addr = (void __user *)regs->nip; | ||
102 | force_sig_info(SIGTRAP, &info, current); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * For 600- and 800-family processors, the error_code parameter is DSISR | ||
107 | * for a data fault, SRR1 for an instruction fault. For 400-family processors | ||
108 | * the error_code parameter is ESR for a data fault, 0 for an instruction | ||
109 | * fault. | ||
110 | * For 64-bit processors, the error_code parameter is | ||
111 | * - DSISR for a non-SLB data access fault, | ||
112 | * - SRR1 & 0x08000000 for a non-SLB instruction access fault | ||
113 | * - 0 any SLB fault. | ||
114 | * | ||
115 | * The return value is 0 if the fault was handled, or the signal | ||
116 | * number if this is a kernel fault that can't be handled here. | ||
117 | */ | ||
118 | int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | ||
119 | unsigned long error_code) | ||
120 | { | ||
121 | struct vm_area_struct * vma; | ||
122 | struct mm_struct *mm = current->mm; | ||
123 | siginfo_t info; | ||
124 | int code = SEGV_MAPERR; | ||
125 | int is_write = 0; | ||
126 | int trap = TRAP(regs); | ||
127 | int is_exec = trap == 0x400; | ||
128 | |||
129 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
130 | /* | ||
131 | * Fortunately the bit assignments in SRR1 for an instruction | ||
132 | * fault and DSISR for a data fault are mostly the same for the | ||
133 | * bits we are interested in. But there are some bits which | ||
134 | * indicate errors in DSISR but can validly be set in SRR1. | ||
135 | */ | ||
136 | if (trap == 0x400) | ||
137 | error_code &= 0x48200000; | ||
138 | else | ||
139 | is_write = error_code & DSISR_ISSTORE; | ||
140 | #else | ||
141 | is_write = error_code & ESR_DST; | ||
142 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ | ||
143 | |||
144 | if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, | ||
145 | 11, SIGSEGV) == NOTIFY_STOP) | ||
146 | return 0; | ||
147 | |||
148 | if (trap == 0x300) { | ||
149 | if (debugger_fault_handler(regs)) | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* On a kernel SLB miss we can only check for a valid exception entry */ | ||
154 | if (!user_mode(regs) && (address >= TASK_SIZE)) | ||
155 | return SIGSEGV; | ||
156 | |||
157 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
158 | if (error_code & DSISR_DABRMATCH) { | ||
159 | /* DABR match */ | ||
160 | do_dabr(regs, error_code); | ||
161 | return 0; | ||
162 | } | ||
163 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ | ||
164 | |||
165 | if (in_atomic() || mm == NULL) { | ||
166 | if (!user_mode(regs)) | ||
167 | return SIGSEGV; | ||
168 | /* in_atomic() in user mode is really bad, | ||
169 | as is current->mm == NULL. */ | ||
170 | printk(KERN_EMERG "Page fault in user mode with" | ||
171 | "in_atomic() = %d mm = %p\n", in_atomic(), mm); | ||
172 | printk(KERN_EMERG "NIP = %lx MSR = %lx\n", | ||
173 | regs->nip, regs->msr); | ||
174 | die("Weird page fault", regs, SIGSEGV); | ||
175 | } | ||
176 | |||
177 | /* When running in the kernel we expect faults to occur only to | ||
178 | * addresses in user space. All other faults represent errors in the | ||
179 | * kernel and should generate an OOPS. Unfortunatly, in the case of an | ||
180 | * erroneous fault occuring in a code path which already holds mmap_sem | ||
181 | * we will deadlock attempting to validate the fault against the | ||
182 | * address space. Luckily the kernel only validly references user | ||
183 | * space from well defined areas of code, which are listed in the | ||
184 | * exceptions table. | ||
185 | * | ||
186 | * As the vast majority of faults will be valid we will only perform | ||
187 | * the source reference check when there is a possibilty of a deadlock. | ||
188 | * Attempt to lock the address space, if we cannot we then validate the | ||
189 | * source. If this is invalid we can skip the address space check, | ||
190 | * thus avoiding the deadlock. | ||
191 | */ | ||
192 | if (!down_read_trylock(&mm->mmap_sem)) { | ||
193 | if (!user_mode(regs) && !search_exception_tables(regs->nip)) | ||
194 | goto bad_area_nosemaphore; | ||
195 | |||
196 | down_read(&mm->mmap_sem); | ||
197 | } | ||
198 | |||
199 | vma = find_vma(mm, address); | ||
200 | if (!vma) | ||
201 | goto bad_area; | ||
202 | if (vma->vm_start <= address) | ||
203 | goto good_area; | ||
204 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
205 | goto bad_area; | ||
206 | |||
207 | /* | ||
208 | * N.B. The POWER/Open ABI allows programs to access up to | ||
209 | * 288 bytes below the stack pointer. | ||
210 | * The kernel signal delivery code writes up to about 1.5kB | ||
211 | * below the stack pointer (r1) before decrementing it. | ||
212 | * The exec code can write slightly over 640kB to the stack | ||
213 | * before setting the user r1. Thus we allow the stack to | ||
214 | * expand to 1MB without further checks. | ||
215 | */ | ||
216 | if (address + 0x100000 < vma->vm_end) { | ||
217 | /* get user regs even if this fault is in kernel mode */ | ||
218 | struct pt_regs *uregs = current->thread.regs; | ||
219 | if (uregs == NULL) | ||
220 | goto bad_area; | ||
221 | |||
222 | /* | ||
223 | * A user-mode access to an address a long way below | ||
224 | * the stack pointer is only valid if the instruction | ||
225 | * is one which would update the stack pointer to the | ||
226 | * address accessed if the instruction completed, | ||
227 | * i.e. either stwu rs,n(r1) or stwux rs,r1,rb | ||
228 | * (or the byte, halfword, float or double forms). | ||
229 | * | ||
230 | * If we don't check this then any write to the area | ||
231 | * between the last mapped region and the stack will | ||
232 | * expand the stack rather than segfaulting. | ||
233 | */ | ||
234 | if (address + 2048 < uregs->gpr[1] | ||
235 | && (!user_mode(regs) || !store_updates_sp(regs))) | ||
236 | goto bad_area; | ||
237 | } | ||
238 | if (expand_stack(vma, address)) | ||
239 | goto bad_area; | ||
240 | |||
241 | good_area: | ||
242 | code = SEGV_ACCERR; | ||
243 | #if defined(CONFIG_6xx) | ||
244 | if (error_code & 0x95700000) | ||
245 | /* an error such as lwarx to I/O controller space, | ||
246 | address matching DABR, eciwx, etc. */ | ||
247 | goto bad_area; | ||
248 | #endif /* CONFIG_6xx */ | ||
249 | #if defined(CONFIG_8xx) | ||
250 | /* The MPC8xx seems to always set 0x80000000, which is | ||
251 | * "undefined". Of those that can be set, this is the only | ||
252 | * one which seems bad. | ||
253 | */ | ||
254 | if (error_code & 0x10000000) | ||
255 | /* Guarded storage error. */ | ||
256 | goto bad_area; | ||
257 | #endif /* CONFIG_8xx */ | ||
258 | |||
259 | if (is_exec) { | ||
260 | #ifdef CONFIG_PPC64 | ||
261 | /* protection fault */ | ||
262 | if (error_code & DSISR_PROTFAULT) | ||
263 | goto bad_area; | ||
264 | if (!(vma->vm_flags & VM_EXEC)) | ||
265 | goto bad_area; | ||
266 | #endif | ||
267 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) | ||
268 | pte_t *ptep; | ||
269 | |||
270 | /* Since 4xx/Book-E supports per-page execute permission, | ||
271 | * we lazily flush dcache to icache. */ | ||
272 | ptep = NULL; | ||
273 | if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { | ||
274 | struct page *page = pte_page(*ptep); | ||
275 | |||
276 | if (! test_bit(PG_arch_1, &page->flags)) { | ||
277 | flush_dcache_icache_page(page); | ||
278 | set_bit(PG_arch_1, &page->flags); | ||
279 | } | ||
280 | pte_update(ptep, 0, _PAGE_HWEXEC); | ||
281 | _tlbie(address); | ||
282 | pte_unmap(ptep); | ||
283 | up_read(&mm->mmap_sem); | ||
284 | return 0; | ||
285 | } | ||
286 | if (ptep != NULL) | ||
287 | pte_unmap(ptep); | ||
288 | #endif | ||
289 | /* a write */ | ||
290 | } else if (is_write) { | ||
291 | if (!(vma->vm_flags & VM_WRITE)) | ||
292 | goto bad_area; | ||
293 | /* a read */ | ||
294 | } else { | ||
295 | /* protection fault */ | ||
296 | if (error_code & 0x08000000) | ||
297 | goto bad_area; | ||
298 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | ||
299 | goto bad_area; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * If for any reason at all we couldn't handle the fault, | ||
304 | * make sure we exit gracefully rather than endlessly redo | ||
305 | * the fault. | ||
306 | */ | ||
307 | survive: | ||
308 | switch (handle_mm_fault(mm, vma, address, is_write)) { | ||
309 | |||
310 | case VM_FAULT_MINOR: | ||
311 | current->min_flt++; | ||
312 | break; | ||
313 | case VM_FAULT_MAJOR: | ||
314 | current->maj_flt++; | ||
315 | break; | ||
316 | case VM_FAULT_SIGBUS: | ||
317 | goto do_sigbus; | ||
318 | case VM_FAULT_OOM: | ||
319 | goto out_of_memory; | ||
320 | default: | ||
321 | BUG(); | ||
322 | } | ||
323 | |||
324 | up_read(&mm->mmap_sem); | ||
325 | return 0; | ||
326 | |||
327 | bad_area: | ||
328 | up_read(&mm->mmap_sem); | ||
329 | |||
330 | bad_area_nosemaphore: | ||
331 | /* User mode accesses cause a SIGSEGV */ | ||
332 | if (user_mode(regs)) { | ||
333 | _exception(SIGSEGV, regs, code, address); | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | if (is_exec && (error_code & DSISR_PROTFAULT) | ||
338 | && printk_ratelimit()) | ||
339 | printk(KERN_CRIT "kernel tried to execute NX-protected" | ||
340 | " page (%lx) - exploit attempt? (uid: %d)\n", | ||
341 | address, current->uid); | ||
342 | |||
343 | return SIGSEGV; | ||
344 | |||
345 | /* | ||
346 | * We ran out of memory, or some other thing happened to us that made | ||
347 | * us unable to handle the page fault gracefully. | ||
348 | */ | ||
349 | out_of_memory: | ||
350 | up_read(&mm->mmap_sem); | ||
351 | if (current->pid == 1) { | ||
352 | yield(); | ||
353 | down_read(&mm->mmap_sem); | ||
354 | goto survive; | ||
355 | } | ||
356 | printk("VM: killing process %s\n", current->comm); | ||
357 | if (user_mode(regs)) | ||
358 | do_exit(SIGKILL); | ||
359 | return SIGKILL; | ||
360 | |||
361 | do_sigbus: | ||
362 | up_read(&mm->mmap_sem); | ||
363 | if (user_mode(regs)) { | ||
364 | info.si_signo = SIGBUS; | ||
365 | info.si_errno = 0; | ||
366 | info.si_code = BUS_ADRERR; | ||
367 | info.si_addr = (void __user *)address; | ||
368 | force_sig_info(SIGBUS, &info, current); | ||
369 | return 0; | ||
370 | } | ||
371 | return SIGBUS; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * bad_page_fault is called when we have a bad access from the kernel. | ||
376 | * It is called from the DSI and ISI handlers in head.S and from some | ||
377 | * of the procedures in traps.c. | ||
378 | */ | ||
379 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | ||
380 | { | ||
381 | const struct exception_table_entry *entry; | ||
382 | |||
383 | /* Are we prepared to handle this fault? */ | ||
384 | if ((entry = search_exception_tables(regs->nip)) != NULL) { | ||
385 | regs->nip = entry->fixup; | ||
386 | return; | ||
387 | } | ||
388 | |||
389 | /* kernel has accessed a bad area */ | ||
390 | die("Kernel access of bad area", regs, sig); | ||
391 | } | ||