diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/mm/fault.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/ppc64/mm/fault.c')
-rw-r--r-- | arch/ppc64/mm/fault.c | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c new file mode 100644 index 000000000000..20b0f37e8bf8 --- /dev/null +++ b/arch/ppc64/mm/fault.c | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * arch/ppc/mm/fault.c | ||
3 | * | ||
4 | * PowerPC version | ||
5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
6 | * | ||
7 | * Derived from "arch/i386/mm/fault.c" | ||
8 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
9 | * | ||
10 | * Modified by Cort Dougan and Paul Mackerras. | ||
11 | * | ||
12 | * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or | ||
15 | * modify it under the terms of the GNU General Public License | ||
16 | * as published by the Free Software Foundation; either version | ||
17 | * 2 of the License, or (at your option) any later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/config.h> | ||
21 | #include <linux/signal.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/mman.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/smp_lock.h> | ||
31 | #include <linux/module.h> | ||
32 | |||
33 | #include <asm/page.h> | ||
34 | #include <asm/pgtable.h> | ||
35 | #include <asm/mmu.h> | ||
36 | #include <asm/mmu_context.h> | ||
37 | #include <asm/system.h> | ||
38 | #include <asm/uaccess.h> | ||
39 | #include <asm/kdebug.h> | ||
40 | |||
41 | /* | ||
42 | * Check whether the instruction at regs->nip is a store using | ||
43 | * an update addressing form which will update r1. | ||
44 | */ | ||
45 | static int store_updates_sp(struct pt_regs *regs) | ||
46 | { | ||
47 | unsigned int inst; | ||
48 | |||
49 | if (get_user(inst, (unsigned int __user *)regs->nip)) | ||
50 | return 0; | ||
51 | /* check for 1 in the rA field */ | ||
52 | if (((inst >> 16) & 0x1f) != 1) | ||
53 | return 0; | ||
54 | /* check major opcode */ | ||
55 | switch (inst >> 26) { | ||
56 | case 37: /* stwu */ | ||
57 | case 39: /* stbu */ | ||
58 | case 45: /* sthu */ | ||
59 | case 53: /* stfsu */ | ||
60 | case 55: /* stfdu */ | ||
61 | return 1; | ||
62 | case 62: /* std or stdu */ | ||
63 | return (inst & 3) == 1; | ||
64 | case 31: | ||
65 | /* check minor opcode */ | ||
66 | switch ((inst >> 1) & 0x3ff) { | ||
67 | case 181: /* stdux */ | ||
68 | case 183: /* stwux */ | ||
69 | case 247: /* stbux */ | ||
70 | case 439: /* sthux */ | ||
71 | case 695: /* stfsux */ | ||
72 | case 759: /* stfdux */ | ||
73 | return 1; | ||
74 | } | ||
75 | } | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | /* | ||
80 | * The error_code parameter is | ||
81 | * - DSISR for a non-SLB data access fault, | ||
82 | * - SRR1 & 0x08000000 for a non-SLB instruction access fault | ||
83 | * - 0 any SLB fault. | ||
84 | * The return value is 0 if the fault was handled, or the signal | ||
85 | * number if this is a kernel fault that can't be handled here. | ||
86 | */ | ||
87 | int do_page_fault(struct pt_regs *regs, unsigned long address, | ||
88 | unsigned long error_code) | ||
89 | { | ||
90 | struct vm_area_struct * vma; | ||
91 | struct mm_struct *mm = current->mm; | ||
92 | siginfo_t info; | ||
93 | unsigned long code = SEGV_MAPERR; | ||
94 | unsigned long is_write = error_code & DSISR_ISSTORE; | ||
95 | unsigned long trap = TRAP(regs); | ||
96 | unsigned long is_exec = trap == 0x400; | ||
97 | |||
98 | BUG_ON((trap == 0x380) || (trap == 0x480)); | ||
99 | |||
100 | if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, | ||
101 | 11, SIGSEGV) == NOTIFY_STOP) | ||
102 | return 0; | ||
103 | |||
104 | if (trap == 0x300) { | ||
105 | if (debugger_fault_handler(regs)) | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | /* On a kernel SLB miss we can only check for a valid exception entry */ | ||
110 | if (!user_mode(regs) && (address >= TASK_SIZE)) | ||
111 | return SIGSEGV; | ||
112 | |||
113 | if (error_code & DSISR_DABRMATCH) { | ||
114 | if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, | ||
115 | 11, SIGSEGV) == NOTIFY_STOP) | ||
116 | return 0; | ||
117 | if (debugger_dabr_match(regs)) | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | if (in_atomic() || mm == NULL) { | ||
122 | if (!user_mode(regs)) | ||
123 | return SIGSEGV; | ||
124 | /* in_atomic() in user mode is really bad, | ||
125 | as is current->mm == NULL. */ | ||
126 | printk(KERN_EMERG "Page fault in user mode with" | ||
127 | "in_atomic() = %d mm = %p\n", in_atomic(), mm); | ||
128 | printk(KERN_EMERG "NIP = %lx MSR = %lx\n", | ||
129 | regs->nip, regs->msr); | ||
130 | die("Weird page fault", regs, SIGSEGV); | ||
131 | } | ||
132 | |||
133 | /* When running in the kernel we expect faults to occur only to | ||
134 | * addresses in user space. All other faults represent errors in the | ||
135 | * kernel and should generate an OOPS. Unfortunatly, in the case of an | ||
136 | * erroneous fault occuring in a code path which already holds mmap_sem | ||
137 | * we will deadlock attempting to validate the fault against the | ||
138 | * address space. Luckily the kernel only validly references user | ||
139 | * space from well defined areas of code, which are listed in the | ||
140 | * exceptions table. | ||
141 | * | ||
142 | * As the vast majority of faults will be valid we will only perform | ||
143 | * the source reference check when there is a possibilty of a deadlock. | ||
144 | * Attempt to lock the address space, if we cannot we then validate the | ||
145 | * source. If this is invalid we can skip the address space check, | ||
146 | * thus avoiding the deadlock. | ||
147 | */ | ||
148 | if (!down_read_trylock(&mm->mmap_sem)) { | ||
149 | if (!user_mode(regs) && !search_exception_tables(regs->nip)) | ||
150 | goto bad_area_nosemaphore; | ||
151 | |||
152 | down_read(&mm->mmap_sem); | ||
153 | } | ||
154 | |||
155 | vma = find_vma(mm, address); | ||
156 | if (!vma) | ||
157 | goto bad_area; | ||
158 | |||
159 | if (vma->vm_start <= address) { | ||
160 | goto good_area; | ||
161 | } | ||
162 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
163 | goto bad_area; | ||
164 | |||
165 | /* | ||
166 | * N.B. The POWER/Open ABI allows programs to access up to | ||
167 | * 288 bytes below the stack pointer. | ||
168 | * The kernel signal delivery code writes up to about 1.5kB | ||
169 | * below the stack pointer (r1) before decrementing it. | ||
170 | * The exec code can write slightly over 640kB to the stack | ||
171 | * before setting the user r1. Thus we allow the stack to | ||
172 | * expand to 1MB without further checks. | ||
173 | */ | ||
174 | if (address + 0x100000 < vma->vm_end) { | ||
175 | /* get user regs even if this fault is in kernel mode */ | ||
176 | struct pt_regs *uregs = current->thread.regs; | ||
177 | if (uregs == NULL) | ||
178 | goto bad_area; | ||
179 | |||
180 | /* | ||
181 | * A user-mode access to an address a long way below | ||
182 | * the stack pointer is only valid if the instruction | ||
183 | * is one which would update the stack pointer to the | ||
184 | * address accessed if the instruction completed, | ||
185 | * i.e. either stwu rs,n(r1) or stwux rs,r1,rb | ||
186 | * (or the byte, halfword, float or double forms). | ||
187 | * | ||
188 | * If we don't check this then any write to the area | ||
189 | * between the last mapped region and the stack will | ||
190 | * expand the stack rather than segfaulting. | ||
191 | */ | ||
192 | if (address + 2048 < uregs->gpr[1] | ||
193 | && (!user_mode(regs) || !store_updates_sp(regs))) | ||
194 | goto bad_area; | ||
195 | } | ||
196 | |||
197 | if (expand_stack(vma, address)) | ||
198 | goto bad_area; | ||
199 | |||
200 | good_area: | ||
201 | code = SEGV_ACCERR; | ||
202 | |||
203 | if (is_exec) { | ||
204 | /* protection fault */ | ||
205 | if (error_code & DSISR_PROTFAULT) | ||
206 | goto bad_area; | ||
207 | if (!(vma->vm_flags & VM_EXEC)) | ||
208 | goto bad_area; | ||
209 | /* a write */ | ||
210 | } else if (is_write) { | ||
211 | if (!(vma->vm_flags & VM_WRITE)) | ||
212 | goto bad_area; | ||
213 | /* a read */ | ||
214 | } else { | ||
215 | if (!(vma->vm_flags & VM_READ)) | ||
216 | goto bad_area; | ||
217 | } | ||
218 | |||
219 | survive: | ||
220 | /* | ||
221 | * If for any reason at all we couldn't handle the fault, | ||
222 | * make sure we exit gracefully rather than endlessly redo | ||
223 | * the fault. | ||
224 | */ | ||
225 | switch (handle_mm_fault(mm, vma, address, is_write)) { | ||
226 | |||
227 | case VM_FAULT_MINOR: | ||
228 | current->min_flt++; | ||
229 | break; | ||
230 | case VM_FAULT_MAJOR: | ||
231 | current->maj_flt++; | ||
232 | break; | ||
233 | case VM_FAULT_SIGBUS: | ||
234 | goto do_sigbus; | ||
235 | case VM_FAULT_OOM: | ||
236 | goto out_of_memory; | ||
237 | default: | ||
238 | BUG(); | ||
239 | } | ||
240 | |||
241 | up_read(&mm->mmap_sem); | ||
242 | return 0; | ||
243 | |||
244 | bad_area: | ||
245 | up_read(&mm->mmap_sem); | ||
246 | |||
247 | bad_area_nosemaphore: | ||
248 | /* User mode accesses cause a SIGSEGV */ | ||
249 | if (user_mode(regs)) { | ||
250 | info.si_signo = SIGSEGV; | ||
251 | info.si_errno = 0; | ||
252 | info.si_code = code; | ||
253 | info.si_addr = (void __user *) address; | ||
254 | force_sig_info(SIGSEGV, &info, current); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | if (trap == 0x400 && (error_code & DSISR_PROTFAULT) | ||
259 | && printk_ratelimit()) | ||
260 | printk(KERN_CRIT "kernel tried to execute NX-protected" | ||
261 | " page (%lx) - exploit attempt? (uid: %d)\n", | ||
262 | address, current->uid); | ||
263 | |||
264 | return SIGSEGV; | ||
265 | |||
266 | /* | ||
267 | * We ran out of memory, or some other thing happened to us that made | ||
268 | * us unable to handle the page fault gracefully. | ||
269 | */ | ||
270 | out_of_memory: | ||
271 | up_read(&mm->mmap_sem); | ||
272 | if (current->pid == 1) { | ||
273 | yield(); | ||
274 | down_read(&mm->mmap_sem); | ||
275 | goto survive; | ||
276 | } | ||
277 | printk("VM: killing process %s\n", current->comm); | ||
278 | if (user_mode(regs)) | ||
279 | do_exit(SIGKILL); | ||
280 | return SIGKILL; | ||
281 | |||
282 | do_sigbus: | ||
283 | up_read(&mm->mmap_sem); | ||
284 | if (user_mode(regs)) { | ||
285 | info.si_signo = SIGBUS; | ||
286 | info.si_errno = 0; | ||
287 | info.si_code = BUS_ADRERR; | ||
288 | info.si_addr = (void __user *)address; | ||
289 | force_sig_info(SIGBUS, &info, current); | ||
290 | return 0; | ||
291 | } | ||
292 | return SIGBUS; | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * bad_page_fault is called when we have a bad access from the kernel. | ||
297 | * It is called from do_page_fault above and from some of the procedures | ||
298 | * in traps.c. | ||
299 | */ | ||
300 | void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | ||
301 | { | ||
302 | const struct exception_table_entry *entry; | ||
303 | |||
304 | /* Are we prepared to handle this fault? */ | ||
305 | if ((entry = search_exception_tables(regs->nip)) != NULL) { | ||
306 | regs->nip = entry->fixup; | ||
307 | return; | ||
308 | } | ||
309 | |||
310 | /* kernel has accessed a bad area */ | ||
311 | die("Kernel access of bad area", regs, sig); | ||
312 | } | ||