diff options
Diffstat (limited to 'arch/ppc/mm/fault.c')
-rw-r--r-- | arch/ppc/mm/fault.c | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c new file mode 100644 index 000000000000..57d9930843ac --- /dev/null +++ b/arch/ppc/mm/fault.c | |||
@@ -0,0 +1,440 @@ | |||
1 | /* | ||
2 | * arch/ppc/mm/fault.c | ||
3 | * | ||
4 | * PowerPC version | ||
5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
6 | * | ||
7 | * Derived from "arch/i386/mm/fault.c" | ||
8 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
9 | * | ||
10 | * Modified by Cort Dougan and Paul Mackerras. | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version | ||
15 | * 2 of the License, or (at your option) any later version. | ||
16 | */ | ||
17 | |||
18 | #include <linux/config.h> | ||
19 | #include <linux/signal.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/interrupt.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/module.h> | ||
31 | |||
32 | #include <asm/page.h> | ||
33 | #include <asm/pgtable.h> | ||
34 | #include <asm/mmu.h> | ||
35 | #include <asm/mmu_context.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <asm/uaccess.h> | ||
38 | #include <asm/tlbflush.h> | ||
39 | |||
40 | #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) | ||
41 | extern void (*debugger)(struct pt_regs *); | ||
42 | extern void (*debugger_fault_handler)(struct pt_regs *); | ||
43 | extern int (*debugger_dabr_match)(struct pt_regs *); | ||
44 | int debugger_kernel_faults = 1; | ||
45 | #endif | ||
46 | |||
47 | unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */ | ||
48 | unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */ | ||
49 | unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */ | ||
50 | unsigned long pte_misses; /* updated by do_page_fault() */ | ||
51 | unsigned long pte_errors; /* updated by do_page_fault() */ | ||
52 | unsigned int probingmem; | ||
53 | |||
54 | /* | ||
55 | * Check whether the instruction at regs->nip is a store using | ||
56 | * an update addressing form which will update r1. | ||
57 | */ | ||
58 | static int store_updates_sp(struct pt_regs *regs) | ||
59 | { | ||
60 | unsigned int inst; | ||
61 | |||
62 | if (get_user(inst, (unsigned int __user *)regs->nip)) | ||
63 | return 0; | ||
64 | /* check for 1 in the rA field */ | ||
65 | if (((inst >> 16) & 0x1f) != 1) | ||
66 | return 0; | ||
67 | /* check major opcode */ | ||
68 | switch (inst >> 26) { | ||
69 | case 37: /* stwu */ | ||
70 | case 39: /* stbu */ | ||
71 | case 45: /* sthu */ | ||
72 | case 53: /* stfsu */ | ||
73 | case 55: /* stfdu */ | ||
74 | return 1; | ||
75 | case 31: | ||
76 | /* check minor opcode */ | ||
77 | switch ((inst >> 1) & 0x3ff) { | ||
78 | case 183: /* stwux */ | ||
79 | case 247: /* stbux */ | ||
80 | case 439: /* sthux */ | ||
81 | case 695: /* stfsux */ | ||
82 | case 759: /* stfdux */ | ||
83 | return 1; | ||
84 | } | ||
85 | } | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * For 600- and 800-family processors, the error_code parameter is DSISR | ||
91 | * for a data fault, SRR1 for an instruction fault. For 400-family processors | ||
92 | * the error_code parameter is ESR for a data fault, 0 for an instruction | ||
93 | * fault. | ||
94 | */ | ||
95 | int do_page_fault(struct pt_regs *regs, unsigned long address, | ||
96 | unsigned long error_code) | ||
97 | { | ||
98 | struct vm_area_struct * vma; | ||
99 | struct mm_struct *mm = current->mm; | ||
100 | siginfo_t info; | ||
101 | int code = SEGV_MAPERR; | ||
102 | #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE) | ||
103 | int is_write = error_code & ESR_DST; | ||
104 | #else | ||
105 | int is_write = 0; | ||
106 | |||
107 | /* | ||
108 | * Fortunately the bit assignments in SRR1 for an instruction | ||
109 | * fault and DSISR for a data fault are mostly the same for the | ||
110 | * bits we are interested in. But there are some bits which | ||
111 | * indicate errors in DSISR but can validly be set in SRR1. | ||
112 | */ | ||
113 | if (TRAP(regs) == 0x400) | ||
114 | error_code &= 0x48200000; | ||
115 | else | ||
116 | is_write = error_code & 0x02000000; | ||
117 | #endif /* CONFIG_4xx || CONFIG_BOOKE */ | ||
118 | |||
119 | #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) | ||
120 | if (debugger_fault_handler && TRAP(regs) == 0x300) { | ||
121 | debugger_fault_handler(regs); | ||
122 | return 0; | ||
123 | } | ||
124 | #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) | ||
125 | if (error_code & 0x00400000) { | ||
126 | /* DABR match */ | ||
127 | if (debugger_dabr_match(regs)) | ||
128 | return 0; | ||
129 | } | ||
130 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ | ||
131 | #endif /* CONFIG_XMON || CONFIG_KGDB */ | ||
132 | |||
133 | if (in_atomic() || mm == NULL) | ||
134 | return SIGSEGV; | ||
135 | |||
136 | down_read(&mm->mmap_sem); | ||
137 | vma = find_vma(mm, address); | ||
138 | if (!vma) | ||
139 | goto bad_area; | ||
140 | if (vma->vm_start <= address) | ||
141 | goto good_area; | ||
142 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
143 | goto bad_area; | ||
144 | if (!is_write) | ||
145 | goto bad_area; | ||
146 | |||
147 | /* | ||
148 | * N.B. The rs6000/xcoff ABI allows programs to access up to | ||
149 | * a few hundred bytes below the stack pointer. | ||
150 | * The kernel signal delivery code writes up to about 1.5kB | ||
151 | * below the stack pointer (r1) before decrementing it. | ||
152 | * The exec code can write slightly over 640kB to the stack | ||
153 | * before setting the user r1. Thus we allow the stack to | ||
154 | * expand to 1MB without further checks. | ||
155 | */ | ||
156 | if (address + 0x100000 < vma->vm_end) { | ||
157 | /* get user regs even if this fault is in kernel mode */ | ||
158 | struct pt_regs *uregs = current->thread.regs; | ||
159 | if (uregs == NULL) | ||
160 | goto bad_area; | ||
161 | |||
162 | /* | ||
163 | * A user-mode access to an address a long way below | ||
164 | * the stack pointer is only valid if the instruction | ||
165 | * is one which would update the stack pointer to the | ||
166 | * address accessed if the instruction completed, | ||
167 | * i.e. either stwu rs,n(r1) or stwux rs,r1,rb | ||
168 | * (or the byte, halfword, float or double forms). | ||
169 | * | ||
170 | * If we don't check this then any write to the area | ||
171 | * between the last mapped region and the stack will | ||
172 | * expand the stack rather than segfaulting. | ||
173 | */ | ||
174 | if (address + 2048 < uregs->gpr[1] | ||
175 | && (!user_mode(regs) || !store_updates_sp(regs))) | ||
176 | goto bad_area; | ||
177 | } | ||
178 | if (expand_stack(vma, address)) | ||
179 | goto bad_area; | ||
180 | |||
181 | good_area: | ||
182 | code = SEGV_ACCERR; | ||
183 | #if defined(CONFIG_6xx) | ||
184 | if (error_code & 0x95700000) | ||
185 | /* an error such as lwarx to I/O controller space, | ||
186 | address matching DABR, eciwx, etc. */ | ||
187 | goto bad_area; | ||
188 | #endif /* CONFIG_6xx */ | ||
189 | #if defined(CONFIG_8xx) | ||
190 | /* The MPC8xx seems to always set 0x80000000, which is | ||
191 | * "undefined". Of those that can be set, this is the only | ||
192 | * one which seems bad. | ||
193 | */ | ||
194 | if (error_code & 0x10000000) | ||
195 | /* Guarded storage error. */ | ||
196 | goto bad_area; | ||
197 | #endif /* CONFIG_8xx */ | ||
198 | |||
199 | /* a write */ | ||
200 | if (is_write) { | ||
201 | if (!(vma->vm_flags & VM_WRITE)) | ||
202 | goto bad_area; | ||
203 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) | ||
204 | /* an exec - 4xx/Book-E allows for per-page execute permission */ | ||
205 | } else if (TRAP(regs) == 0x400) { | ||
206 | pte_t *ptep; | ||
207 | |||
208 | #if 0 | ||
209 | /* It would be nice to actually enforce the VM execute | ||
210 | permission on CPUs which can do so, but far too | ||
211 | much stuff in userspace doesn't get the permissions | ||
212 | right, so we let any page be executed for now. */ | ||
213 | if (! (vma->vm_flags & VM_EXEC)) | ||
214 | goto bad_area; | ||
215 | #endif | ||
216 | |||
217 | /* Since 4xx/Book-E supports per-page execute permission, | ||
218 | * we lazily flush dcache to icache. */ | ||
219 | ptep = NULL; | ||
220 | if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { | ||
221 | struct page *page = pte_page(*ptep); | ||
222 | |||
223 | if (! test_bit(PG_arch_1, &page->flags)) { | ||
224 | flush_dcache_icache_page(page); | ||
225 | set_bit(PG_arch_1, &page->flags); | ||
226 | } | ||
227 | pte_update(ptep, 0, _PAGE_HWEXEC); | ||
228 | _tlbie(address); | ||
229 | pte_unmap(ptep); | ||
230 | up_read(&mm->mmap_sem); | ||
231 | return 0; | ||
232 | } | ||
233 | if (ptep != NULL) | ||
234 | pte_unmap(ptep); | ||
235 | #endif | ||
236 | /* a read */ | ||
237 | } else { | ||
238 | /* protection fault */ | ||
239 | if (error_code & 0x08000000) | ||
240 | goto bad_area; | ||
241 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | ||
242 | goto bad_area; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * If for any reason at all we couldn't handle the fault, | ||
247 | * make sure we exit gracefully rather than endlessly redo | ||
248 | * the fault. | ||
249 | */ | ||
250 | survive: | ||
251 | switch (handle_mm_fault(mm, vma, address, is_write)) { | ||
252 | case VM_FAULT_MINOR: | ||
253 | current->min_flt++; | ||
254 | break; | ||
255 | case VM_FAULT_MAJOR: | ||
256 | current->maj_flt++; | ||
257 | break; | ||
258 | case VM_FAULT_SIGBUS: | ||
259 | goto do_sigbus; | ||
260 | case VM_FAULT_OOM: | ||
261 | goto out_of_memory; | ||
262 | default: | ||
263 | BUG(); | ||
264 | } | ||
265 | |||
266 | up_read(&mm->mmap_sem); | ||
267 | /* | ||
268 | * keep track of tlb+htab misses that are good addrs but | ||
269 | * just need pte's created via handle_mm_fault() | ||
270 | * -- Cort | ||
271 | */ | ||
272 | pte_misses++; | ||
273 | return 0; | ||
274 | |||
275 | bad_area: | ||
276 | up_read(&mm->mmap_sem); | ||
277 | pte_errors++; | ||
278 | |||
279 | /* User mode accesses cause a SIGSEGV */ | ||
280 | if (user_mode(regs)) { | ||
281 | info.si_signo = SIGSEGV; | ||
282 | info.si_errno = 0; | ||
283 | info.si_code = code; | ||
284 | info.si_addr = (void __user *) address; | ||
285 | force_sig_info(SIGSEGV, &info, current); | ||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | return SIGSEGV; | ||
290 | |||
291 | /* | ||
292 | * We ran out of memory, or some other thing happened to us that made | ||
293 | * us unable to handle the page fault gracefully. | ||
294 | */ | ||
295 | out_of_memory: | ||
296 | up_read(&mm->mmap_sem); | ||
297 | if (current->pid == 1) { | ||
298 | yield(); | ||
299 | down_read(&mm->mmap_sem); | ||
300 | goto survive; | ||
301 | } | ||
302 | printk("VM: killing process %s\n", current->comm); | ||
303 | if (user_mode(regs)) | ||
304 | do_exit(SIGKILL); | ||
305 | return SIGKILL; | ||
306 | |||
307 | do_sigbus: | ||
308 | up_read(&mm->mmap_sem); | ||
309 | info.si_signo = SIGBUS; | ||
310 | info.si_errno = 0; | ||
311 | info.si_code = BUS_ADRERR; | ||
312 | info.si_addr = (void __user *)address; | ||
313 | force_sig_info (SIGBUS, &info, current); | ||
314 | if (!user_mode(regs)) | ||
315 | return SIGBUS; | ||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * bad_page_fault is called when we have a bad access from the kernel. | ||
321 | * It is called from the DSI and ISI handlers in head.S and from some | ||
322 | * of the procedures in traps.c. | ||
323 | */ | ||
324 | void | ||
325 | bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | ||
326 | { | ||
327 | const struct exception_table_entry *entry; | ||
328 | |||
329 | /* Are we prepared to handle this fault? */ | ||
330 | if ((entry = search_exception_tables(regs->nip)) != NULL) { | ||
331 | regs->nip = entry->fixup; | ||
332 | return; | ||
333 | } | ||
334 | |||
335 | /* kernel has accessed a bad area */ | ||
336 | #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) | ||
337 | if (debugger_kernel_faults) | ||
338 | debugger(regs); | ||
339 | #endif | ||
340 | die("kernel access of bad area", regs, sig); | ||
341 | } | ||
342 | |||
343 | #ifdef CONFIG_8xx | ||
344 | |||
345 | /* The pgtable.h claims some functions generically exist, but I | ||
346 | * can't find them...... | ||
347 | */ | ||
348 | pte_t *va_to_pte(unsigned long address) | ||
349 | { | ||
350 | pgd_t *dir; | ||
351 | pmd_t *pmd; | ||
352 | pte_t *pte; | ||
353 | |||
354 | if (address < TASK_SIZE) | ||
355 | return NULL; | ||
356 | |||
357 | dir = pgd_offset(&init_mm, address); | ||
358 | if (dir) { | ||
359 | pmd = pmd_offset(dir, address & PAGE_MASK); | ||
360 | if (pmd && pmd_present(*pmd)) { | ||
361 | pte = pte_offset_kernel(pmd, address & PAGE_MASK); | ||
362 | if (pte && pte_present(*pte)) | ||
363 | return(pte); | ||
364 | } | ||
365 | } | ||
366 | return NULL; | ||
367 | } | ||
368 | |||
369 | unsigned long va_to_phys(unsigned long address) | ||
370 | { | ||
371 | pte_t *pte; | ||
372 | |||
373 | pte = va_to_pte(address); | ||
374 | if (pte) | ||
375 | return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK))); | ||
376 | return (0); | ||
377 | } | ||
378 | |||
379 | void | ||
380 | print_8xx_pte(struct mm_struct *mm, unsigned long addr) | ||
381 | { | ||
382 | pgd_t * pgd; | ||
383 | pmd_t * pmd; | ||
384 | pte_t * pte; | ||
385 | |||
386 | printk(" pte @ 0x%8lx: ", addr); | ||
387 | pgd = pgd_offset(mm, addr & PAGE_MASK); | ||
388 | if (pgd) { | ||
389 | pmd = pmd_offset(pgd, addr & PAGE_MASK); | ||
390 | if (pmd && pmd_present(*pmd)) { | ||
391 | pte = pte_offset_kernel(pmd, addr & PAGE_MASK); | ||
392 | if (pte) { | ||
393 | printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n", | ||
394 | (long)pgd, (long)pte, (long)pte_val(*pte)); | ||
395 | #define pp ((long)pte_val(*pte)) | ||
396 | printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx " | ||
397 | "CI: %lx v: %lx\n", | ||
398 | pp>>12, /* rpn */ | ||
399 | (pp>>10)&3, /* pp */ | ||
400 | (pp>>3)&1, /* small */ | ||
401 | (pp>>2)&1, /* shared */ | ||
402 | (pp>>1)&1, /* cache inhibit */ | ||
403 | pp&1 /* valid */ | ||
404 | ); | ||
405 | #undef pp | ||
406 | } | ||
407 | else { | ||
408 | printk("no pte\n"); | ||
409 | } | ||
410 | } | ||
411 | else { | ||
412 | printk("no pmd\n"); | ||
413 | } | ||
414 | } | ||
415 | else { | ||
416 | printk("no pgd\n"); | ||
417 | } | ||
418 | } | ||
419 | |||
420 | int | ||
421 | get_8xx_pte(struct mm_struct *mm, unsigned long addr) | ||
422 | { | ||
423 | pgd_t * pgd; | ||
424 | pmd_t * pmd; | ||
425 | pte_t * pte; | ||
426 | int retval = 0; | ||
427 | |||
428 | pgd = pgd_offset(mm, addr & PAGE_MASK); | ||
429 | if (pgd) { | ||
430 | pmd = pmd_offset(pgd, addr & PAGE_MASK); | ||
431 | if (pmd && pmd_present(*pmd)) { | ||
432 | pte = pte_offset_kernel(pmd, addr & PAGE_MASK); | ||
433 | if (pte) { | ||
434 | retval = (int)pte_val(*pte); | ||
435 | } | ||
436 | } | ||
437 | } | ||
438 | return(retval); | ||
439 | } | ||
440 | #endif /* CONFIG_8xx */ | ||