aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc/mm/fault.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc/mm/fault.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ppc/mm/fault.c')
-rw-r--r--arch/ppc/mm/fault.c440
1 files changed, 440 insertions, 0 deletions
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
new file mode 100644
index 000000000000..57d9930843ac
--- /dev/null
+++ b/arch/ppc/mm/fault.c
@@ -0,0 +1,440 @@
1/*
2 * arch/ppc/mm/fault.c
3 *
4 * PowerPC version
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
6 *
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
9 *
10 * Modified by Cort Dougan and Paul Mackerras.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18#include <linux/config.h>
19#include <linux/signal.h>
20#include <linux/sched.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/string.h>
24#include <linux/types.h>
25#include <linux/ptrace.h>
26#include <linux/mman.h>
27#include <linux/mm.h>
28#include <linux/interrupt.h>
29#include <linux/highmem.h>
30#include <linux/module.h>
31
32#include <asm/page.h>
33#include <asm/pgtable.h>
34#include <asm/mmu.h>
35#include <asm/mmu_context.h>
36#include <asm/system.h>
37#include <asm/uaccess.h>
38#include <asm/tlbflush.h>
39
40#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
41extern void (*debugger)(struct pt_regs *);
42extern void (*debugger_fault_handler)(struct pt_regs *);
43extern int (*debugger_dabr_match)(struct pt_regs *);
44int debugger_kernel_faults = 1;
45#endif
46
47unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
48unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
49unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
50unsigned long pte_misses; /* updated by do_page_fault() */
51unsigned long pte_errors; /* updated by do_page_fault() */
52unsigned int probingmem;
53
54/*
55 * Check whether the instruction at regs->nip is a store using
56 * an update addressing form which will update r1.
57 */
58static int store_updates_sp(struct pt_regs *regs)
59{
60 unsigned int inst;
61
62 if (get_user(inst, (unsigned int __user *)regs->nip))
63 return 0;
64 /* check for 1 in the rA field */
65 if (((inst >> 16) & 0x1f) != 1)
66 return 0;
67 /* check major opcode */
68 switch (inst >> 26) {
69 case 37: /* stwu */
70 case 39: /* stbu */
71 case 45: /* sthu */
72 case 53: /* stfsu */
73 case 55: /* stfdu */
74 return 1;
75 case 31:
76 /* check minor opcode */
77 switch ((inst >> 1) & 0x3ff) {
78 case 183: /* stwux */
79 case 247: /* stbux */
80 case 439: /* sthux */
81 case 695: /* stfsux */
82 case 759: /* stfdux */
83 return 1;
84 }
85 }
86 return 0;
87}
88
89/*
90 * For 600- and 800-family processors, the error_code parameter is DSISR
91 * for a data fault, SRR1 for an instruction fault. For 400-family processors
92 * the error_code parameter is ESR for a data fault, 0 for an instruction
93 * fault.
94 */
95int do_page_fault(struct pt_regs *regs, unsigned long address,
96 unsigned long error_code)
97{
98 struct vm_area_struct * vma;
99 struct mm_struct *mm = current->mm;
100 siginfo_t info;
101 int code = SEGV_MAPERR;
102#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
103 int is_write = error_code & ESR_DST;
104#else
105 int is_write = 0;
106
107 /*
108 * Fortunately the bit assignments in SRR1 for an instruction
109 * fault and DSISR for a data fault are mostly the same for the
110 * bits we are interested in. But there are some bits which
111 * indicate errors in DSISR but can validly be set in SRR1.
112 */
113 if (TRAP(regs) == 0x400)
114 error_code &= 0x48200000;
115 else
116 is_write = error_code & 0x02000000;
117#endif /* CONFIG_4xx || CONFIG_BOOKE */
118
119#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
120 if (debugger_fault_handler && TRAP(regs) == 0x300) {
121 debugger_fault_handler(regs);
122 return 0;
123 }
124#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
125 if (error_code & 0x00400000) {
126 /* DABR match */
127 if (debugger_dabr_match(regs))
128 return 0;
129 }
130#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
131#endif /* CONFIG_XMON || CONFIG_KGDB */
132
133 if (in_atomic() || mm == NULL)
134 return SIGSEGV;
135
136 down_read(&mm->mmap_sem);
137 vma = find_vma(mm, address);
138 if (!vma)
139 goto bad_area;
140 if (vma->vm_start <= address)
141 goto good_area;
142 if (!(vma->vm_flags & VM_GROWSDOWN))
143 goto bad_area;
144 if (!is_write)
145 goto bad_area;
146
147 /*
148 * N.B. The rs6000/xcoff ABI allows programs to access up to
149 * a few hundred bytes below the stack pointer.
150 * The kernel signal delivery code writes up to about 1.5kB
151 * below the stack pointer (r1) before decrementing it.
152 * The exec code can write slightly over 640kB to the stack
153 * before setting the user r1. Thus we allow the stack to
154 * expand to 1MB without further checks.
155 */
156 if (address + 0x100000 < vma->vm_end) {
157 /* get user regs even if this fault is in kernel mode */
158 struct pt_regs *uregs = current->thread.regs;
159 if (uregs == NULL)
160 goto bad_area;
161
162 /*
163 * A user-mode access to an address a long way below
164 * the stack pointer is only valid if the instruction
165 * is one which would update the stack pointer to the
166 * address accessed if the instruction completed,
167 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
168 * (or the byte, halfword, float or double forms).
169 *
170 * If we don't check this then any write to the area
171 * between the last mapped region and the stack will
172 * expand the stack rather than segfaulting.
173 */
174 if (address + 2048 < uregs->gpr[1]
175 && (!user_mode(regs) || !store_updates_sp(regs)))
176 goto bad_area;
177 }
178 if (expand_stack(vma, address))
179 goto bad_area;
180
181good_area:
182 code = SEGV_ACCERR;
183#if defined(CONFIG_6xx)
184 if (error_code & 0x95700000)
185 /* an error such as lwarx to I/O controller space,
186 address matching DABR, eciwx, etc. */
187 goto bad_area;
188#endif /* CONFIG_6xx */
189#if defined(CONFIG_8xx)
190 /* The MPC8xx seems to always set 0x80000000, which is
191 * "undefined". Of those that can be set, this is the only
192 * one which seems bad.
193 */
194 if (error_code & 0x10000000)
195 /* Guarded storage error. */
196 goto bad_area;
197#endif /* CONFIG_8xx */
198
199 /* a write */
200 if (is_write) {
201 if (!(vma->vm_flags & VM_WRITE))
202 goto bad_area;
203#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
204 /* an exec - 4xx/Book-E allows for per-page execute permission */
205 } else if (TRAP(regs) == 0x400) {
206 pte_t *ptep;
207
208#if 0
209 /* It would be nice to actually enforce the VM execute
210 permission on CPUs which can do so, but far too
211 much stuff in userspace doesn't get the permissions
212 right, so we let any page be executed for now. */
213 if (! (vma->vm_flags & VM_EXEC))
214 goto bad_area;
215#endif
216
217 /* Since 4xx/Book-E supports per-page execute permission,
218 * we lazily flush dcache to icache. */
219 ptep = NULL;
220 if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
221 struct page *page = pte_page(*ptep);
222
223 if (! test_bit(PG_arch_1, &page->flags)) {
224 flush_dcache_icache_page(page);
225 set_bit(PG_arch_1, &page->flags);
226 }
227 pte_update(ptep, 0, _PAGE_HWEXEC);
228 _tlbie(address);
229 pte_unmap(ptep);
230 up_read(&mm->mmap_sem);
231 return 0;
232 }
233 if (ptep != NULL)
234 pte_unmap(ptep);
235#endif
236 /* a read */
237 } else {
238 /* protection fault */
239 if (error_code & 0x08000000)
240 goto bad_area;
241 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
242 goto bad_area;
243 }
244
245 /*
246 * If for any reason at all we couldn't handle the fault,
247 * make sure we exit gracefully rather than endlessly redo
248 * the fault.
249 */
250 survive:
251 switch (handle_mm_fault(mm, vma, address, is_write)) {
252 case VM_FAULT_MINOR:
253 current->min_flt++;
254 break;
255 case VM_FAULT_MAJOR:
256 current->maj_flt++;
257 break;
258 case VM_FAULT_SIGBUS:
259 goto do_sigbus;
260 case VM_FAULT_OOM:
261 goto out_of_memory;
262 default:
263 BUG();
264 }
265
266 up_read(&mm->mmap_sem);
267 /*
268 * keep track of tlb+htab misses that are good addrs but
269 * just need pte's created via handle_mm_fault()
270 * -- Cort
271 */
272 pte_misses++;
273 return 0;
274
275bad_area:
276 up_read(&mm->mmap_sem);
277 pte_errors++;
278
279 /* User mode accesses cause a SIGSEGV */
280 if (user_mode(regs)) {
281 info.si_signo = SIGSEGV;
282 info.si_errno = 0;
283 info.si_code = code;
284 info.si_addr = (void __user *) address;
285 force_sig_info(SIGSEGV, &info, current);
286 return 0;
287 }
288
289 return SIGSEGV;
290
291/*
292 * We ran out of memory, or some other thing happened to us that made
293 * us unable to handle the page fault gracefully.
294 */
295out_of_memory:
296 up_read(&mm->mmap_sem);
297 if (current->pid == 1) {
298 yield();
299 down_read(&mm->mmap_sem);
300 goto survive;
301 }
302 printk("VM: killing process %s\n", current->comm);
303 if (user_mode(regs))
304 do_exit(SIGKILL);
305 return SIGKILL;
306
307do_sigbus:
308 up_read(&mm->mmap_sem);
309 info.si_signo = SIGBUS;
310 info.si_errno = 0;
311 info.si_code = BUS_ADRERR;
312 info.si_addr = (void __user *)address;
313 force_sig_info (SIGBUS, &info, current);
314 if (!user_mode(regs))
315 return SIGBUS;
316 return 0;
317}
318
319/*
320 * bad_page_fault is called when we have a bad access from the kernel.
321 * It is called from the DSI and ISI handlers in head.S and from some
322 * of the procedures in traps.c.
323 */
324void
325bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
326{
327 const struct exception_table_entry *entry;
328
329 /* Are we prepared to handle this fault? */
330 if ((entry = search_exception_tables(regs->nip)) != NULL) {
331 regs->nip = entry->fixup;
332 return;
333 }
334
335 /* kernel has accessed a bad area */
336#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
337 if (debugger_kernel_faults)
338 debugger(regs);
339#endif
340 die("kernel access of bad area", regs, sig);
341}
342
343#ifdef CONFIG_8xx
344
345/* The pgtable.h claims some functions generically exist, but I
346 * can't find them......
347 */
348pte_t *va_to_pte(unsigned long address)
349{
350 pgd_t *dir;
351 pmd_t *pmd;
352 pte_t *pte;
353
354 if (address < TASK_SIZE)
355 return NULL;
356
357 dir = pgd_offset(&init_mm, address);
358 if (dir) {
359 pmd = pmd_offset(dir, address & PAGE_MASK);
360 if (pmd && pmd_present(*pmd)) {
361 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
362 if (pte && pte_present(*pte))
363 return(pte);
364 }
365 }
366 return NULL;
367}
368
369unsigned long va_to_phys(unsigned long address)
370{
371 pte_t *pte;
372
373 pte = va_to_pte(address);
374 if (pte)
375 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
376 return (0);
377}
378
379void
380print_8xx_pte(struct mm_struct *mm, unsigned long addr)
381{
382 pgd_t * pgd;
383 pmd_t * pmd;
384 pte_t * pte;
385
386 printk(" pte @ 0x%8lx: ", addr);
387 pgd = pgd_offset(mm, addr & PAGE_MASK);
388 if (pgd) {
389 pmd = pmd_offset(pgd, addr & PAGE_MASK);
390 if (pmd && pmd_present(*pmd)) {
391 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
392 if (pte) {
393 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
394 (long)pgd, (long)pte, (long)pte_val(*pte));
395#define pp ((long)pte_val(*pte))
396 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
397 "CI: %lx v: %lx\n",
398 pp>>12, /* rpn */
399 (pp>>10)&3, /* pp */
400 (pp>>3)&1, /* small */
401 (pp>>2)&1, /* shared */
402 (pp>>1)&1, /* cache inhibit */
403 pp&1 /* valid */
404 );
405#undef pp
406 }
407 else {
408 printk("no pte\n");
409 }
410 }
411 else {
412 printk("no pmd\n");
413 }
414 }
415 else {
416 printk("no pgd\n");
417 }
418}
419
420int
421get_8xx_pte(struct mm_struct *mm, unsigned long addr)
422{
423 pgd_t * pgd;
424 pmd_t * pmd;
425 pte_t * pte;
426 int retval = 0;
427
428 pgd = pgd_offset(mm, addr & PAGE_MASK);
429 if (pgd) {
430 pmd = pmd_offset(pgd, addr & PAGE_MASK);
431 if (pmd && pmd_present(*pmd)) {
432 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
433 if (pte) {
434 retval = (int)pte_val(*pte);
435 }
436 }
437 }
438 return(retval);
439}
440#endif /* CONFIG_8xx */