diff options
author | Michal Simek <monstr@monstr.eu> | 2009-05-26 10:30:13 -0400 |
---|---|---|
committer | Michal Simek <monstr@monstr.eu> | 2009-05-26 10:45:15 -0400 |
commit | 5de96121009f4de43ffeb7160109e23132278c07 (patch) | |
tree | 6d8418195f2a68e185ef993471a6e8ffd8bcb613 /arch/microblaze/mm/fault.c | |
parent | 23098649e0f8861ea69fac62cf6ba721b83065dc (diff) |
microblaze_mmu_v2: Page fault handling high level - fault.c
Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/mm/fault.c')
-rw-r--r-- | arch/microblaze/mm/fault.c | 304 |
1 files changed, 304 insertions, 0 deletions
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c new file mode 100644 index 000000000000..5e67cd1fab40 --- /dev/null +++ b/arch/microblaze/mm/fault.c | |||
@@ -0,0 +1,304 @@ | |||
1 | /* | ||
2 | * arch/microblaze/mm/fault.c | ||
3 | * | ||
4 | * Copyright (C) 2007 Xilinx, Inc. All rights reserved. | ||
5 | * | ||
6 | * Derived from "arch/ppc/mm/fault.c" | ||
7 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | ||
8 | * | ||
9 | * Derived from "arch/i386/mm/fault.c" | ||
10 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
11 | * | ||
12 | * Modified by Cort Dougan and Paul Mackerras. | ||
13 | * | ||
14 | * This file is subject to the terms and conditions of the GNU General | ||
15 | * Public License. See the file COPYING in the main directory of this | ||
16 | * archive for more details. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/signal.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/ptrace.h> | ||
28 | #include <linux/mman.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <linux/interrupt.h> | ||
31 | |||
32 | #include <asm/page.h> | ||
33 | #include <asm/pgtable.h> | ||
34 | #include <asm/mmu.h> | ||
35 | #include <asm/mmu_context.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <linux/uaccess.h> | ||
38 | #include <asm/exceptions.h> | ||
39 | |||
40 | #if defined(CONFIG_KGDB) | ||
41 | int debugger_kernel_faults = 1; | ||
42 | #endif | ||
43 | |||
44 | static unsigned long pte_misses; /* updated by do_page_fault() */ | ||
45 | static unsigned long pte_errors; /* updated by do_page_fault() */ | ||
46 | |||
47 | /* | ||
48 | * Check whether the instruction at regs->pc is a store using | ||
49 | * an update addressing form which will update r1. | ||
50 | */ | ||
51 | static int store_updates_sp(struct pt_regs *regs) | ||
52 | { | ||
53 | unsigned int inst; | ||
54 | |||
55 | if (get_user(inst, (unsigned int *)regs->pc)) | ||
56 | return 0; | ||
57 | /* check for 1 in the rD field */ | ||
58 | if (((inst >> 21) & 0x1f) != 1) | ||
59 | return 0; | ||
60 | /* check for store opcodes */ | ||
61 | if ((inst & 0xd0000000) == 0xd0000000) | ||
62 | return 1; | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | |||
67 | /* | ||
68 | * bad_page_fault is called when we have a bad access from the kernel. | ||
69 | * It is called from do_page_fault above and from some of the procedures | ||
70 | * in traps.c. | ||
71 | */ | ||
72 | static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) | ||
73 | { | ||
74 | const struct exception_table_entry *fixup; | ||
75 | /* MS: no context */ | ||
76 | /* Are we prepared to handle this fault? */ | ||
77 | fixup = search_exception_tables(regs->pc); | ||
78 | if (fixup) { | ||
79 | regs->pc = fixup->fixup; | ||
80 | return; | ||
81 | } | ||
82 | |||
83 | /* kernel has accessed a bad area */ | ||
84 | #if defined(CONFIG_KGDB) | ||
85 | if (debugger_kernel_faults) | ||
86 | debugger(regs); | ||
87 | #endif | ||
88 | die("kernel access of bad area", regs, sig); | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * The error_code parameter is ESR for a data fault, | ||
93 | * 0 for an instruction fault. | ||
94 | */ | ||
95 | void do_page_fault(struct pt_regs *regs, unsigned long address, | ||
96 | unsigned long error_code) | ||
97 | { | ||
98 | struct vm_area_struct *vma; | ||
99 | struct mm_struct *mm = current->mm; | ||
100 | siginfo_t info; | ||
101 | int code = SEGV_MAPERR; | ||
102 | int is_write = error_code & ESR_S; | ||
103 | int fault; | ||
104 | |||
105 | regs->ear = address; | ||
106 | regs->esr = error_code; | ||
107 | |||
108 | /* On a kernel SLB miss we can only check for a valid exception entry */ | ||
109 | if (kernel_mode(regs) && (address >= TASK_SIZE)) { | ||
110 | printk(KERN_WARNING "kernel task_size exceed"); | ||
111 | _exception(SIGSEGV, regs, code, address); | ||
112 | } | ||
113 | |||
114 | /* for instr TLB miss and instr storage exception ESR_S is undefined */ | ||
115 | if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) | ||
116 | is_write = 0; | ||
117 | |||
118 | #if defined(CONFIG_KGDB) | ||
119 | if (debugger_fault_handler && regs->trap == 0x300) { | ||
120 | debugger_fault_handler(regs); | ||
121 | return; | ||
122 | } | ||
123 | #endif /* CONFIG_KGDB */ | ||
124 | |||
125 | if (in_atomic() || mm == NULL) { | ||
126 | /* FIXME */ | ||
127 | if (kernel_mode(regs)) { | ||
128 | printk(KERN_EMERG | ||
129 | "Page fault in kernel mode - Oooou!!! pid %d\n", | ||
130 | current->pid); | ||
131 | _exception(SIGSEGV, regs, code, address); | ||
132 | return; | ||
133 | } | ||
134 | /* in_atomic() in user mode is really bad, | ||
135 | as is current->mm == NULL. */ | ||
136 | printk(KERN_EMERG "Page fault in user mode with " | ||
137 | "in_atomic(), mm = %p\n", mm); | ||
138 | printk(KERN_EMERG "r15 = %lx MSR = %lx\n", | ||
139 | regs->r15, regs->msr); | ||
140 | die("Weird page fault", regs, SIGSEGV); | ||
141 | } | ||
142 | |||
143 | /* When running in the kernel we expect faults to occur only to | ||
144 | * addresses in user space. All other faults represent errors in the | ||
145 | * kernel and should generate an OOPS. Unfortunately, in the case of an | ||
146 | * erroneous fault occurring in a code path which already holds mmap_sem | ||
147 | * we will deadlock attempting to validate the fault against the | ||
148 | * address space. Luckily the kernel only validly references user | ||
149 | * space from well defined areas of code, which are listed in the | ||
150 | * exceptions table. | ||
151 | * | ||
152 | * As the vast majority of faults will be valid we will only perform | ||
153 | * the source reference check when there is a possibility of a deadlock. | ||
154 | * Attempt to lock the address space, if we cannot we then validate the | ||
155 | * source. If this is invalid we can skip the address space check, | ||
156 | * thus avoiding the deadlock. | ||
157 | */ | ||
158 | if (!down_read_trylock(&mm->mmap_sem)) { | ||
159 | if (kernel_mode(regs) && !search_exception_tables(regs->pc)) | ||
160 | goto bad_area_nosemaphore; | ||
161 | |||
162 | down_read(&mm->mmap_sem); | ||
163 | } | ||
164 | |||
165 | vma = find_vma(mm, address); | ||
166 | if (!vma) | ||
167 | goto bad_area; | ||
168 | |||
169 | if (vma->vm_start <= address) | ||
170 | goto good_area; | ||
171 | |||
172 | if (!(vma->vm_flags & VM_GROWSDOWN)) | ||
173 | goto bad_area; | ||
174 | |||
175 | if (!is_write) | ||
176 | goto bad_area; | ||
177 | |||
178 | /* | ||
179 | * N.B. The ABI allows programs to access up to | ||
180 | * a few hundred bytes below the stack pointer (TBD). | ||
181 | * The kernel signal delivery code writes up to about 1.5kB | ||
182 | * below the stack pointer (r1) before decrementing it. | ||
183 | * The exec code can write slightly over 640kB to the stack | ||
184 | * before setting the user r1. Thus we allow the stack to | ||
185 | * expand to 1MB without further checks. | ||
186 | */ | ||
187 | if (address + 0x100000 < vma->vm_end) { | ||
188 | |||
189 | /* get user regs even if this fault is in kernel mode */ | ||
190 | struct pt_regs *uregs = current->thread.regs; | ||
191 | if (uregs == NULL) | ||
192 | goto bad_area; | ||
193 | |||
194 | /* | ||
195 | * A user-mode access to an address a long way below | ||
196 | * the stack pointer is only valid if the instruction | ||
197 | * is one which would update the stack pointer to the | ||
198 | * address accessed if the instruction completed, | ||
199 | * i.e. either stwu rs,n(r1) or stwux rs,r1,rb | ||
200 | * (or the byte, halfword, float or double forms). | ||
201 | * | ||
202 | * If we don't check this then any write to the area | ||
203 | * between the last mapped region and the stack will | ||
204 | * expand the stack rather than segfaulting. | ||
205 | */ | ||
206 | if (address + 2048 < uregs->r1 | ||
207 | && (kernel_mode(regs) || !store_updates_sp(regs))) | ||
208 | goto bad_area; | ||
209 | } | ||
210 | if (expand_stack(vma, address)) | ||
211 | goto bad_area; | ||
212 | |||
213 | good_area: | ||
214 | code = SEGV_ACCERR; | ||
215 | |||
216 | /* a write */ | ||
217 | if (is_write) { | ||
218 | if (!(vma->vm_flags & VM_WRITE)) | ||
219 | goto bad_area; | ||
220 | /* a read */ | ||
221 | } else { | ||
222 | /* protection fault */ | ||
223 | if (error_code & 0x08000000) | ||
224 | goto bad_area; | ||
225 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | ||
226 | goto bad_area; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * If for any reason at all we couldn't handle the fault, | ||
231 | * make sure we exit gracefully rather than endlessly redo | ||
232 | * the fault. | ||
233 | */ | ||
234 | survive: | ||
235 | fault = handle_mm_fault(mm, vma, address, is_write); | ||
236 | if (unlikely(fault & VM_FAULT_ERROR)) { | ||
237 | if (fault & VM_FAULT_OOM) | ||
238 | goto out_of_memory; | ||
239 | else if (fault & VM_FAULT_SIGBUS) | ||
240 | goto do_sigbus; | ||
241 | BUG(); | ||
242 | } | ||
243 | if (fault & VM_FAULT_MAJOR) | ||
244 | current->maj_flt++; | ||
245 | else | ||
246 | current->min_flt++; | ||
247 | up_read(&mm->mmap_sem); | ||
248 | /* | ||
249 | * keep track of tlb+htab misses that are good addrs but | ||
250 | * just need pte's created via handle_mm_fault() | ||
251 | * -- Cort | ||
252 | */ | ||
253 | pte_misses++; | ||
254 | return; | ||
255 | |||
256 | bad_area: | ||
257 | up_read(&mm->mmap_sem); | ||
258 | |||
259 | bad_area_nosemaphore: | ||
260 | pte_errors++; | ||
261 | |||
262 | /* User mode accesses cause a SIGSEGV */ | ||
263 | if (user_mode(regs)) { | ||
264 | _exception(SIGSEGV, regs, code, address); | ||
265 | /* info.si_signo = SIGSEGV; | ||
266 | info.si_errno = 0; | ||
267 | info.si_code = code; | ||
268 | info.si_addr = (void *) address; | ||
269 | force_sig_info(SIGSEGV, &info, current);*/ | ||
270 | return; | ||
271 | } | ||
272 | |||
273 | bad_page_fault(regs, address, SIGSEGV); | ||
274 | return; | ||
275 | |||
276 | /* | ||
277 | * We ran out of memory, or some other thing happened to us that made | ||
278 | * us unable to handle the page fault gracefully. | ||
279 | */ | ||
280 | out_of_memory: | ||
281 | if (current->pid == 1) { | ||
282 | yield(); | ||
283 | down_read(&mm->mmap_sem); | ||
284 | goto survive; | ||
285 | } | ||
286 | up_read(&mm->mmap_sem); | ||
287 | printk(KERN_WARNING "VM: killing process %s\n", current->comm); | ||
288 | if (user_mode(regs)) | ||
289 | do_exit(SIGKILL); | ||
290 | bad_page_fault(regs, address, SIGKILL); | ||
291 | return; | ||
292 | |||
293 | do_sigbus: | ||
294 | up_read(&mm->mmap_sem); | ||
295 | if (user_mode(regs)) { | ||
296 | info.si_signo = SIGBUS; | ||
297 | info.si_errno = 0; | ||
298 | info.si_code = BUS_ADRERR; | ||
299 | info.si_addr = (void __user *)address; | ||
300 | force_sig_info(SIGBUS, &info, current); | ||
301 | return; | ||
302 | } | ||
303 | bad_page_fault(regs, address, SIGBUS); | ||
304 | } | ||