microblaze_mmu_v2: Page fault handling high level - fault.c

Signed-off-by: Michal Simek <monstr@monstr.eu>
author: Michal Simek <monstr@monstr.eu> 2009-05-26 10:30:13 -0400
committer: Michal Simek <monstr@monstr.eu> 2009-05-26 10:45:15 -0400
commit: 5de96121009f4de43ffeb7160109e23132278c07 (patch)
tree: 6d8418195f2a68e185ef993471a6e8ffd8bcb613
parent: 23098649e0f8861ea69fac62cf6ba721b83065dc (diff)
1 files changed, 304 insertions, 0 deletions
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
new file mode 100644
index 000000000000..5e67cd1fab40
--- /dev/null
+++ b/arch/microblaze/mm/fault.c
@@ -0,0 +1,304 @@
+/*
+ *  arch/microblaze/mm/fault.c
+ *
+ *    Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
+ *
+ *  Derived from "arch/ppc/mm/fault.c"
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <linux/uaccess.h>
+#include <asm/exceptions.h>
+#if defined(CONFIG_KGDB)
+int debugger_kernel_faults = 1;
+#endif
+static unsigned long pte_misses;        /* updated by do_page_fault() */
+static unsigned long pte_errors;        /* updated by do_page_fault() */
+/*
+ * Check whether the instruction at regs->pc is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+        unsigned int inst;
+        if (get_user(inst, (unsigned int *)regs->pc))
+                return 0;
+        /* check for 1 in the rD field */
+        if (((inst >> 21) & 0x1f) != 1)
+                return 0;
+        /* check for store opcodes */
+        if ((inst & 0xd0000000) == 0xd0000000)
+                return 1;
+        return 0;
+}
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from do_page_fault above and from some of the procedures
+ * in traps.c.
+ */
+static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+        const struct exception_table_entry *fixup;
+/* MS: no context */
+        /* Are we prepared to handle this fault?  */
+        fixup = search_exception_tables(regs->pc);
+        if (fixup) {
+                regs->pc = fixup->fixup;
+                return;
+        }
+        /* kernel has accessed a bad area */
+#if defined(CONFIG_KGDB)
+        if (debugger_kernel_faults)
+                debugger(regs);
+#endif
+        die("kernel access of bad area", regs, sig);
+}
+/*
+ * The error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ */
+void do_page_fault(struct pt_regs *regs, unsigned long address,
+                   unsigned long error_code)
+{
+        struct vm_area_struct *vma;
+        struct mm_struct *mm = current->mm;
+        siginfo_t info;
+        int code = SEGV_MAPERR;
+        int is_write = error_code & ESR_S;
+        int fault;
+        regs->ear = address;
+        regs->esr = error_code;
+        /* On a kernel SLB miss we can only check for a valid exception entry */
+        if (kernel_mode(regs) && (address >= TASK_SIZE)) {
+                printk(KERN_WARNING "kernel task_size exceed");
+                _exception(SIGSEGV, regs, code, address);
+        }
+        /* for instr TLB miss and instr storage exception ESR_S is undefined */
+        if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
+                is_write = 0;
+#if defined(CONFIG_KGDB)
+        if (debugger_fault_handler && regs->trap == 0x300) {
+                debugger_fault_handler(regs);
+                return;
+        }
+#endif /* CONFIG_KGDB */
+        if (in_atomic() || mm == NULL) {
+                /* FIXME */
+                if (kernel_mode(regs)) {
+                        printk(KERN_EMERG
+                                "Page fault in kernel mode - Oooou!!! pid %d\n",
+                                current->pid);
+                        _exception(SIGSEGV, regs, code, address);
+                        return;
+                }
+                /* in_atomic() in user mode is really bad,
+                   as is current->mm == NULL. */
+                printk(KERN_EMERG "Page fault in user mode with "
+                       "in_atomic(), mm = %p\n", mm);
+                printk(KERN_EMERG "r15 = %lx  MSR = %lx\n",
+                       regs->r15, regs->msr);
+                die("Weird page fault", regs, SIGSEGV);
+        }
+        /* When running in the kernel we expect faults to occur only to
+         * addresses in user space.  All other faults represent errors in the
+         * kernel and should generate an OOPS.  Unfortunately, in the case of an
+         * erroneous fault occurring in a code path which already holds mmap_sem
+         * we will deadlock attempting to validate the fault against the
+         * address space.  Luckily the kernel only validly references user
+         * space from well defined areas of code, which are listed in the
+         * exceptions table.
+         *
+         * As the vast majority of faults will be valid we will only perform
+         * the source reference check when there is a possibility of a deadlock.
+         * Attempt to lock the address space, if we cannot we then validate the
+         * source.  If this is invalid we can skip the address space check,
+         * thus avoiding the deadlock.
+         */
+        if (!down_read_trylock(&mm->mmap_sem)) {
+                if (kernel_mode(regs) && !search_exception_tables(regs->pc))
+                        goto bad_area_nosemaphore;
+                down_read(&mm->mmap_sem);
+        }
+        vma = find_vma(mm, address);
+        if (!vma)
+                goto bad_area;
+        if (vma->vm_start <= address)
+                goto good_area;
+        if (!(vma->vm_flags & VM_GROWSDOWN))
+                goto bad_area;
+        if (!is_write)
+                goto bad_area;
+        /*
+         * N.B. The ABI allows programs to access up to
+         * a few hundred bytes below the stack pointer (TBD).
+         * The kernel signal delivery code writes up to about 1.5kB
+         * below the stack pointer (r1) before decrementing it.
+         * The exec code can write slightly over 640kB to the stack
+         * before setting the user r1.  Thus we allow the stack to
+         * expand to 1MB without further checks.
+         */
+        if (address + 0x100000 < vma->vm_end) {
+                /* get user regs even if this fault is in kernel mode */
+                struct pt_regs *uregs = current->thread.regs;
+                if (uregs == NULL)
+                        goto bad_area;
+                /*
+                 * A user-mode access to an address a long way below
+                 * the stack pointer is only valid if the instruction
+                 * is one which would update the stack pointer to the
+                 * address accessed if the instruction completed,
+                 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+                 * (or the byte, halfword, float or double forms).
+                 *
+                 * If we don't check this then any write to the area
+                 * between the last mapped region and the stack will
+                 * expand the stack rather than segfaulting.
+                 */
+                if (address + 2048 < uregs->r1
+                        && (kernel_mode(regs) || !store_updates_sp(regs)))
+                                goto bad_area;
+        }
+        if (expand_stack(vma, address))
+                goto bad_area;
+good_area:
+        code = SEGV_ACCERR;
+        /* a write */
+        if (is_write) {
+                if (!(vma->vm_flags & VM_WRITE))
+                        goto bad_area;
+        /* a read */
+        } else {
+                /* protection fault */
+                if (error_code & 0x08000000)
+                        goto bad_area;
+                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                        goto bad_area;
+        }
+        /*
+         * If for any reason at all we couldn't handle the fault,
+         * make sure we exit gracefully rather than endlessly redo
+         * the fault.
+         */
+survive:
+        fault = handle_mm_fault(mm, vma, address, is_write);
+        if (unlikely(fault & VM_FAULT_ERROR)) {
+                if (fault & VM_FAULT_OOM)
+                        goto out_of_memory;
+                else if (fault & VM_FAULT_SIGBUS)
+                        goto do_sigbus;
+                BUG();
+        }
+        if (fault & VM_FAULT_MAJOR)
+                current->maj_flt++;
+        else
+                current->min_flt++;
+        up_read(&mm->mmap_sem);
+        /*
+         * keep track of tlb+htab misses that are good addrs but
+         * just need pte's created via handle_mm_fault()
+         * -- Cort
+         */
+        pte_misses++;
+        return;
+bad_area:
+        up_read(&mm->mmap_sem);
+bad_area_nosemaphore:
+        pte_errors++;
+        /* User mode accesses cause a SIGSEGV */
+        if (user_mode(regs)) {
+                _exception(SIGSEGV, regs, code, address);
+/*              info.si_signo = SIGSEGV;
+                info.si_errno = 0;
+                info.si_code = code;
+                info.si_addr = (void *) address;
+                force_sig_info(SIGSEGV, &info, current);*/
+                return;
+        }
+        bad_page_fault(regs, address, SIGSEGV);
+        return;
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+        if (current->pid == 1) {
+                yield();
+                down_read(&mm->mmap_sem);
+                goto survive;
+        }
+        up_read(&mm->mmap_sem);
+        printk(KERN_WARNING "VM: killing process %s\n", current->comm);
+        if (user_mode(regs))
+                do_exit(SIGKILL);
+        bad_page_fault(regs, address, SIGKILL);
+        return;
+do_sigbus:
+        up_read(&mm->mmap_sem);
+        if (user_mode(regs)) {
+                info.si_signo = SIGBUS;
+                info.si_errno = 0;
+                info.si_code = BUS_ADRERR;
+                info.si_addr = (void __user *)address;
+                force_sig_info(SIGBUS, &info, current);
+                return;
+        }
+        bad_page_fault(regs, address, SIGBUS);
+}
author	Michal Simek <monstr@monstr.eu>	2009-05-26 10:30:13 -0400
committer	Michal Simek <monstr@monstr.eu>	2009-05-26 10:45:15 -0400
commit	5de96121009f4de43ffeb7160109e23132278c07 (patch)
tree	6d8418195f2a68e185ef993471a6e8ffd8bcb613
parent	23098649e0f8861ea69fac62cf6ba721b83065dc (diff)

diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c new file mode 100644 index 000000000000..5e67cd1fab40 --- /dev/null +++ b/arch/microblaze/mm/fault.c
@@ -0,0 +1,304 @@
	1	/*
	2	* arch/microblaze/mm/fault.c
	3	*
	4	* Copyright (C) 2007 Xilinx, Inc. All rights reserved.
	5	*
	6	* Derived from "arch/ppc/mm/fault.c"
	7	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
	8	*
	9	* Derived from "arch/i386/mm/fault.c"
	10	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
	11	*
	12	* Modified by Cort Dougan and Paul Mackerras.
	13	*
	14	* This file is subject to the terms and conditions of the GNU General
	15	* Public License. See the file COPYING in the main directory of this
	16	* archive for more details.
	17	*
	18	*/
	19
	20	#include <linux/module.h>
	21	#include <linux/signal.h>
	22	#include <linux/sched.h>
	23	#include <linux/kernel.h>
	24	#include <linux/errno.h>
	25	#include <linux/string.h>
	26	#include <linux/types.h>
	27	#include <linux/ptrace.h>
	28	#include <linux/mman.h>
	29	#include <linux/mm.h>
	30	#include <linux/interrupt.h>
	31
	32	#include <asm/page.h>
	33	#include <asm/pgtable.h>
	34	#include <asm/mmu.h>
	35	#include <asm/mmu_context.h>
	36	#include <asm/system.h>
	37	#include <linux/uaccess.h>
	38	#include <asm/exceptions.h>
	39
	40	#if defined(CONFIG_KGDB)
	41	int debugger_kernel_faults = 1;
	42	#endif
	43
	44	static unsigned long pte_misses; /* updated by do_page_fault() */
	45	static unsigned long pte_errors; /* updated by do_page_fault() */
	46
	47	/*
	48	* Check whether the instruction at regs->pc is a store using
	49	* an update addressing form which will update r1.
	50	*/
	51	static int store_updates_sp(struct pt_regs *regs)
	52	{
	53	unsigned int inst;
	54
	55	if (get_user(inst, (unsigned int *)regs->pc))
	56	return 0;
	57	/* check for 1 in the rD field */
	58	if (((inst >> 21) & 0x1f) != 1)
	59	return 0;
	60	/* check for store opcodes */
	61	if ((inst & 0xd0000000) == 0xd0000000)
	62	return 1;
	63	return 0;
	64	}
	65
	66
	67	/*
	68	* bad_page_fault is called when we have a bad access from the kernel.
	69	* It is called from do_page_fault above and from some of the procedures
	70	* in traps.c.
	71	*/
	72	static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
	73	{
	74	const struct exception_table_entry *fixup;
	75	/* MS: no context */
	76	/* Are we prepared to handle this fault? */
	77	fixup = search_exception_tables(regs->pc);
	78	if (fixup) {
	79	regs->pc = fixup->fixup;
	80	return;
	81	}
	82
	83	/* kernel has accessed a bad area */
	84	#if defined(CONFIG_KGDB)
	85	if (debugger_kernel_faults)
	86	debugger(regs);
	87	#endif
	88	die("kernel access of bad area", regs, sig);
	89	}
	90
	91	/*
	92	* The error_code parameter is ESR for a data fault,
	93	* 0 for an instruction fault.
	94	*/
	95	void do_page_fault(struct pt_regs *regs, unsigned long address,
	96	unsigned long error_code)
	97	{
	98	struct vm_area_struct *vma;
	99	struct mm_struct *mm = current->mm;
	100	siginfo_t info;
	101	int code = SEGV_MAPERR;
	102	int is_write = error_code & ESR_S;
	103	int fault;
	104
	105	regs->ear = address;
	106	regs->esr = error_code;
	107
	108	/* On a kernel SLB miss we can only check for a valid exception entry */
	109	if (kernel_mode(regs) && (address >= TASK_SIZE)) {
	110	printk(KERN_WARNING "kernel task_size exceed");
	111	_exception(SIGSEGV, regs, code, address);
	112	}
	113
	114	/* for instr TLB miss and instr storage exception ESR_S is undefined */
	115	if ((error_code & 0x13) == 0x13 \|\| (error_code & 0x11) == 0x11)
	116	is_write = 0;
	117
	118	#if defined(CONFIG_KGDB)
	119	if (debugger_fault_handler && regs->trap == 0x300) {
	120	debugger_fault_handler(regs);
	121	return;
	122	}
	123	#endif /* CONFIG_KGDB */
	124
	125	if (in_atomic() \|\| mm == NULL) {
	126	/* FIXME */
	127	if (kernel_mode(regs)) {
	128	printk(KERN_EMERG
	129	"Page fault in kernel mode - Oooou!!! pid %d\n",
	130	current->pid);
	131	_exception(SIGSEGV, regs, code, address);
	132	return;
	133	}
	134	/* in_atomic() in user mode is really bad,
	135	as is current->mm == NULL. */
	136	printk(KERN_EMERG "Page fault in user mode with "
	137	"in_atomic(), mm = %p\n", mm);
	138	printk(KERN_EMERG "r15 = %lx MSR = %lx\n",
	139	regs->r15, regs->msr);
	140	die("Weird page fault", regs, SIGSEGV);
	141	}
	142
	143	/* When running in the kernel we expect faults to occur only to
	144	* addresses in user space. All other faults represent errors in the
	145	* kernel and should generate an OOPS. Unfortunately, in the case of an
	146	* erroneous fault occurring in a code path which already holds mmap_sem
	147	* we will deadlock attempting to validate the fault against the
	148	* address space. Luckily the kernel only validly references user
	149	* space from well defined areas of code, which are listed in the
	150	* exceptions table.
	151	*
	152	* As the vast majority of faults will be valid we will only perform
	153	* the source reference check when there is a possibility of a deadlock.
	154	* Attempt to lock the address space, if we cannot we then validate the
	155	* source. If this is invalid we can skip the address space check,
	156	* thus avoiding the deadlock.
	157	*/
	158	if (!down_read_trylock(&mm->mmap_sem)) {
	159	if (kernel_mode(regs) && !search_exception_tables(regs->pc))
	160	goto bad_area_nosemaphore;
	161
	162	down_read(&mm->mmap_sem);
	163	}
	164
	165	vma = find_vma(mm, address);
	166	if (!vma)
	167	goto bad_area;
	168
	169	if (vma->vm_start <= address)
	170	goto good_area;
	171
	172	if (!(vma->vm_flags & VM_GROWSDOWN))
	173	goto bad_area;
	174
	175	if (!is_write)
	176	goto bad_area;
	177
	178	/*
	179	* N.B. The ABI allows programs to access up to
	180	* a few hundred bytes below the stack pointer (TBD).
	181	* The kernel signal delivery code writes up to about 1.5kB
	182	* below the stack pointer (r1) before decrementing it.
	183	* The exec code can write slightly over 640kB to the stack
	184	* before setting the user r1. Thus we allow the stack to
	185	* expand to 1MB without further checks.
	186	*/
	187	if (address + 0x100000 < vma->vm_end) {
	188
	189	/* get user regs even if this fault is in kernel mode */
	190	struct pt_regs *uregs = current->thread.regs;
	191	if (uregs == NULL)
	192	goto bad_area;
	193
	194	/*
	195	* A user-mode access to an address a long way below
	196	* the stack pointer is only valid if the instruction
	197	* is one which would update the stack pointer to the
	198	* address accessed if the instruction completed,
	199	* i.e. either stwu rs,n(r1) or stwux rs,r1,rb
	200	* (or the byte, halfword, float or double forms).
	201	*
	202	* If we don't check this then any write to the area
	203	* between the last mapped region and the stack will
	204	* expand the stack rather than segfaulting.
	205	*/
	206	if (address + 2048 < uregs->r1
	207	&& (kernel_mode(regs) \|\| !store_updates_sp(regs)))
	208	goto bad_area;
	209	}
	210	if (expand_stack(vma, address))
	211	goto bad_area;
	212
	213	good_area:
	214	code = SEGV_ACCERR;
	215
	216	/* a write */
	217	if (is_write) {
	218	if (!(vma->vm_flags & VM_WRITE))
	219	goto bad_area;
	220	/* a read */
	221	} else {
	222	/* protection fault */
	223	if (error_code & 0x08000000)
	224	goto bad_area;
	225	if (!(vma->vm_flags & (VM_READ \| VM_EXEC)))
	226	goto bad_area;
	227	}
	228
	229	/*
	230	* If for any reason at all we couldn't handle the fault,
	231	* make sure we exit gracefully rather than endlessly redo
	232	* the fault.
	233	*/
	234	survive:
	235	fault = handle_mm_fault(mm, vma, address, is_write);
	236	if (unlikely(fault & VM_FAULT_ERROR)) {
	237	if (fault & VM_FAULT_OOM)
	238	goto out_of_memory;
	239	else if (fault & VM_FAULT_SIGBUS)
	240	goto do_sigbus;
	241	BUG();
	242	}
	243	if (fault & VM_FAULT_MAJOR)
	244	current->maj_flt++;
	245	else
	246	current->min_flt++;
	247	up_read(&mm->mmap_sem);
	248	/*
	249	* keep track of tlb+htab misses that are good addrs but
	250	* just need pte's created via handle_mm_fault()
	251	* -- Cort
	252	*/
	253	pte_misses++;
	254	return;
	255
	256	bad_area:
	257	up_read(&mm->mmap_sem);
	258
	259	bad_area_nosemaphore:
	260	pte_errors++;
	261
	262	/* User mode accesses cause a SIGSEGV */
	263	if (user_mode(regs)) {
	264	_exception(SIGSEGV, regs, code, address);
	265	/* info.si_signo = SIGSEGV;
	266	info.si_errno = 0;
	267	info.si_code = code;
	268	info.si_addr = (void *) address;
	269	force_sig_info(SIGSEGV, &info, current);*/
	270	return;
	271	}
	272
	273	bad_page_fault(regs, address, SIGSEGV);
	274	return;
	275
	276	/*
	277	* We ran out of memory, or some other thing happened to us that made
	278	* us unable to handle the page fault gracefully.
	279	*/
	280	out_of_memory:
	281	if (current->pid == 1) {
	282	yield();
	283	down_read(&mm->mmap_sem);
	284	goto survive;
	285	}
	286	up_read(&mm->mmap_sem);
	287	printk(KERN_WARNING "VM: killing process %s\n", current->comm);
	288	if (user_mode(regs))
	289	do_exit(SIGKILL);
	290	bad_page_fault(regs, address, SIGKILL);
	291	return;
	292
	293	do_sigbus:
	294	up_read(&mm->mmap_sem);
	295	if (user_mode(regs)) {
	296	info.si_signo = SIGBUS;
	297	info.si_errno = 0;
	298	info.si_code = BUS_ADRERR;
	299	info.si_addr = (void __user *)address;
	300	force_sig_info(SIGBUS, &info, current);
	301	return;
	302	}
	303	bad_page_fault(regs, address, SIGBUS);
	304	}