Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/mm/fault.c
1 files changed, 312 insertions, 0 deletions
diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c
new file mode 100644
index 000000000000..20b0f37e8bf8
--- /dev/null
+++ b/arch/ppc64/mm/fault.c
@@ -0,0 +1,312 @@
+/*
+ *  arch/ppc/mm/fault.c
+ *
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/kdebug.h>
+/*
+ * Check whether the instruction at regs->nip is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+        unsigned int inst;
+        if (get_user(inst, (unsigned int __user *)regs->nip))
+                return 0;
+        /* check for 1 in the rA field */
+        if (((inst >> 16) & 0x1f) != 1)
+                return 0;
+        /* check major opcode */
+        switch (inst >> 26) {
+        case 37:        /* stwu */
+        case 39:        /* stbu */
+        case 45:        /* sthu */
+        case 53:        /* stfsu */
+        case 55:        /* stfdu */
+                return 1;
+        case 62:        /* std or stdu */
+                return (inst & 3) == 1;
+        case 31:
+                /* check minor opcode */
+                switch ((inst >> 1) & 0x3ff) {
+                case 181:       /* stdux */
+                case 183:       /* stwux */
+                case 247:       /* stbux */
+                case 439:       /* sthux */
+                case 695:       /* stfsux */
+                case 759:       /* stfdux */
+                        return 1;
+                }
+        }
+        return 0;
+}
+/*
+ * The error_code parameter is
+ *  - DSISR for a non-SLB data access fault,
+ *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
+ *  - 0 any SLB fault.
+ * The return value is 0 if the fault was handled, or the signal
+ * number if this is a kernel fault that can't be handled here.
+ */
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+                  unsigned long error_code)
+{
+        struct vm_area_struct * vma;
+        struct mm_struct *mm = current->mm;
+        siginfo_t info;
+        unsigned long code = SEGV_MAPERR;
+        unsigned long is_write = error_code & DSISR_ISSTORE;
+        unsigned long trap = TRAP(regs);
+        unsigned long is_exec = trap == 0x400;
+        BUG_ON((trap == 0x380) || (trap == 0x480));
+        if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
+                                11, SIGSEGV) == NOTIFY_STOP)
+                return 0;
+        if (trap == 0x300) {
+                if (debugger_fault_handler(regs))
+                        return 0;
+        }
+        /* On a kernel SLB miss we can only check for a valid exception entry */
+        if (!user_mode(regs) && (address >= TASK_SIZE))
+                return SIGSEGV;
+        if (error_code & DSISR_DABRMATCH) {
+                if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+                                        11, SIGSEGV) == NOTIFY_STOP)
+                        return 0;
+                if (debugger_dabr_match(regs))
+                        return 0;
+        }
+        if (in_atomic() || mm == NULL) {
+                if (!user_mode(regs))
+                        return SIGSEGV;
+                /* in_atomic() in user mode is really bad,
+                   as is current->mm == NULL. */
+                printk(KERN_EMERG "Page fault in user mode with"
+                       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+                printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
+                       regs->nip, regs->msr);
+                die("Weird page fault", regs, SIGSEGV);
+        }
+        /* When running in the kernel we expect faults to occur only to
+         * addresses in user space.  All other faults represent errors in the
+         * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+         * erroneous fault occuring in a code path which already holds mmap_sem
+         * we will deadlock attempting to validate the fault against the
+         * address space.  Luckily the kernel only validly references user
+         * space from well defined areas of code, which are listed in the
+         * exceptions table.
+         *
+         * As the vast majority of faults will be valid we will only perform
+         * the source reference check when there is a possibilty of a deadlock.
+         * Attempt to lock the address space, if we cannot we then validate the
+         * source.  If this is invalid we can skip the address space check,
+         * thus avoiding the deadlock.
+         */
+        if (!down_read_trylock(&mm->mmap_sem)) {
+                if (!user_mode(regs) && !search_exception_tables(regs->nip))
+                        goto bad_area_nosemaphore;
+                down_read(&mm->mmap_sem);
+        }
+        vma = find_vma(mm, address);
+        if (!vma)
+                goto bad_area;
+        if (vma->vm_start <= address) {
+                goto good_area;
+        }
+        if (!(vma->vm_flags & VM_GROWSDOWN))
+                goto bad_area;
+        /*
+         * N.B. The POWER/Open ABI allows programs to access up to
+         * 288 bytes below the stack pointer.
+         * The kernel signal delivery code writes up to about 1.5kB
+         * below the stack pointer (r1) before decrementing it.
+         * The exec code can write slightly over 640kB to the stack
+         * before setting the user r1.  Thus we allow the stack to
+         * expand to 1MB without further checks.
+         */
+        if (address + 0x100000 < vma->vm_end) {
+                /* get user regs even if this fault is in kernel mode */
+                struct pt_regs *uregs = current->thread.regs;
+                if (uregs == NULL)
+                        goto bad_area;
+                /*
+                 * A user-mode access to an address a long way below
+                 * the stack pointer is only valid if the instruction
+                 * is one which would update the stack pointer to the
+                 * address accessed if the instruction completed,
+                 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+                 * (or the byte, halfword, float or double forms).
+                 *
+                 * If we don't check this then any write to the area
+                 * between the last mapped region and the stack will
+                 * expand the stack rather than segfaulting.
+                 */
+                if (address + 2048 < uregs->gpr[1]
+                    && (!user_mode(regs) || !store_updates_sp(regs)))
+                        goto bad_area;
+        }
+        if (expand_stack(vma, address))
+                goto bad_area;
+good_area:
+        code = SEGV_ACCERR;
+        if (is_exec) {
+                /* protection fault */
+                if (error_code & DSISR_PROTFAULT)
+                        goto bad_area;
+                if (!(vma->vm_flags & VM_EXEC))
+                        goto bad_area;
+        /* a write */
+        } else if (is_write) {
+                if (!(vma->vm_flags & VM_WRITE))
+                        goto bad_area;
+        /* a read */
+        } else {
+                if (!(vma->vm_flags & VM_READ))
+                        goto bad_area;
+        }
+ survive:
+        /*
+         * If for any reason at all we couldn't handle the fault,
+         * make sure we exit gracefully rather than endlessly redo
+         * the fault.
+         */
+        switch (handle_mm_fault(mm, vma, address, is_write)) {
+        case VM_FAULT_MINOR:
+                current->min_flt++;
+                break;
+        case VM_FAULT_MAJOR:
+                current->maj_flt++;
+                break;
+        case VM_FAULT_SIGBUS:
+                goto do_sigbus;
+        case VM_FAULT_OOM:
+                goto out_of_memory;
+        default:
+                BUG();
+        }
+        up_read(&mm->mmap_sem);
+        return 0;
+bad_area:
+        up_read(&mm->mmap_sem);
+bad_area_nosemaphore:
+        /* User mode accesses cause a SIGSEGV */
+        if (user_mode(regs)) {
+                info.si_signo = SIGSEGV;
+                info.si_errno = 0;
+                info.si_code = code;
+                info.si_addr = (void __user *) address;
+                force_sig_info(SIGSEGV, &info, current);
+                return 0;
+        }
+        if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
+            && printk_ratelimit())
+                printk(KERN_CRIT "kernel tried to execute NX-protected"
+                       " page (%lx) - exploit attempt? (uid: %d)\n",
+                       address, current->uid);
+        return SIGSEGV;
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+        up_read(&mm->mmap_sem);
+        if (current->pid == 1) {
+                yield();
+                down_read(&mm->mmap_sem);
+                goto survive;
+        }
+        printk("VM: killing process %s\n", current->comm);
+        if (user_mode(regs))
+                do_exit(SIGKILL);
+        return SIGKILL;
+do_sigbus:
+        up_read(&mm->mmap_sem);
+        if (user_mode(regs)) {
+                info.si_signo = SIGBUS;
+                info.si_errno = 0;
+                info.si_code = BUS_ADRERR;
+                info.si_addr = (void __user *)address;
+                force_sig_info(SIGBUS, &info, current);
+                return 0;
+        }
+        return SIGBUS;
+}
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from do_page_fault above and from some of the procedures
+ * in traps.c.
+ */
+void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+        const struct exception_table_entry *entry;
+        /* Are we prepared to handle this fault?  */
+        if ((entry = search_exception_tables(regs->nip)) != NULL) {
+                regs->nip = entry->fixup;
+                return;
+        }
+        /* kernel has accessed a bad area */
+        die("Kernel access of bad area", regs, sig);
+}
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ppc64/mm/fault.c

diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c new file mode 100644 index 000000000000..20b0f37e8bf8 --- /dev/null +++ b/arch/ppc64/mm/fault.c
@@ -0,0 +1,312 @@
	1	/*
	2	* arch/ppc/mm/fault.c
	3	*
	4	* PowerPC version
	5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
	6	*
	7	* Derived from "arch/i386/mm/fault.c"
	8	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
	9	*
	10	* Modified by Cort Dougan and Paul Mackerras.
	11	*
	12	* Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
	13	*
	14	* This program is free software; you can redistribute it and/or
	15	* modify it under the terms of the GNU General Public License
	16	* as published by the Free Software Foundation; either version
	17	* 2 of the License, or (at your option) any later version.
	18	*/
	19
	20	#include <linux/config.h>
	21	#include <linux/signal.h>
	22	#include <linux/sched.h>
	23	#include <linux/kernel.h>
	24	#include <linux/errno.h>
	25	#include <linux/string.h>
	26	#include <linux/types.h>
	27	#include <linux/mman.h>
	28	#include <linux/mm.h>
	29	#include <linux/interrupt.h>
	30	#include <linux/smp_lock.h>
	31	#include <linux/module.h>
	32
	33	#include <asm/page.h>
	34	#include <asm/pgtable.h>
	35	#include <asm/mmu.h>
	36	#include <asm/mmu_context.h>
	37	#include <asm/system.h>
	38	#include <asm/uaccess.h>
	39	#include <asm/kdebug.h>
	40
	41	/*
	42	* Check whether the instruction at regs->nip is a store using
	43	* an update addressing form which will update r1.
	44	*/
	45	static int store_updates_sp(struct pt_regs *regs)
	46	{
	47	unsigned int inst;
	48
	49	if (get_user(inst, (unsigned int __user *)regs->nip))
	50	return 0;
	51	/* check for 1 in the rA field */
	52	if (((inst >> 16) & 0x1f) != 1)
	53	return 0;
	54	/* check major opcode */
	55	switch (inst >> 26) {
	56	case 37: /* stwu */
	57	case 39: /* stbu */
	58	case 45: /* sthu */
	59	case 53: /* stfsu */
	60	case 55: /* stfdu */
	61	return 1;
	62	case 62: /* std or stdu */
	63	return (inst & 3) == 1;
	64	case 31:
	65	/* check minor opcode */
	66	switch ((inst >> 1) & 0x3ff) {
	67	case 181: /* stdux */
	68	case 183: /* stwux */
	69	case 247: /* stbux */
	70	case 439: /* sthux */
	71	case 695: /* stfsux */
	72	case 759: /* stfdux */
	73	return 1;
	74	}
	75	}
	76	return 0;
	77	}
	78
	79	/*
	80	* The error_code parameter is
	81	* - DSISR for a non-SLB data access fault,
	82	* - SRR1 & 0x08000000 for a non-SLB instruction access fault
	83	* - 0 any SLB fault.
	84	* The return value is 0 if the fault was handled, or the signal
	85	* number if this is a kernel fault that can't be handled here.
	86	*/
	87	int do_page_fault(struct pt_regs *regs, unsigned long address,
	88	unsigned long error_code)
	89	{
	90	struct vm_area_struct * vma;
	91	struct mm_struct *mm = current->mm;
	92	siginfo_t info;
	93	unsigned long code = SEGV_MAPERR;
	94	unsigned long is_write = error_code & DSISR_ISSTORE;
	95	unsigned long trap = TRAP(regs);
	96	unsigned long is_exec = trap == 0x400;
	97
	98	BUG_ON((trap == 0x380) \|\| (trap == 0x480));
	99
	100	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
	101	11, SIGSEGV) == NOTIFY_STOP)
	102	return 0;
	103
	104	if (trap == 0x300) {
	105	if (debugger_fault_handler(regs))
	106	return 0;
	107	}
	108
	109	/* On a kernel SLB miss we can only check for a valid exception entry */
	110	if (!user_mode(regs) && (address >= TASK_SIZE))
	111	return SIGSEGV;
	112
	113	if (error_code & DSISR_DABRMATCH) {
	114	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
	115	11, SIGSEGV) == NOTIFY_STOP)
	116	return 0;
	117	if (debugger_dabr_match(regs))
	118	return 0;
	119	}
	120
	121	if (in_atomic() \|\| mm == NULL) {
	122	if (!user_mode(regs))
	123	return SIGSEGV;
	124	/* in_atomic() in user mode is really bad,
	125	as is current->mm == NULL. */
	126	printk(KERN_EMERG "Page fault in user mode with"
	127	"in_atomic() = %d mm = %p\n", in_atomic(), mm);
	128	printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
	129	regs->nip, regs->msr);
	130	die("Weird page fault", regs, SIGSEGV);
	131	}
	132
	133	/* When running in the kernel we expect faults to occur only to
	134	* addresses in user space. All other faults represent errors in the
	135	* kernel and should generate an OOPS. Unfortunatly, in the case of an
	136	* erroneous fault occuring in a code path which already holds mmap_sem
	137	* we will deadlock attempting to validate the fault against the
	138	* address space. Luckily the kernel only validly references user
	139	* space from well defined areas of code, which are listed in the
	140	* exceptions table.
	141	*
	142	* As the vast majority of faults will be valid we will only perform
	143	* the source reference check when there is a possibilty of a deadlock.
	144	* Attempt to lock the address space, if we cannot we then validate the
	145	* source. If this is invalid we can skip the address space check,
	146	* thus avoiding the deadlock.
	147	*/
	148	if (!down_read_trylock(&mm->mmap_sem)) {
	149	if (!user_mode(regs) && !search_exception_tables(regs->nip))
	150	goto bad_area_nosemaphore;
	151
	152	down_read(&mm->mmap_sem);
	153	}
	154
	155	vma = find_vma(mm, address);
	156	if (!vma)
	157	goto bad_area;
	158
	159	if (vma->vm_start <= address) {
	160	goto good_area;
	161	}
	162	if (!(vma->vm_flags & VM_GROWSDOWN))
	163	goto bad_area;
	164
	165	/*
	166	* N.B. The POWER/Open ABI allows programs to access up to
	167	* 288 bytes below the stack pointer.
	168	* The kernel signal delivery code writes up to about 1.5kB
	169	* below the stack pointer (r1) before decrementing it.
	170	* The exec code can write slightly over 640kB to the stack
	171	* before setting the user r1. Thus we allow the stack to
	172	* expand to 1MB without further checks.
	173	*/
	174	if (address + 0x100000 < vma->vm_end) {
	175	/* get user regs even if this fault is in kernel mode */
	176	struct pt_regs *uregs = current->thread.regs;
	177	if (uregs == NULL)
	178	goto bad_area;
	179
	180	/*
	181	* A user-mode access to an address a long way below
	182	* the stack pointer is only valid if the instruction
	183	* is one which would update the stack pointer to the
	184	* address accessed if the instruction completed,
	185	* i.e. either stwu rs,n(r1) or stwux rs,r1,rb
	186	* (or the byte, halfword, float or double forms).
	187	*
	188	* If we don't check this then any write to the area
	189	* between the last mapped region and the stack will
	190	* expand the stack rather than segfaulting.
	191	*/
	192	if (address + 2048 < uregs->gpr[1]
	193	&& (!user_mode(regs) \|\| !store_updates_sp(regs)))
	194	goto bad_area;
	195	}
	196
	197	if (expand_stack(vma, address))
	198	goto bad_area;
	199
	200	good_area:
	201	code = SEGV_ACCERR;
	202
	203	if (is_exec) {
	204	/* protection fault */
	205	if (error_code & DSISR_PROTFAULT)
	206	goto bad_area;
	207	if (!(vma->vm_flags & VM_EXEC))
	208	goto bad_area;
	209	/* a write */
	210	} else if (is_write) {
	211	if (!(vma->vm_flags & VM_WRITE))
	212	goto bad_area;
	213	/* a read */
	214	} else {
	215	if (!(vma->vm_flags & VM_READ))
	216	goto bad_area;
	217	}
	218
	219	survive:
	220	/*
	221	* If for any reason at all we couldn't handle the fault,
	222	* make sure we exit gracefully rather than endlessly redo
	223	* the fault.
	224	*/
	225	switch (handle_mm_fault(mm, vma, address, is_write)) {
	226
	227	case VM_FAULT_MINOR:
	228	current->min_flt++;
	229	break;
	230	case VM_FAULT_MAJOR:
	231	current->maj_flt++;
	232	break;
	233	case VM_FAULT_SIGBUS:
	234	goto do_sigbus;
	235	case VM_FAULT_OOM:
	236	goto out_of_memory;
	237	default:
	238	BUG();
	239	}
	240
	241	up_read(&mm->mmap_sem);
	242	return 0;
	243
	244	bad_area:
	245	up_read(&mm->mmap_sem);
	246
	247	bad_area_nosemaphore:
	248	/* User mode accesses cause a SIGSEGV */
	249	if (user_mode(regs)) {
	250	info.si_signo = SIGSEGV;
	251	info.si_errno = 0;
	252	info.si_code = code;
	253	info.si_addr = (void __user *) address;
	254	force_sig_info(SIGSEGV, &info, current);
	255	return 0;
	256	}
	257
	258	if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
	259	&& printk_ratelimit())
	260	printk(KERN_CRIT "kernel tried to execute NX-protected"
	261	" page (%lx) - exploit attempt? (uid: %d)\n",
	262	address, current->uid);
	263
	264	return SIGSEGV;
	265
	266	/*
	267	* We ran out of memory, or some other thing happened to us that made
	268	* us unable to handle the page fault gracefully.
	269	*/
	270	out_of_memory:
	271	up_read(&mm->mmap_sem);
	272	if (current->pid == 1) {
	273	yield();
	274	down_read(&mm->mmap_sem);
	275	goto survive;
	276	}
	277	printk("VM: killing process %s\n", current->comm);
	278	if (user_mode(regs))
	279	do_exit(SIGKILL);
	280	return SIGKILL;
	281
	282	do_sigbus:
	283	up_read(&mm->mmap_sem);
	284	if (user_mode(regs)) {
	285	info.si_signo = SIGBUS;
	286	info.si_errno = 0;
	287	info.si_code = BUS_ADRERR;
	288	info.si_addr = (void __user *)address;
	289	force_sig_info(SIGBUS, &info, current);
	290	return 0;
	291	}
	292	return SIGBUS;
	293	}
	294
	295	/*
	296	* bad_page_fault is called when we have a bad access from the kernel.
	297	* It is called from do_page_fault above and from some of the procedures
	298	* in traps.c.
	299	*/
	300	void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
	301	{
	302	const struct exception_table_entry *entry;
	303
	304	/* Are we prepared to handle this fault? */
	305	if ((entry = search_exception_tables(regs->nip)) != NULL) {
	306	regs->nip = entry->fixup;
	307	return;
	308	}
	309
	310	/* kernel has accessed a bad area */
	311	die("Kernel access of bad area", regs, sig);
	312	}