OpenRISC: Memory management

Signed-off-by: Jonas Bonn <jonas@southpole.se> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
author: Jonas Bonn <jonas@southpole.se> 2011-06-04 04:06:11 -0400
committer: Jonas Bonn <jonas@southpole.se> 2011-07-22 12:46:28 -0400
commit: 61e85e367535a7b6385b404bef93928768140f96 (patch)
tree: a0b8cb40dff683d3d09268f55080b5539d25b9a5 /arch/openrisc/mm/fault.c
parent: 4f246ba30e1a9a31fcfd91d2ab8f5c75f1362bbf (diff)
1 files changed, 338 insertions, 0 deletions
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
new file mode 100644
index 000000000000..a5dce82f864b
--- /dev/null
+++ b/arch/openrisc/mm/fault.c
@@ -0,0 +1,338 @@
+/*
+ * OpenRISC fault.c
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others.  All original copyrights apply as per the original source
+ * declaration.
+ *
+ * Modifications for the OpenRISC architecture:
+ * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
+ * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+#define NUM_TLB_ENTRIES 64
+#define TLB_OFFSET(add) (((add) >> PAGE_SHIFT) & (NUM_TLB_ENTRIES-1))
+unsigned long pte_misses;       /* updated by do_page_fault() */
+unsigned long pte_errors;       /* updated by do_page_fault() */
+/* __PHX__ :: - check the vmalloc_fault in do_page_fault()
+ *            - also look into include/asm-or32/mmu_context.h
+ */
+volatile pgd_t *current_pgd;
+extern void die(char *, struct pt_regs *, long);
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * If this routine detects a bad access, it returns 1, otherwise it
+ * returns 0.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
+                              unsigned long vector, int write_acc)
+{
+        struct task_struct *tsk;
+        struct mm_struct *mm;
+        struct vm_area_struct *vma;
+        siginfo_t info;
+        int fault;
+        tsk = current;
+        /*
+         * We fault-in kernel-space virtual memory on-demand. The
+         * 'reference' page table is init_mm.pgd.
+         *
+         * NOTE! We MUST NOT take any locks for this case. We may
+         * be in an interrupt or a critical region, and should
+         * only copy the information from the master page table,
+         * nothing more.
+         *
+         * NOTE2: This is done so that, when updating the vmalloc
+         * mappings we don't have to walk all processes pgdirs and
+         * add the high mappings all at once. Instead we do it as they
+         * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
+         * bit set so sometimes the TLB can use a lingering entry.
+         *
+         * This verifies that the fault happens in kernel space
+         * and that the fault was not a protection error.
+         */
+        if (address >= VMALLOC_START &&
+            (vector != 0x300 && vector != 0x400) &&
+            !user_mode(regs))
+                goto vmalloc_fault;
+        /* If exceptions were enabled, we can reenable them here */
+        if (user_mode(regs)) {
+                /* Exception was in userspace: reenable interrupts */
+                local_irq_enable();
+        } else {
+                /* If exception was in a syscall, then IRQ's may have
+                 * been enabled or disabled.  If they were enabled,
+                 * reenable them.
+                 */
+                if (regs->sr && (SPR_SR_IEE | SPR_SR_TEE))
+                        local_irq_enable();
+        }
+        mm = tsk->mm;
+        info.si_code = SEGV_MAPERR;
+        /*
+         * If we're in an interrupt or have no user
+         * context, we must not take the fault..
+         */
+        if (in_interrupt() || !mm)
+                goto no_context;
+        down_read(&mm->mmap_sem);
+        vma = find_vma(mm, address);
+        if (!vma)
+                goto bad_area;
+        if (vma->vm_start <= address)
+                goto good_area;
+        if (!(vma->vm_flags & VM_GROWSDOWN))
+                goto bad_area;
+        if (user_mode(regs)) {
+                /*
+                 * accessing the stack below usp is always a bug.
+                 * we get page-aligned addresses so we can only check
+                 * if we're within a page from usp, but that might be
+                 * enough to catch brutal errors at least.
+                 */
+                if (address + PAGE_SIZE < regs->sp)
+                        goto bad_area;
+        }
+        if (expand_stack(vma, address))
+                goto bad_area;
+        /*
+         * Ok, we have a good vm_area for this memory access, so
+         * we can handle it..
+         */
+good_area:
+        info.si_code = SEGV_ACCERR;
+        /* first do some preliminary protection checks */
+        if (write_acc) {
+                if (!(vma->vm_flags & VM_WRITE))
+                        goto bad_area;
+        } else {
+                /* not present */
+                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                        goto bad_area;
+        }
+        /* are we trying to execute nonexecutable area */
+        if ((vector == 0x400) && !(vma->vm_page_prot.pgprot & _PAGE_EXEC))
+                goto bad_area;
+        /*
+         * If for any reason at all we couldn't handle the fault,
+         * make sure we exit gracefully rather than endlessly redo
+         * the fault.
+         */
+        fault = handle_mm_fault(mm, vma, address, write_acc);
+        if (unlikely(fault & VM_FAULT_ERROR)) {
+                if (fault & VM_FAULT_OOM)
+                        goto out_of_memory;
+                else if (fault & VM_FAULT_SIGBUS)
+                        goto do_sigbus;
+                BUG();
+        }
+        /*RGD modeled on Cris */
+        if (fault & VM_FAULT_MAJOR)
+                tsk->maj_flt++;
+        else
+                tsk->min_flt++;
+        up_read(&mm->mmap_sem);
+        return;
+        /*
+         * Something tried to access memory that isn't in our memory map..
+         * Fix it, but check if it's kernel or user first..
+         */
+bad_area:
+        up_read(&mm->mmap_sem);
+bad_area_nosemaphore:
+        /* User mode accesses just cause a SIGSEGV */
+        if (user_mode(regs)) {
+                info.si_signo = SIGSEGV;
+                info.si_errno = 0;
+                /* info.si_code has been set above */
+                info.si_addr = (void *)address;
+                force_sig_info(SIGSEGV, &info, tsk);
+                return;
+        }
+no_context:
+        /* Are we prepared to handle this kernel fault?
+         *
+         * (The kernel has valid exception-points in the source
+         *  when it acesses user-memory. When it fails in one
+         *  of those points, we find it in a table and do a jump
+         *  to some fixup code that loads an appropriate error
+         *  code)
+         */
+        {
+                const struct exception_table_entry *entry;
+                __asm__ __volatile__("l.nop 42");
+                if ((entry = search_exception_tables(regs->pc)) != NULL) {
+                        /* Adjust the instruction pointer in the stackframe */
+                        regs->pc = entry->fixup;
+                        return;
+                }
+        }
+        /*
+         * Oops. The kernel tried to access some bad page. We'll have to
+         * terminate things with extreme prejudice.
+         */
+        if ((unsigned long)(address) < PAGE_SIZE)
+                printk(KERN_ALERT
+                       "Unable to handle kernel NULL pointer dereference");
+        else
+                printk(KERN_ALERT "Unable to handle kernel access");
+        printk(" at virtual address 0x%08lx\n", address);
+        die("Oops", regs, write_acc);
+        do_exit(SIGKILL);
+        /*
+         * We ran out of memory, or some other thing happened to us that made
+         * us unable to handle the page fault gracefully.
+         */
+out_of_memory:
+        __asm__ __volatile__("l.nop 42");
+        __asm__ __volatile__("l.nop 1");
+        up_read(&mm->mmap_sem);
+        printk("VM: killing process %s\n", tsk->comm);
+        if (user_mode(regs))
+                do_exit(SIGKILL);
+        goto no_context;
+do_sigbus:
+        up_read(&mm->mmap_sem);
+        /*
+         * Send a sigbus, regardless of whether we were in kernel
+         * or user mode.
+         */
+        info.si_signo = SIGBUS;
+        info.si_errno = 0;
+        info.si_code = BUS_ADRERR;
+        info.si_addr = (void *)address;
+        force_sig_info(SIGBUS, &info, tsk);
+        /* Kernel mode? Handle exceptions or die */
+        if (!user_mode(regs))
+                goto no_context;
+        return;
+vmalloc_fault:
+        {
+                /*
+                 * Synchronize this task's top level page-table
+                 * with the 'reference' page table.
+                 *
+                 * Use current_pgd instead of tsk->active_mm->pgd
+                 * since the latter might be unavailable if this
+                 * code is executed in a misfortunately run irq
+                 * (like inside schedule() between switch_mm and
+                 *  switch_to...).
+                 */
+                int offset = pgd_index(address);
+                pgd_t *pgd, *pgd_k;
+                pud_t *pud, *pud_k;
+                pmd_t *pmd, *pmd_k;
+                pte_t *pte_k;
+/*
+                phx_warn("do_page_fault(): vmalloc_fault will not work, "
+                         "since current_pgd assign a proper value somewhere\n"
+                         "anyhow we don't need this at the moment\n");
+                phx_mmu("vmalloc_fault");
+*/
+                pgd = (pgd_t *)current_pgd + offset;
+                pgd_k = init_mm.pgd + offset;
+                /* Since we're two-level, we don't need to do both
+                 * set_pgd and set_pmd (they do the same thing). If
+                 * we go three-level at some point, do the right thing
+                 * with pgd_present and set_pgd here.
+                 *
+                 * Also, since the vmalloc area is global, we don't
+                 * need to copy individual PTE's, it is enough to
+                 * copy the pgd pointer into the pte page of the
+                 * root task. If that is there, we'll find our pte if
+                 * it exists.
+                 */
+                pud = pud_offset(pgd, address);
+                pud_k = pud_offset(pgd_k, address);
+                if (!pud_present(*pud_k))
+                        goto no_context;
+                pmd = pmd_offset(pud, address);
+                pmd_k = pmd_offset(pud_k, address);
+                if (!pmd_present(*pmd_k))
+                        goto bad_area_nosemaphore;
+                set_pmd(pmd, *pmd_k);
+                /* Make sure the actual PTE exists as well to
+                 * catch kernel vmalloc-area accesses to non-mapped
+                 * addresses. If we don't do this, this will just
+                 * silently loop forever.
+                 */
+                pte_k = pte_offset_kernel(pmd_k, address);
+                if (!pte_present(*pte_k))
+                        goto no_context;
+                return;
+        }
+}
author	Jonas Bonn <jonas@southpole.se>	2011-06-04 04:06:11 -0400
committer	Jonas Bonn <jonas@southpole.se>	2011-07-22 12:46:28 -0400
commit	61e85e367535a7b6385b404bef93928768140f96 (patch)
tree	a0b8cb40dff683d3d09268f55080b5539d25b9a5 /arch/openrisc/mm/fault.c
parent	4f246ba30e1a9a31fcfd91d2ab8f5c75f1362bbf (diff)

diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c new file mode 100644 index 000000000000..a5dce82f864b --- /dev/null +++ b/arch/openrisc/mm/fault.c
@@ -0,0 +1,338 @@
	1	/*
	2	* OpenRISC fault.c
	3	*
	4	* Linux architectural port borrowing liberally from similar works of
	5	* others. All original copyrights apply as per the original source
	6	* declaration.
	7	*
	8	* Modifications for the OpenRISC architecture:
	9	* Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
	10	* Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
	11	*
	12	* This program is free software; you can redistribute it and/or
	13	* modify it under the terms of the GNU General Public License
	14	* as published by the Free Software Foundation; either version
	15	* 2 of the License, or (at your option) any later version.
	16	*/
	17
	18	#include <linux/mm.h>
	19	#include <linux/interrupt.h>
	20	#include <linux/module.h>
	21	#include <linux/sched.h>
	22
	23	#include <asm/uaccess.h>
	24	#include <asm/siginfo.h>
	25	#include <asm/signal.h>
	26
	27	#define NUM_TLB_ENTRIES 64
	28	#define TLB_OFFSET(add) (((add) >> PAGE_SHIFT) & (NUM_TLB_ENTRIES-1))
	29
	30	unsigned long pte_misses; /* updated by do_page_fault() */
	31	unsigned long pte_errors; /* updated by do_page_fault() */
	32
	33	/* __PHX__ :: - check the vmalloc_fault in do_page_fault()
	34	* - also look into include/asm-or32/mmu_context.h
	35	*/
	36	volatile pgd_t *current_pgd;
	37
	38	extern void die(char , struct pt_regs , long);
	39
	40	/*
	41	* This routine handles page faults. It determines the address,
	42	* and the problem, and then passes it off to one of the appropriate
	43	* routines.
	44	*
	45	* If this routine detects a bad access, it returns 1, otherwise it
	46	* returns 0.
	47	*/
	48
	49	asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
	50	unsigned long vector, int write_acc)
	51	{
	52	struct task_struct *tsk;
	53	struct mm_struct *mm;
	54	struct vm_area_struct *vma;
	55	siginfo_t info;
	56	int fault;
	57
	58	tsk = current;
	59
	60	/*
	61	* We fault-in kernel-space virtual memory on-demand. The
	62	* 'reference' page table is init_mm.pgd.
	63	*
	64	* NOTE! We MUST NOT take any locks for this case. We may
	65	* be in an interrupt or a critical region, and should
	66	* only copy the information from the master page table,
	67	* nothing more.
	68	*
	69	* NOTE2: This is done so that, when updating the vmalloc
	70	* mappings we don't have to walk all processes pgdirs and
	71	* add the high mappings all at once. Instead we do it as they
	72	* are used. However vmalloc'ed page entries have the PAGE_GLOBAL
	73	* bit set so sometimes the TLB can use a lingering entry.
	74	*
	75	* This verifies that the fault happens in kernel space
	76	* and that the fault was not a protection error.
	77	*/
	78
	79	if (address >= VMALLOC_START &&
	80	(vector != 0x300 && vector != 0x400) &&
	81	!user_mode(regs))
	82	goto vmalloc_fault;
	83
	84	/* If exceptions were enabled, we can reenable them here */
	85	if (user_mode(regs)) {
	86	/* Exception was in userspace: reenable interrupts */
	87	local_irq_enable();
	88	} else {
	89	/* If exception was in a syscall, then IRQ's may have
	90	* been enabled or disabled. If they were enabled,
	91	* reenable them.
	92	*/
	93	if (regs->sr && (SPR_SR_IEE \| SPR_SR_TEE))
	94	local_irq_enable();
	95	}
	96
	97	mm = tsk->mm;
	98	info.si_code = SEGV_MAPERR;
	99
	100	/*
	101	* If we're in an interrupt or have no user
	102	* context, we must not take the fault..
	103	*/
	104
	105	if (in_interrupt() \|\| !mm)
	106	goto no_context;
	107
	108	down_read(&mm->mmap_sem);
	109	vma = find_vma(mm, address);
	110
	111	if (!vma)
	112	goto bad_area;
	113
	114	if (vma->vm_start <= address)
	115	goto good_area;
	116
	117	if (!(vma->vm_flags & VM_GROWSDOWN))
	118	goto bad_area;
	119
	120	if (user_mode(regs)) {
	121	/*
	122	* accessing the stack below usp is always a bug.
	123	* we get page-aligned addresses so we can only check
	124	* if we're within a page from usp, but that might be
	125	* enough to catch brutal errors at least.
	126	*/
	127	if (address + PAGE_SIZE < regs->sp)
	128	goto bad_area;
	129	}
	130	if (expand_stack(vma, address))
	131	goto bad_area;
	132
	133	/*
	134	* Ok, we have a good vm_area for this memory access, so
	135	* we can handle it..
	136	*/
	137
	138	good_area:
	139	info.si_code = SEGV_ACCERR;
	140
	141	/* first do some preliminary protection checks */
	142
	143	if (write_acc) {
	144	if (!(vma->vm_flags & VM_WRITE))
	145	goto bad_area;
	146	} else {
	147	/* not present */
	148	if (!(vma->vm_flags & (VM_READ \| VM_EXEC)))
	149	goto bad_area;
	150	}
	151
	152	/* are we trying to execute nonexecutable area */
	153	if ((vector == 0x400) && !(vma->vm_page_prot.pgprot & _PAGE_EXEC))
	154	goto bad_area;
	155
	156	/*
	157	* If for any reason at all we couldn't handle the fault,
	158	* make sure we exit gracefully rather than endlessly redo
	159	* the fault.
	160	*/
	161
	162	fault = handle_mm_fault(mm, vma, address, write_acc);
	163	if (unlikely(fault & VM_FAULT_ERROR)) {
	164	if (fault & VM_FAULT_OOM)
	165	goto out_of_memory;
	166	else if (fault & VM_FAULT_SIGBUS)
	167	goto do_sigbus;
	168	BUG();
	169	}
	170	/RGD modeled on Cris /
	171	if (fault & VM_FAULT_MAJOR)
	172	tsk->maj_flt++;
	173	else
	174	tsk->min_flt++;
	175
	176	up_read(&mm->mmap_sem);
	177	return;
	178
	179	/*
	180	* Something tried to access memory that isn't in our memory map..
	181	* Fix it, but check if it's kernel or user first..
	182	*/
	183
	184	bad_area:
	185	up_read(&mm->mmap_sem);
	186
	187	bad_area_nosemaphore:
	188
	189	/* User mode accesses just cause a SIGSEGV */
	190
	191	if (user_mode(regs)) {
	192	info.si_signo = SIGSEGV;
	193	info.si_errno = 0;
	194	/* info.si_code has been set above */
	195	info.si_addr = (void *)address;
	196	force_sig_info(SIGSEGV, &info, tsk);
	197	return;
	198	}
	199
	200	no_context:
	201
	202	/* Are we prepared to handle this kernel fault?
	203	*
	204	* (The kernel has valid exception-points in the source
	205	* when it acesses user-memory. When it fails in one
	206	* of those points, we find it in a table and do a jump
	207	* to some fixup code that loads an appropriate error
	208	* code)
	209	*/
	210
	211	{
	212	const struct exception_table_entry *entry;
	213
	214	__asm__ __volatile__("l.nop 42");
	215
	216	if ((entry = search_exception_tables(regs->pc)) != NULL) {
	217	/* Adjust the instruction pointer in the stackframe */
	218	regs->pc = entry->fixup;
	219	return;
	220	}
	221	}
	222
	223	/*
	224	* Oops. The kernel tried to access some bad page. We'll have to
	225	* terminate things with extreme prejudice.
	226	*/
	227
	228	if ((unsigned long)(address) < PAGE_SIZE)
	229	printk(KERN_ALERT
	230	"Unable to handle kernel NULL pointer dereference");
	231	else
	232	printk(KERN_ALERT "Unable to handle kernel access");
	233	printk(" at virtual address 0x%08lx\n", address);
	234
	235	die("Oops", regs, write_acc);
	236
	237	do_exit(SIGKILL);
	238
	239	/*
	240	* We ran out of memory, or some other thing happened to us that made
	241	* us unable to handle the page fault gracefully.
	242	*/
	243
	244	out_of_memory:
	245	__asm__ __volatile__("l.nop 42");
	246	__asm__ __volatile__("l.nop 1");
	247
	248	up_read(&mm->mmap_sem);
	249	printk("VM: killing process %s\n", tsk->comm);
	250	if (user_mode(regs))
	251	do_exit(SIGKILL);
	252	goto no_context;
	253
	254	do_sigbus:
	255	up_read(&mm->mmap_sem);
	256
	257	/*
	258	* Send a sigbus, regardless of whether we were in kernel
	259	* or user mode.
	260	*/
	261	info.si_signo = SIGBUS;
	262	info.si_errno = 0;
	263	info.si_code = BUS_ADRERR;
	264	info.si_addr = (void *)address;
	265	force_sig_info(SIGBUS, &info, tsk);
	266
	267	/* Kernel mode? Handle exceptions or die */
	268	if (!user_mode(regs))
	269	goto no_context;
	270	return;
	271
	272	vmalloc_fault:
	273	{
	274	/*
	275	* Synchronize this task's top level page-table
	276	* with the 'reference' page table.
	277	*
	278	* Use current_pgd instead of tsk->active_mm->pgd
	279	* since the latter might be unavailable if this
	280	* code is executed in a misfortunately run irq
	281	* (like inside schedule() between switch_mm and
	282	* switch_to...).
	283	*/
	284
	285	int offset = pgd_index(address);
	286	pgd_t pgd, pgd_k;
	287	pud_t pud, pud_k;
	288	pmd_t pmd, pmd_k;
	289	pte_t *pte_k;
	290
	291	/*
	292	phx_warn("do_page_fault(): vmalloc_fault will not work, "
	293	"since current_pgd assign a proper value somewhere\n"
	294	"anyhow we don't need this at the moment\n");
	295
	296	phx_mmu("vmalloc_fault");
	297	*/
	298	pgd = (pgd_t *)current_pgd + offset;
	299	pgd_k = init_mm.pgd + offset;
	300
	301	/* Since we're two-level, we don't need to do both
	302	* set_pgd and set_pmd (they do the same thing). If
	303	* we go three-level at some point, do the right thing
	304	* with pgd_present and set_pgd here.
	305	*
	306	* Also, since the vmalloc area is global, we don't
	307	* need to copy individual PTE's, it is enough to
	308	* copy the pgd pointer into the pte page of the
	309	* root task. If that is there, we'll find our pte if
	310	* it exists.
	311	*/
	312
	313	pud = pud_offset(pgd, address);
	314	pud_k = pud_offset(pgd_k, address);
	315	if (!pud_present(*pud_k))
	316	goto no_context;
	317
	318	pmd = pmd_offset(pud, address);
	319	pmd_k = pmd_offset(pud_k, address);
	320
	321	if (!pmd_present(*pmd_k))
	322	goto bad_area_nosemaphore;
	323
	324	set_pmd(pmd, *pmd_k);
	325
	326	/* Make sure the actual PTE exists as well to
	327	* catch kernel vmalloc-area accesses to non-mapped
	328	* addresses. If we don't do this, this will just
	329	* silently loop forever.
	330	*/
	331
	332	pte_k = pte_offset_kernel(pmd_k, address);
	333	if (!pte_present(*pte_k))
	334	goto no_context;
	335
	336	return;
	337	}
	338	}