sparc64: implement get_user_pages_fast()

Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: David S. Miller <davem@davemloft.net> 2011-07-25 20:12:22 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-07-25 23:57:10 -0400
commit: df077ac4687500e02a273a628057ff5ab17dc19f (patch)
tree: 3a28c3e9f2f0f529caa5da4770111dc78db071ba /arch/sparc/mm
parent: 683d2fa672da5e3b4fe96f13c43eba32b068d64b (diff)
2 files changed, 182 insertions, 1 deletions
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 79836a7dd00c..e3cda21b5ee9 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -4,7 +4,7 @@
 asflags-y := -ansi
 ccflags-y := -Werror
-obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o
+obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o gup.o
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
 obj-$(CONFIG_SPARC32)   += loadmmu.o
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
new file mode 100644
index 000000000000..a986b5d05712
--- /dev/null
+++ b/arch/sparc/mm/gup.c
@@ -0,0 +1,181 @@
+/*
+ * Lockless get_user_pages_fast for sparc, cribbed from powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask, result;
+        pte_t *ptep;
+        if (tlb_type == hypervisor) {
+                result = _PAGE_PRESENT_4V|_PAGE_P_4V;
+                if (write)
+                        result |= _PAGE_WRITE_4V;
+        } else {
+                result = _PAGE_PRESENT_4U|_PAGE_P_4U;
+                if (write)
+                        result |= _PAGE_WRITE_4U;
+        }
+        mask = result | _PAGE_SPECIAL;
+        ptep = pte_offset_kernel(&pmd, addr);
+        do {
+                struct page *page, *head;
+                pte_t pte = *ptep;
+                if ((pte_val(pte) & mask) != result)
+                        return 0;
+                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+                /* The hugepage case is simplified on sparc64 because
+                 * we encode the sub-page pfn offsets into the
+                 * hugepage PTEs.  We could optimize this in the future
+                 * use page_cache_add_speculative() for the hugepage case.
+                 */
+                page = pte_page(pte);
+                head = compound_head(page);
+                if (!page_cache_get_speculative(head))
+                        return 0;
+                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+                        put_page(head);
+                        return 0;
+                }
+                pages[*nr] = page;
+                (*nr)++;
+        } while (ptep++, addr += PAGE_SIZE, addr != end);
+        return 1;
+}
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+                int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pmd_t *pmdp;
+        pmdp = pmd_offset(&pud, addr);
+        do {
+                pmd_t pmd = *pmdp;
+                next = pmd_addr_end(addr, end);
+                if (pmd_none(pmd))
+                        return 0;
+                if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+                        return 0;
+        } while (pmdp++, addr = next, addr != end);
+        return 1;
+}
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pud_t *pudp;
+        pudp = pud_offset(&pgd, addr);
+        do {
+                pud_t pud = *pudp;
+                next = pud_addr_end(addr, end);
+                if (pud_none(pud))
+                        return 0;
+                if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+                        return 0;
+        } while (pudp++, addr = next, addr != end);
+        return 1;
+}
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                        struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long addr, len, end;
+        unsigned long next;
+        pgd_t *pgdp;
+        int nr = 0;
+        start &= PAGE_MASK;
+        addr = start;
+        len = (unsigned long) nr_pages << PAGE_SHIFT;
+        end = start + len;
+        /*
+         * XXX: batch / limit 'nr', to avoid large irq off latency
+         * needs some instrumenting to determine the common sizes used by
+         * important workloads (eg. DB2), and whether limiting the batch size
+         * will decrease performance.
+         *
+         * It seems like we're in the clear for the moment. Direct-IO is
+         * the main guy that batches up lots of get_user_pages, and even
+         * they are limited to 64-at-a-time which is not so many.
+         */
+        /*
+         * This doesn't prevent pagetable teardown, but does prevent
+         * the pagetables from being freed on sparc.
+         *
+         * So long as we atomically load page table pointers versus teardown,
+         * we can follow the address down to the the page and take a ref on it.
+         */
+        local_irq_disable();
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        goto slow;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        goto slow;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_enable();
+        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+        return nr;
+        {
+                int ret;
+slow:
+                local_irq_enable();
+                /* Try to get the remaining pages with get_user_pages */
+                start += nr << PAGE_SHIFT;
+                pages += nr;
+                down_read(&mm->mmap_sem);
+                ret = get_user_pages(current, mm, start,
+                        (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+                up_read(&mm->mmap_sem);
+                /* Have to be a bit careful with return values */
+                if (nr > 0) {
+                        if (ret < 0)
+                                ret = nr;
+                        else
+                                ret += nr;
+                }
+                return ret;
+        }
+}
author	David S. Miller <davem@davemloft.net>	2011-07-25 20:12:22 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-07-25 23:57:10 -0400
commit	df077ac4687500e02a273a628057ff5ab17dc19f (patch)
tree	3a28c3e9f2f0f529caa5da4770111dc78db071ba /arch/sparc/mm
parent	683d2fa672da5e3b4fe96f13c43eba32b068d64b (diff)

diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 79836a7dd00c..e3cda21b5ee9 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile
@@ -4,7 +4,7 @@
4	asflags-y := -ansi	4	asflags-y := -ansi
5	ccflags-y := -Werror	5	ccflags-y := -Werror
6		6
7	obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o	7	obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o
8	obj-y += fault_$(BITS).o	8	obj-y += fault_$(BITS).o
9	obj-y += init_$(BITS).o	9	obj-y += init_$(BITS).o
10	obj-$(CONFIG_SPARC32) += loadmmu.o	10	obj-$(CONFIG_SPARC32) += loadmmu.o


diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c new file mode 100644 index 000000000000..a986b5d05712 --- /dev/null +++ b/arch/sparc/mm/gup.c
@@ -0,0 +1,181 @@
		1	/*
		2	* Lockless get_user_pages_fast for sparc, cribbed from powerpc
		3	*
		4	* Copyright (C) 2008 Nick Piggin
		5	* Copyright (C) 2008 Novell Inc.
		6	*/
		7
		8	#include <linux/sched.h>
		9	#include <linux/mm.h>
		10	#include <linux/vmstat.h>
		11	#include <linux/pagemap.h>
		12	#include <linux/rwsem.h>
		13	#include <asm/pgtable.h>
		14
		15	/*
		16	* The performance critical leaf functions are made noinline otherwise gcc
		17	* inlines everything into a single function which results in too much
		18	* register pressure.
		19	*/
		20	static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		21	unsigned long end, int write, struct page *pages, int nr)
		22	{
		23	unsigned long mask, result;
		24	pte_t *ptep;
		25
		26	if (tlb_type == hypervisor) {
		27	result = _PAGE_PRESENT_4V\|_PAGE_P_4V;
		28	if (write)
		29	result \|= _PAGE_WRITE_4V;
		30	} else {
		31	result = _PAGE_PRESENT_4U\|_PAGE_P_4U;
		32	if (write)
		33	result \|= _PAGE_WRITE_4U;
		34	}
		35	mask = result \| _PAGE_SPECIAL;
		36
		37	ptep = pte_offset_kernel(&pmd, addr);
		38	do {
		39	struct page page, head;
		40	pte_t pte = *ptep;
		41
		42	if ((pte_val(pte) & mask) != result)
		43	return 0;
		44	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		45
		46	/* The hugepage case is simplified on sparc64 because
		47	* we encode the sub-page pfn offsets into the
		48	* hugepage PTEs. We could optimize this in the future
		49	* use page_cache_add_speculative() for the hugepage case.
		50	*/
		51	page = pte_page(pte);
		52	head = compound_head(page);
		53	if (!page_cache_get_speculative(head))
		54	return 0;
		55	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
		56	put_page(head);
		57	return 0;
		58	}
		59
		60	pages[*nr] = page;
		61	(*nr)++;
		62	} while (ptep++, addr += PAGE_SIZE, addr != end);
		63
		64	return 1;
		65	}
		66
		67	static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
		68	int write, struct page *pages, int nr)
		69	{
		70	unsigned long next;
		71	pmd_t *pmdp;
		72
		73	pmdp = pmd_offset(&pud, addr);
		74	do {
		75	pmd_t pmd = *pmdp;
		76
		77	next = pmd_addr_end(addr, end);
		78	if (pmd_none(pmd))
		79	return 0;
		80	if (!gup_pte_range(pmd, addr, next, write, pages, nr))
		81	return 0;
		82	} while (pmdp++, addr = next, addr != end);
		83
		84	return 1;
		85	}
		86
		87	static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
		88	int write, struct page *pages, int nr)
		89	{
		90	unsigned long next;
		91	pud_t *pudp;
		92
		93	pudp = pud_offset(&pgd, addr);
		94	do {
		95	pud_t pud = *pudp;
		96
		97	next = pud_addr_end(addr, end);
		98	if (pud_none(pud))
		99	return 0;
		100	if (!gup_pmd_range(pud, addr, next, write, pages, nr))
		101	return 0;
		102	} while (pudp++, addr = next, addr != end);
		103
		104	return 1;
		105	}
		106
		107	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
		108	struct page **pages)
		109	{
		110	struct mm_struct *mm = current->mm;
		111	unsigned long addr, len, end;
		112	unsigned long next;
		113	pgd_t *pgdp;
		114	int nr = 0;
		115
		116	start &= PAGE_MASK;
		117	addr = start;
		118	len = (unsigned long) nr_pages << PAGE_SHIFT;
		119	end = start + len;
		120
		121	/*
		122	* XXX: batch / limit 'nr', to avoid large irq off latency
		123	* needs some instrumenting to determine the common sizes used by
		124	* important workloads (eg. DB2), and whether limiting the batch size
		125	* will decrease performance.
		126	*
		127	* It seems like we're in the clear for the moment. Direct-IO is
		128	* the main guy that batches up lots of get_user_pages, and even
		129	* they are limited to 64-at-a-time which is not so many.
		130	*/
		131	/*
		132	* This doesn't prevent pagetable teardown, but does prevent
		133	* the pagetables from being freed on sparc.
		134	*
		135	* So long as we atomically load page table pointers versus teardown,
		136	* we can follow the address down to the the page and take a ref on it.
		137	*/
		138	local_irq_disable();
		139
		140	pgdp = pgd_offset(mm, addr);
		141	do {
		142	pgd_t pgd = *pgdp;
		143
		144	next = pgd_addr_end(addr, end);
		145	if (pgd_none(pgd))
		146	goto slow;
		147	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
		148	goto slow;
		149	} while (pgdp++, addr = next, addr != end);
		150
		151	local_irq_enable();
		152
		153	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
		154	return nr;
		155
		156	{
		157	int ret;
		158
		159	slow:
		160	local_irq_enable();
		161
		162	/* Try to get the remaining pages with get_user_pages */
		163	start += nr << PAGE_SHIFT;
		164	pages += nr;
		165
		166	down_read(&mm->mmap_sem);
		167	ret = get_user_pages(current, mm, start,
		168	(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		169	up_read(&mm->mmap_sem);
		170
		171	/* Have to be a bit careful with return values */
		172	if (nr > 0) {
		173	if (ret < 0)
		174	ret = nr;
		175	else
		176	ret += nr;
		177	}
		178
		179	return ret;
		180	}
		181	}