Merge branch 'next/generic' into mips-for-linux-next

author: Ralf Baechle <ralf@linux-mips.org> 2012-01-11 09:41:47 -0500
committer: Ralf Baechle <ralf@linux-mips.org> 2012-01-11 09:41:47 -0500
commit: 39b741431af7f6f46b2e0e7f7f13ea2351fb4a5f (patch)
tree: 89355f4ae7bbb874537bb65f71ba0d19b3d468e1 /arch/mips/mm/gup.c
parent: 5b0ec2efb7d373faa7b1a7632c459b93895d45cd (diff)
parent: d7a887a73dec6c387b02a966a71aac767bbd9ce6 (diff)
1 files changed, 315 insertions, 0 deletions
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
new file mode 100644
index 000000000000..33aadbcf170b
--- /dev/null
+++ b/arch/mips/mm/gup.c
@@ -0,0 +1,315 @@
+/*
+ * Lockless get_user_pages_fast for MIPS
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ * Copyright (C) 2011 Ralf Baechle
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
+        pte_t pte;
+retry:
+        pte.pte_low = ptep->pte_low;
+        smp_rmb();
+        pte.pte_high = ptep->pte_high;
+        smp_rmb();
+        if (unlikely(pte.pte_low != ptep->pte_low))
+                goto retry;
+        return pte;
+#else
+        return ACCESS_ONCE(*ptep);
+#endif
+}
+static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        pte_t *ptep = pte_offset_map(&pmd, addr);
+        do {
+                pte_t pte = gup_get_pte(ptep);
+                struct page *page;
+                if (!pte_present(pte) ||
+                    pte_special(pte) || (write && !pte_write(pte))) {
+                        pte_unmap(ptep);
+                        return 0;
+                }
+                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+                page = pte_page(pte);
+                get_page(page);
+                SetPageReferenced(page);
+                pages[*nr] = page;
+                (*nr)++;
+        } while (ptep++, addr += PAGE_SIZE, addr != end);
+        pte_unmap(ptep - 1);
+        return 1;
+}
+static inline void get_head_page_multiple(struct page *page, int nr)
+{
+        VM_BUG_ON(page != compound_head(page));
+        VM_BUG_ON(page_count(page) == 0);
+        atomic_add(nr, &page->_count);
+        SetPageReferenced(page);
+}
+static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        pte_t pte = *(pte_t *)&pmd;
+        struct page *head, *page;
+        int refs;
+        if (write && !pte_write(pte))
+                return 0;
+        /* hugepages are never "special" */
+        VM_BUG_ON(pte_special(pte));
+        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+        refs = 0;
+        head = pte_page(pte);
+        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+        do {
+                VM_BUG_ON(compound_head(page) != head);
+                pages[*nr] = page;
+                if (PageTail(page))
+                        get_huge_page_tail(page);
+                (*nr)++;
+                page++;
+                refs++;
+        } while (addr += PAGE_SIZE, addr != end);
+        get_head_page_multiple(head, refs);
+        return 1;
+}
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pmd_t *pmdp;
+        pmdp = pmd_offset(&pud, addr);
+        do {
+                pmd_t pmd = *pmdp;
+                next = pmd_addr_end(addr, end);
+                /*
+                 * The pmd_trans_splitting() check below explains why
+                 * pmdp_splitting_flush has to flush the tlb, to stop
+                 * this gup-fast code from running while we set the
+                 * splitting bit in the pmd. Returning zero will take
+                 * the slow path that will call wait_split_huge_page()
+                 * if the pmd is still in splitting state. gup-fast
+                 * can't because it has irq disabled and
+                 * wait_split_huge_page() would never return as the
+                 * tlb flush IPI wouldn't run.
+                 */
+                if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+                        return 0;
+                if (unlikely(pmd_huge(pmd))) {
+                        if (!gup_huge_pmd(pmd, addr, next, write, pages,nr))
+                                return 0;
+                } else {
+                        if (!gup_pte_range(pmd, addr, next, write, pages,nr))
+                                return 0;
+                }
+        } while (pmdp++, addr = next, addr != end);
+        return 1;
+}
+static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        pte_t pte = *(pte_t *)&pud;
+        struct page *head, *page;
+        int refs;
+        if (write && !pte_write(pte))
+                return 0;
+        /* hugepages are never "special" */
+        VM_BUG_ON(pte_special(pte));
+        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+        refs = 0;
+        head = pte_page(pte);
+        page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+        do {
+                VM_BUG_ON(compound_head(page) != head);
+                pages[*nr] = page;
+                (*nr)++;
+                page++;
+                refs++;
+        } while (addr += PAGE_SIZE, addr != end);
+        get_head_page_multiple(head, refs);
+        return 1;
+}
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pud_t *pudp;
+        pudp = pud_offset(&pgd, addr);
+        do {
+                pud_t pud = *pudp;
+                next = pud_addr_end(addr, end);
+                if (pud_none(pud))
+                        return 0;
+                if (unlikely(pud_huge(pud))) {
+                        if (!gup_huge_pud(pud, addr, next, write, pages,nr))
+                                return 0;
+                } else {
+                        if (!gup_pmd_range(pud, addr, next, write, pages,nr))
+                                return 0;
+                }
+        } while (pudp++, addr = next, addr != end);
+        return 1;
+}
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                          struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long addr, len, end;
+        unsigned long next;
+        unsigned long flags;
+        pgd_t *pgdp;
+        int nr = 0;
+        start &= PAGE_MASK;
+        addr = start;
+        len = (unsigned long) nr_pages << PAGE_SHIFT;
+        end = start + len;
+        if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+                                        (void __user *)start, len)))
+                return 0;
+        /*
+         * XXX: batch / limit 'nr', to avoid large irq off latency
+         * needs some instrumenting to determine the common sizes used by
+         * important workloads (eg. DB2), and whether limiting the batch
+         * size will decrease performance.
+         *
+         * It seems like we're in the clear for the moment. Direct-IO is
+         * the main guy that batches up lots of get_user_pages, and even
+         * they are limited to 64-at-a-time which is not so many.
+         */
+        /*
+         * This doesn't prevent pagetable teardown, but does prevent
+         * the pagetables and pages from being freed.
+         *
+         * So long as we atomically load page table pointers versus teardown,
+         * we can follow the address down to the page and take a ref on it.
+         */
+        local_irq_save(flags);
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        break;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        break;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_restore(flags);
+        return nr;
+}
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:      starting user address
+ * @nr_pages:   number of pages from start to pin
+ * @write:      whether pages will be written to
+ * @pages:      array that receives pointers to the pages pinned.
+ *              Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                        struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long addr, len, end;
+        unsigned long next;
+        pgd_t *pgdp;
+        int ret, nr = 0;
+        start &= PAGE_MASK;
+        addr = start;
+        len = (unsigned long) nr_pages << PAGE_SHIFT;
+        end = start + len;
+        if (end < start)
+                goto slow_irqon;
+        /* XXX: batch / limit 'nr' */
+        local_irq_disable();
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        goto slow;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        goto slow;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_enable();
+        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+        return nr;
+slow:
+        local_irq_enable();
+slow_irqon:
+        /* Try to get the remaining pages with get_user_pages */
+        start += nr << PAGE_SHIFT;
+        pages += nr;
+        down_read(&mm->mmap_sem);
+        ret = get_user_pages(current, mm, start,
+                                (end - start) >> PAGE_SHIFT,
+                                write, 0, pages, NULL);
+        up_read(&mm->mmap_sem);
+        /* Have to be a bit careful with return values */
+        if (nr > 0) {
+                if (ret < 0)
+                        ret = nr;
+                else
+                        ret += nr;
+        }
+        return ret;
+}
author	Ralf Baechle <ralf@linux-mips.org>	2012-01-11 09:41:47 -0500
committer	Ralf Baechle <ralf@linux-mips.org>	2012-01-11 09:41:47 -0500
commit	39b741431af7f6f46b2e0e7f7f13ea2351fb4a5f (patch)
tree	89355f4ae7bbb874537bb65f71ba0d19b3d468e1 /arch/mips/mm/gup.c
parent	5b0ec2efb7d373faa7b1a7632c459b93895d45cd (diff)
parent	d7a887a73dec6c387b02a966a71aac767bbd9ce6 (diff)

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c new file mode 100644 index 000000000000..33aadbcf170b --- /dev/null +++ b/arch/mips/mm/gup.c
@@ -0,0 +1,315 @@
	1	/*
	2	* Lockless get_user_pages_fast for MIPS
	3	*
	4	* Copyright (C) 2008 Nick Piggin
	5	* Copyright (C) 2008 Novell Inc.
	6	* Copyright (C) 2011 Ralf Baechle
	7	*/
	8	#include <linux/sched.h>
	9	#include <linux/mm.h>
	10	#include <linux/vmstat.h>
	11	#include <linux/highmem.h>
	12	#include <linux/swap.h>
	13	#include <linux/hugetlb.h>
	14
	15	#include <asm/pgtable.h>
	16
	17	static inline pte_t gup_get_pte(pte_t *ptep)
	18	{
	19	#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
	20	pte_t pte;
	21
	22	retry:
	23	pte.pte_low = ptep->pte_low;
	24	smp_rmb();
	25	pte.pte_high = ptep->pte_high;
	26	smp_rmb();
	27	if (unlikely(pte.pte_low != ptep->pte_low))
	28	goto retry;
	29
	30	return pte;
	31	#else
	32	return ACCESS_ONCE(*ptep);
	33	#endif
	34	}
	35
	36	static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
	37	int write, struct page *pages, int nr)
	38	{
	39	pte_t *ptep = pte_offset_map(&pmd, addr);
	40	do {
	41	pte_t pte = gup_get_pte(ptep);
	42	struct page *page;
	43
	44	if (!pte_present(pte) \|\|
	45	pte_special(pte) \|\| (write && !pte_write(pte))) {
	46	pte_unmap(ptep);
	47	return 0;
	48	}
	49	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
	50	page = pte_page(pte);
	51	get_page(page);
	52	SetPageReferenced(page);
	53	pages[*nr] = page;
	54	(*nr)++;
	55
	56	} while (ptep++, addr += PAGE_SIZE, addr != end);
	57
	58	pte_unmap(ptep - 1);
	59	return 1;
	60	}
	61
	62	static inline void get_head_page_multiple(struct page *page, int nr)
	63	{
	64	VM_BUG_ON(page != compound_head(page));
	65	VM_BUG_ON(page_count(page) == 0);
	66	atomic_add(nr, &page->_count);
	67	SetPageReferenced(page);
	68	}
	69
	70	static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
	71	int write, struct page *pages, int nr)
	72	{
	73	pte_t pte = (pte_t )&pmd;
	74	struct page head, page;
	75	int refs;
	76
	77	if (write && !pte_write(pte))
	78	return 0;
	79	/* hugepages are never "special" */
	80	VM_BUG_ON(pte_special(pte));
	81	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
	82
	83	refs = 0;
	84	head = pte_page(pte);
	85	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
	86	do {
	87	VM_BUG_ON(compound_head(page) != head);
	88	pages[*nr] = page;
	89	if (PageTail(page))
	90	get_huge_page_tail(page);
	91	(*nr)++;
	92	page++;
	93	refs++;
	94	} while (addr += PAGE_SIZE, addr != end);
	95
	96	get_head_page_multiple(head, refs);
	97	return 1;
	98	}
	99
	100	static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
	101	int write, struct page *pages, int nr)
	102	{
	103	unsigned long next;
	104	pmd_t *pmdp;
	105
	106	pmdp = pmd_offset(&pud, addr);
	107	do {
	108	pmd_t pmd = *pmdp;
	109
	110	next = pmd_addr_end(addr, end);
	111	/*
	112	* The pmd_trans_splitting() check below explains why
	113	* pmdp_splitting_flush has to flush the tlb, to stop
	114	* this gup-fast code from running while we set the
	115	* splitting bit in the pmd. Returning zero will take
	116	* the slow path that will call wait_split_huge_page()
	117	* if the pmd is still in splitting state. gup-fast
	118	* can't because it has irq disabled and
	119	* wait_split_huge_page() would never return as the
	120	* tlb flush IPI wouldn't run.
	121	*/
	122	if (pmd_none(pmd) \|\| pmd_trans_splitting(pmd))
	123	return 0;
	124	if (unlikely(pmd_huge(pmd))) {
	125	if (!gup_huge_pmd(pmd, addr, next, write, pages,nr))
	126	return 0;
	127	} else {
	128	if (!gup_pte_range(pmd, addr, next, write, pages,nr))
	129	return 0;
	130	}
	131	} while (pmdp++, addr = next, addr != end);
	132
	133	return 1;
	134	}
	135
	136	static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
	137	int write, struct page *pages, int nr)
	138	{
	139	pte_t pte = (pte_t )&pud;
	140	struct page head, page;
	141	int refs;
	142
	143	if (write && !pte_write(pte))
	144	return 0;
	145	/* hugepages are never "special" */
	146	VM_BUG_ON(pte_special(pte));
	147	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
	148
	149	refs = 0;
	150	head = pte_page(pte);
	151	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
	152	do {
	153	VM_BUG_ON(compound_head(page) != head);
	154	pages[*nr] = page;
	155	(*nr)++;
	156	page++;
	157	refs++;
	158	} while (addr += PAGE_SIZE, addr != end);
	159
	160	get_head_page_multiple(head, refs);
	161	return 1;
	162	}
	163
	164	static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
	165	int write, struct page *pages, int nr)
	166	{
	167	unsigned long next;
	168	pud_t *pudp;
	169
	170	pudp = pud_offset(&pgd, addr);
	171	do {
	172	pud_t pud = *pudp;
	173
	174	next = pud_addr_end(addr, end);
	175	if (pud_none(pud))
	176	return 0;
	177	if (unlikely(pud_huge(pud))) {
	178	if (!gup_huge_pud(pud, addr, next, write, pages,nr))
	179	return 0;
	180	} else {
	181	if (!gup_pmd_range(pud, addr, next, write, pages,nr))
	182	return 0;
	183	}
	184	} while (pudp++, addr = next, addr != end);
	185
	186	return 1;
	187	}
	188
	189	/*
	190	* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
	191	* back to the regular GUP.
	192	*/
	193	int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
	194	struct page **pages)
	195	{
	196	struct mm_struct *mm = current->mm;
	197	unsigned long addr, len, end;
	198	unsigned long next;
	199	unsigned long flags;
	200	pgd_t *pgdp;
	201	int nr = 0;
	202
	203	start &= PAGE_MASK;
	204	addr = start;
	205	len = (unsigned long) nr_pages << PAGE_SHIFT;
	206	end = start + len;
	207	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
	208	(void __user *)start, len)))
	209	return 0;
	210
	211	/*
	212	* XXX: batch / limit 'nr', to avoid large irq off latency
	213	* needs some instrumenting to determine the common sizes used by
	214	* important workloads (eg. DB2), and whether limiting the batch
	215	* size will decrease performance.
	216	*
	217	* It seems like we're in the clear for the moment. Direct-IO is
	218	* the main guy that batches up lots of get_user_pages, and even
	219	* they are limited to 64-at-a-time which is not so many.
	220	*/
	221	/*
	222	* This doesn't prevent pagetable teardown, but does prevent
	223	* the pagetables and pages from being freed.
	224	*
	225	* So long as we atomically load page table pointers versus teardown,
	226	* we can follow the address down to the page and take a ref on it.
	227	*/
	228	local_irq_save(flags);
	229	pgdp = pgd_offset(mm, addr);
	230	do {
	231	pgd_t pgd = *pgdp;
	232
	233	next = pgd_addr_end(addr, end);
	234	if (pgd_none(pgd))
	235	break;
	236	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
	237	break;
	238	} while (pgdp++, addr = next, addr != end);
	239	local_irq_restore(flags);
	240
	241	return nr;
	242	}
	243
	244	/**
	245	* get_user_pages_fast() - pin user pages in memory
	246	* @start: starting user address
	247	* @nr_pages: number of pages from start to pin
	248	* @write: whether pages will be written to
	249	* @pages: array that receives pointers to the pages pinned.
	250	* Should be at least nr_pages long.
	251	*
	252	* Attempt to pin user pages in memory without taking mm->mmap_sem.
	253	* If not successful, it will fall back to taking the lock and
	254	* calling get_user_pages().
	255	*
	256	* Returns number of pages pinned. This may be fewer than the number
	257	* requested. If nr_pages is 0 or negative, returns 0. If no pages
	258	* were pinned, returns -errno.
	259	*/
	260	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
	261	struct page **pages)
	262	{
	263	struct mm_struct *mm = current->mm;
	264	unsigned long addr, len, end;
	265	unsigned long next;
	266	pgd_t *pgdp;
	267	int ret, nr = 0;
	268
	269	start &= PAGE_MASK;
	270	addr = start;
	271	len = (unsigned long) nr_pages << PAGE_SHIFT;
	272
	273	end = start + len;
	274	if (end < start)
	275	goto slow_irqon;
	276
	277	/* XXX: batch / limit 'nr' */
	278	local_irq_disable();
	279	pgdp = pgd_offset(mm, addr);
	280	do {
	281	pgd_t pgd = *pgdp;
	282
	283	next = pgd_addr_end(addr, end);
	284	if (pgd_none(pgd))
	285	goto slow;
	286	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
	287	goto slow;
	288	} while (pgdp++, addr = next, addr != end);
	289	local_irq_enable();
	290
	291	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
	292	return nr;
	293	slow:
	294	local_irq_enable();
	295
	296	slow_irqon:
	297	/* Try to get the remaining pages with get_user_pages */
	298	start += nr << PAGE_SHIFT;
	299	pages += nr;
	300
	301	down_read(&mm->mmap_sem);
	302	ret = get_user_pages(current, mm, start,
	303	(end - start) >> PAGE_SHIFT,
	304	write, 0, pages, NULL);
	305	up_read(&mm->mmap_sem);
	306
	307	/* Have to be a bit careful with return values */
	308	if (nr > 0) {
	309	if (ret < 0)
	310	ret = nr;
	311	else
	312	ret += nr;
	313	}
	314	return ret;
	315	}