Merge branch 'linus' into x86/cleanups

author: Ingo Molnar <mingo@elte.hu> 2008-08-11 06:57:01 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-08-11 06:57:01 -0400
commit: 6de9c70882ecdee63a652d493bf2353963bd4c22 (patch)
tree: 9d219e705492331c97f5f7dccce3b0b1a29251bf /arch/x86/mm
parent: d406d21d90dce2e66c7eb4a44605aac947fe55fb (diff)
parent: 796aadeb1b2db9b5d463946766c5bbfd7717158c (diff)
4 files changed, 299 insertions, 84 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1fbb844c3d7..2977ea37791 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,7 @@
 obj-y   :=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
            pat.o pgtable.o
+obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
 obj-$(CONFIG_X86_32)            += pgtable_32.o
 obj-$(CONFIG_HUGETLB_PAGE)      += hugetlbpage.o
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
new file mode 100644
index 00000000000..007bb06c750
--- /dev/null
+++ b/arch/x86/mm/gup.c
@@ -0,0 +1,298 @@
+/*
+ * Lockless get_user_pages_fast for x86
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#ifndef CONFIG_X86_PAE
+        return *ptep;
+#else
+        /*
+         * With get_user_pages_fast, we walk down the pagetables without taking
+         * any locks.  For this we would like to load the pointers atoimcally,
+         * but that is not possible (without expensive cmpxchg8b) on PAE.  What
+         * we do have is the guarantee that a pte will only either go from not
+         * present to present, or present to not present or both -- it will not
+         * switch to a completely different present page without a TLB flush in
+         * between; something that we are blocking by holding interrupts off.
+         *
+         * Setting ptes from not present to present goes:
+         * ptep->pte_high = h;
+         * smp_wmb();
+         * ptep->pte_low = l;
+         *
+         * And present to not present goes:
+         * ptep->pte_low = 0;
+         * smp_wmb();
+         * ptep->pte_high = 0;
+         *
+         * We must ensure here that the load of pte_low sees l iff pte_high
+         * sees h. We load pte_high *after* loading pte_low, which ensures we
+         * don't see an older value of pte_high.  *Then* we recheck pte_low,
+         * which ensures that we haven't picked up a changed pte high. We might
+         * have got rubbish values from pte_low and pte_high, but we are
+         * guaranteed that pte_low will not have the present bit set *unless*
+         * it is 'l'. And get_user_pages_fast only operates on present ptes, so
+         * we're safe.
+         *
+         * gup_get_pte should not be used or copied outside gup.c without being
+         * very careful -- it does not atomically load the pte or anything that
+         * is likely to be useful for you.
+         */
+        pte_t pte;
+retry:
+        pte.pte_low = ptep->pte_low;
+        smp_rmb();
+        pte.pte_high = ptep->pte_high;
+        smp_rmb();
+        if (unlikely(pte.pte_low != ptep->pte_low))
+                goto retry;
+        return pte;
+#endif
+}
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask;
+        pte_t *ptep;
+        mask = _PAGE_PRESENT|_PAGE_USER;
+        if (write)
+                mask |= _PAGE_RW;
+        ptep = pte_offset_map(&pmd, addr);
+        do {
+                pte_t pte = gup_get_pte(ptep);
+                struct page *page;
+                if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+                        pte_unmap(ptep);
+                        return 0;
+                }
+                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+                page = pte_page(pte);
+                get_page(page);
+                pages[*nr] = page;
+                (*nr)++;
+        } while (ptep++, addr += PAGE_SIZE, addr != end);
+        pte_unmap(ptep - 1);
+        return 1;
+}
+static inline void get_head_page_multiple(struct page *page, int nr)
+{
+        VM_BUG_ON(page != compound_head(page));
+        VM_BUG_ON(page_count(page) == 0);
+        atomic_add(nr, &page->_count);
+}
+static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask;
+        pte_t pte = *(pte_t *)&pmd;
+        struct page *head, *page;
+        int refs;
+        mask = _PAGE_PRESENT|_PAGE_USER;
+        if (write)
+                mask |= _PAGE_RW;
+        if ((pte_val(pte) & mask) != mask)
+                return 0;
+        /* hugepages are never "special" */
+        VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
+        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+        refs = 0;
+        head = pte_page(pte);
+        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+        do {
+                VM_BUG_ON(compound_head(page) != head);
+                pages[*nr] = page;
+                (*nr)++;
+                page++;
+                refs++;
+        } while (addr += PAGE_SIZE, addr != end);
+        get_head_page_multiple(head, refs);
+        return 1;
+}
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+                int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pmd_t *pmdp;
+        pmdp = pmd_offset(&pud, addr);
+        do {
+                pmd_t pmd = *pmdp;
+                next = pmd_addr_end(addr, end);
+                if (pmd_none(pmd))
+                        return 0;
+                if (unlikely(pmd_large(pmd))) {
+                        if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
+                                return 0;
+                } else {
+                        if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+                                return 0;
+                }
+        } while (pmdp++, addr = next, addr != end);
+        return 1;
+}
+static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask;
+        pte_t pte = *(pte_t *)&pud;
+        struct page *head, *page;
+        int refs;
+        mask = _PAGE_PRESENT|_PAGE_USER;
+        if (write)
+                mask |= _PAGE_RW;
+        if ((pte_val(pte) & mask) != mask)
+                return 0;
+        /* hugepages are never "special" */
+        VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
+        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+        refs = 0;
+        head = pte_page(pte);
+        page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+        do {
+                VM_BUG_ON(compound_head(page) != head);
+                pages[*nr] = page;
+                (*nr)++;
+                page++;
+                refs++;
+        } while (addr += PAGE_SIZE, addr != end);
+        get_head_page_multiple(head, refs);
+        return 1;
+}
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pud_t *pudp;
+        pudp = pud_offset(&pgd, addr);
+        do {
+                pud_t pud = *pudp;
+                next = pud_addr_end(addr, end);
+                if (pud_none(pud))
+                        return 0;
+                if (unlikely(pud_large(pud))) {
+                        if (!gup_huge_pud(pud, addr, next, write, pages, nr))
+                                return 0;
+                } else {
+                        if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+                                return 0;
+                }
+        } while (pudp++, addr = next, addr != end);
+        return 1;
+}
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                        struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long addr, len, end;
+        unsigned long next;
+        pgd_t *pgdp;
+        int nr = 0;
+        start &= PAGE_MASK;
+        addr = start;
+        len = (unsigned long) nr_pages << PAGE_SHIFT;
+        end = start + len;
+        if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+                                        start, len)))
+                goto slow_irqon;
+        /*
+         * XXX: batch / limit 'nr', to avoid large irq off latency
+         * needs some instrumenting to determine the common sizes used by
+         * important workloads (eg. DB2), and whether limiting the batch size
+         * will decrease performance.
+         *
+         * It seems like we're in the clear for the moment. Direct-IO is
+         * the main guy that batches up lots of get_user_pages, and even
+         * they are limited to 64-at-a-time which is not so many.
+         */
+        /*
+         * This doesn't prevent pagetable teardown, but does prevent
+         * the pagetables and pages from being freed on x86.
+         *
+         * So long as we atomically load page table pointers versus teardown
+         * (which we do on x86, with the above PAE exception), we can follow the
+         * address down to the the page and take a ref on it.
+         */
+        local_irq_disable();
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        goto slow;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        goto slow;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_enable();
+        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+        return nr;
+        {
+                int ret;
+slow:
+                local_irq_enable();
+slow_irqon:
+                /* Try to get the remaining pages with get_user_pages */
+                start += nr << PAGE_SHIFT;
+                pages += nr;
+                down_read(&mm->mmap_sem);
+                ret = get_user_pages(current, mm, start,
+                        (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+                up_read(&mm->mmap_sem);
+                /* Have to be a bit careful with return values */
+                if (nr > 0) {
+                        if (ret < 0)
+                                ret = nr;
+                        else
+                                ret += nr;
+                }
+                return ret;
+        }
+}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e4805771b5b..08a20e6a15c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on);
 * around without checking the pgd every time.
 */
-void show_mem(void)
-{
-        long i, total = 0, reserved = 0;
-        long shared = 0, cached = 0;
-        struct page *page;
-        pg_data_t *pgdat;
-        printk(KERN_INFO "Mem-info:\n");
-        show_free_areas();
-        for_each_online_pgdat(pgdat) {
-                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
-                        /*
-                         * This loop can take a while with 256 GB and
-                         * 4k pages so defer the NMI watchdog:
-                         */
-                        if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
-                                touch_nmi_watchdog();
-                        if (!pfn_valid(pgdat->node_start_pfn + i))
-                                continue;
-                        page = pfn_to_page(pgdat->node_start_pfn + i);
-                        total++;
-                        if (PageReserved(page))
-                                reserved++;
-                        else if (PageSwapCache(page))
-                                cached++;
-                        else if (page_count(page))
-                                shared += page_count(page) - 1;
-                }
-        }
-        printk(KERN_INFO "%lu pages of RAM\n",          total);
-        printk(KERN_INFO "%lu reserved pages\n",        reserved);
-        printk(KERN_INFO "%lu pages shared\n",          shared);
-        printk(KERN_INFO "%lu pages swap cached\n",     cached);
-}
 int after_bootmem;
 static __init void *spp_getpage(void)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b4becbf8c57..cab0abbd1eb 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -20,53 +20,6 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
-void show_mem(void)
-{
-        int total = 0, reserved = 0;
-        int shared = 0, cached = 0;
-        int highmem = 0;
-        struct page *page;
-        pg_data_t *pgdat;
-        unsigned long i;
-        unsigned long flags;
-        printk(KERN_INFO "Mem-info:\n");
-        show_free_areas();
-        for_each_online_pgdat(pgdat) {
-                pgdat_resize_lock(pgdat, &flags);
-                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
-                        if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
-                                touch_nmi_watchdog();
-                        page = pgdat_page_nr(pgdat, i);
-                        total++;
-                        if (PageHighMem(page))
-                                highmem++;
-                        if (PageReserved(page))
-                                reserved++;
-                        else if (PageSwapCache(page))
-                                cached++;
-                        else if (page_count(page))
-                                shared += page_count(page) - 1;
-                }
-                pgdat_resize_unlock(pgdat, &flags);
-        }
-        printk(KERN_INFO "%d pages of RAM\n", total);
-        printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
-        printk(KERN_INFO "%d reserved pages\n", reserved);
-        printk(KERN_INFO "%d pages shared\n", shared);
-        printk(KERN_INFO "%d pages swap cached\n", cached);
-        printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
-        printk(KERN_INFO "%lu pages writeback\n",
-                                        global_page_state(NR_WRITEBACK));
-        printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
-        printk(KERN_INFO "%lu pages slab\n",
-                global_page_state(NR_SLAB_RECLAIMABLE) +
-                global_page_state(NR_SLAB_UNRECLAIMABLE));
-        printk(KERN_INFO "%lu pages pagetables\n",
-                                        global_page_state(NR_PAGETABLE));
-}
 /*
 * Associate a virtual page frame with a given physical page frame 
 * and protection flags for that frame.
author	Ingo Molnar <mingo@elte.hu>	2008-08-11 06:57:01 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-08-11 06:57:01 -0400
commit	6de9c70882ecdee63a652d493bf2353963bd4c22 (patch)
tree	9d219e705492331c97f5f7dccce3b0b1a29251bf /arch/x86/mm
parent	d406d21d90dce2e66c7eb4a44605aac947fe55fb (diff)
parent	796aadeb1b2db9b5d463946766c5bbfd7717158c (diff)

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7..2977ea37791 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile
@@ -1,6 +1,7 @@
1	obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \	1	obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2	pat.o pgtable.o	2	pat.o pgtable.o
3		3
		4	obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
4	obj-$(CONFIG_X86_32) += pgtable_32.o	5	obj-$(CONFIG_X86_32) += pgtable_32.o
5		6
6	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o	7	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o


diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 00000000000..007bb06c750 --- /dev/null +++ b/arch/x86/mm/gup.c
@@ -0,0 +1,298 @@
		1	/*
		2	* Lockless get_user_pages_fast for x86
		3	*
		4	* Copyright (C) 2008 Nick Piggin
		5	* Copyright (C) 2008 Novell Inc.
		6	*/
		7	#include <linux/sched.h>
		8	#include <linux/mm.h>
		9	#include <linux/vmstat.h>
		10	#include <linux/highmem.h>
		11
		12	#include <asm/pgtable.h>
		13
		14	static inline pte_t gup_get_pte(pte_t *ptep)
		15	{
		16	#ifndef CONFIG_X86_PAE
		17	return *ptep;
		18	#else
		19	/*
		20	* With get_user_pages_fast, we walk down the pagetables without taking
		21	* any locks. For this we would like to load the pointers atoimcally,
		22	* but that is not possible (without expensive cmpxchg8b) on PAE. What
		23	* we do have is the guarantee that a pte will only either go from not
		24	* present to present, or present to not present or both -- it will not
		25	* switch to a completely different present page without a TLB flush in
		26	* between; something that we are blocking by holding interrupts off.
		27	*
		28	* Setting ptes from not present to present goes:
		29	* ptep->pte_high = h;
		30	* smp_wmb();
		31	* ptep->pte_low = l;
		32	*
		33	* And present to not present goes:
		34	* ptep->pte_low = 0;
		35	* smp_wmb();
		36	* ptep->pte_high = 0;
		37	*
		38	* We must ensure here that the load of pte_low sees l iff pte_high
		39	* sees h. We load pte_high after loading pte_low, which ensures we
		40	* don't see an older value of pte_high. Then we recheck pte_low,
		41	* which ensures that we haven't picked up a changed pte high. We might
		42	* have got rubbish values from pte_low and pte_high, but we are
		43	* guaranteed that pte_low will not have the present bit set unless
		44	* it is 'l'. And get_user_pages_fast only operates on present ptes, so
		45	* we're safe.
		46	*
		47	* gup_get_pte should not be used or copied outside gup.c without being
		48	* very careful -- it does not atomically load the pte or anything that
		49	* is likely to be useful for you.
		50	*/
		51	pte_t pte;
		52
		53	retry:
		54	pte.pte_low = ptep->pte_low;
		55	smp_rmb();
		56	pte.pte_high = ptep->pte_high;
		57	smp_rmb();
		58	if (unlikely(pte.pte_low != ptep->pte_low))
		59	goto retry;
		60
		61	return pte;
		62	#endif
		63	}
		64
		65	/*
		66	* The performance critical leaf functions are made noinline otherwise gcc
		67	* inlines everything into a single function which results in too much
		68	* register pressure.
		69	*/
		70	static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		71	unsigned long end, int write, struct page *pages, int nr)
		72	{
		73	unsigned long mask;
		74	pte_t *ptep;
		75
		76	mask = _PAGE_PRESENT\|_PAGE_USER;
		77	if (write)
		78	mask \|= _PAGE_RW;
		79
		80	ptep = pte_offset_map(&pmd, addr);
		81	do {
		82	pte_t pte = gup_get_pte(ptep);
		83	struct page *page;
		84
		85	if ((pte_val(pte) & (mask \| _PAGE_SPECIAL)) != mask) {
		86	pte_unmap(ptep);
		87	return 0;
		88	}
		89	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		90	page = pte_page(pte);
		91	get_page(page);
		92	pages[*nr] = page;
		93	(*nr)++;
		94
		95	} while (ptep++, addr += PAGE_SIZE, addr != end);
		96	pte_unmap(ptep - 1);
		97
		98	return 1;
		99	}
		100
		101	static inline void get_head_page_multiple(struct page *page, int nr)
		102	{
		103	VM_BUG_ON(page != compound_head(page));
		104	VM_BUG_ON(page_count(page) == 0);
		105	atomic_add(nr, &page->_count);
		106	}
		107
		108	static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
		109	unsigned long end, int write, struct page *pages, int nr)
		110	{
		111	unsigned long mask;
		112	pte_t pte = (pte_t )&pmd;
		113	struct page head, page;
		114	int refs;
		115
		116	mask = _PAGE_PRESENT\|_PAGE_USER;
		117	if (write)
		118	mask \|= _PAGE_RW;
		119	if ((pte_val(pte) & mask) != mask)
		120	return 0;
		121	/* hugepages are never "special" */
		122	VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
		123	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		124
		125	refs = 0;
		126	head = pte_page(pte);
		127	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
		128	do {
		129	VM_BUG_ON(compound_head(page) != head);
		130	pages[*nr] = page;
		131	(*nr)++;
		132	page++;
		133	refs++;
		134	} while (addr += PAGE_SIZE, addr != end);
		135	get_head_page_multiple(head, refs);
		136
		137	return 1;
		138	}
		139
		140	static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
		141	int write, struct page *pages, int nr)
		142	{
		143	unsigned long next;
		144	pmd_t *pmdp;
		145
		146	pmdp = pmd_offset(&pud, addr);
		147	do {
		148	pmd_t pmd = *pmdp;
		149
		150	next = pmd_addr_end(addr, end);
		151	if (pmd_none(pmd))
		152	return 0;
		153	if (unlikely(pmd_large(pmd))) {
		154	if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
		155	return 0;
		156	} else {
		157	if (!gup_pte_range(pmd, addr, next, write, pages, nr))
		158	return 0;
		159	}
		160	} while (pmdp++, addr = next, addr != end);
		161
		162	return 1;
		163	}
		164
		165	static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
		166	unsigned long end, int write, struct page *pages, int nr)
		167	{
		168	unsigned long mask;
		169	pte_t pte = (pte_t )&pud;
		170	struct page head, page;
		171	int refs;
		172
		173	mask = _PAGE_PRESENT\|_PAGE_USER;
		174	if (write)
		175	mask \|= _PAGE_RW;
		176	if ((pte_val(pte) & mask) != mask)
		177	return 0;
		178	/* hugepages are never "special" */
		179	VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
		180	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		181
		182	refs = 0;
		183	head = pte_page(pte);
		184	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
		185	do {
		186	VM_BUG_ON(compound_head(page) != head);
		187	pages[*nr] = page;
		188	(*nr)++;
		189	page++;
		190	refs++;
		191	} while (addr += PAGE_SIZE, addr != end);
		192	get_head_page_multiple(head, refs);
		193
		194	return 1;
		195	}
		196
		197	static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
		198	int write, struct page *pages, int nr)
		199	{
		200	unsigned long next;
		201	pud_t *pudp;
		202
		203	pudp = pud_offset(&pgd, addr);
		204	do {
		205	pud_t pud = *pudp;
		206
		207	next = pud_addr_end(addr, end);
		208	if (pud_none(pud))
		209	return 0;
		210	if (unlikely(pud_large(pud))) {
		211	if (!gup_huge_pud(pud, addr, next, write, pages, nr))
		212	return 0;
		213	} else {
		214	if (!gup_pmd_range(pud, addr, next, write, pages, nr))
		215	return 0;
		216	}
		217	} while (pudp++, addr = next, addr != end);
		218
		219	return 1;
		220	}
		221
		222	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
		223	struct page **pages)
		224	{
		225	struct mm_struct *mm = current->mm;
		226	unsigned long addr, len, end;
		227	unsigned long next;
		228	pgd_t *pgdp;
		229	int nr = 0;
		230
		231	start &= PAGE_MASK;
		232	addr = start;
		233	len = (unsigned long) nr_pages << PAGE_SHIFT;
		234	end = start + len;
		235	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
		236	start, len)))
		237	goto slow_irqon;
		238
		239	/*
		240	* XXX: batch / limit 'nr', to avoid large irq off latency
		241	* needs some instrumenting to determine the common sizes used by
		242	* important workloads (eg. DB2), and whether limiting the batch size
		243	* will decrease performance.
		244	*
		245	* It seems like we're in the clear for the moment. Direct-IO is
		246	* the main guy that batches up lots of get_user_pages, and even
		247	* they are limited to 64-at-a-time which is not so many.
		248	*/
		249	/*
		250	* This doesn't prevent pagetable teardown, but does prevent
		251	* the pagetables and pages from being freed on x86.
		252	*
		253	* So long as we atomically load page table pointers versus teardown
		254	* (which we do on x86, with the above PAE exception), we can follow the
		255	* address down to the the page and take a ref on it.
		256	*/
		257	local_irq_disable();
		258	pgdp = pgd_offset(mm, addr);
		259	do {
		260	pgd_t pgd = *pgdp;
		261
		262	next = pgd_addr_end(addr, end);
		263	if (pgd_none(pgd))
		264	goto slow;
		265	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
		266	goto slow;
		267	} while (pgdp++, addr = next, addr != end);
		268	local_irq_enable();
		269
		270	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
		271	return nr;
		272
		273	{
		274	int ret;
		275
		276	slow:
		277	local_irq_enable();
		278	slow_irqon:
		279	/* Try to get the remaining pages with get_user_pages */
		280	start += nr << PAGE_SHIFT;
		281	pages += nr;
		282
		283	down_read(&mm->mmap_sem);
		284	ret = get_user_pages(current, mm, start,
		285	(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		286	up_read(&mm->mmap_sem);
		287
		288	/* Have to be a bit careful with return values */
		289	if (nr > 0) {
		290	if (ret < 0)
		291	ret = nr;
		292	else
		293	ret += nr;
		294	}
		295
		296	return ret;
		297	}
		298	}


diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e4805771b5b..08a20e6a15c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c
@@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on);
86	* around without checking the pgd every time.	86	* around without checking the pgd every time.
87	*/	87	*/
88		88
89	void show_mem(void)
90	{
91	long i, total = 0, reserved = 0;
92	long shared = 0, cached = 0;
93	struct page *page;
94	pg_data_t *pgdat;
95
96	printk(KERN_INFO "Mem-info:\n");
97	show_free_areas();
98	for_each_online_pgdat(pgdat) {
99	for (i = 0; i < pgdat->node_spanned_pages; ++i) {
100	/*
101	* This loop can take a while with 256 GB and
102	* 4k pages so defer the NMI watchdog:
103	*/
104	if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
105	touch_nmi_watchdog();
106
107	if (!pfn_valid(pgdat->node_start_pfn + i))
108	continue;
109
110	page = pfn_to_page(pgdat->node_start_pfn + i);
111	total++;
112	if (PageReserved(page))
113	reserved++;
114	else if (PageSwapCache(page))
115	cached++;
116	else if (page_count(page))
117	shared += page_count(page) - 1;
118	}
119	}
120	printk(KERN_INFO "%lu pages of RAM\n", total);
121	printk(KERN_INFO "%lu reserved pages\n", reserved);
122	printk(KERN_INFO "%lu pages shared\n", shared);
123	printk(KERN_INFO "%lu pages swap cached\n", cached);
124	}
125
126	int after_bootmem;	89	int after_bootmem;
127		90
128	static __init void *spp_getpage(void)	91	static __init void *spp_getpage(void)


diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c57..cab0abbd1eb 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c
@@ -20,53 +20,6 @@
20	#include <asm/tlb.h>	20	#include <asm/tlb.h>
21	#include <asm/tlbflush.h>	21	#include <asm/tlbflush.h>
22		22
23	void show_mem(void)
24	{
25	int total = 0, reserved = 0;
26	int shared = 0, cached = 0;
27	int highmem = 0;
28	struct page *page;
29	pg_data_t *pgdat;
30	unsigned long i;
31	unsigned long flags;
32
33	printk(KERN_INFO "Mem-info:\n");
34	show_free_areas();
35	for_each_online_pgdat(pgdat) {
36	pgdat_resize_lock(pgdat, &flags);
37	for (i = 0; i < pgdat->node_spanned_pages; ++i) {
38	if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
39	touch_nmi_watchdog();
40	page = pgdat_page_nr(pgdat, i);
41	total++;
42	if (PageHighMem(page))
43	highmem++;
44	if (PageReserved(page))
45	reserved++;
46	else if (PageSwapCache(page))
47	cached++;
48	else if (page_count(page))
49	shared += page_count(page) - 1;
50	}
51	pgdat_resize_unlock(pgdat, &flags);
52	}
53	printk(KERN_INFO "%d pages of RAM\n", total);
54	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
55	printk(KERN_INFO "%d reserved pages\n", reserved);
56	printk(KERN_INFO "%d pages shared\n", shared);
57	printk(KERN_INFO "%d pages swap cached\n", cached);
58
59	printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
60	printk(KERN_INFO "%lu pages writeback\n",
61	global_page_state(NR_WRITEBACK));
62	printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
63	printk(KERN_INFO "%lu pages slab\n",
64	global_page_state(NR_SLAB_RECLAIMABLE) +
65	global_page_state(NR_SLAB_UNRECLAIMABLE));
66	printk(KERN_INFO "%lu pages pagetables\n",
67	global_page_state(NR_PAGETABLE));
68	}
69
70	/*	23	/*
71	* Associate a virtual page frame with a given physical page frame	24	* Associate a virtual page frame with a given physical page frame
72	* and protection flags for that frame.	25	* and protection flags for that frame.