10 files changed, 115 insertions, 26 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index f4026bae6eed..05f2b4009ccc 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -1,7 +1,7 @@
 /*
 * linux/mm/allocpercpu.c
 *
- * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>
+ * Separated from slab.c August 11, 2006 Christoph Lameter
 */
 #include <linux/mm.h>
 #include <linux/module.h>
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
        return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
                                 unsigned long size, int flags)
 {
        int ret;
        ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
        if (ret < 0)
-                return;
+                return -ENOMEM;
        reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+        return 0;
 }
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/memory.c b/mm/memory.c
index 19e0ae9beecb..2302d228fe04 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -999,17 +999,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
                goto no_page_table;
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
-        if (!ptep)
-                goto out;
        pte = *ptep;
        if (!pte_present(pte))
-                goto unlock;
+                goto no_page;
        if ((flags & FOLL_WRITE) && !pte_write(pte))
                goto unlock;
        page = vm_normal_page(vma, address, pte);
        if (unlikely(!page))
-                goto unlock;
+                goto bad_page;
        if (flags & FOLL_GET)
                get_page(page);
@@ -1024,6 +1022,15 @@ unlock:
 out:
        return page;
+bad_page:
+        pte_unmap_unlock(ptep, ptl);
+        return ERR_PTR(-EFAULT);
+no_page:
+        pte_unmap_unlock(ptep, ptl);
+        if (!pte_none(pte))
+                return page;
+        /* Fall through to ZERO_PAGE handling */
 no_page_table:
        /*
         * When core dumping an enormous anonymous area that nobody
@@ -1038,6 +1045,26 @@ no_page_table:
        return page;
 }
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+        /*
+         * We don't want to optimize FOLL_ANON for make_pages_present()
+         * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+         * we want to get the page from the page tables to make sure
+         * that we serialize and update with any other user of that
+         * mapping.
+         */
+        if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+                return 0;
+        /*
+         * And if we have a fault or a nopfn routine, it's not an
+         * anonymous region.
+         */
+        return !vma->vm_ops ||
+                (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, int len, int write, int force,
                struct page **pages, struct vm_area_struct **vmas)
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                foll_flags = FOLL_TOUCH;
                if (pages)
                        foll_flags |= FOLL_GET;
-                if (!write && !(vma->vm_flags & VM_LOCKED) &&
+                if (!write && use_zero_page(vma))
-                    (!vma->vm_ops || !vma->vm_ops->fault))
                        foll_flags |= FOLL_ANON;
                do {
@@ -1125,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         * be processed until returning to user space.
                         */
                        if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))
-                                return -ENOMEM;
+                                return i ? i : -ENOMEM;
                        if (write)
                                foll_flags |= FOLL_WRITE;
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                cond_resched();
                        }
+                        if (IS_ERR(page))
+                                return i ? i : PTR_ERR(page);
                        if (pages) {
                                pages[i] = page;
@@ -1669,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *dirty_page = NULL;
        old_page = vm_normal_page(vma, address, orig_pte);
-        if (!old_page)
+        if (!old_page) {
+                /*
+                 * VM_MIXEDMAP !pfn_valid() case
+                 *
+                 * We should not cow pages in a shared writeable mapping.
+                 * Just mark the pages writable as we can't do any dirty
+                 * accounting on raw pfn maps.
+                 */
+                if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+                                     (VM_WRITE|VM_SHARED))
+                        goto reuse;
                goto gotten;
+        }
        /*
         * Take out anonymous pages first, anonymous shared vmas are
@@ -1723,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
        }
        if (reuse) {
+reuse:
                flush_cache_page(vma, address, pte_pfn(orig_pte));
                entry = pte_mkyoung(orig_pte);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1757,7 +1797,6 @@ gotten:
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (likely(pte_same(*page_table, orig_pte))) {
                if (old_page) {
-                        page_remove_rmap(old_page, vma);
                        if (!PageAnon(old_page)) {
                                dec_mm_counter(mm, file_rss);
                                inc_mm_counter(mm, anon_rss);
@@ -1779,6 +1818,32 @@ gotten:
                lru_cache_add_active(new_page);
                page_add_new_anon_rmap(new_page, vma, address);
+                if (old_page) {
+                        /*
+                         * Only after switching the pte to the new page may
+                         * we remove the mapcount here. Otherwise another
+                         * process may come and find the rmap count decremented
+                         * before the pte is switched to the new page, and
+                         * "reuse" the old page writing into it while our pte
+                         * here still points into it and can be read by other
+                         * threads.
+                         *
+                         * The critical issue is to order this
+                         * page_remove_rmap with the ptp_clear_flush above.
+                         * Those stores are ordered by (if nothing else,)
+                         * the barrier present in the atomic_add_negative
+                         * in page_remove_rmap.
+                         *
+                         * Then the TLB flush in ptep_clear_flush ensures that
+                         * no process can access the old page before the
+                         * decremented mapcount is visible. And the old page
+                         * cannot be reused until after the decremented
+                         * mapcount is visible. So transitively, TLBs to
+                         * old page will be flushed before it can be reused.
+                         */
+                        page_remove_rmap(old_page, vma);
+                }
                /* Free the old page.. */
                new_page = old_page;
                ret |= VM_FAULT_WRITE;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a37a5034f63d..c94e58b192c3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
        } else {
                *policy = pol == &default_policy ? MPOL_DEFAULT :
                                                pol->mode;
-                *policy |= pol->flags;
+                /*
+                 * Internal mempolicy flags must be masked off before exposing
+                 * the policy to userspace.
+                 */
+                *policy |= (pol->flags & MPOL_MODE_FLAGS);
        }
        if (vma) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 449d77d409f5..55bd355d170d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -9,7 +9,7 @@
 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
 * Hirokazu Takahashi <taka@valinux.co.jp>
 * Dave Hansen <haveblue@us.ibm.com>
- * Christoph Lameter <clameter@sgi.com>
+ * Christoph Lameter
 */
 #include <linux/migrate.h>
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
                        goto set_status;
                page = follow_page(vma, pp->addr, FOLL_GET);
+                err = PTR_ERR(page);
+                if (IS_ERR(page))
+                        goto set_status;
                err = -ENOENT;
                if (!page)
                        goto set_status;
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
                        goto set_status;
                page = follow_page(vma, pm->addr, 0);
+                err = PTR_ERR(page);
+                if (IS_ERR(page))
+                        goto set_status;
                err = -ENOENT;
                /* Use PageReserved to check for zero page */
                if (!page || PageReserved(page))
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a5bf31c27375..acfe7c8d72fc 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
                if (pte_present(oldpte)) {
                        pte_t ptent;
-                        /* Avoid an SMP race with hardware updated dirty/clean
+                        ptent = ptep_modify_prot_start(mm, addr, pte);
-                         * bits by wiping the pte and then setting the new pte
-                         * into place.
-                         */
-                        ptent = ptep_get_and_clear(mm, addr, pte);
                        ptent = pte_modify(ptent, newprot);
                        /*
                         * Avoid taking write faults for pages we know to be
                         * dirty.
                         */
                        if (dirty_accountable && pte_dirty(ptent))
                                ptent = pte_mkwrite(ptent);
-                        set_pte_at(mm, addr, pte, ptent);
+                        ptep_modify_prot_commit(mm, addr, pte, ptent);
 #ifdef CONFIG_MIGRATION
                } else if (!pte_file(oldpte)) {
                        swp_entry_t entry = pte_to_swp_entry(oldpte);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2f552955a02f..f32fae3121f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2328,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat)
 static void build_zonelist_cache(pg_data_t *pgdat)
 {
        pgdat->node_zonelists[0].zlcache_ptr = NULL;
-        pgdat->node_zonelists[1].zlcache_ptr = NULL;
 }
 #endif  /* CONFIG_NUMA */
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1b..046607f05f3e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
                if (cpuset_zone_allowed_hardwall(zone, flags) &&
                        cache->nodelists[nid] &&
-                        cache->nodelists[nid]->free_objects)
+                        cache->nodelists[nid]->free_objects) {
                                obj = ____cache_alloc_node(cache,
                                        flags | GFP_THISNODE, nid);
+                                if (obj)
+                                        break;
+                }
        }
        if (!obj) {
diff --git a/mm/slub.c b/mm/slub.c
index 0987d1cd943c..1a427c0ae83b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5,7 +5,7 @@
 * The allocator synchronizes using per slab locks and only
 * uses a centralized lock to manage a pool of partial slabs.
 *
- * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>
+ * (C) 2007 SGI, Christoph Lameter
 */
 #include <linux/mm.h>
@@ -2995,8 +2995,6 @@ void __init kmem_cache_init(void)
                create_kmalloc_cache(&kmalloc_caches[1],
                                "kmalloc-96", 96, GFP_KERNEL);
                caches++;
-        }
-        if (KMALLOC_MIN_SIZE <= 128) {
                create_kmalloc_cache(&kmalloc_caches[2],
                                "kmalloc-192", 192, GFP_KERNEL);
                caches++;
@@ -3026,6 +3024,16 @@ void __init kmem_cache_init(void)
        for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
                size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
+        if (KMALLOC_MIN_SIZE == 128) {
+                /*
+                 * The 192 byte sized cache is not used if the alignment
+                 * is 128 byte. Redirect kmalloc to use the 256 byte cache
+                 * instead.
+                 */
+                for (i = 128 + 8; i <= 192; i += 8)
+                        size_index[(i - 1) / 8] = 8;
+        }
        slab_state = UP;
        /* Provide the correct kmalloc names now that the caches are up */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 99c4f36eb8a3..a91b5f8fcaf6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -1,7 +1,7 @@
 /*
 * Virtual Memory Map support
 *
- * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.
+ * (C) 2007 sgi. Christoph Lameter.
 *
 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
 * virt_to_page, page_address() to be implemented as a base offset

diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index f4026bae6eed..05f2b4009ccc 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* linux/mm/allocpercpu.c	2	* linux/mm/allocpercpu.c
3	*	3	*
4	* Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>	4	* Separated from slab.c August 11, 2006 Christoph Lameter
5	*/	5	*/
6	#include <linux/mm.h>	6	#include <linux/mm.h>
7	#include <linux/module.h>	7	#include <linux/module.h>


diff --git a/mm/bootmem.c b/mm/bootmem.c index e8fb927392b9..8d9f60e06f62 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
442	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);	442	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
443	}	443	}
444		444
445	void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,	445	int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446	unsigned long size, int flags)	446	unsigned long size, int flags)
447	{	447	{
448	int ret;	448	int ret;
449		449
450	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);	450	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
451	if (ret < 0)	451	if (ret < 0)
452	return;	452	return -ENOMEM;
453	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);	453	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
		454
		455	return 0;
454	}	456	}
455		457
456	void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,	458	void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,


diff --git a/mm/memory.c b/mm/memory.c index 19e0ae9beecb..2302d228fe04 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -999,17 +999,15 @@ struct page follow_page(struct vm_area_struct vma, unsigned long address,
999	goto no_page_table;	999	goto no_page_table;
1000		1000
1001	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);	1001	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
1002	if (!ptep)
1003	goto out;
1004		1002
1005	pte = *ptep;	1003	pte = *ptep;
1006	if (!pte_present(pte))	1004	if (!pte_present(pte))
1007	goto unlock;	1005	goto no_page;
1008	if ((flags & FOLL_WRITE) && !pte_write(pte))	1006	if ((flags & FOLL_WRITE) && !pte_write(pte))
1009	goto unlock;	1007	goto unlock;
1010	page = vm_normal_page(vma, address, pte);	1008	page = vm_normal_page(vma, address, pte);
1011	if (unlikely(!page))	1009	if (unlikely(!page))
1012	goto unlock;	1010	goto bad_page;
1013		1011
1014	if (flags & FOLL_GET)	1012	if (flags & FOLL_GET)
1015	get_page(page);	1013	get_page(page);
@@ -1024,6 +1022,15 @@ unlock:
1024	out:	1022	out:
1025	return page;	1023	return page;
1026		1024
		1025	bad_page:
		1026	pte_unmap_unlock(ptep, ptl);
		1027	return ERR_PTR(-EFAULT);
		1028
		1029	no_page:
		1030	pte_unmap_unlock(ptep, ptl);
		1031	if (!pte_none(pte))
		1032	return page;
		1033	/* Fall through to ZERO_PAGE handling */
1027	no_page_table:	1034	no_page_table:
1028	/*	1035	/*
1029	* When core dumping an enormous anonymous area that nobody	1036	* When core dumping an enormous anonymous area that nobody
@@ -1038,6 +1045,26 @@ no_page_table:
1038	return page;	1045	return page;
1039	}	1046	}
1040		1047
		1048	/* Can we do the FOLL_ANON optimization? */
		1049	static inline int use_zero_page(struct vm_area_struct *vma)
		1050	{
		1051	/*
		1052	* We don't want to optimize FOLL_ANON for make_pages_present()
		1053	* when it tries to page in a VM_LOCKED region. As to VM_SHARED,
		1054	* we want to get the page from the page tables to make sure
		1055	* that we serialize and update with any other user of that
		1056	* mapping.
		1057	*/
		1058	if (vma->vm_flags & (VM_LOCKED \| VM_SHARED))
		1059	return 0;
		1060	/*
		1061	* And if we have a fault or a nopfn routine, it's not an
		1062	* anonymous region.
		1063	*/
		1064	return !vma->vm_ops \|\|
		1065	(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
		1066	}
		1067
1041	int get_user_pages(struct task_struct tsk, struct mm_struct mm,	1068	int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1042	unsigned long start, int len, int write, int force,	1069	unsigned long start, int len, int write, int force,
1043	struct page pages, struct vm_area_struct vmas)	1070	struct page pages, struct vm_area_struct vmas)
@@ -1112,8 +1139,7 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1112	foll_flags = FOLL_TOUCH;	1139	foll_flags = FOLL_TOUCH;
1113	if (pages)	1140	if (pages)
1114	foll_flags \|= FOLL_GET;	1141	foll_flags \|= FOLL_GET;
1115	if (!write && !(vma->vm_flags & VM_LOCKED) &&	1142	if (!write && use_zero_page(vma))
1116	(!vma->vm_ops \|\| !vma->vm_ops->fault))
1117	foll_flags \|= FOLL_ANON;	1143	foll_flags \|= FOLL_ANON;
1118		1144
1119	do {	1145	do {
@@ -1125,7 +1151,7 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1125	* be processed until returning to user space.	1151	* be processed until returning to user space.
1126	*/	1152	*/
1127	if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))	1153	if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE)))
1128	return -ENOMEM;	1154	return i ? i : -ENOMEM;
1129		1155
1130	if (write)	1156	if (write)
1131	foll_flags \|= FOLL_WRITE;	1157	foll_flags \|= FOLL_WRITE;
@@ -1159,6 +1185,8 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1159		1185
1160	cond_resched();	1186	cond_resched();
1161	}	1187	}
		1188	if (IS_ERR(page))
		1189	return i ? i : PTR_ERR(page);
1162	if (pages) {	1190	if (pages) {
1163	pages[i] = page;	1191	pages[i] = page;
1164		1192
@@ -1669,8 +1697,19 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
1669	struct page *dirty_page = NULL;	1697	struct page *dirty_page = NULL;
1670		1698
1671	old_page = vm_normal_page(vma, address, orig_pte);	1699	old_page = vm_normal_page(vma, address, orig_pte);
1672	if (!old_page)	1700	if (!old_page) {
		1701	/*
		1702	* VM_MIXEDMAP !pfn_valid() case
		1703	*
		1704	* We should not cow pages in a shared writeable mapping.
		1705	* Just mark the pages writable as we can't do any dirty
		1706	* accounting on raw pfn maps.
		1707	*/
		1708	if ((vma->vm_flags & (VM_WRITE\|VM_SHARED)) ==
		1709	(VM_WRITE\|VM_SHARED))
		1710	goto reuse;
1673	goto gotten;	1711	goto gotten;
		1712	}
1674		1713
1675	/*	1714	/*
1676	* Take out anonymous pages first, anonymous shared vmas are	1715	* Take out anonymous pages first, anonymous shared vmas are
@@ -1723,6 +1762,7 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
1723	}	1762	}
1724		1763
1725	if (reuse) {	1764	if (reuse) {
		1765	reuse:
1726	flush_cache_page(vma, address, pte_pfn(orig_pte));	1766	flush_cache_page(vma, address, pte_pfn(orig_pte));
1727	entry = pte_mkyoung(orig_pte);	1767	entry = pte_mkyoung(orig_pte);
1728	entry = maybe_mkwrite(pte_mkdirty(entry), vma);	1768	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1757,7 +1797,6 @@ gotten:
1757	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);	1797	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1758	if (likely(pte_same(*page_table, orig_pte))) {	1798	if (likely(pte_same(*page_table, orig_pte))) {
1759	if (old_page) {	1799	if (old_page) {
1760	page_remove_rmap(old_page, vma);
1761	if (!PageAnon(old_page)) {	1800	if (!PageAnon(old_page)) {
1762	dec_mm_counter(mm, file_rss);	1801	dec_mm_counter(mm, file_rss);
1763	inc_mm_counter(mm, anon_rss);	1802	inc_mm_counter(mm, anon_rss);
@@ -1779,6 +1818,32 @@ gotten:
1779	lru_cache_add_active(new_page);	1818	lru_cache_add_active(new_page);
1780	page_add_new_anon_rmap(new_page, vma, address);	1819	page_add_new_anon_rmap(new_page, vma, address);
1781		1820
		1821	if (old_page) {
		1822	/*
		1823	* Only after switching the pte to the new page may
		1824	* we remove the mapcount here. Otherwise another
		1825	* process may come and find the rmap count decremented
		1826	* before the pte is switched to the new page, and
		1827	* "reuse" the old page writing into it while our pte
		1828	* here still points into it and can be read by other
		1829	* threads.
		1830	*
		1831	* The critical issue is to order this
		1832	* page_remove_rmap with the ptp_clear_flush above.
		1833	* Those stores are ordered by (if nothing else,)
		1834	* the barrier present in the atomic_add_negative
		1835	* in page_remove_rmap.
		1836	*
		1837	* Then the TLB flush in ptep_clear_flush ensures that
		1838	* no process can access the old page before the
		1839	* decremented mapcount is visible. And the old page
		1840	* cannot be reused until after the decremented
		1841	* mapcount is visible. So transitively, TLBs to
		1842	* old page will be flushed before it can be reused.
		1843	*/
		1844	page_remove_rmap(old_page, vma);
		1845	}
		1846
1782	/* Free the old page.. */	1847	/* Free the old page.. */
1783	new_page = old_page;	1848	new_page = old_page;
1784	ret \|= VM_FAULT_WRITE;	1849	ret \|= VM_FAULT_WRITE;


diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a37a5034f63d..c94e58b192c3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c
@@ -729,7 +729,11 @@ static long do_get_mempolicy(int policy, nodemask_t nmask,
729	} else {	729	} else {
730	*policy = pol == &default_policy ? MPOL_DEFAULT :	730	*policy = pol == &default_policy ? MPOL_DEFAULT :
731	pol->mode;	731	pol->mode;
732	*policy \|= pol->flags;	732	/*
		733	* Internal mempolicy flags must be masked off before exposing
		734	* the policy to userspace.
		735	*/
		736	*policy \|= (pol->flags & MPOL_MODE_FLAGS);
733	}	737	}
734		738
735	if (vma) {	739	if (vma) {


diff --git a/mm/migrate.c b/mm/migrate.c index 449d77d409f5..55bd355d170d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -9,7 +9,7 @@
9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>	9	* IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
10	* Hirokazu Takahashi <taka@valinux.co.jp>	10	* Hirokazu Takahashi <taka@valinux.co.jp>
11	* Dave Hansen <haveblue@us.ibm.com>	11	* Dave Hansen <haveblue@us.ibm.com>
12	* Christoph Lameter <clameter@sgi.com>	12	* Christoph Lameter
13	*/	13	*/
14		14
15	#include <linux/migrate.h>	15	#include <linux/migrate.h>
@@ -865,6 +865,11 @@ static int do_move_pages(struct mm_struct mm, struct page_to_node pm,
865	goto set_status;	865	goto set_status;
866		866
867	page = follow_page(vma, pp->addr, FOLL_GET);	867	page = follow_page(vma, pp->addr, FOLL_GET);
		868
		869	err = PTR_ERR(page);
		870	if (IS_ERR(page))
		871	goto set_status;
		872
868	err = -ENOENT;	873	err = -ENOENT;
869	if (!page)	874	if (!page)
870	goto set_status;	875	goto set_status;
@@ -928,6 +933,11 @@ static int do_pages_stat(struct mm_struct mm, struct page_to_node pm)
928	goto set_status;	933	goto set_status;
929		934
930	page = follow_page(vma, pm->addr, 0);	935	page = follow_page(vma, pm->addr, 0);
		936
		937	err = PTR_ERR(page);
		938	if (IS_ERR(page))
		939	goto set_status;
		940
931	err = -ENOENT;	941	err = -ENOENT;
932	/* Use PageReserved to check for zero page */	942	/* Use PageReserved to check for zero page */
933	if (!page \|\| PageReserved(page))	943	if (!page \|\| PageReserved(page))


diff --git a/mm/mprotect.c b/mm/mprotect.c index a5bf31c27375..acfe7c8d72fc 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c
@@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct mm, pmd_t pmd,
47	if (pte_present(oldpte)) {	47	if (pte_present(oldpte)) {
48	pte_t ptent;	48	pte_t ptent;
49		49
50	/* Avoid an SMP race with hardware updated dirty/clean	50	ptent = ptep_modify_prot_start(mm, addr, pte);
51	* bits by wiping the pte and then setting the new pte
52	* into place.
53	*/
54	ptent = ptep_get_and_clear(mm, addr, pte);
55	ptent = pte_modify(ptent, newprot);	51	ptent = pte_modify(ptent, newprot);
		52
56	/*	53	/*
57	* Avoid taking write faults for pages we know to be	54	* Avoid taking write faults for pages we know to be
58	* dirty.	55	* dirty.
59	*/	56	*/
60	if (dirty_accountable && pte_dirty(ptent))	57	if (dirty_accountable && pte_dirty(ptent))
61	ptent = pte_mkwrite(ptent);	58	ptent = pte_mkwrite(ptent);
62	set_pte_at(mm, addr, pte, ptent);	59
		60	ptep_modify_prot_commit(mm, addr, pte, ptent);
63	#ifdef CONFIG_MIGRATION	61	#ifdef CONFIG_MIGRATION
64	} else if (!pte_file(oldpte)) {	62	} else if (!pte_file(oldpte)) {
65	swp_entry_t entry = pte_to_swp_entry(oldpte);	63	swp_entry_t entry = pte_to_swp_entry(oldpte);


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2f552955a02f..f32fae3121f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -2328,7 +2328,6 @@ static void build_zonelists(pg_data_t *pgdat)
2328	static void build_zonelist_cache(pg_data_t *pgdat)	2328	static void build_zonelist_cache(pg_data_t *pgdat)
2329	{	2329	{
2330	pgdat->node_zonelists[0].zlcache_ptr = NULL;	2330	pgdat->node_zonelists[0].zlcache_ptr = NULL;
2331	pgdat->node_zonelists[1].zlcache_ptr = NULL;
2332	}	2331	}
2333		2332
2334	#endif /* CONFIG_NUMA */	2333	#endif /* CONFIG_NUMA */


diff --git a/mm/slab.c b/mm/slab.c index 06236e4ddc1b..046607f05f3e 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
3263		3263
3264	if (cpuset_zone_allowed_hardwall(zone, flags) &&	3264	if (cpuset_zone_allowed_hardwall(zone, flags) &&
3265	cache->nodelists[nid] &&	3265	cache->nodelists[nid] &&
3266	cache->nodelists[nid]->free_objects)	3266	cache->nodelists[nid]->free_objects) {
3267	obj = ____cache_alloc_node(cache,	3267	obj = ____cache_alloc_node(cache,
3268	flags \| GFP_THISNODE, nid);	3268	flags \| GFP_THISNODE, nid);
		3269	if (obj)
		3270	break;
		3271	}
3269	}	3272	}
3270		3273
3271	if (!obj) {	3274	if (!obj) {


diff --git a/mm/slub.c b/mm/slub.c index 0987d1cd943c..1a427c0ae83b 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -5,7 +5,7 @@
5	* The allocator synchronizes using per slab locks and only	5	* The allocator synchronizes using per slab locks and only
6	* uses a centralized lock to manage a pool of partial slabs.	6	* uses a centralized lock to manage a pool of partial slabs.
7	*	7	*
8	* (C) 2007 SGI, Christoph Lameter <clameter@sgi.com>	8	* (C) 2007 SGI, Christoph Lameter
9	*/	9	*/
10		10
11	#include <linux/mm.h>	11	#include <linux/mm.h>
@@ -2995,8 +2995,6 @@ void __init kmem_cache_init(void)
2995	create_kmalloc_cache(&kmalloc_caches[1],	2995	create_kmalloc_cache(&kmalloc_caches[1],
2996	"kmalloc-96", 96, GFP_KERNEL);	2996	"kmalloc-96", 96, GFP_KERNEL);
2997	caches++;	2997	caches++;
2998	}
2999	if (KMALLOC_MIN_SIZE <= 128) {
3000	create_kmalloc_cache(&kmalloc_caches[2],	2998	create_kmalloc_cache(&kmalloc_caches[2],
3001	"kmalloc-192", 192, GFP_KERNEL);	2999	"kmalloc-192", 192, GFP_KERNEL);
3002	caches++;	3000	caches++;
@@ -3026,6 +3024,16 @@ void __init kmem_cache_init(void)
3026	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)	3024	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
3027	size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;	3025	size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
3028		3026
		3027	if (KMALLOC_MIN_SIZE == 128) {
		3028	/*
		3029	* The 192 byte sized cache is not used if the alignment
		3030	* is 128 byte. Redirect kmalloc to use the 256 byte cache
		3031	* instead.
		3032	*/
		3033	for (i = 128 + 8; i <= 192; i += 8)
		3034	size_index[(i - 1) / 8] = 8;
		3035	}
		3036
3029	slab_state = UP;	3037	slab_state = UP;
3030		3038
3031	/* Provide the correct kmalloc names now that the caches are up */	3039	/* Provide the correct kmalloc names now that the caches are up */


diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 99c4f36eb8a3..a91b5f8fcaf6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c
@@ -1,7 +1,7 @@
1	/*	1	/*
2	* Virtual Memory Map support	2	* Virtual Memory Map support
3	*	3	*
4	* (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.	4	* (C) 2007 sgi. Christoph Lameter.
5	*	5	*
6	* Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,	6	* Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
7	* virt_to_page, page_address() to be implemented as a base offset	7	* virt_to_page, page_address() to be implemented as a base offset