mm: refactor do_wp_page, extract the page copy flow

In some cases, do_wp_page had to copy the page suffering a write fault to a new location. If the function logic decided that to do this, it was done by jumping with a "goto" operation to the relevant code block. This made the code really hard to understand. It is also against the kernel coding style guidelines. This patch extracts the page copy and page table update logic to a separate function. It also clean up the naming, from "gotten" to "wp_page_copy", and adds few comments. Signed-off-by: Shachar Raindel <raindel@mellanox.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Andi Kleen <ak@linux.intel.com> Acked-by: Haggai Eran <haggaie@mellanox.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Matthew Wilcox <matthew.r.wilcox@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Peter Feiner <pfeiner@google.com> Cc: Michel Lespinasse <walken@google.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Shachar Raindel <raindel@mellanox.com> 2015-04-14 18:46:32 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-04-14 19:49:03 -0400
commit: 2f38ab2c3c7fef04dca0313fd89d91f142ca9281 (patch)
tree: 0f749c9d45d2177903ac5bcc29457e35961f64b2 /mm/memory.c
parent: 28766805275c12c2298883cece3f98505ac764b4 (diff)
1 files changed, 147 insertions, 118 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 0e28fddafdaf..cfd3c78f00fe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2042,6 +2042,146 @@ static inline int wp_page_reuse(struct mm_struct *mm,
 }
 /*
+ * Handle the case of a page which we actually need to copy to a new page.
+ *
+ * Called with mmap_sem locked and the old page referenced, but
+ * without the ptl held.
+ *
+ * High level logic flow:
+ *
+ * - Allocate a page, copy the content of the old page to the new one.
+ * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
+ * - Take the PTL. If the pte changed, bail out and release the allocated page
+ * - If the pte is still the way we remember it, update the page table and all
+ *   relevant references. This includes dropping the reference the page-table
+ *   held to the old page, as well as updating the rmap.
+ * - In any case, unlock the PTL and drop the reference we took to the old page.
+ */
+static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
+                        unsigned long address, pte_t *page_table, pmd_t *pmd,
+                        pte_t orig_pte, struct page *old_page)
+{
+        struct page *new_page = NULL;
+        spinlock_t *ptl = NULL;
+        pte_t entry;
+        int page_copied = 0;
+        const unsigned long mmun_start = address & PAGE_MASK;   /* For mmu_notifiers */
+        const unsigned long mmun_end = mmun_start + PAGE_SIZE;  /* For mmu_notifiers */
+        struct mem_cgroup *memcg;
+        if (unlikely(anon_vma_prepare(vma)))
+                goto oom;
+        if (is_zero_pfn(pte_pfn(orig_pte))) {
+                new_page = alloc_zeroed_user_highpage_movable(vma, address);
+                if (!new_page)
+                        goto oom;
+        } else {
+                new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+                if (!new_page)
+                        goto oom;
+                cow_user_page(new_page, old_page, address, vma);
+        }
+        __SetPageUptodate(new_page);
+        if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
+                goto oom_free_new;
+        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+        /*
+         * Re-check the pte - we dropped the lock
+         */
+        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+        if (likely(pte_same(*page_table, orig_pte))) {
+                if (old_page) {
+                        if (!PageAnon(old_page)) {
+                                dec_mm_counter_fast(mm, MM_FILEPAGES);
+                                inc_mm_counter_fast(mm, MM_ANONPAGES);
+                        }
+                } else {
+                        inc_mm_counter_fast(mm, MM_ANONPAGES);
+                }
+                flush_cache_page(vma, address, pte_pfn(orig_pte));
+                entry = mk_pte(new_page, vma->vm_page_prot);
+                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+                /*
+                 * Clear the pte entry and flush it first, before updating the
+                 * pte with the new entry. This will avoid a race condition
+                 * seen in the presence of one thread doing SMC and another
+                 * thread doing COW.
+                 */
+                ptep_clear_flush_notify(vma, address, page_table);
+                page_add_new_anon_rmap(new_page, vma, address);
+                mem_cgroup_commit_charge(new_page, memcg, false);
+                lru_cache_add_active_or_unevictable(new_page, vma);
+                /*
+                 * We call the notify macro here because, when using secondary
+                 * mmu page tables (such as kvm shadow page tables), we want the
+                 * new page to be mapped directly into the secondary page table.
+                 */
+                set_pte_at_notify(mm, address, page_table, entry);
+                update_mmu_cache(vma, address, page_table);
+                if (old_page) {
+                        /*
+                         * Only after switching the pte to the new page may
+                         * we remove the mapcount here. Otherwise another
+                         * process may come and find the rmap count decremented
+                         * before the pte is switched to the new page, and
+                         * "reuse" the old page writing into it while our pte
+                         * here still points into it and can be read by other
+                         * threads.
+                         *
+                         * The critical issue is to order this
+                         * page_remove_rmap with the ptp_clear_flush above.
+                         * Those stores are ordered by (if nothing else,)
+                         * the barrier present in the atomic_add_negative
+                         * in page_remove_rmap.
+                         *
+                         * Then the TLB flush in ptep_clear_flush ensures that
+                         * no process can access the old page before the
+                         * decremented mapcount is visible. And the old page
+                         * cannot be reused until after the decremented
+                         * mapcount is visible. So transitively, TLBs to
+                         * old page will be flushed before it can be reused.
+                         */
+                        page_remove_rmap(old_page);
+                }
+                /* Free the old page.. */
+                new_page = old_page;
+                page_copied = 1;
+        } else {
+                mem_cgroup_cancel_charge(new_page, memcg);
+        }
+        if (new_page)
+                page_cache_release(new_page);
+        pte_unmap_unlock(page_table, ptl);
+        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+        if (old_page) {
+                /*
+                 * Don't let another task, with possibly unlocked vma,
+                 * keep the mlocked page.
+                 */
+                if (page_copied && (vma->vm_flags & VM_LOCKED)) {
+                        lock_page(old_page);    /* LRU manipulation */
+                        munlock_vma_page(old_page);
+                        unlock_page(old_page);
+                }
+                page_cache_release(old_page);
+        }
+        return page_copied ? VM_FAULT_WRITE : 0;
+oom_free_new:
+        page_cache_release(new_page);
+oom:
+        if (old_page)
+                page_cache_release(old_page);
+        return VM_FAULT_OOM;
+}
+/*
 * This routine handles present pages, when users try to write
 * to a shared page. It is done by copying the page to a new address
 * and decrementing the shared-page counter for the old page.
@@ -2064,12 +2204,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                spinlock_t *ptl, pte_t orig_pte)
        __releases(ptl)
 {
-        struct page *old_page, *new_page = NULL;
+        struct page *old_page;
-        pte_t entry;
-        int page_copied = 0;
-        unsigned long mmun_start = 0;   /* For mmu_notifiers */
-        unsigned long mmun_end = 0;     /* For mmu_notifiers */
-        struct mem_cgroup *memcg;
        old_page = vm_normal_page(vma, address, orig_pte);
        if (!old_page) {
@@ -2085,7 +2220,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                     (VM_WRITE|VM_SHARED))
                        return wp_page_reuse(mm, vma, address, page_table, ptl,
                                             orig_pte, old_page, 0, 0);
-                goto gotten;
+                pte_unmap_unlock(page_table, ptl);
+                return wp_page_copy(mm, vma, address, page_table, pmd,
+                                    orig_pte, old_page);
        }
        /*
@@ -2165,119 +2303,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * Ok, we need to copy. Oh, well..
         */
        page_cache_get(old_page);
-gotten:
-        pte_unmap_unlock(page_table, ptl);
-        if (unlikely(anon_vma_prepare(vma)))
-                goto oom;
-        if (is_zero_pfn(pte_pfn(orig_pte))) {
-                new_page = alloc_zeroed_user_highpage_movable(vma, address);
-                if (!new_page)
-                        goto oom;
-        } else {
-                new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-                if (!new_page)
-                        goto oom;
-                cow_user_page(new_page, old_page, address, vma);
-        }
-        __SetPageUptodate(new_page);
-        if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
-                goto oom_free_new;
-        mmun_start  = address & PAGE_MASK;
-        mmun_end    = mmun_start + PAGE_SIZE;
-        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-        /*
-         * Re-check the pte - we dropped the lock
-         */
-        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-        if (likely(pte_same(*page_table, orig_pte))) {
-                if (old_page) {
-                        if (!PageAnon(old_page)) {
-                                dec_mm_counter_fast(mm, MM_FILEPAGES);
-                                inc_mm_counter_fast(mm, MM_ANONPAGES);
-                        }
-                } else
-                        inc_mm_counter_fast(mm, MM_ANONPAGES);
-                flush_cache_page(vma, address, pte_pfn(orig_pte));
-                entry = mk_pte(new_page, vma->vm_page_prot);
-                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-                /*
-                 * Clear the pte entry and flush it first, before updating the
-                 * pte with the new entry. This will avoid a race condition
-                 * seen in the presence of one thread doing SMC and another
-                 * thread doing COW.
-                 */
-                ptep_clear_flush_notify(vma, address, page_table);
-                page_add_new_anon_rmap(new_page, vma, address);
-                mem_cgroup_commit_charge(new_page, memcg, false);
-                lru_cache_add_active_or_unevictable(new_page, vma);
-                /*
-                 * We call the notify macro here because, when using secondary
-                 * mmu page tables (such as kvm shadow page tables), we want the
-                 * new page to be mapped directly into the secondary page table.
-                 */
-                set_pte_at_notify(mm, address, page_table, entry);
-                update_mmu_cache(vma, address, page_table);
-                if (old_page) {
-                        /*
-                         * Only after switching the pte to the new page may
-                         * we remove the mapcount here. Otherwise another
-                         * process may come and find the rmap count decremented
-                         * before the pte is switched to the new page, and
-                         * "reuse" the old page writing into it while our pte
-                         * here still points into it and can be read by other
-                         * threads.
-                         *
-                         * The critical issue is to order this
-                         * page_remove_rmap with the ptp_clear_flush above.
-                         * Those stores are ordered by (if nothing else,)
-                         * the barrier present in the atomic_add_negative
-                         * in page_remove_rmap.
-                         *
-                         * Then the TLB flush in ptep_clear_flush ensures that
-                         * no process can access the old page before the
-                         * decremented mapcount is visible. And the old page
-                         * cannot be reused until after the decremented
-                         * mapcount is visible. So transitively, TLBs to
-                         * old page will be flushed before it can be reused.
-                         */
-                        page_remove_rmap(old_page);
-                }
-                /* Free the old page.. */
-                new_page = old_page;
-                page_copied = 1;
-        } else
-                mem_cgroup_cancel_charge(new_page, memcg);
-        if (new_page)
-                page_cache_release(new_page);
        pte_unmap_unlock(page_table, ptl);
-        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+        return wp_page_copy(mm, vma, address, page_table, pmd,
-        if (old_page) {
+                            orig_pte, old_page);
-                /*
-                 * Don't let another task, with possibly unlocked vma,
-                 * keep the mlocked page.
-                 */
-                if (page_copied && (vma->vm_flags & VM_LOCKED)) {
-                        lock_page(old_page);    /* LRU manipulation */
-                        munlock_vma_page(old_page);
-                        unlock_page(old_page);
-                }
-                page_cache_release(old_page);
-        }
-        return page_copied ? VM_FAULT_WRITE : 0;
-oom_free_new:
-        page_cache_release(new_page);
-oom:
-        if (old_page)
-                page_cache_release(old_page);
-        return VM_FAULT_OOM;
 }
 static void unmap_mapping_range_vma(struct vm_area_struct *vma,
author	Shachar Raindel <raindel@mellanox.com>	2015-04-14 18:46:32 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-04-14 19:49:03 -0400
commit	2f38ab2c3c7fef04dca0313fd89d91f142ca9281 (patch)
tree	0f749c9d45d2177903ac5bcc29457e35961f64b2 /mm/memory.c
parent	28766805275c12c2298883cece3f98505ac764b4 (diff)

diff --git a/mm/memory.c b/mm/memory.c index 0e28fddafdaf..cfd3c78f00fe 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -2042,6 +2042,146 @@ static inline int wp_page_reuse(struct mm_struct *mm,
2042	}	2042	}
2043		2043
2044	/*	2044	/*
		2045	* Handle the case of a page which we actually need to copy to a new page.
		2046	*
		2047	* Called with mmap_sem locked and the old page referenced, but
		2048	* without the ptl held.
		2049	*
		2050	* High level logic flow:
		2051	*
		2052	* - Allocate a page, copy the content of the old page to the new one.
		2053	* - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
		2054	* - Take the PTL. If the pte changed, bail out and release the allocated page
		2055	* - If the pte is still the way we remember it, update the page table and all
		2056	* relevant references. This includes dropping the reference the page-table
		2057	* held to the old page, as well as updating the rmap.
		2058	* - In any case, unlock the PTL and drop the reference we took to the old page.
		2059	*/
		2060	static int wp_page_copy(struct mm_struct mm, struct vm_area_struct vma,
		2061	unsigned long address, pte_t page_table, pmd_t pmd,
		2062	pte_t orig_pte, struct page *old_page)
		2063	{
		2064	struct page *new_page = NULL;
		2065	spinlock_t *ptl = NULL;
		2066	pte_t entry;
		2067	int page_copied = 0;
		2068	const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */
		2069	const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */
		2070	struct mem_cgroup *memcg;
		2071
		2072	if (unlikely(anon_vma_prepare(vma)))
		2073	goto oom;
		2074
		2075	if (is_zero_pfn(pte_pfn(orig_pte))) {
		2076	new_page = alloc_zeroed_user_highpage_movable(vma, address);
		2077	if (!new_page)
		2078	goto oom;
		2079	} else {
		2080	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
		2081	if (!new_page)
		2082	goto oom;
		2083	cow_user_page(new_page, old_page, address, vma);
		2084	}
		2085	__SetPageUptodate(new_page);
		2086
		2087	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
		2088	goto oom_free_new;
		2089
		2090	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
		2091
		2092	/*
		2093	* Re-check the pte - we dropped the lock
		2094	*/
		2095	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
		2096	if (likely(pte_same(*page_table, orig_pte))) {
		2097	if (old_page) {
		2098	if (!PageAnon(old_page)) {
		2099	dec_mm_counter_fast(mm, MM_FILEPAGES);
		2100	inc_mm_counter_fast(mm, MM_ANONPAGES);
		2101	}
		2102	} else {
		2103	inc_mm_counter_fast(mm, MM_ANONPAGES);
		2104	}
		2105	flush_cache_page(vma, address, pte_pfn(orig_pte));
		2106	entry = mk_pte(new_page, vma->vm_page_prot);
		2107	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		2108	/*
		2109	* Clear the pte entry and flush it first, before updating the
		2110	* pte with the new entry. This will avoid a race condition
		2111	* seen in the presence of one thread doing SMC and another
		2112	* thread doing COW.
		2113	*/
		2114	ptep_clear_flush_notify(vma, address, page_table);
		2115	page_add_new_anon_rmap(new_page, vma, address);
		2116	mem_cgroup_commit_charge(new_page, memcg, false);
		2117	lru_cache_add_active_or_unevictable(new_page, vma);
		2118	/*
		2119	* We call the notify macro here because, when using secondary
		2120	* mmu page tables (such as kvm shadow page tables), we want the
		2121	* new page to be mapped directly into the secondary page table.
		2122	*/
		2123	set_pte_at_notify(mm, address, page_table, entry);
		2124	update_mmu_cache(vma, address, page_table);
		2125	if (old_page) {
		2126	/*
		2127	* Only after switching the pte to the new page may
		2128	* we remove the mapcount here. Otherwise another
		2129	* process may come and find the rmap count decremented
		2130	* before the pte is switched to the new page, and
		2131	* "reuse" the old page writing into it while our pte
		2132	* here still points into it and can be read by other
		2133	* threads.
		2134	*
		2135	* The critical issue is to order this
		2136	* page_remove_rmap with the ptp_clear_flush above.
		2137	* Those stores are ordered by (if nothing else,)
		2138	* the barrier present in the atomic_add_negative
		2139	* in page_remove_rmap.
		2140	*
		2141	* Then the TLB flush in ptep_clear_flush ensures that
		2142	* no process can access the old page before the
		2143	* decremented mapcount is visible. And the old page
		2144	* cannot be reused until after the decremented
		2145	* mapcount is visible. So transitively, TLBs to
		2146	* old page will be flushed before it can be reused.
		2147	*/
		2148	page_remove_rmap(old_page);
		2149	}
		2150
		2151	/* Free the old page.. */
		2152	new_page = old_page;
		2153	page_copied = 1;
		2154	} else {
		2155	mem_cgroup_cancel_charge(new_page, memcg);
		2156	}
		2157
		2158	if (new_page)
		2159	page_cache_release(new_page);
		2160
		2161	pte_unmap_unlock(page_table, ptl);
		2162	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
		2163	if (old_page) {
		2164	/*
		2165	* Don't let another task, with possibly unlocked vma,
		2166	* keep the mlocked page.
		2167	*/
		2168	if (page_copied && (vma->vm_flags & VM_LOCKED)) {
		2169	lock_page(old_page); /* LRU manipulation */
		2170	munlock_vma_page(old_page);
		2171	unlock_page(old_page);
		2172	}
		2173	page_cache_release(old_page);
		2174	}
		2175	return page_copied ? VM_FAULT_WRITE : 0;
		2176	oom_free_new:
		2177	page_cache_release(new_page);
		2178	oom:
		2179	if (old_page)
		2180	page_cache_release(old_page);
		2181	return VM_FAULT_OOM;
		2182	}
		2183
		2184	/*
2045	* This routine handles present pages, when users try to write	2185	* This routine handles present pages, when users try to write
2046	* to a shared page. It is done by copying the page to a new address	2186	* to a shared page. It is done by copying the page to a new address
2047	* and decrementing the shared-page counter for the old page.	2187	* and decrementing the shared-page counter for the old page.
@@ -2064,12 +2204,7 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
2064	spinlock_t *ptl, pte_t orig_pte)	2204	spinlock_t *ptl, pte_t orig_pte)
2065	__releases(ptl)	2205	__releases(ptl)
2066	{	2206	{
2067	struct page old_page, new_page = NULL;	2207	struct page *old_page;
2068	pte_t entry;
2069	int page_copied = 0;
2070	unsigned long mmun_start = 0; /* For mmu_notifiers */
2071	unsigned long mmun_end = 0; /* For mmu_notifiers */
2072	struct mem_cgroup *memcg;
2073		2208
2074	old_page = vm_normal_page(vma, address, orig_pte);	2209	old_page = vm_normal_page(vma, address, orig_pte);
2075	if (!old_page) {	2210	if (!old_page) {
@@ -2085,7 +2220,10 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
2085	(VM_WRITE\|VM_SHARED))	2220	(VM_WRITE\|VM_SHARED))
2086	return wp_page_reuse(mm, vma, address, page_table, ptl,	2221	return wp_page_reuse(mm, vma, address, page_table, ptl,
2087	orig_pte, old_page, 0, 0);	2222	orig_pte, old_page, 0, 0);
2088	goto gotten;	2223
		2224	pte_unmap_unlock(page_table, ptl);
		2225	return wp_page_copy(mm, vma, address, page_table, pmd,
		2226	orig_pte, old_page);
2089	}	2227	}
2090		2228
2091	/*	2229	/*
@@ -2165,119 +2303,10 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
2165	* Ok, we need to copy. Oh, well..	2303	* Ok, we need to copy. Oh, well..
2166	*/	2304	*/
2167	page_cache_get(old_page);	2305	page_cache_get(old_page);
2168	gotten:
2169	pte_unmap_unlock(page_table, ptl);
2170
2171	if (unlikely(anon_vma_prepare(vma)))
2172	goto oom;
2173
2174	if (is_zero_pfn(pte_pfn(orig_pte))) {
2175	new_page = alloc_zeroed_user_highpage_movable(vma, address);
2176	if (!new_page)
2177	goto oom;
2178	} else {
2179	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2180	if (!new_page)
2181	goto oom;
2182	cow_user_page(new_page, old_page, address, vma);
2183	}
2184	__SetPageUptodate(new_page);
2185
2186	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
2187	goto oom_free_new;
2188
2189	mmun_start = address & PAGE_MASK;
2190	mmun_end = mmun_start + PAGE_SIZE;
2191	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2192
2193	/*
2194	* Re-check the pte - we dropped the lock
2195	*/
2196	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2197	if (likely(pte_same(*page_table, orig_pte))) {
2198	if (old_page) {
2199	if (!PageAnon(old_page)) {
2200	dec_mm_counter_fast(mm, MM_FILEPAGES);
2201	inc_mm_counter_fast(mm, MM_ANONPAGES);
2202	}
2203	} else
2204	inc_mm_counter_fast(mm, MM_ANONPAGES);
2205	flush_cache_page(vma, address, pte_pfn(orig_pte));
2206	entry = mk_pte(new_page, vma->vm_page_prot);
2207	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2208	/*
2209	* Clear the pte entry and flush it first, before updating the
2210	* pte with the new entry. This will avoid a race condition
2211	* seen in the presence of one thread doing SMC and another
2212	* thread doing COW.
2213	*/
2214	ptep_clear_flush_notify(vma, address, page_table);
2215	page_add_new_anon_rmap(new_page, vma, address);
2216	mem_cgroup_commit_charge(new_page, memcg, false);
2217	lru_cache_add_active_or_unevictable(new_page, vma);
2218	/*
2219	* We call the notify macro here because, when using secondary
2220	* mmu page tables (such as kvm shadow page tables), we want the
2221	* new page to be mapped directly into the secondary page table.
2222	*/
2223	set_pte_at_notify(mm, address, page_table, entry);
2224	update_mmu_cache(vma, address, page_table);
2225	if (old_page) {
2226	/*
2227	* Only after switching the pte to the new page may
2228	* we remove the mapcount here. Otherwise another
2229	* process may come and find the rmap count decremented
2230	* before the pte is switched to the new page, and
2231	* "reuse" the old page writing into it while our pte
2232	* here still points into it and can be read by other
2233	* threads.
2234	*
2235	* The critical issue is to order this
2236	* page_remove_rmap with the ptp_clear_flush above.
2237	* Those stores are ordered by (if nothing else,)
2238	* the barrier present in the atomic_add_negative
2239	* in page_remove_rmap.
2240	*
2241	* Then the TLB flush in ptep_clear_flush ensures that
2242	* no process can access the old page before the
2243	* decremented mapcount is visible. And the old page
2244	* cannot be reused until after the decremented
2245	* mapcount is visible. So transitively, TLBs to
2246	* old page will be flushed before it can be reused.
2247	*/
2248	page_remove_rmap(old_page);
2249	}
2250
2251	/* Free the old page.. */
2252	new_page = old_page;
2253	page_copied = 1;
2254	} else
2255	mem_cgroup_cancel_charge(new_page, memcg);
2256
2257	if (new_page)
2258	page_cache_release(new_page);
2259		2306
2260	pte_unmap_unlock(page_table, ptl);	2307	pte_unmap_unlock(page_table, ptl);
2261	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);	2308	return wp_page_copy(mm, vma, address, page_table, pmd,
2262	if (old_page) {	2309	orig_pte, old_page);
2263	/*
2264	* Don't let another task, with possibly unlocked vma,
2265	* keep the mlocked page.
2266	*/
2267	if (page_copied && (vma->vm_flags & VM_LOCKED)) {
2268	lock_page(old_page); /* LRU manipulation */
2269	munlock_vma_page(old_page);
2270	unlock_page(old_page);
2271	}
2272	page_cache_release(old_page);
2273	}
2274	return page_copied ? VM_FAULT_WRITE : 0;
2275	oom_free_new:
2276	page_cache_release(new_page);
2277	oom:
2278	if (old_page)
2279	page_cache_release(old_page);
2280	return VM_FAULT_OOM;
2281	}	2310	}
2282		2311
2283	static void unmap_mapping_range_vma(struct vm_area_struct *vma,	2312	static void unmap_mapping_range_vma(struct vm_area_struct *vma,