mm: fix fault vs invalidate race for linear mappings

Fix the race between invalidate_inode_pages and do_no_page. Andrea Arcangeli identified a subtle race between invalidation of pages from pagecache with userspace mappings, and do_no_page. The issue is that invalidation has to shoot down all mappings to the page, before it can be discarded from the pagecache. Between shooting down ptes to a particular page, and actually dropping the struct page from the pagecache, do_no_page from any process might fault on that page and establish a new mapping to the page just before it gets discarded from the pagecache. The most common case where such invalidation is used is in file truncation. This case was catered for by doing a sort of open-coded seqlock between the file's i_size, and its truncate_count. Truncation will decrease i_size, then increment truncate_count before unmapping userspace pages; do_no_page will read truncate_count, then find the page if it is within i_size, and then check truncate_count under the page table lock and back out and retry if it had subsequently been changed (ptl will serialise against unmapping, and ensure a potentially updated truncate_count is actually visible). Complexity and documentation issues aside, the locking protocol fails in the case where we would like to invalidate pagecache inside i_size. do_no_page can come in anytime and filemap_nopage is not aware of the invalidation in progress (as it is when it is outside i_size). The end result is that dangling (->mapping == NULL) pages that appear to be from a particular file may be mapped into userspace with nonsense data. Valid mappings to the same place will see a different page. Andrea implemented two working fixes, one using a real seqlock, another using a page->flags bit. He also proposed using the page lock in do_no_page, but that was initially considered too heavyweight. However, it is not a global or per-file lock, and the page cacheline is modified in do_no_page to increment _count and _mapcount anyway, so a further modification should not be a large performance hit. Scalability is not an issue. This patch implements this latter approach. ->nopage implementations return with the page locked if it is possible for their underlying file to be invalidated (in that case, they must set a special vm_flags bit to indicate so). do_no_page only unlocks the page after setting up the mapping completely. invalidation is excluded because it holds the page lock during invalidation of each page (and ensures that the page is not mapped while holding the lock). This also allows significant simplifications in do_no_page, because we have the page locked in the right place in the pagecache from the start. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <npiggin@suse.de> 2007-07-19 04:46:57 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-07-19 13:04:41 -0400
commit: d00806b183152af6d24f46f0c33f14162ca1262a (patch)
tree: 36f829cf13d5410374a3f00b56ec0b1f8dc3ce3c /mm/memory.c
parent: 589f1e81bde732dd0b1bc5d01b6bddd4bcb4527b (diff)
1 files changed, 73 insertions, 80 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 9c6ff7fffdc8..e6c99f6b5649 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1831,6 +1831,13 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
        unsigned long restart_addr;
        int need_break;
+        /*
+         * files that support invalidating or truncating portions of the
+         * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
+         * have their .nopage function return the page locked.
+         */
+        BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
 again:
        restart_addr = vma->vm_truncate_count;
        if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
@@ -1959,17 +1966,8 @@ void unmap_mapping_range(struct address_space *mapping,
        spin_lock(&mapping->i_mmap_lock);
-        /* serialize i_size write against truncate_count write */
+        /* Protect against endless unmapping loops */
-        smp_wmb();
-        /* Protect against page faults, and endless unmapping loops */
        mapping->truncate_count++;
-        /*
-         * For archs where spin_lock has inclusive semantics like ia64
-         * this smp_mb() will prevent to read pagetable contents
-         * before the truncate_count increment is visible to
-         * other cpus.
-         */
-        smp_mb();
        if (unlikely(is_restart_addr(mapping->truncate_count))) {
                if (mapping->truncate_count == 0)
                        reset_vma_truncate_counts(mapping);
@@ -2008,8 +2006,18 @@ int vmtruncate(struct inode * inode, loff_t offset)
        if (IS_SWAPFILE(inode))
                goto out_busy;
        i_size_write(inode, offset);
+        /*
+         * unmap_mapping_range is called twice, first simply for efficiency
+         * so that truncate_inode_pages does fewer single-page unmaps. However
+         * after this first call, and before truncate_inode_pages finishes,
+         * it is possible for private pages to be COWed, which remain after
+         * truncate_inode_pages finishes, hence the second unmap_mapping_range
+         * call must be made for correctness.
+         */
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        truncate_inode_pages(mapping, offset);
+        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        goto out_truncate;
 do_expand:
@@ -2049,6 +2057,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
        down_write(&inode->i_alloc_sem);
        unmap_mapping_range(mapping, offset, (end - offset), 1);
        truncate_inode_pages_range(mapping, offset, end);
+        unmap_mapping_range(mapping, offset, (end - offset), 1);
        inode->i_op->truncate_range(inode, offset, end);
        up_write(&inode->i_alloc_sem);
        mutex_unlock(&inode->i_mutex);
@@ -2206,7 +2215,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
-        lazy_mmu_prot_update(pte);
 unlock:
        pte_unmap_unlock(page_table, ptl);
 out:
@@ -2297,10 +2305,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
                int write_access)
 {
        spinlock_t *ptl;
-        struct page *new_page;
+        struct page *page, *nopage_page;
-        struct address_space *mapping = NULL;
        pte_t entry;
-        unsigned int sequence = 0;
        int ret = VM_FAULT_MINOR;
        int anon = 0;
        struct page *dirty_page = NULL;
@@ -2308,74 +2314,53 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
        pte_unmap(page_table);
        BUG_ON(vma->vm_flags & VM_PFNMAP);
-        if (vma->vm_file) {
+        nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
-                mapping = vma->vm_file->f_mapping;
-                sequence = mapping->truncate_count;
-                smp_rmb(); /* serializes i_size against truncate_count */
-        }
-retry:
-        new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
-        /*
-         * No smp_rmb is needed here as long as there's a full
-         * spin_lock/unlock sequence inside the ->nopage callback
-         * (for the pagecache lookup) that acts as an implicit
-         * smp_mb() and prevents the i_size read to happen
-         * after the next truncate_count read.
-         */
        /* no page was available -- either SIGBUS, OOM or REFAULT */
-        if (unlikely(new_page == NOPAGE_SIGBUS))
+        if (unlikely(nopage_page == NOPAGE_SIGBUS))
                return VM_FAULT_SIGBUS;
-        else if (unlikely(new_page == NOPAGE_OOM))
+        else if (unlikely(nopage_page == NOPAGE_OOM))
                return VM_FAULT_OOM;
-        else if (unlikely(new_page == NOPAGE_REFAULT))
+        else if (unlikely(nopage_page == NOPAGE_REFAULT))
                return VM_FAULT_MINOR;
+        BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
+        /*
+         * For consistency in subsequent calls, make the nopage_page always
+         * locked.
+         */
+        if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
+                lock_page(nopage_page);
        /*
         * Should we do an early C-O-W break?
         */
+        page = nopage_page;
        if (write_access) {
                if (!(vma->vm_flags & VM_SHARED)) {
-                        struct page *page;
+                        if (unlikely(anon_vma_prepare(vma))) {
+                                ret = VM_FAULT_OOM;
-                        if (unlikely(anon_vma_prepare(vma)))
+                                goto out_error;
-                                goto oom;
+                        }
-                        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
+                        page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-                                                vma, address);
+                        if (!page) {
-                        if (!page)
+                                ret = VM_FAULT_OOM;
-                                goto oom;
+                                goto out_error;
-                        copy_user_highpage(page, new_page, address, vma);
+                        }
-                        page_cache_release(new_page);
+                        copy_user_highpage(page, nopage_page, address, vma);
-                        new_page = page;
                        anon = 1;
                } else {
                        /* if the page will be shareable, see if the backing
                         * address space wants to know that the page is about
                         * to become writable */
                        if (vma->vm_ops->page_mkwrite &&
-                            vma->vm_ops->page_mkwrite(vma, new_page) < 0
+                            vma->vm_ops->page_mkwrite(vma, page) < 0) {
-                            ) {
+                                ret = VM_FAULT_SIGBUS;
-                                page_cache_release(new_page);
+                                goto out_error;
-                                return VM_FAULT_SIGBUS;
                        }
                }
        }
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-        /*
-         * For a file-backed vma, someone could have truncated or otherwise
-         * invalidated this page.  If unmap_mapping_range got called,
-         * retry getting the page.
-         */
-        if (mapping && unlikely(sequence != mapping->truncate_count)) {
-                pte_unmap_unlock(page_table, ptl);
-                page_cache_release(new_page);
-                cond_resched();
-                sequence = mapping->truncate_count;
-                smp_rmb();
-                goto retry;
-        }
        /*
         * This silly early PAGE_DIRTY setting removes a race
@@ -2388,43 +2373,51 @@ retry:
         * handle that later.
         */
        /* Only go through if we didn't race with anybody else... */
-        if (pte_none(*page_table)) {
+        if (likely(pte_none(*page_table))) {
-                flush_icache_page(vma, new_page);
+                flush_icache_page(vma, page);
-                entry = mk_pte(new_page, vma->vm_page_prot);
+                entry = mk_pte(page, vma->vm_page_prot);
                if (write_access)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                set_pte_at(mm, address, page_table, entry);
                if (anon) {
-                        inc_mm_counter(mm, anon_rss);
+                        inc_mm_counter(mm, anon_rss);
-                        lru_cache_add_active(new_page);
+                        lru_cache_add_active(page);
-                        page_add_new_anon_rmap(new_page, vma, address);
+                        page_add_new_anon_rmap(page, vma, address);
                } else {
                        inc_mm_counter(mm, file_rss);
-                        page_add_file_rmap(new_page);
+                        page_add_file_rmap(page);
                        if (write_access) {
-                                dirty_page = new_page;
+                                dirty_page = page;
                                get_page(dirty_page);
                        }
                }
+                /* no need to invalidate: a not-present page won't be cached */
+                update_mmu_cache(vma, address, entry);
+                lazy_mmu_prot_update(entry);
        } else {
-                /* One of our sibling threads was faster, back out. */
+                if (anon)
-                page_cache_release(new_page);
+                        page_cache_release(page);
-                goto unlock;
+                else
+                        anon = 1; /* not anon, but release nopage_page */
        }
-        /* no need to invalidate: a not-present page shouldn't be cached */
-        update_mmu_cache(vma, address, entry);
-        lazy_mmu_prot_update(entry);
-unlock:
        pte_unmap_unlock(page_table, ptl);
-        if (dirty_page) {
+out:
+        unlock_page(nopage_page);
+        if (anon)
+                page_cache_release(nopage_page);
+        else if (dirty_page) {
                set_page_dirty_balance(dirty_page);
                put_page(dirty_page);
        }
        return ret;
-oom:
-        page_cache_release(new_page);
+out_error:
-        return VM_FAULT_OOM;
+        anon = 1; /* relase nopage_page */
+        goto out;
 }
 /*
author	Nick Piggin <npiggin@suse.de>	2007-07-19 04:46:57 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-07-19 13:04:41 -0400
commit	d00806b183152af6d24f46f0c33f14162ca1262a (patch)
tree	36f829cf13d5410374a3f00b56ec0b1f8dc3ce3c /mm/memory.c
parent	589f1e81bde732dd0b1bc5d01b6bddd4bcb4527b (diff)

diff --git a/mm/memory.c b/mm/memory.c index 9c6ff7fffdc8..e6c99f6b5649 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -1831,6 +1831,13 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma,
1831	unsigned long restart_addr;	1831	unsigned long restart_addr;
1832	int need_break;	1832	int need_break;
1833		1833
		1834	/*
		1835	* files that support invalidating or truncating portions of the
		1836	* file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
		1837	* have their .nopage function return the page locked.
		1838	*/
		1839	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
		1840
1834	again:	1841	again:
1835	restart_addr = vma->vm_truncate_count;	1842	restart_addr = vma->vm_truncate_count;
1836	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {	1843	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
@@ -1959,17 +1966,8 @@ void unmap_mapping_range(struct address_space *mapping,
1959		1966
1960	spin_lock(&mapping->i_mmap_lock);	1967	spin_lock(&mapping->i_mmap_lock);
1961		1968
1962	/* serialize i_size write against truncate_count write */	1969	/* Protect against endless unmapping loops */
1963	smp_wmb();
1964	/* Protect against page faults, and endless unmapping loops */
1965	mapping->truncate_count++;	1970	mapping->truncate_count++;
1966	/*
1967	* For archs where spin_lock has inclusive semantics like ia64
1968	* this smp_mb() will prevent to read pagetable contents
1969	* before the truncate_count increment is visible to
1970	* other cpus.
1971	*/
1972	smp_mb();
1973	if (unlikely(is_restart_addr(mapping->truncate_count))) {	1971	if (unlikely(is_restart_addr(mapping->truncate_count))) {
1974	if (mapping->truncate_count == 0)	1972	if (mapping->truncate_count == 0)
1975	reset_vma_truncate_counts(mapping);	1973	reset_vma_truncate_counts(mapping);
@@ -2008,8 +2006,18 @@ int vmtruncate(struct inode * inode, loff_t offset)
2008	if (IS_SWAPFILE(inode))	2006	if (IS_SWAPFILE(inode))
2009	goto out_busy;	2007	goto out_busy;
2010	i_size_write(inode, offset);	2008	i_size_write(inode, offset);
		2009
		2010	/*
		2011	* unmap_mapping_range is called twice, first simply for efficiency
		2012	* so that truncate_inode_pages does fewer single-page unmaps. However
		2013	* after this first call, and before truncate_inode_pages finishes,
		2014	* it is possible for private pages to be COWed, which remain after
		2015	* truncate_inode_pages finishes, hence the second unmap_mapping_range
		2016	* call must be made for correctness.
		2017	*/
2011	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);	2018	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2012	truncate_inode_pages(mapping, offset);	2019	truncate_inode_pages(mapping, offset);
		2020	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2013	goto out_truncate;	2021	goto out_truncate;
2014		2022
2015	do_expand:	2023	do_expand:
@@ -2049,6 +2057,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2049	down_write(&inode->i_alloc_sem);	2057	down_write(&inode->i_alloc_sem);
2050	unmap_mapping_range(mapping, offset, (end - offset), 1);	2058	unmap_mapping_range(mapping, offset, (end - offset), 1);
2051	truncate_inode_pages_range(mapping, offset, end);	2059	truncate_inode_pages_range(mapping, offset, end);
		2060	unmap_mapping_range(mapping, offset, (end - offset), 1);
2052	inode->i_op->truncate_range(inode, offset, end);	2061	inode->i_op->truncate_range(inode, offset, end);
2053	up_write(&inode->i_alloc_sem);	2062	up_write(&inode->i_alloc_sem);
2054	mutex_unlock(&inode->i_mutex);	2063	mutex_unlock(&inode->i_mutex);
@@ -2206,7 +2215,6 @@ static int do_swap_page(struct mm_struct mm, struct vm_area_struct vma,
2206		2215
2207	/* No need to invalidate - it was non-present before */	2216	/* No need to invalidate - it was non-present before */
2208	update_mmu_cache(vma, address, pte);	2217	update_mmu_cache(vma, address, pte);
2209	lazy_mmu_prot_update(pte);
2210	unlock:	2218	unlock:
2211	pte_unmap_unlock(page_table, ptl);	2219	pte_unmap_unlock(page_table, ptl);
2212	out:	2220	out:
@@ -2297,10 +2305,8 @@ static int do_no_page(struct mm_struct mm, struct vm_area_struct vma,
2297	int write_access)	2305	int write_access)
2298	{	2306	{
2299	spinlock_t *ptl;	2307	spinlock_t *ptl;
2300	struct page *new_page;	2308	struct page page, nopage_page;
2301	struct address_space *mapping = NULL;
2302	pte_t entry;	2309	pte_t entry;
2303	unsigned int sequence = 0;
2304	int ret = VM_FAULT_MINOR;	2310	int ret = VM_FAULT_MINOR;
2305	int anon = 0;	2311	int anon = 0;
2306	struct page *dirty_page = NULL;	2312	struct page *dirty_page = NULL;
@@ -2308,74 +2314,53 @@ static int do_no_page(struct mm_struct mm, struct vm_area_struct vma,
2308	pte_unmap(page_table);	2314	pte_unmap(page_table);
2309	BUG_ON(vma->vm_flags & VM_PFNMAP);	2315	BUG_ON(vma->vm_flags & VM_PFNMAP);
2310		2316
2311	if (vma->vm_file) {	2317	nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
2312	mapping = vma->vm_file->f_mapping;
2313	sequence = mapping->truncate_count;
2314	smp_rmb(); /* serializes i_size against truncate_count */
2315	}
2316	retry:
2317	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
2318	/*
2319	* No smp_rmb is needed here as long as there's a full
2320	* spin_lock/unlock sequence inside the ->nopage callback
2321	* (for the pagecache lookup) that acts as an implicit
2322	* smp_mb() and prevents the i_size read to happen
2323	* after the next truncate_count read.
2324	*/
2325
2326	/* no page was available -- either SIGBUS, OOM or REFAULT */	2318	/* no page was available -- either SIGBUS, OOM or REFAULT */
2327	if (unlikely(new_page == NOPAGE_SIGBUS))	2319	if (unlikely(nopage_page == NOPAGE_SIGBUS))
2328	return VM_FAULT_SIGBUS;	2320	return VM_FAULT_SIGBUS;
2329	else if (unlikely(new_page == NOPAGE_OOM))	2321	else if (unlikely(nopage_page == NOPAGE_OOM))
2330	return VM_FAULT_OOM;	2322	return VM_FAULT_OOM;
2331	else if (unlikely(new_page == NOPAGE_REFAULT))	2323	else if (unlikely(nopage_page == NOPAGE_REFAULT))
2332	return VM_FAULT_MINOR;	2324	return VM_FAULT_MINOR;
2333		2325
		2326	BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
		2327	/*
		2328	* For consistency in subsequent calls, make the nopage_page always
		2329	* locked.
		2330	*/
		2331	if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
		2332	lock_page(nopage_page);
		2333
2334	/*	2334	/*
2335	* Should we do an early C-O-W break?	2335	* Should we do an early C-O-W break?
2336	*/	2336	*/
		2337	page = nopage_page;
2337	if (write_access) {	2338	if (write_access) {
2338	if (!(vma->vm_flags & VM_SHARED)) {	2339	if (!(vma->vm_flags & VM_SHARED)) {
2339	struct page *page;	2340	if (unlikely(anon_vma_prepare(vma))) {
2340		2341	ret = VM_FAULT_OOM;
2341	if (unlikely(anon_vma_prepare(vma)))	2342	goto out_error;
2342	goto oom;	2343	}
2343	page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,	2344	page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2344	vma, address);	2345	if (!page) {
2345	if (!page)	2346	ret = VM_FAULT_OOM;
2346	goto oom;	2347	goto out_error;
2347	copy_user_highpage(page, new_page, address, vma);	2348	}
2348	page_cache_release(new_page);	2349	copy_user_highpage(page, nopage_page, address, vma);
2349	new_page = page;
2350	anon = 1;	2350	anon = 1;
2351
2352	} else {	2351	} else {
2353	/* if the page will be shareable, see if the backing	2352	/* if the page will be shareable, see if the backing
2354	* address space wants to know that the page is about	2353	* address space wants to know that the page is about
2355	* to become writable */	2354	* to become writable */
2356	if (vma->vm_ops->page_mkwrite &&	2355	if (vma->vm_ops->page_mkwrite &&
2357	vma->vm_ops->page_mkwrite(vma, new_page) < 0	2356	vma->vm_ops->page_mkwrite(vma, page) < 0) {
2358	) {	2357	ret = VM_FAULT_SIGBUS;
2359	page_cache_release(new_page);	2358	goto out_error;
2360	return VM_FAULT_SIGBUS;
2361	}	2359	}
2362	}	2360	}
2363	}	2361	}
2364		2362
2365	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);	2363	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2366	/*
2367	* For a file-backed vma, someone could have truncated or otherwise
2368	* invalidated this page. If unmap_mapping_range got called,
2369	* retry getting the page.
2370	*/
2371	if (mapping && unlikely(sequence != mapping->truncate_count)) {
2372	pte_unmap_unlock(page_table, ptl);
2373	page_cache_release(new_page);
2374	cond_resched();
2375	sequence = mapping->truncate_count;
2376	smp_rmb();
2377	goto retry;
2378	}
2379		2364
2380	/*	2365	/*
2381	* This silly early PAGE_DIRTY setting removes a race	2366	* This silly early PAGE_DIRTY setting removes a race
@@ -2388,43 +2373,51 @@ retry:
2388	* handle that later.	2373	* handle that later.
2389	*/	2374	*/
2390	/* Only go through if we didn't race with anybody else... */	2375	/* Only go through if we didn't race with anybody else... */
2391	if (pte_none(*page_table)) {	2376	if (likely(pte_none(*page_table))) {
2392	flush_icache_page(vma, new_page);	2377	flush_icache_page(vma, page);
2393	entry = mk_pte(new_page, vma->vm_page_prot);	2378	entry = mk_pte(page, vma->vm_page_prot);
2394	if (write_access)	2379	if (write_access)
2395	entry = maybe_mkwrite(pte_mkdirty(entry), vma);	2380	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2396	set_pte_at(mm, address, page_table, entry);	2381	set_pte_at(mm, address, page_table, entry);
2397	if (anon) {	2382	if (anon) {
2398	inc_mm_counter(mm, anon_rss);	2383	inc_mm_counter(mm, anon_rss);
2399	lru_cache_add_active(new_page);	2384	lru_cache_add_active(page);
2400	page_add_new_anon_rmap(new_page, vma, address);	2385	page_add_new_anon_rmap(page, vma, address);
2401	} else {	2386	} else {
2402	inc_mm_counter(mm, file_rss);	2387	inc_mm_counter(mm, file_rss);
2403	page_add_file_rmap(new_page);	2388	page_add_file_rmap(page);
2404	if (write_access) {	2389	if (write_access) {
2405	dirty_page = new_page;	2390	dirty_page = page;
2406	get_page(dirty_page);	2391	get_page(dirty_page);
2407	}	2392	}
2408	}	2393	}
		2394
		2395	/* no need to invalidate: a not-present page won't be cached */
		2396	update_mmu_cache(vma, address, entry);
		2397	lazy_mmu_prot_update(entry);
2409	} else {	2398	} else {
2410	/* One of our sibling threads was faster, back out. */	2399	if (anon)
2411	page_cache_release(new_page);	2400	page_cache_release(page);
2412	goto unlock;	2401	else
		2402	anon = 1; /* not anon, but release nopage_page */
2413	}	2403	}
2414		2404
2415	/* no need to invalidate: a not-present page shouldn't be cached */
2416	update_mmu_cache(vma, address, entry);
2417	lazy_mmu_prot_update(entry);
2418	unlock:
2419	pte_unmap_unlock(page_table, ptl);	2405	pte_unmap_unlock(page_table, ptl);
2420	if (dirty_page) {	2406
		2407	out:
		2408	unlock_page(nopage_page);
		2409	if (anon)
		2410	page_cache_release(nopage_page);
		2411	else if (dirty_page) {
2421	set_page_dirty_balance(dirty_page);	2412	set_page_dirty_balance(dirty_page);
2422	put_page(dirty_page);	2413	put_page(dirty_page);
2423	}	2414	}
		2415
2424	return ret;	2416	return ret;
2425	oom:	2417
2426	page_cache_release(new_page);	2418	out_error:
2427	return VM_FAULT_OOM;	2419	anon = 1; /* relase nopage_page */
		2420	goto out;
2428	}	2421	}
2429		2422
2430	/*	2423	/*