1 files changed, 99 insertions, 157 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index c244e93a70fa..d4e184e2a38e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -264,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 }
 /*
+ * Sometimes, before we decide whether to proceed or to fail, we must check
+ * that an entry was not already brought back from swap by a racing thread.
+ *
+ * Checking page is not enough: by the time a SwapCache page is locked, it
+ * might be reused, and again be SwapCache, using the same swap as before.
+ */
+static bool shmem_confirm_swap(struct address_space *mapping,
+                               pgoff_t index, swp_entry_t swap)
+{
+        void *item;
+        rcu_read_lock();
+        item = radix_tree_lookup(&mapping->page_tree, index);
+        rcu_read_unlock();
+        return item == swp_to_radix_entry(swap);
+}
+/*
 * Like add_to_page_cache_locked, but error if expected item has gone.
 */
 static int shmem_add_to_page_cache(struct page *page,
                                   struct address_space *mapping,
                                   pgoff_t index, gfp_t gfp, void *expected)
 {
-        int error = 0;
+        int error;
        VM_BUG_ON(!PageLocked(page));
        VM_BUG_ON(!PageSwapBacked(page));
+        page_cache_get(page);
+        page->mapping = mapping;
+        page->index = index;
+        spin_lock_irq(&mapping->tree_lock);
        if (!expected)
-                error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+                error = radix_tree_insert(&mapping->page_tree, index, page);
+        else
+                error = shmem_radix_tree_replace(mapping, index, expected,
+                                                                 page);
        if (!error) {
-                page_cache_get(page);
+                mapping->nrpages++;
-                page->mapping = mapping;
+                __inc_zone_page_state(page, NR_FILE_PAGES);
-                page->index = index;
+                __inc_zone_page_state(page, NR_SHMEM);
+                spin_unlock_irq(&mapping->tree_lock);
-                spin_lock_irq(&mapping->tree_lock);
+        } else {
-                if (!expected)
+                page->mapping = NULL;
-                        error = radix_tree_insert(&mapping->page_tree,
+                spin_unlock_irq(&mapping->tree_lock);
-                                                        index, page);
+                page_cache_release(page);
-                else
-                        error = shmem_radix_tree_replace(mapping, index,
-                                                        expected, page);
-                if (!error) {
-                        mapping->nrpages++;
-                        __inc_zone_page_state(page, NR_FILE_PAGES);
-                        __inc_zone_page_state(page, NR_SHMEM);
-                        spin_unlock_irq(&mapping->tree_lock);
-                } else {
-                        page->mapping = NULL;
-                        spin_unlock_irq(&mapping->tree_lock);
-                        page_cache_release(page);
-                }
-                if (!expected)
-                        radix_tree_preload_end();
        }
-        if (error)
-                mem_cgroup_uncharge_cache_page(page);
        return error;
 }
@@ -683,10 +692,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
                mutex_lock(&shmem_swaplist_mutex);
                /*
                 * We needed to drop mutex to make that restrictive page
-                 * allocation; but the inode might already be freed by now,
+                 * allocation, but the inode might have been freed while we
-                 * and we cannot refer to inode or mapping or info to check.
+                 * dropped it: although a racing shmem_evict_inode() cannot
-                 * However, we do hold page lock on the PageSwapCache page,
+                 * complete without emptying the radix_tree, our page lock
-                 * so can check if that still has our reference remaining.
+                 * on this swapcache page is not enough to prevent that -
+                 * free_swap_and_cache() of our swap entry will only
+                 * trylock_page(), removing swap from radix_tree whatever.
+                 *
+                 * We must not proceed to shmem_add_to_page_cache() if the
+                 * inode has been freed, but of course we cannot rely on
+                 * inode or mapping or info to check that.  However, we can
+                 * safely check if our swap entry is still in use (and here
+                 * it can't have got reused for another page): if it's still
+                 * in use, then the inode cannot have been freed yet, and we
+                 * can safely proceed (if it's no longer in use, that tells
+                 * nothing about the inode, but we don't need to unuse swap).
                 */
                if (!page_swapcount(*pagep))
                        error = -ENOENT;
@@ -730,9 +750,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
        /*
         * There's a faint possibility that swap page was replaced before
-         * caller locked it: it will come back later with the right page.
+         * caller locked it: caller will come back later with the right page.
         */
-        if (unlikely(!PageSwapCache(page)))
+        if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
                goto out;
        /*
@@ -909,7 +929,8 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
        /* Create a pseudo vma that just contains the policy */
        pvma.vm_start = 0;
-        pvma.vm_pgoff = index;
+        /* Bias interleave by inode number to distribute better across nodes */
+        pvma.vm_pgoff = index + info->vfs_inode.i_ino;
        pvma.vm_ops = NULL;
        pvma.vm_policy = spol;
        return swapin_readahead(swap, gfp, &pvma, 0);
@@ -922,7 +943,8 @@ static struct page *shmem_alloc_page(gfp_t gfp,
        /* Create a pseudo vma that just contains the policy */
        pvma.vm_start = 0;
-        pvma.vm_pgoff = index;
+        /* Bias interleave by inode number to distribute better across nodes */
+        pvma.vm_pgoff = index + info->vfs_inode.i_ino;
        pvma.vm_ops = NULL;
        pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
@@ -995,21 +1017,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        newpage = shmem_alloc_page(gfp, info, index);
        if (!newpage)
                return -ENOMEM;
-        VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
-        *pagep = newpage;
        page_cache_get(newpage);
        copy_highpage(newpage, oldpage);
+        flush_dcache_page(newpage);
-        VM_BUG_ON(!PageLocked(oldpage));
        __set_page_locked(newpage);
-        VM_BUG_ON(!PageUptodate(oldpage));
        SetPageUptodate(newpage);
-        VM_BUG_ON(!PageSwapBacked(oldpage));
        SetPageSwapBacked(newpage);
-        VM_BUG_ON(!swap_index);
        set_page_private(newpage, swap_index);
-        VM_BUG_ON(!PageSwapCache(oldpage));
        SetPageSwapCache(newpage);
        /*
@@ -1019,13 +1035,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        spin_lock_irq(&swap_mapping->tree_lock);
        error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
                                                                   newpage);
-        __inc_zone_page_state(newpage, NR_FILE_PAGES);
+        if (!error) {
-        __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+                __inc_zone_page_state(newpage, NR_FILE_PAGES);
+                __dec_zone_page_state(oldpage, NR_FILE_PAGES);
+        }
        spin_unlock_irq(&swap_mapping->tree_lock);
-        BUG_ON(error);
-        mem_cgroup_replace_page_cache(oldpage, newpage);
+        if (unlikely(error)) {
-        lru_cache_add_anon(newpage);
+                /*
+                 * Is this possible?  I think not, now that our callers check
+                 * both PageSwapCache and page_private after getting page lock;
+                 * but be defensive.  Reverse old to newpage for clear and free.
+                 */
+                oldpage = newpage;
+        } else {
+                mem_cgroup_replace_page_cache(oldpage, newpage);
+                lru_cache_add_anon(newpage);
+                *pagep = newpage;
+        }
        ClearPageSwapCache(oldpage);
        set_page_private(oldpage, 0);
@@ -1033,7 +1060,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        unlock_page(oldpage);
        page_cache_release(oldpage);
        page_cache_release(oldpage);
-        return 0;
+        return error;
 }
 /*
@@ -1107,9 +1134,10 @@ repeat:
                /* We have to do this with page locked to prevent races */
                lock_page(page);
-                if (!PageSwapCache(page) || page->mapping) {
+                if (!PageSwapCache(page) || page_private(page) != swap.val ||
+                    !shmem_confirm_swap(mapping, index, swap)) {
                        error = -EEXIST;        /* try again */
-                        goto failed;
+                        goto unlock;
                }
                if (!PageUptodate(page)) {
                        error = -EIO;
@@ -1125,9 +1153,12 @@ repeat:
                error = mem_cgroup_cache_charge(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
-                if (!error)
+                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
                                                gfp, swp_to_radix_entry(swap));
+                        /* We already confirmed swap, and make no allocation */
+                        VM_BUG_ON(error);
+                }
                if (error)
                        goto failed;
@@ -1164,11 +1195,18 @@ repeat:
                __set_page_locked(page);
                error = mem_cgroup_cache_charge(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
-                if (!error)
-                        error = shmem_add_to_page_cache(page, mapping, index,
-                                                gfp, NULL);
                if (error)
                        goto decused;
+                error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+                if (!error) {
+                        error = shmem_add_to_page_cache(page, mapping, index,
+                                                        gfp, NULL);
+                        radix_tree_preload_end();
+                }
+                if (error) {
+                        mem_cgroup_uncharge_cache_page(page);
+                        goto decused;
+                }
                lru_cache_add_anon(page);
                spin_lock(&info->lock);
@@ -1228,14 +1266,10 @@ decused:
 unacct:
        shmem_unacct_blocks(info->flags, 1);
 failed:
-        if (swap.val && error != -EINVAL) {
+        if (swap.val && error != -EINVAL &&
-                struct page *test = find_get_page(mapping, index);
+            !shmem_confirm_swap(mapping, index, swap))
-                if (test && !radix_tree_exceptional_entry(test))
+                error = -EEXIST;
-                        page_cache_release(test);
+unlock:
-                /* Have another try if the entry has changed */
-                if (test != swp_to_radix_entry(swap))
-                        error = -EEXIST;
-        }
        if (page) {
                unlock_page(page);
                page_cache_release(page);
@@ -1247,7 +1281,7 @@ failed:
                spin_unlock(&info->lock);
                goto repeat;
        }
-        if (error == -EEXIST)
+        if (error == -EEXIST)   /* from above or from radix_tree_insert */
                goto repeat;
        return error;
 }
@@ -1675,98 +1709,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
        return error;
 }
-/*
- * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
- */
-static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-                                    pgoff_t index, pgoff_t end, int origin)
-{
-        struct page *page;
-        struct pagevec pvec;
-        pgoff_t indices[PAGEVEC_SIZE];
-        bool done = false;
-        int i;
-        pagevec_init(&pvec, 0);
-        pvec.nr = 1;            /* start small: we may be there already */
-        while (!done) {
-                pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
-                                        pvec.nr, pvec.pages, indices);
-                if (!pvec.nr) {
-                        if (origin == SEEK_DATA)
-                                index = end;
-                        break;
-                }
-                for (i = 0; i < pvec.nr; i++, index++) {
-                        if (index < indices[i]) {
-                                if (origin == SEEK_HOLE) {
-                                        done = true;
-                                        break;
-                                }
-                                index = indices[i];
-                        }
-                        page = pvec.pages[i];
-                        if (page && !radix_tree_exceptional_entry(page)) {
-                                if (!PageUptodate(page))
-                                        page = NULL;
-                        }
-                        if (index >= end ||
-                            (page && origin == SEEK_DATA) ||
-                            (!page && origin == SEEK_HOLE)) {
-                                done = true;
-                                break;
-                        }
-                }
-                shmem_deswap_pagevec(&pvec);
-                pagevec_release(&pvec);
-                pvec.nr = PAGEVEC_SIZE;
-                cond_resched();
-        }
-        return index;
-}
-static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
-{
-        struct address_space *mapping;
-        struct inode *inode;
-        pgoff_t start, end;
-        loff_t new_offset;
-        if (origin != SEEK_DATA && origin != SEEK_HOLE)
-                return generic_file_llseek_size(file, offset, origin,
-                                                        MAX_LFS_FILESIZE);
-        mapping = file->f_mapping;
-        inode = mapping->host;
-        mutex_lock(&inode->i_mutex);
-        /* We're holding i_mutex so we can access i_size directly */
-        if (offset < 0)
-                offset = -EINVAL;
-        else if (offset >= inode->i_size)
-                offset = -ENXIO;
-        else {
-                start = offset >> PAGE_CACHE_SHIFT;
-                end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-                new_offset = shmem_seek_hole_data(mapping, start, end, origin);
-                new_offset <<= PAGE_CACHE_SHIFT;
-                if (new_offset > offset) {
-                        if (new_offset < inode->i_size)
-                                offset = new_offset;
-                        else if (origin == SEEK_DATA)
-                                offset = -ENXIO;
-                        else
-                                offset = inode->i_size;
-                }
-        }
-        if (offset >= 0 && offset != file->f_pos) {
-                file->f_pos = offset;
-                file->f_version = 0;
-        }
-        mutex_unlock(&inode->i_mutex);
-        return offset;
-}
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                                                         loff_t len)
 {
@@ -1937,7 +1879,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
 }
@@ -2770,7 +2712,7 @@ static const struct address_space_operations shmem_aops = {
 static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
 #ifdef CONFIG_TMPFS
-        .llseek         = shmem_file_llseek,
+        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
        .write          = do_sync_write,
        .aio_read       = shmem_file_aio_read,

diff --git a/mm/shmem.c b/mm/shmem.c index c244e93a70fa..d4e184e2a38e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c
@@ -264,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
264	}	264	}
265		265
266	/*	266	/*
		267	* Sometimes, before we decide whether to proceed or to fail, we must check
		268	* that an entry was not already brought back from swap by a racing thread.
		269	*
		270	* Checking page is not enough: by the time a SwapCache page is locked, it
		271	* might be reused, and again be SwapCache, using the same swap as before.
		272	*/
		273	static bool shmem_confirm_swap(struct address_space *mapping,
		274	pgoff_t index, swp_entry_t swap)
		275	{
		276	void *item;
		277
		278	rcu_read_lock();
		279	item = radix_tree_lookup(&mapping->page_tree, index);
		280	rcu_read_unlock();
		281	return item == swp_to_radix_entry(swap);
		282	}
		283
		284	/*
267	* Like add_to_page_cache_locked, but error if expected item has gone.	285	* Like add_to_page_cache_locked, but error if expected item has gone.
268	*/	286	*/
269	static int shmem_add_to_page_cache(struct page *page,	287	static int shmem_add_to_page_cache(struct page *page,
270	struct address_space *mapping,	288	struct address_space *mapping,
271	pgoff_t index, gfp_t gfp, void *expected)	289	pgoff_t index, gfp_t gfp, void *expected)
272	{	290	{
273	int error = 0;	291	int error;
274		292
275	VM_BUG_ON(!PageLocked(page));	293	VM_BUG_ON(!PageLocked(page));
276	VM_BUG_ON(!PageSwapBacked(page));	294	VM_BUG_ON(!PageSwapBacked(page));
277		295
		296	page_cache_get(page);
		297	page->mapping = mapping;
		298	page->index = index;
		299
		300	spin_lock_irq(&mapping->tree_lock);
278	if (!expected)	301	if (!expected)
279	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);	302	error = radix_tree_insert(&mapping->page_tree, index, page);
		303	else
		304	error = shmem_radix_tree_replace(mapping, index, expected,
		305	page);
280	if (!error) {	306	if (!error) {
281	page_cache_get(page);	307	mapping->nrpages++;
282	page->mapping = mapping;	308	__inc_zone_page_state(page, NR_FILE_PAGES);
283	page->index = index;	309	__inc_zone_page_state(page, NR_SHMEM);
284		310	spin_unlock_irq(&mapping->tree_lock);
285	spin_lock_irq(&mapping->tree_lock);	311	} else {
286	if (!expected)	312	page->mapping = NULL;
287	error = radix_tree_insert(&mapping->page_tree,	313	spin_unlock_irq(&mapping->tree_lock);
288	index, page);	314	page_cache_release(page);
289	else
290	error = shmem_radix_tree_replace(mapping, index,
291	expected, page);
292	if (!error) {
293	mapping->nrpages++;
294	__inc_zone_page_state(page, NR_FILE_PAGES);
295	__inc_zone_page_state(page, NR_SHMEM);
296	spin_unlock_irq(&mapping->tree_lock);
297	} else {
298	page->mapping = NULL;
299	spin_unlock_irq(&mapping->tree_lock);
300	page_cache_release(page);
301	}
302	if (!expected)
303	radix_tree_preload_end();
304	}	315	}
305	if (error)
306	mem_cgroup_uncharge_cache_page(page);
307	return error;	316	return error;
308	}	317	}
309		318
@@ -683,10 +692,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
683	mutex_lock(&shmem_swaplist_mutex);	692	mutex_lock(&shmem_swaplist_mutex);
684	/*	693	/*
685	* We needed to drop mutex to make that restrictive page	694	* We needed to drop mutex to make that restrictive page
686	* allocation; but the inode might already be freed by now,	695	* allocation, but the inode might have been freed while we
687	* and we cannot refer to inode or mapping or info to check.	696	* dropped it: although a racing shmem_evict_inode() cannot
688	* However, we do hold page lock on the PageSwapCache page,	697	* complete without emptying the radix_tree, our page lock
689	* so can check if that still has our reference remaining.	698	* on this swapcache page is not enough to prevent that -
		699	* free_swap_and_cache() of our swap entry will only
		700	* trylock_page(), removing swap from radix_tree whatever.
		701	*
		702	* We must not proceed to shmem_add_to_page_cache() if the
		703	* inode has been freed, but of course we cannot rely on
		704	* inode or mapping or info to check that. However, we can
		705	* safely check if our swap entry is still in use (and here
		706	* it can't have got reused for another page): if it's still
		707	* in use, then the inode cannot have been freed yet, and we
		708	* can safely proceed (if it's no longer in use, that tells
		709	* nothing about the inode, but we don't need to unuse swap).
690	*/	710	*/
691	if (!page_swapcount(*pagep))	711	if (!page_swapcount(*pagep))
692	error = -ENOENT;	712	error = -ENOENT;
@@ -730,9 +750,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
730		750
731	/*	751	/*
732	* There's a faint possibility that swap page was replaced before	752	* There's a faint possibility that swap page was replaced before
733	* caller locked it: it will come back later with the right page.	753	* caller locked it: caller will come back later with the right page.
734	*/	754	*/
735	if (unlikely(!PageSwapCache(page)))	755	if (unlikely(!PageSwapCache(page) \|\| page_private(page) != swap.val))
736	goto out;	756	goto out;
737		757
738	/*	758	/*
@@ -909,7 +929,8 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
909		929
910	/* Create a pseudo vma that just contains the policy */	930	/* Create a pseudo vma that just contains the policy */
911	pvma.vm_start = 0;	931	pvma.vm_start = 0;
912	pvma.vm_pgoff = index;	932	/* Bias interleave by inode number to distribute better across nodes */
		933	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
913	pvma.vm_ops = NULL;	934	pvma.vm_ops = NULL;
914	pvma.vm_policy = spol;	935	pvma.vm_policy = spol;
915	return swapin_readahead(swap, gfp, &pvma, 0);	936	return swapin_readahead(swap, gfp, &pvma, 0);
@@ -922,7 +943,8 @@ static struct page *shmem_alloc_page(gfp_t gfp,
922		943
923	/* Create a pseudo vma that just contains the policy */	944	/* Create a pseudo vma that just contains the policy */
924	pvma.vm_start = 0;	945	pvma.vm_start = 0;
925	pvma.vm_pgoff = index;	946	/* Bias interleave by inode number to distribute better across nodes */
		947	pvma.vm_pgoff = index + info->vfs_inode.i_ino;
926	pvma.vm_ops = NULL;	948	pvma.vm_ops = NULL;
927	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);	949	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
928		950
@@ -995,21 +1017,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
995	newpage = shmem_alloc_page(gfp, info, index);	1017	newpage = shmem_alloc_page(gfp, info, index);
996	if (!newpage)	1018	if (!newpage)
997	return -ENOMEM;	1019	return -ENOMEM;
998	VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
999		1020
1000	*pagep = newpage;
1001	page_cache_get(newpage);	1021	page_cache_get(newpage);
1002	copy_highpage(newpage, oldpage);	1022	copy_highpage(newpage, oldpage);
		1023	flush_dcache_page(newpage);
1003		1024
1004	VM_BUG_ON(!PageLocked(oldpage));
1005	__set_page_locked(newpage);	1025	__set_page_locked(newpage);
1006	VM_BUG_ON(!PageUptodate(oldpage));
1007	SetPageUptodate(newpage);	1026	SetPageUptodate(newpage);
1008	VM_BUG_ON(!PageSwapBacked(oldpage));
1009	SetPageSwapBacked(newpage);	1027	SetPageSwapBacked(newpage);
1010	VM_BUG_ON(!swap_index);
1011	set_page_private(newpage, swap_index);	1028	set_page_private(newpage, swap_index);
1012	VM_BUG_ON(!PageSwapCache(oldpage));
1013	SetPageSwapCache(newpage);	1029	SetPageSwapCache(newpage);
1014		1030
1015	/*	1031	/*
@@ -1019,13 +1035,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1019	spin_lock_irq(&swap_mapping->tree_lock);	1035	spin_lock_irq(&swap_mapping->tree_lock);
1020	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,	1036	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
1021	newpage);	1037	newpage);
1022	__inc_zone_page_state(newpage, NR_FILE_PAGES);	1038	if (!error) {
1023	__dec_zone_page_state(oldpage, NR_FILE_PAGES);	1039	__inc_zone_page_state(newpage, NR_FILE_PAGES);
		1040	__dec_zone_page_state(oldpage, NR_FILE_PAGES);
		1041	}
1024	spin_unlock_irq(&swap_mapping->tree_lock);	1042	spin_unlock_irq(&swap_mapping->tree_lock);
1025	BUG_ON(error);
1026		1043
1027	mem_cgroup_replace_page_cache(oldpage, newpage);	1044	if (unlikely(error)) {
1028	lru_cache_add_anon(newpage);	1045	/*
		1046	* Is this possible? I think not, now that our callers check
		1047	* both PageSwapCache and page_private after getting page lock;
		1048	* but be defensive. Reverse old to newpage for clear and free.
		1049	*/
		1050	oldpage = newpage;
		1051	} else {
		1052	mem_cgroup_replace_page_cache(oldpage, newpage);
		1053	lru_cache_add_anon(newpage);
		1054	*pagep = newpage;
		1055	}
1029		1056
1030	ClearPageSwapCache(oldpage);	1057	ClearPageSwapCache(oldpage);
1031	set_page_private(oldpage, 0);	1058	set_page_private(oldpage, 0);
@@ -1033,7 +1060,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1033	unlock_page(oldpage);	1060	unlock_page(oldpage);
1034	page_cache_release(oldpage);	1061	page_cache_release(oldpage);
1035	page_cache_release(oldpage);	1062	page_cache_release(oldpage);
1036	return 0;	1063	return error;
1037	}	1064	}
1038		1065
1039	/*	1066	/*
@@ -1107,9 +1134,10 @@ repeat:
1107		1134
1108	/* We have to do this with page locked to prevent races */	1135	/* We have to do this with page locked to prevent races */
1109	lock_page(page);	1136	lock_page(page);
1110	if (!PageSwapCache(page) \|\| page->mapping) {	1137	if (!PageSwapCache(page) \|\| page_private(page) != swap.val \|\|
		1138	!shmem_confirm_swap(mapping, index, swap)) {
1111	error = -EEXIST; /* try again */	1139	error = -EEXIST; /* try again */
1112	goto failed;	1140	goto unlock;
1113	}	1141	}
1114	if (!PageUptodate(page)) {	1142	if (!PageUptodate(page)) {
1115	error = -EIO;	1143	error = -EIO;
@@ -1125,9 +1153,12 @@ repeat:
1125		1153
1126	error = mem_cgroup_cache_charge(page, current->mm,	1154	error = mem_cgroup_cache_charge(page, current->mm,
1127	gfp & GFP_RECLAIM_MASK);	1155	gfp & GFP_RECLAIM_MASK);
1128	if (!error)	1156	if (!error) {
1129	error = shmem_add_to_page_cache(page, mapping, index,	1157	error = shmem_add_to_page_cache(page, mapping, index,
1130	gfp, swp_to_radix_entry(swap));	1158	gfp, swp_to_radix_entry(swap));
		1159	/* We already confirmed swap, and make no allocation */
		1160	VM_BUG_ON(error);
		1161	}
1131	if (error)	1162	if (error)
1132	goto failed;	1163	goto failed;
1133		1164
@@ -1164,11 +1195,18 @@ repeat:
1164	__set_page_locked(page);	1195	__set_page_locked(page);
1165	error = mem_cgroup_cache_charge(page, current->mm,	1196	error = mem_cgroup_cache_charge(page, current->mm,
1166	gfp & GFP_RECLAIM_MASK);	1197	gfp & GFP_RECLAIM_MASK);
1167	if (!error)
1168	error = shmem_add_to_page_cache(page, mapping, index,
1169	gfp, NULL);
1170	if (error)	1198	if (error)
1171	goto decused;	1199	goto decused;
		1200	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
		1201	if (!error) {
		1202	error = shmem_add_to_page_cache(page, mapping, index,
		1203	gfp, NULL);
		1204	radix_tree_preload_end();
		1205	}
		1206	if (error) {
		1207	mem_cgroup_uncharge_cache_page(page);
		1208	goto decused;
		1209	}
1172	lru_cache_add_anon(page);	1210	lru_cache_add_anon(page);
1173		1211
1174	spin_lock(&info->lock);	1212	spin_lock(&info->lock);
@@ -1228,14 +1266,10 @@ decused:
1228	unacct:	1266	unacct:
1229	shmem_unacct_blocks(info->flags, 1);	1267	shmem_unacct_blocks(info->flags, 1);
1230	failed:	1268	failed:
1231	if (swap.val && error != -EINVAL) {	1269	if (swap.val && error != -EINVAL &&
1232	struct page *test = find_get_page(mapping, index);	1270	!shmem_confirm_swap(mapping, index, swap))
1233	if (test && !radix_tree_exceptional_entry(test))	1271	error = -EEXIST;
1234	page_cache_release(test);	1272	unlock:
1235	/* Have another try if the entry has changed */
1236	if (test != swp_to_radix_entry(swap))
1237	error = -EEXIST;
1238	}
1239	if (page) {	1273	if (page) {
1240	unlock_page(page);	1274	unlock_page(page);
1241	page_cache_release(page);	1275	page_cache_release(page);
@@ -1247,7 +1281,7 @@ failed:
1247	spin_unlock(&info->lock);	1281	spin_unlock(&info->lock);
1248	goto repeat;	1282	goto repeat;
1249	}	1283	}
1250	if (error == -EEXIST)	1284	if (error == -EEXIST) /* from above or from radix_tree_insert */
1251	goto repeat;	1285	goto repeat;
1252	return error;	1286	return error;
1253	}	1287	}
@@ -1675,98 +1709,6 @@ static ssize_t shmem_file_splice_read(struct file in, loff_t ppos,
1675	return error;	1709	return error;
1676	}	1710	}
1677		1711
1678	/*
1679	* llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
1680	*/
1681	static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1682	pgoff_t index, pgoff_t end, int origin)
1683	{
1684	struct page *page;
1685	struct pagevec pvec;
1686	pgoff_t indices[PAGEVEC_SIZE];
1687	bool done = false;
1688	int i;
1689
1690	pagevec_init(&pvec, 0);
1691	pvec.nr = 1; /* start small: we may be there already */
1692	while (!done) {
1693	pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
1694	pvec.nr, pvec.pages, indices);
1695	if (!pvec.nr) {
1696	if (origin == SEEK_DATA)
1697	index = end;
1698	break;
1699	}
1700	for (i = 0; i < pvec.nr; i++, index++) {
1701	if (index < indices[i]) {
1702	if (origin == SEEK_HOLE) {
1703	done = true;
1704	break;
1705	}
1706	index = indices[i];
1707	}
1708	page = pvec.pages[i];
1709	if (page && !radix_tree_exceptional_entry(page)) {
1710	if (!PageUptodate(page))
1711	page = NULL;
1712	}
1713	if (index >= end \|\|
1714	(page && origin == SEEK_DATA) \|\|
1715	(!page && origin == SEEK_HOLE)) {
1716	done = true;
1717	break;
1718	}
1719	}
1720	shmem_deswap_pagevec(&pvec);
1721	pagevec_release(&pvec);
1722	pvec.nr = PAGEVEC_SIZE;
1723	cond_resched();
1724	}
1725	return index;
1726	}
1727
1728	static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
1729	{
1730	struct address_space *mapping;
1731	struct inode *inode;
1732	pgoff_t start, end;
1733	loff_t new_offset;
1734
1735	if (origin != SEEK_DATA && origin != SEEK_HOLE)
1736	return generic_file_llseek_size(file, offset, origin,
1737	MAX_LFS_FILESIZE);
1738	mapping = file->f_mapping;
1739	inode = mapping->host;
1740	mutex_lock(&inode->i_mutex);
1741	/* We're holding i_mutex so we can access i_size directly */
1742
1743	if (offset < 0)
1744	offset = -EINVAL;
1745	else if (offset >= inode->i_size)
1746	offset = -ENXIO;
1747	else {
1748	start = offset >> PAGE_CACHE_SHIFT;
1749	end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1750	new_offset = shmem_seek_hole_data(mapping, start, end, origin);
1751	new_offset <<= PAGE_CACHE_SHIFT;
1752	if (new_offset > offset) {
1753	if (new_offset < inode->i_size)
1754	offset = new_offset;
1755	else if (origin == SEEK_DATA)
1756	offset = -ENXIO;
1757	else
1758	offset = inode->i_size;
1759	}
1760	}
1761
1762	if (offset >= 0 && offset != file->f_pos) {
1763	file->f_pos = offset;
1764	file->f_version = 0;
1765	}
1766	mutex_unlock(&inode->i_mutex);
1767	return offset;
1768	}
1769
1770	static long shmem_fallocate(struct file *file, int mode, loff_t offset,	1712	static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1771	loff_t len)	1713	loff_t len)
1772	{	1714	{
@@ -1937,7 +1879,7 @@ static int shmem_mkdir(struct inode dir, struct dentry dentry, umode_t mode)
1937	}	1879	}
1938		1880
1939	static int shmem_create(struct inode dir, struct dentry dentry, umode_t mode,	1881	static int shmem_create(struct inode dir, struct dentry dentry, umode_t mode,
1940	struct nameidata *nd)	1882	bool excl)
1941	{	1883	{
1942	return shmem_mknod(dir, dentry, mode \| S_IFREG, 0);	1884	return shmem_mknod(dir, dentry, mode \| S_IFREG, 0);
1943	}	1885	}
@@ -2770,7 +2712,7 @@ static const struct address_space_operations shmem_aops = {
2770	static const struct file_operations shmem_file_operations = {	2712	static const struct file_operations shmem_file_operations = {
2771	.mmap = shmem_mmap,	2713	.mmap = shmem_mmap,
2772	#ifdef CONFIG_TMPFS	2714	#ifdef CONFIG_TMPFS
2773	.llseek = shmem_file_llseek,	2715	.llseek = generic_file_llseek,
2774	.read = do_sync_read,	2716	.read = do_sync_read,
2775	.write = do_sync_write,	2717	.write = do_sync_write,
2776	.aio_read = shmem_file_aio_read,	2718	.aio_read = shmem_file_aio_read,