1 files changed, 38 insertions, 259 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 876bc595d0f8..f3e5f8944d17 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include "internal.h"
 /*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
 {
        struct address_space *mapping = page->mapping;
-        mem_cgroup_uncharge_cache_page(page);
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
        mapping->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        BUG_ON(page_mapped(page));
+        mem_cgroup_uncharge_cache_page(page);
        /*
         * Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                pgoff_t offset, gfp_t gfp_mask)
 {
-        int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+        int ret;
-        if (ret == 0)
-                lru_cache_add(page);
+        /*
+         * Splice_read and readahead add shmem/tmpfs pages into the page cache
+         * before shmem_readpage has a chance to mark them as SwapBacked: they
+         * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+         * (called in add_to_page_cache) needs to know where they're going too.
+         */
+        if (mapping_cap_swap_backed(mapping))
+                SetPageSwapBacked(page);
+        ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+        if (ret == 0) {
+                if (page_is_file_cache(page))
+                        lru_cache_add_file(page);
+                else
+                        lru_cache_add_active_anon(page);
+        }
        return ret;
 }
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
 * mechananism between PageLocked pages and PageWriteback pages is shared.
 * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
 *
- * The first mb is necessary to safely close the critical section opened by the
+ * The mb is necessary to enforce ordering between the clear_bit and the read
- * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
- * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
- * races with a parallel wait_on_page_locked()).
 */
 void unlock_page(struct page *page)
 {
-        smp_mb__before_clear_bit();
+        VM_BUG_ON(!PageLocked(page));
-        if (!test_and_clear_bit(PG_locked, &page->flags))
+        clear_bit_unlock(PG_locked, &page->flags);
-                BUG();
+        smp_mb__after_clear_bit();
-        smp_mb__after_clear_bit(); 
        wake_up_page(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
@@ -1100,8 +1113,9 @@ page_ok:
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
-                if (lock_page_killable(page))
+                error = lock_page_killable(page);
-                        goto readpage_eio;
+                if (unlikely(error))
+                        goto readpage_error;
 page_not_up_to_date_locked:
                /* Did it get truncated before we got the lock? */
@@ -1130,8 +1144,9 @@ readpage:
                }
                if (!PageUptodate(page)) {
-                        if (lock_page_killable(page))
+                        error = lock_page_killable(page);
-                                goto readpage_eio;
+                        if (unlikely(error))
+                                goto readpage_error;
                        if (!PageUptodate(page)) {
                                if (page->mapping == NULL) {
                                        /*
@@ -1143,15 +1158,14 @@ readpage:
                                }
                                unlock_page(page);
                                shrink_readahead_size_eio(filp, ra);
-                                goto readpage_eio;
+                                error = -EIO;
+                                goto readpage_error;
                        }
                        unlock_page(page);
                }
                goto page_ok;
-readpage_eio:
-                error = -EIO;
 readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
                desc->error = error;
@@ -1186,8 +1200,7 @@ out:
        ra->prev_pos |= prev_offset;
        *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
-        if (filp)
+        file_accessed(filp);
-                file_accessed(filp);
 }
 int file_read_actor(read_descriptor_t *desc, struct page *page,
@@ -2016,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping,
 {
        const struct address_space_operations *aops = mapping->a_ops;
-        if (aops->write_begin) {
+        return aops->write_begin(file, mapping, pos, len, flags,
-                return aops->write_begin(file, mapping, pos, len, flags,
                                                        pagep, fsdata);
-        } else {
-                int ret;
-                pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-                struct inode *inode = mapping->host;
-                struct page *page;
-again:
-                page = __grab_cache_page(mapping, index);
-                *pagep = page;
-                if (!page)
-                        return -ENOMEM;
-                if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
-                        /*
-                         * There is no way to resolve a short write situation
-                         * for a !Uptodate page (except by double copying in
-                         * the caller done by generic_perform_write_2copy).
-                         *
-                         * Instead, we have to bring it uptodate here.
-                         */
-                        ret = aops->readpage(file, page);
-                        page_cache_release(page);
-                        if (ret) {
-                                if (ret == AOP_TRUNCATED_PAGE)
-                                        goto again;
-                                return ret;
-                        }
-                        goto again;
-                }
-                ret = aops->prepare_write(file, page, offset, offset+len);
-                if (ret) {
-                        unlock_page(page);
-                        page_cache_release(page);
-                        if (pos + len > inode->i_size)
-                                vmtruncate(inode, inode->i_size);
-                }
-                return ret;
-        }
 }
 EXPORT_SYMBOL(pagecache_write_begin);
@@ -2066,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
                                struct page *page, void *fsdata)
 {
        const struct address_space_operations *aops = mapping->a_ops;
-        int ret;
-        if (aops->write_end) {
-                mark_page_accessed(page);
-                ret = aops->write_end(file, mapping, pos, len, copied,
-                                                        page, fsdata);
-        } else {
-                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-                struct inode *inode = mapping->host;
-                flush_dcache_page(page);
-                ret = aops->commit_write(file, page, offset, offset+len);
-                unlock_page(page);
-                mark_page_accessed(page);
-                page_cache_release(page);
-                if (ret < 0) {
+        mark_page_accessed(page);
-                        if (pos + len > inode->i_size)
+        return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
-                                vmtruncate(inode, inode->i_size);
-                } else if (ret > 0)
-                        ret = min_t(size_t, copied, ret);
-                else
-                        ret = copied;
-        }
-        return ret;
 }
 EXPORT_SYMBOL(pagecache_write_end);
@@ -2213,174 +2163,6 @@ repeat:
 }
 EXPORT_SYMBOL(__grab_cache_page);
-static ssize_t generic_perform_write_2copy(struct file *file,
-                                struct iov_iter *i, loff_t pos)
-{
-        struct address_space *mapping = file->f_mapping;
-        const struct address_space_operations *a_ops = mapping->a_ops;
-        struct inode *inode = mapping->host;
-        long status = 0;
-        ssize_t written = 0;
-        do {
-                struct page *src_page;
-                struct page *page;
-                pgoff_t index;          /* Pagecache index for current page */
-                unsigned long offset;   /* Offset into pagecache page */
-                unsigned long bytes;    /* Bytes to write to page */
-                size_t copied;          /* Bytes copied from user */
-                offset = (pos & (PAGE_CACHE_SIZE - 1));
-                index = pos >> PAGE_CACHE_SHIFT;
-                bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
-                                                iov_iter_count(i));
-                /*
-                 * a non-NULL src_page indicates that we're doing the
-                 * copy via get_user_pages and kmap.
-                 */
-                src_page = NULL;
-                /*
-                 * Bring in the user page that we will copy from _first_.
-                 * Otherwise there's a nasty deadlock on copying from the
-                 * same page as we're writing to, without it being marked
-                 * up-to-date.
-                 *
-                 * Not only is this an optimisation, but it is also required
-                 * to check that the address is actually valid, when atomic
-                 * usercopies are used, below.
-                 */
-                if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
-                        status = -EFAULT;
-                        break;
-                }
-                page = __grab_cache_page(mapping, index);
-                if (!page) {
-                        status = -ENOMEM;
-                        break;
-                }
-                /*
-                 * non-uptodate pages cannot cope with short copies, and we
-                 * cannot take a pagefault with the destination page locked.
-                 * So pin the source page to copy it.
-                 */
-                if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
-                        unlock_page(page);
-                        src_page = alloc_page(GFP_KERNEL);
-                        if (!src_page) {
-                                page_cache_release(page);
-                                status = -ENOMEM;
-                                break;
-                        }
-                        /*
-                         * Cannot get_user_pages with a page locked for the
-                         * same reason as we can't take a page fault with a
-                         * page locked (as explained below).
-                         */
-                        copied = iov_iter_copy_from_user(src_page, i,
-                                                                offset, bytes);
-                        if (unlikely(copied == 0)) {
-                                status = -EFAULT;
-                                page_cache_release(page);
-                                page_cache_release(src_page);
-                                break;
-                        }
-                        bytes = copied;
-                        lock_page(page);
-                        /*
-                         * Can't handle the page going uptodate here, because
-                         * that means we would use non-atomic usercopies, which
-                         * zero out the tail of the page, which can cause
-                         * zeroes to become transiently visible. We could just
-                         * use a non-zeroing copy, but the APIs aren't too
-                         * consistent.
-                         */
-                        if (unlikely(!page->mapping || PageUptodate(page))) {
-                                unlock_page(page);
-                                page_cache_release(page);
-                                page_cache_release(src_page);
-                                continue;
-                        }
-                }
-                status = a_ops->prepare_write(file, page, offset, offset+bytes);
-                if (unlikely(status))
-                        goto fs_write_aop_error;
-                if (!src_page) {
-                        /*
-                         * Must not enter the pagefault handler here, because
-                         * we hold the page lock, so we might recursively
-                         * deadlock on the same lock, or get an ABBA deadlock
-                         * against a different lock, or against the mmap_sem
-                         * (which nests outside the page lock).  So increment
-                         * preempt count, and use _atomic usercopies.
-                         *
-                         * The page is uptodate so we are OK to encounter a
-                         * short copy: if unmodified parts of the page are
-                         * marked dirty and written out to disk, it doesn't
-                         * really matter.
-                         */
-                        pagefault_disable();
-                        copied = iov_iter_copy_from_user_atomic(page, i,
-                                                                offset, bytes);
-                        pagefault_enable();
-                } else {
-                        void *src, *dst;
-                        src = kmap_atomic(src_page, KM_USER0);
-                        dst = kmap_atomic(page, KM_USER1);
-                        memcpy(dst + offset, src + offset, bytes);
-                        kunmap_atomic(dst, KM_USER1);
-                        kunmap_atomic(src, KM_USER0);
-                        copied = bytes;
-                }
-                flush_dcache_page(page);
-                status = a_ops->commit_write(file, page, offset, offset+bytes);
-                if (unlikely(status < 0))
-                        goto fs_write_aop_error;
-                if (unlikely(status > 0)) /* filesystem did partial write */
-                        copied = min_t(size_t, copied, status);
-                unlock_page(page);
-                mark_page_accessed(page);
-                page_cache_release(page);
-                if (src_page)
-                        page_cache_release(src_page);
-                iov_iter_advance(i, copied);
-                pos += copied;
-                written += copied;
-                balance_dirty_pages_ratelimited(mapping);
-                cond_resched();
-                continue;
-fs_write_aop_error:
-                unlock_page(page);
-                page_cache_release(page);
-                if (src_page)
-                        page_cache_release(src_page);
-                /*
-                 * prepare_write() may have instantiated a few blocks
-                 * outside i_size.  Trim these off again. Don't need
-                 * i_size_read because we hold i_mutex.
-                 */
-                if (pos + bytes > inode->i_size)
-                        vmtruncate(inode, inode->i_size);
-                break;
-        } while (iov_iter_count(i));
-        return written ? written : status;
-}
 static ssize_t generic_perform_write(struct file *file,
                                struct iov_iter *i, loff_t pos)
 {
@@ -2481,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
        struct iov_iter i;
        iov_iter_init(&i, iov, nr_segs, count, written);
-        if (a_ops->write_begin)
+        status = generic_perform_write(file, &i, pos);
-                status = generic_perform_write(file, &i, pos);
-        else
-                status = generic_perform_write_2copy(file, &i, pos);
        if (likely(status >= 0)) {
                written += status;

diff --git a/mm/filemap.c b/mm/filemap.c index 876bc595d0f8..f3e5f8944d17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c
@@ -33,6 +33,7 @@
33	#include <linux/cpuset.h>	33	#include <linux/cpuset.h>
34	#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */	34	#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
35	#include <linux/memcontrol.h>	35	#include <linux/memcontrol.h>
		36	#include <linux/mm_inline.h> /* for page_is_file_cache() */
36	#include "internal.h"	37	#include "internal.h"
37		38
38	/*	39	/*
@@ -115,12 +116,12 @@ void __remove_from_page_cache(struct page *page)
115	{	116	{
116	struct address_space *mapping = page->mapping;	117	struct address_space *mapping = page->mapping;
117		118
118	mem_cgroup_uncharge_cache_page(page);
119	radix_tree_delete(&mapping->page_tree, page->index);	119	radix_tree_delete(&mapping->page_tree, page->index);
120	page->mapping = NULL;	120	page->mapping = NULL;
121	mapping->nrpages--;	121	mapping->nrpages--;
122	__dec_zone_page_state(page, NR_FILE_PAGES);	122	__dec_zone_page_state(page, NR_FILE_PAGES);
123	BUG_ON(page_mapped(page));	123	BUG_ON(page_mapped(page));
		124	mem_cgroup_uncharge_cache_page(page);
124		125
125	/*	126	/*
126	* Some filesystems seem to re-dirty the page even after	127	* Some filesystems seem to re-dirty the page even after
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
492	int add_to_page_cache_lru(struct page page, struct address_space mapping,	493	int add_to_page_cache_lru(struct page page, struct address_space mapping,
493	pgoff_t offset, gfp_t gfp_mask)	494	pgoff_t offset, gfp_t gfp_mask)
494	{	495	{
495	int ret = add_to_page_cache(page, mapping, offset, gfp_mask);	496	int ret;
496	if (ret == 0)	497
497	lru_cache_add(page);	498	/*
		499	* Splice_read and readahead add shmem/tmpfs pages into the page cache
		500	* before shmem_readpage has a chance to mark them as SwapBacked: they
		501	* need to go on the active_anon lru below, and mem_cgroup_cache_charge
		502	* (called in add_to_page_cache) needs to know where they're going too.
		503	*/
		504	if (mapping_cap_swap_backed(mapping))
		505	SetPageSwapBacked(page);
		506
		507	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
		508	if (ret == 0) {
		509	if (page_is_file_cache(page))
		510	lru_cache_add_file(page);
		511	else
		512	lru_cache_add_active_anon(page);
		513	}
498	return ret;	514	return ret;
499	}	515	}
500		516
@@ -557,17 +573,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
557	* mechananism between PageLocked pages and PageWriteback pages is shared.	573	* mechananism between PageLocked pages and PageWriteback pages is shared.
558	* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.	574	* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
559	*	575	*
560	* The first mb is necessary to safely close the critical section opened by the	576	* The mb is necessary to enforce ordering between the clear_bit and the read
561	* test_and_set_bit() to lock the page; the second mb is necessary to enforce	577	* of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()).
562	* ordering between the clear_bit and the read of the waitqueue (to avoid SMP
563	* races with a parallel wait_on_page_locked()).
564	*/	578	*/
565	void unlock_page(struct page *page)	579	void unlock_page(struct page *page)
566	{	580	{
567	smp_mb__before_clear_bit();	581	VM_BUG_ON(!PageLocked(page));
568	if (!test_and_clear_bit(PG_locked, &page->flags))	582	clear_bit_unlock(PG_locked, &page->flags);
569	BUG();	583	smp_mb__after_clear_bit();
570	smp_mb__after_clear_bit();
571	wake_up_page(page, PG_locked);	584	wake_up_page(page, PG_locked);
572	}	585	}
573	EXPORT_SYMBOL(unlock_page);	586	EXPORT_SYMBOL(unlock_page);
@@ -1100,8 +1113,9 @@ page_ok:
1100		1113
1101	page_not_up_to_date:	1114	page_not_up_to_date:
1102	/* Get exclusive access to the page ... */	1115	/* Get exclusive access to the page ... */
1103	if (lock_page_killable(page))	1116	error = lock_page_killable(page);
1104	goto readpage_eio;	1117	if (unlikely(error))
		1118	goto readpage_error;
1105		1119
1106	page_not_up_to_date_locked:	1120	page_not_up_to_date_locked:
1107	/* Did it get truncated before we got the lock? */	1121	/* Did it get truncated before we got the lock? */
@@ -1130,8 +1144,9 @@ readpage:
1130	}	1144	}
1131		1145
1132	if (!PageUptodate(page)) {	1146	if (!PageUptodate(page)) {
1133	if (lock_page_killable(page))	1147	error = lock_page_killable(page);
1134	goto readpage_eio;	1148	if (unlikely(error))
		1149	goto readpage_error;
1135	if (!PageUptodate(page)) {	1150	if (!PageUptodate(page)) {
1136	if (page->mapping == NULL) {	1151	if (page->mapping == NULL) {
1137	/*	1152	/*
@@ -1143,15 +1158,14 @@ readpage:
1143	}	1158	}
1144	unlock_page(page);	1159	unlock_page(page);
1145	shrink_readahead_size_eio(filp, ra);	1160	shrink_readahead_size_eio(filp, ra);
1146	goto readpage_eio;	1161	error = -EIO;
		1162	goto readpage_error;
1147	}	1163	}
1148	unlock_page(page);	1164	unlock_page(page);
1149	}	1165	}
1150		1166
1151	goto page_ok;	1167	goto page_ok;
1152		1168
1153	readpage_eio:
1154	error = -EIO;
1155	readpage_error:	1169	readpage_error:
1156	/* UHHUH! A synchronous read error occurred. Report it */	1170	/* UHHUH! A synchronous read error occurred. Report it */
1157	desc->error = error;	1171	desc->error = error;
@@ -1186,8 +1200,7 @@ out:
1186	ra->prev_pos \|= prev_offset;	1200	ra->prev_pos \|= prev_offset;
1187		1201
1188	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;	1202	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
1189	if (filp)	1203	file_accessed(filp);
1190	file_accessed(filp);
1191	}	1204	}
1192		1205
1193	int file_read_actor(read_descriptor_t desc, struct page page,	1206	int file_read_actor(read_descriptor_t desc, struct page page,
@@ -2016,48 +2029,8 @@ int pagecache_write_begin(struct file file, struct address_space mapping,
2016	{	2029	{
2017	const struct address_space_operations *aops = mapping->a_ops;	2030	const struct address_space_operations *aops = mapping->a_ops;
2018		2031
2019	if (aops->write_begin) {	2032	return aops->write_begin(file, mapping, pos, len, flags,
2020	return aops->write_begin(file, mapping, pos, len, flags,
2021	pagep, fsdata);	2033	pagep, fsdata);
2022	} else {
2023	int ret;
2024	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2025	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2026	struct inode *inode = mapping->host;
2027	struct page *page;
2028	again:
2029	page = __grab_cache_page(mapping, index);
2030	*pagep = page;
2031	if (!page)
2032	return -ENOMEM;
2033
2034	if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
2035	/*
2036	* There is no way to resolve a short write situation
2037	* for a !Uptodate page (except by double copying in
2038	* the caller done by generic_perform_write_2copy).
2039	*
2040	* Instead, we have to bring it uptodate here.
2041	*/
2042	ret = aops->readpage(file, page);
2043	page_cache_release(page);
2044	if (ret) {
2045	if (ret == AOP_TRUNCATED_PAGE)
2046	goto again;
2047	return ret;
2048	}
2049	goto again;
2050	}
2051
2052	ret = aops->prepare_write(file, page, offset, offset+len);
2053	if (ret) {
2054	unlock_page(page);
2055	page_cache_release(page);
2056	if (pos + len > inode->i_size)
2057	vmtruncate(inode, inode->i_size);
2058	}
2059	return ret;
2060	}
2061	}	2034	}
2062	EXPORT_SYMBOL(pagecache_write_begin);	2035	EXPORT_SYMBOL(pagecache_write_begin);
2063		2036
@@ -2066,32 +2039,9 @@ int pagecache_write_end(struct file file, struct address_space mapping,
2066	struct page page, void fsdata)	2039	struct page page, void fsdata)
2067	{	2040	{
2068	const struct address_space_operations *aops = mapping->a_ops;	2041	const struct address_space_operations *aops = mapping->a_ops;
2069	int ret;
2070
2071	if (aops->write_end) {
2072	mark_page_accessed(page);
2073	ret = aops->write_end(file, mapping, pos, len, copied,
2074	page, fsdata);
2075	} else {
2076	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2077	struct inode *inode = mapping->host;
2078
2079	flush_dcache_page(page);
2080	ret = aops->commit_write(file, page, offset, offset+len);
2081	unlock_page(page);
2082	mark_page_accessed(page);
2083	page_cache_release(page);
2084		2042
2085	if (ret < 0) {	2043	mark_page_accessed(page);
2086	if (pos + len > inode->i_size)	2044	return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
2087	vmtruncate(inode, inode->i_size);
2088	} else if (ret > 0)
2089	ret = min_t(size_t, copied, ret);
2090	else
2091	ret = copied;
2092	}
2093
2094	return ret;
2095	}	2045	}
2096	EXPORT_SYMBOL(pagecache_write_end);	2046	EXPORT_SYMBOL(pagecache_write_end);
2097		2047
@@ -2213,174 +2163,6 @@ repeat:
2213	}	2163	}
2214	EXPORT_SYMBOL(__grab_cache_page);	2164	EXPORT_SYMBOL(__grab_cache_page);
2215		2165
2216	static ssize_t generic_perform_write_2copy(struct file *file,
2217	struct iov_iter *i, loff_t pos)
2218	{
2219	struct address_space *mapping = file->f_mapping;
2220	const struct address_space_operations *a_ops = mapping->a_ops;
2221	struct inode *inode = mapping->host;
2222	long status = 0;
2223	ssize_t written = 0;
2224
2225	do {
2226	struct page *src_page;
2227	struct page *page;
2228	pgoff_t index; /* Pagecache index for current page */
2229	unsigned long offset; /* Offset into pagecache page */
2230	unsigned long bytes; /* Bytes to write to page */
2231	size_t copied; /* Bytes copied from user */
2232
2233	offset = (pos & (PAGE_CACHE_SIZE - 1));
2234	index = pos >> PAGE_CACHE_SHIFT;
2235	bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2236	iov_iter_count(i));
2237
2238	/*
2239	* a non-NULL src_page indicates that we're doing the
2240	* copy via get_user_pages and kmap.
2241	*/
2242	src_page = NULL;
2243
2244	/*
2245	* Bring in the user page that we will copy from _first_.
2246	* Otherwise there's a nasty deadlock on copying from the
2247	* same page as we're writing to, without it being marked
2248	* up-to-date.
2249	*
2250	* Not only is this an optimisation, but it is also required
2251	* to check that the address is actually valid, when atomic
2252	* usercopies are used, below.
2253	*/
2254	if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2255	status = -EFAULT;
2256	break;
2257	}
2258
2259	page = __grab_cache_page(mapping, index);
2260	if (!page) {
2261	status = -ENOMEM;
2262	break;
2263	}
2264
2265	/*
2266	* non-uptodate pages cannot cope with short copies, and we
2267	* cannot take a pagefault with the destination page locked.
2268	* So pin the source page to copy it.
2269	*/
2270	if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
2271	unlock_page(page);
2272
2273	src_page = alloc_page(GFP_KERNEL);
2274	if (!src_page) {
2275	page_cache_release(page);
2276	status = -ENOMEM;
2277	break;
2278	}
2279
2280	/*
2281	* Cannot get_user_pages with a page locked for the
2282	* same reason as we can't take a page fault with a
2283	* page locked (as explained below).
2284	*/
2285	copied = iov_iter_copy_from_user(src_page, i,
2286	offset, bytes);
2287	if (unlikely(copied == 0)) {
2288	status = -EFAULT;
2289	page_cache_release(page);
2290	page_cache_release(src_page);
2291	break;
2292	}
2293	bytes = copied;
2294
2295	lock_page(page);
2296	/*
2297	* Can't handle the page going uptodate here, because
2298	* that means we would use non-atomic usercopies, which
2299	* zero out the tail of the page, which can cause
2300	* zeroes to become transiently visible. We could just
2301	* use a non-zeroing copy, but the APIs aren't too
2302	* consistent.
2303	*/
2304	if (unlikely(!page->mapping \|\| PageUptodate(page))) {
2305	unlock_page(page);
2306	page_cache_release(page);
2307	page_cache_release(src_page);
2308	continue;
2309	}
2310	}
2311
2312	status = a_ops->prepare_write(file, page, offset, offset+bytes);
2313	if (unlikely(status))
2314	goto fs_write_aop_error;
2315
2316	if (!src_page) {
2317	/*
2318	* Must not enter the pagefault handler here, because
2319	* we hold the page lock, so we might recursively
2320	* deadlock on the same lock, or get an ABBA deadlock
2321	* against a different lock, or against the mmap_sem
2322	* (which nests outside the page lock). So increment
2323	* preempt count, and use _atomic usercopies.
2324	*
2325	* The page is uptodate so we are OK to encounter a
2326	* short copy: if unmodified parts of the page are
2327	* marked dirty and written out to disk, it doesn't
2328	* really matter.
2329	*/
2330	pagefault_disable();
2331	copied = iov_iter_copy_from_user_atomic(page, i,
2332	offset, bytes);
2333	pagefault_enable();
2334	} else {
2335	void src, dst;
2336	src = kmap_atomic(src_page, KM_USER0);
2337	dst = kmap_atomic(page, KM_USER1);
2338	memcpy(dst + offset, src + offset, bytes);
2339	kunmap_atomic(dst, KM_USER1);
2340	kunmap_atomic(src, KM_USER0);
2341	copied = bytes;
2342	}
2343	flush_dcache_page(page);
2344
2345	status = a_ops->commit_write(file, page, offset, offset+bytes);
2346	if (unlikely(status < 0))
2347	goto fs_write_aop_error;
2348	if (unlikely(status > 0)) /* filesystem did partial write */
2349	copied = min_t(size_t, copied, status);
2350
2351	unlock_page(page);
2352	mark_page_accessed(page);
2353	page_cache_release(page);
2354	if (src_page)
2355	page_cache_release(src_page);
2356
2357	iov_iter_advance(i, copied);
2358	pos += copied;
2359	written += copied;
2360
2361	balance_dirty_pages_ratelimited(mapping);
2362	cond_resched();
2363	continue;
2364
2365	fs_write_aop_error:
2366	unlock_page(page);
2367	page_cache_release(page);
2368	if (src_page)
2369	page_cache_release(src_page);
2370
2371	/*
2372	* prepare_write() may have instantiated a few blocks
2373	* outside i_size. Trim these off again. Don't need
2374	* i_size_read because we hold i_mutex.
2375	*/
2376	if (pos + bytes > inode->i_size)
2377	vmtruncate(inode, inode->i_size);
2378	break;
2379	} while (iov_iter_count(i));
2380
2381	return written ? written : status;
2382	}
2383
2384	static ssize_t generic_perform_write(struct file *file,	2166	static ssize_t generic_perform_write(struct file *file,
2385	struct iov_iter *i, loff_t pos)	2167	struct iov_iter *i, loff_t pos)
2386	{	2168	{
@@ -2481,10 +2263,7 @@ generic_file_buffered_write(struct kiocb iocb, const struct iovec iov,
2481	struct iov_iter i;	2263	struct iov_iter i;
2482		2264
2483	iov_iter_init(&i, iov, nr_segs, count, written);	2265	iov_iter_init(&i, iov, nr_segs, count, written);
2484	if (a_ops->write_begin)	2266	status = generic_perform_write(file, &i, pos);
2485	status = generic_perform_write(file, &i, pos);
2486	else
2487	status = generic_perform_write_2copy(file, &i, pos);
2488		2267
2489	if (likely(status >= 0)) {	2268	if (likely(status >= 0)) {
2490	written += status;	2269	written += status;