fs: introduce write_begin, write_end, and perform_write aops

These are intended to replace prepare_write and commit_write with more flexible alternatives that are also able to avoid the buffered write deadlock problems efficiently (which prepare_write is unable to do). [mark.fasheh@oracle.com: API design contributions, code review and fixes] [akpm@linux-foundation.org: various fixes] [dmonakhov@sw.ru: new aop block_write_begin fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <npiggin@suse.de> 2007-10-16 04:25:01 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-10-16 12:42:55 -0400
commit: afddba49d18f346e5cc2938b6ed7c512db18ca68 (patch)
tree: 4726e3d3b0e9e8e5b5d3b2b0cccb36446bbdf3ca /fs
parent: 637aff46f94a754207c80c8c64bf1b74f24b967d (diff)
4 files changed, 231 insertions, 129 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 9ece6c2086d0..68b8fbdc1b28 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1770,6 +1770,48 @@ recover:
        goto done;
 }
+/*
+ * If a page has any new buffers, zero them out here, and mark them uptodate
+ * and dirty so they'll be written out (in order to prevent uninitialised
+ * block data from leaking). And clear the new bit.
+ */
+void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+{
+        unsigned int block_start, block_end;
+        struct buffer_head *head, *bh;
+        BUG_ON(!PageLocked(page));
+        if (!page_has_buffers(page))
+                return;
+        bh = head = page_buffers(page);
+        block_start = 0;
+        do {
+                block_end = block_start + bh->b_size;
+                if (buffer_new(bh)) {
+                        if (block_end > from && block_start < to) {
+                                if (!PageUptodate(page)) {
+                                        unsigned start, size;
+                                        start = max(from, block_start);
+                                        size = min(to, block_end) - start;
+                                        zero_user_page(page, start, size, KM_USER0);
+                                        set_buffer_uptodate(bh);
+                                }
+                                clear_buffer_new(bh);
+                                mark_buffer_dirty(bh);
+                        }
+                }
+                block_start = block_end;
+                bh = bh->b_this_page;
+        } while (bh != head);
+}
+EXPORT_SYMBOL(page_zero_new_buffers);
 static int __block_prepare_write(struct inode *inode, struct page *page,
                unsigned from, unsigned to, get_block_t *get_block)
 {
@@ -1854,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
                if (!buffer_uptodate(*wait_bh))
                        err = -EIO;
        }
-        if (!err) {
+        if (unlikely(err))
-                bh = head;
+                page_zero_new_buffers(page, from, to);
-                do {
-                        if (buffer_new(bh))
-                                clear_buffer_new(bh);
-                } while ((bh = bh->b_this_page) != head);
-                return 0;
-        }
-        /* Error case: */
-        /*
-         * Zero out any newly allocated blocks to avoid exposing stale
-         * data.  If BH_New is set, we know that the block was newly
-         * allocated in the above loop.
-         */
-        bh = head;
-        block_start = 0;
-        do {
-                block_end = block_start+blocksize;
-                if (block_end <= from)
-                        goto next_bh;
-                if (block_start >= to)
-                        break;
-                if (buffer_new(bh)) {
-                        clear_buffer_new(bh);
-                        zero_user_page(page, block_start, bh->b_size, KM_USER0);
-                        set_buffer_uptodate(bh);
-                        mark_buffer_dirty(bh);
-                }
-next_bh:
-                block_start = block_end;
-                bh = bh->b_this_page;
-        } while (bh != head);
        return err;
 }
@@ -1910,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
                        set_buffer_uptodate(bh);
                        mark_buffer_dirty(bh);
                }
+                clear_buffer_new(bh);
        }
        /*
@@ -1924,6 +1937,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 }
 /*
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
+ * If *pagep is not NULL, then block_write_begin uses the locked page
+ * at *pagep rather than allocating its own. In this case, the page will
+ * not be unlocked or deallocated on failure.
+ */
+int block_write_begin(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned flags,
+                        struct page **pagep, void **fsdata,
+                        get_block_t *get_block)
+{
+        struct inode *inode = mapping->host;
+        int status = 0;
+        struct page *page;
+        pgoff_t index;
+        unsigned start, end;
+        int ownpage = 0;
+        index = pos >> PAGE_CACHE_SHIFT;
+        start = pos & (PAGE_CACHE_SIZE - 1);
+        end = start + len;
+        page = *pagep;
+        if (page == NULL) {
+                ownpage = 1;
+                page = __grab_cache_page(mapping, index);
+                if (!page) {
+                        status = -ENOMEM;
+                        goto out;
+                }
+                *pagep = page;
+        } else
+                BUG_ON(!PageLocked(page));
+        status = __block_prepare_write(inode, page, start, end, get_block);
+        if (unlikely(status)) {
+                ClearPageUptodate(page);
+                if (ownpage) {
+                        unlock_page(page);
+                        page_cache_release(page);
+                        *pagep = NULL;
+                        /*
+                         * prepare_write() may have instantiated a few blocks
+                         * outside i_size.  Trim these off again. Don't need
+                         * i_size_read because we hold i_mutex.
+                         */
+                        if (pos + len > inode->i_size)
+                                vmtruncate(inode, inode->i_size);
+                }
+                goto out;
+        }
+out:
+        return status;
+}
+EXPORT_SYMBOL(block_write_begin);
+int block_write_end(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned copied,
+                        struct page *page, void *fsdata)
+{
+        struct inode *inode = mapping->host;
+        unsigned start;
+        start = pos & (PAGE_CACHE_SIZE - 1);
+        if (unlikely(copied < len)) {
+                /*
+                 * The buffers that were written will now be uptodate, so we
+                 * don't have to worry about a readpage reading them and
+                 * overwriting a partial write. However if we have encountered
+                 * a short write and only partially written into a buffer, it
+                 * will not be marked uptodate, so a readpage might come in and
+                 * destroy our partial write.
+                 *
+                 * Do the simplest thing, and just treat any short write to a
+                 * non uptodate page as a zero-length write, and force the
+                 * caller to redo the whole thing.
+                 */
+                if (!PageUptodate(page))
+                        copied = 0;
+                page_zero_new_buffers(page, start+copied, start+len);
+        }
+        flush_dcache_page(page);
+        /* This could be a short (even 0-length) commit */
+        __block_commit_write(inode, page, start, start+copied);
+        return copied;
+}
+EXPORT_SYMBOL(block_write_end);
+int generic_write_end(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned copied,
+                        struct page *page, void *fsdata)
+{
+        struct inode *inode = mapping->host;
+        copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+        /*
+         * No need to use i_size_read() here, the i_size
+         * cannot change under us because we hold i_mutex.
+         *
+         * But it's important to update i_size while still holding page lock:
+         * page writeout could otherwise come in and zero beyond i_size.
+         */
+        if (pos+copied > inode->i_size) {
+                i_size_write(inode, pos+copied);
+                mark_inode_dirty(inode);
+        }
+        unlock_page(page);
+        page_cache_release(page);
+        return copied;
+}
+EXPORT_SYMBOL(generic_write_end);
+/*
 * Generic "read page" function for block devices that have the normal
 * get_block functionality. This is most of the block device filesystems.
 * Reads the page asynchronously --- the unlock_buffer() and
diff --git a/fs/libfs.c b/fs/libfs.c
index 5294de1f40c4..f2b32d3a9093 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -351,6 +351,26 @@ int simple_prepare_write(struct file *file, struct page *page,
        return 0;
 }
+int simple_write_begin(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned flags,
+                        struct page **pagep, void **fsdata)
+{
+        struct page *page;
+        pgoff_t index;
+        unsigned from;
+        index = pos >> PAGE_CACHE_SHIFT;
+        from = pos & (PAGE_CACHE_SIZE - 1);
+        page = __grab_cache_page(mapping, index);
+        if (!page)
+                return -ENOMEM;
+        *pagep = page;
+        return simple_prepare_write(file, page, from, from+len);
+}
 int simple_commit_write(struct file *file, struct page *page,
                        unsigned from, unsigned to)
 {
@@ -369,6 +389,28 @@ int simple_commit_write(struct file *file, struct page *page,
        return 0;
 }
+int simple_write_end(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned copied,
+                        struct page *page, void *fsdata)
+{
+        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+        /* zero the stale part of the page if we did a short copy */
+        if (copied < len) {
+                void *kaddr = kmap_atomic(page, KM_USER0);
+                memset(kaddr + from + copied, 0, len - copied);
+                flush_dcache_page(page);
+                kunmap_atomic(kaddr, KM_USER0);
+        }
+        simple_commit_write(file, page, from, from+copied);
+        unlock_page(page);
+        page_cache_release(page);
+        return copied;
+}
 /*
 * the inodes created here are not hashed. If you use iunique to generate
 * unique inode values later for this filesystem, then you must take care
@@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
 EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(simple_write_begin);
+EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_commit_write);
 EXPORT_SYMBOL(simple_dir_inode_operations);
 EXPORT_SYMBOL(simple_dir_operations);
diff --git a/fs/namei.c b/fs/namei.c
index a83160acd748..b40b8084eefc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2729,53 +2729,29 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
+        void *fsdata;
        int err;
        char *kaddr;
 retry:
-        err = -ENOMEM;
+        err = pagecache_write_begin(NULL, mapping, 0, len-1,
-        page = find_or_create_page(mapping, 0, gfp_mask);
+                                AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-        if (!page)
-                goto fail;
-        err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
-        if (err == AOP_TRUNCATED_PAGE) {
-                page_cache_release(page);
-                goto retry;
-        }
        if (err)
-                goto fail_map;
+                goto fail;
        kaddr = kmap_atomic(page, KM_USER0);
        memcpy(kaddr, symname, len-1);
        kunmap_atomic(kaddr, KM_USER0);
-        err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
-        if (err == AOP_TRUNCATED_PAGE) {
+        err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
-                page_cache_release(page);
+                                                        page, fsdata);
-                goto retry;
-        }
-        if (err)
-                goto fail_map;
-        /*
-         * Notice that we are _not_ going to block here - end of page is
-         * unmapped, so this will only try to map the rest of page, see
-         * that it is unmapped (typically even will not look into inode -
-         * ->i_size will be enough for everything) and zero it out.
-         * OTOH it's obviously correct and should make the page up-to-date.
-         */
-        if (!PageUptodate(page)) {
-                err = mapping->a_ops->readpage(NULL, page);
-                if (err != AOP_TRUNCATED_PAGE)
-                        wait_on_page_locked(page);
-        } else {
-                unlock_page(page);
-        }
-        page_cache_release(page);
        if (err < 0)
                goto fail;
+        if (err < len-1)
+                goto retry;
        mark_inode_dirty(inode);
        return 0;
-fail_map:
-        unlock_page(page);
-        page_cache_release(page);
 fail:
        return err;
 }
diff --git a/fs/splice.c b/fs/splice.c
index 2df6be43c667..a7568bcc0f99 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
-        pgoff_t index;
+        void *fsdata;
        int ret;
        /*
@@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        if (unlikely(ret))
                return ret;
-        index = sd->pos >> PAGE_CACHE_SHIFT;
        offset = sd->pos & ~PAGE_CACHE_MASK;
        this_len = sd->len;
        if (this_len + offset > PAGE_CACHE_SIZE)
                this_len = PAGE_CACHE_SIZE - offset;
-find_page:
+        ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
-        page = find_lock_page(mapping, index);
+                                AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-        if (!page) {
+        if (unlikely(ret))
-                ret = -ENOMEM;
+                goto out;
-                page = page_cache_alloc_cold(mapping);
-                if (unlikely(!page))
-                        goto out_ret;
-                /*
-                 * This will also lock the page
-                 */
-                ret = add_to_page_cache_lru(page, mapping, index,
-                                            GFP_KERNEL);
-                if (unlikely(ret))
-                        goto out_release;
-        }
-        ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-        if (unlikely(ret)) {
-                loff_t isize = i_size_read(mapping->host);
-                if (ret != AOP_TRUNCATED_PAGE)
-                        unlock_page(page);
-                page_cache_release(page);
-                if (ret == AOP_TRUNCATED_PAGE)
-                        goto find_page;
-                /*
-                 * prepare_write() may have instantiated a few blocks
-                 * outside i_size.  Trim these off again.
-                 */
-                if (sd->pos + this_len > isize)
-                        vmtruncate(mapping->host, isize);
-                goto out_ret;
-        }
        if (buf->page != page) {
                /*
@@ -629,31 +596,9 @@ find_page:
                kunmap_atomic(dst, KM_USER1);
                buf->ops->unmap(pipe, buf, src);
        }
+        ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
-        ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
+                                page, fsdata);
-        if (ret) {
-                if (ret == AOP_TRUNCATED_PAGE) {
-                        page_cache_release(page);
-                        goto find_page;
-                }
-                if (ret < 0)
-                        goto out;
-                /*
-                 * Partial write has happened, so 'ret' already initialized by
-                 * number of bytes written, Where is nothing we have to do here.
-                 */
-        } else
-                ret = this_len;
-        /*
-         * Return the number of bytes written and mark page as
-         * accessed, we are now done!
-         */
-        mark_page_accessed(page);
 out:
-        unlock_page(page);
-out_release:
-        page_cache_release(page);
-out_ret:
        return ret;
 }
author	Nick Piggin <npiggin@suse.de>	2007-10-16 04:25:01 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-10-16 12:42:55 -0400
commit	afddba49d18f346e5cc2938b6ed7c512db18ca68 (patch)
tree	4726e3d3b0e9e8e5b5d3b2b0cccb36446bbdf3ca /fs
parent	637aff46f94a754207c80c8c64bf1b74f24b967d (diff)

diff --git a/fs/buffer.c b/fs/buffer.c index 9ece6c2086d0..68b8fbdc1b28 100644 --- a/fs/buffer.c +++ b/fs/buffer.c
@@ -1770,6 +1770,48 @@ recover:
1770	goto done;	1770	goto done;
1771	}	1771	}
1772		1772
		1773	/*
		1774	* If a page has any new buffers, zero them out here, and mark them uptodate
		1775	* and dirty so they'll be written out (in order to prevent uninitialised
		1776	* block data from leaking). And clear the new bit.
		1777	*/
		1778	void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
		1779	{
		1780	unsigned int block_start, block_end;
		1781	struct buffer_head head, bh;
		1782
		1783	BUG_ON(!PageLocked(page));
		1784	if (!page_has_buffers(page))
		1785	return;
		1786
		1787	bh = head = page_buffers(page);
		1788	block_start = 0;
		1789	do {
		1790	block_end = block_start + bh->b_size;
		1791
		1792	if (buffer_new(bh)) {
		1793	if (block_end > from && block_start < to) {
		1794	if (!PageUptodate(page)) {
		1795	unsigned start, size;
		1796
		1797	start = max(from, block_start);
		1798	size = min(to, block_end) - start;
		1799
		1800	zero_user_page(page, start, size, KM_USER0);
		1801	set_buffer_uptodate(bh);
		1802	}
		1803
		1804	clear_buffer_new(bh);
		1805	mark_buffer_dirty(bh);
		1806	}
		1807	}
		1808
		1809	block_start = block_end;
		1810	bh = bh->b_this_page;
		1811	} while (bh != head);
		1812	}
		1813	EXPORT_SYMBOL(page_zero_new_buffers);
		1814
1773	static int __block_prepare_write(struct inode inode, struct page page,	1815	static int __block_prepare_write(struct inode inode, struct page page,
1774	unsigned from, unsigned to, get_block_t *get_block)	1816	unsigned from, unsigned to, get_block_t *get_block)
1775	{	1817	{
@@ -1854,38 +1896,8 @@ static int __block_prepare_write(struct inode inode, struct page page,
1854	if (!buffer_uptodate(*wait_bh))	1896	if (!buffer_uptodate(*wait_bh))
1855	err = -EIO;	1897	err = -EIO;
1856	}	1898	}
1857	if (!err) {	1899	if (unlikely(err))
1858	bh = head;	1900	page_zero_new_buffers(page, from, to);
1859	do {
1860	if (buffer_new(bh))
1861	clear_buffer_new(bh);
1862	} while ((bh = bh->b_this_page) != head);
1863	return 0;
1864	}
1865	/* Error case: */
1866	/*
1867	* Zero out any newly allocated blocks to avoid exposing stale
1868	* data. If BH_New is set, we know that the block was newly
1869	* allocated in the above loop.
1870	*/
1871	bh = head;
1872	block_start = 0;
1873	do {
1874	block_end = block_start+blocksize;
1875	if (block_end <= from)
1876	goto next_bh;
1877	if (block_start >= to)
1878	break;
1879	if (buffer_new(bh)) {
1880	clear_buffer_new(bh);
1881	zero_user_page(page, block_start, bh->b_size, KM_USER0);
1882	set_buffer_uptodate(bh);
1883	mark_buffer_dirty(bh);
1884	}
1885	next_bh:
1886	block_start = block_end;
1887	bh = bh->b_this_page;
1888	} while (bh != head);
1889	return err;	1901	return err;
1890	}	1902	}
1891		1903
@@ -1910,6 +1922,7 @@ static int __block_commit_write(struct inode inode, struct page page,
1910	set_buffer_uptodate(bh);	1922	set_buffer_uptodate(bh);
1911	mark_buffer_dirty(bh);	1923	mark_buffer_dirty(bh);
1912	}	1924	}
		1925	clear_buffer_new(bh);
1913	}	1926	}
1914		1927
1915	/*	1928	/*
@@ -1924,6 +1937,130 @@ static int __block_commit_write(struct inode inode, struct page page,
1924	}	1937	}
1925		1938
1926	/*	1939	/*
		1940	* block_write_begin takes care of the basic task of block allocation and
		1941	* bringing partial write blocks uptodate first.
		1942	*
		1943	* If *pagep is not NULL, then block_write_begin uses the locked page
		1944	* at *pagep rather than allocating its own. In this case, the page will
		1945	* not be unlocked or deallocated on failure.
		1946	*/
		1947	int block_write_begin(struct file file, struct address_space mapping,
		1948	loff_t pos, unsigned len, unsigned flags,
		1949	struct page pagep, void fsdata,
		1950	get_block_t *get_block)
		1951	{
		1952	struct inode *inode = mapping->host;
		1953	int status = 0;
		1954	struct page *page;
		1955	pgoff_t index;
		1956	unsigned start, end;
		1957	int ownpage = 0;
		1958
		1959	index = pos >> PAGE_CACHE_SHIFT;
		1960	start = pos & (PAGE_CACHE_SIZE - 1);
		1961	end = start + len;
		1962
		1963	page = *pagep;
		1964	if (page == NULL) {
		1965	ownpage = 1;
		1966	page = __grab_cache_page(mapping, index);
		1967	if (!page) {
		1968	status = -ENOMEM;
		1969	goto out;
		1970	}
		1971	*pagep = page;
		1972	} else
		1973	BUG_ON(!PageLocked(page));
		1974
		1975	status = __block_prepare_write(inode, page, start, end, get_block);
		1976	if (unlikely(status)) {
		1977	ClearPageUptodate(page);
		1978
		1979	if (ownpage) {
		1980	unlock_page(page);
		1981	page_cache_release(page);
		1982	*pagep = NULL;
		1983
		1984	/*
		1985	* prepare_write() may have instantiated a few blocks
		1986	* outside i_size. Trim these off again. Don't need
		1987	* i_size_read because we hold i_mutex.
		1988	*/
		1989	if (pos + len > inode->i_size)
		1990	vmtruncate(inode, inode->i_size);
		1991	}
		1992	goto out;
		1993	}
		1994
		1995	out:
		1996	return status;
		1997	}
		1998	EXPORT_SYMBOL(block_write_begin);
		1999
		2000	int block_write_end(struct file file, struct address_space mapping,
		2001	loff_t pos, unsigned len, unsigned copied,
		2002	struct page page, void fsdata)
		2003	{
		2004	struct inode *inode = mapping->host;
		2005	unsigned start;
		2006
		2007	start = pos & (PAGE_CACHE_SIZE - 1);
		2008
		2009	if (unlikely(copied < len)) {
		2010	/*
		2011	* The buffers that were written will now be uptodate, so we
		2012	* don't have to worry about a readpage reading them and
		2013	* overwriting a partial write. However if we have encountered
		2014	* a short write and only partially written into a buffer, it
		2015	* will not be marked uptodate, so a readpage might come in and
		2016	* destroy our partial write.
		2017	*
		2018	* Do the simplest thing, and just treat any short write to a
		2019	* non uptodate page as a zero-length write, and force the
		2020	* caller to redo the whole thing.
		2021	*/
		2022	if (!PageUptodate(page))
		2023	copied = 0;
		2024
		2025	page_zero_new_buffers(page, start+copied, start+len);
		2026	}
		2027	flush_dcache_page(page);
		2028
		2029	/* This could be a short (even 0-length) commit */
		2030	__block_commit_write(inode, page, start, start+copied);
		2031
		2032	return copied;
		2033	}
		2034	EXPORT_SYMBOL(block_write_end);
		2035
		2036	int generic_write_end(struct file file, struct address_space mapping,
		2037	loff_t pos, unsigned len, unsigned copied,
		2038	struct page page, void fsdata)
		2039	{
		2040	struct inode *inode = mapping->host;
		2041
		2042	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
		2043
		2044	/*
		2045	* No need to use i_size_read() here, the i_size
		2046	* cannot change under us because we hold i_mutex.
		2047	*
		2048	* But it's important to update i_size while still holding page lock:
		2049	* page writeout could otherwise come in and zero beyond i_size.
		2050	*/
		2051	if (pos+copied > inode->i_size) {
		2052	i_size_write(inode, pos+copied);
		2053	mark_inode_dirty(inode);
		2054	}
		2055
		2056	unlock_page(page);
		2057	page_cache_release(page);
		2058
		2059	return copied;
		2060	}
		2061	EXPORT_SYMBOL(generic_write_end);
		2062
		2063	/*
1927	* Generic "read page" function for block devices that have the normal	2064	* Generic "read page" function for block devices that have the normal
1928	* get_block functionality. This is most of the block device filesystems.	2065	* get_block functionality. This is most of the block device filesystems.
1929	* Reads the page asynchronously --- the unlock_buffer() and	2066	* Reads the page asynchronously --- the unlock_buffer() and


diff --git a/fs/libfs.c b/fs/libfs.c index 5294de1f40c4..f2b32d3a9093 100644 --- a/fs/libfs.c +++ b/fs/libfs.c
@@ -351,6 +351,26 @@ int simple_prepare_write(struct file file, struct page page,
351	return 0;	351	return 0;
352	}	352	}
353		353
		354	int simple_write_begin(struct file file, struct address_space mapping,
		355	loff_t pos, unsigned len, unsigned flags,
		356	struct page pagep, void fsdata)
		357	{
		358	struct page *page;
		359	pgoff_t index;
		360	unsigned from;
		361
		362	index = pos >> PAGE_CACHE_SHIFT;
		363	from = pos & (PAGE_CACHE_SIZE - 1);
		364
		365	page = __grab_cache_page(mapping, index);
		366	if (!page)
		367	return -ENOMEM;
		368
		369	*pagep = page;
		370
		371	return simple_prepare_write(file, page, from, from+len);
		372	}
		373
354	int simple_commit_write(struct file file, struct page page,	374	int simple_commit_write(struct file file, struct page page,
355	unsigned from, unsigned to)	375	unsigned from, unsigned to)
356	{	376	{
@@ -369,6 +389,28 @@ int simple_commit_write(struct file file, struct page page,
369	return 0;	389	return 0;
370	}	390	}
371		391
		392	int simple_write_end(struct file file, struct address_space mapping,
		393	loff_t pos, unsigned len, unsigned copied,
		394	struct page page, void fsdata)
		395	{
		396	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
		397
		398	/* zero the stale part of the page if we did a short copy */
		399	if (copied < len) {
		400	void *kaddr = kmap_atomic(page, KM_USER0);
		401	memset(kaddr + from + copied, 0, len - copied);
		402	flush_dcache_page(page);
		403	kunmap_atomic(kaddr, KM_USER0);
		404	}
		405
		406	simple_commit_write(file, page, from, from+copied);
		407
		408	unlock_page(page);
		409	page_cache_release(page);
		410
		411	return copied;
		412	}
		413
372	/*	414	/*
373	* the inodes created here are not hashed. If you use iunique to generate	415	* the inodes created here are not hashed. If you use iunique to generate
374	* unique inode values later for this filesystem, then you must take care	416	* unique inode values later for this filesystem, then you must take care
@@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open);
642	EXPORT_SYMBOL(dcache_readdir);	684	EXPORT_SYMBOL(dcache_readdir);
643	EXPORT_SYMBOL(generic_read_dir);	685	EXPORT_SYMBOL(generic_read_dir);
644	EXPORT_SYMBOL(get_sb_pseudo);	686	EXPORT_SYMBOL(get_sb_pseudo);
		687	EXPORT_SYMBOL(simple_write_begin);
		688	EXPORT_SYMBOL(simple_write_end);
645	EXPORT_SYMBOL(simple_commit_write);	689	EXPORT_SYMBOL(simple_commit_write);
646	EXPORT_SYMBOL(simple_dir_inode_operations);	690	EXPORT_SYMBOL(simple_dir_inode_operations);
647	EXPORT_SYMBOL(simple_dir_operations);	691	EXPORT_SYMBOL(simple_dir_operations);


diff --git a/fs/namei.c b/fs/namei.c index a83160acd748..b40b8084eefc 100644 --- a/fs/namei.c +++ b/fs/namei.c
@@ -2729,53 +2729,29 @@ int __page_symlink(struct inode inode, const char symname, int len,
2729	{	2729	{
2730	struct address_space *mapping = inode->i_mapping;	2730	struct address_space *mapping = inode->i_mapping;
2731	struct page *page;	2731	struct page *page;
		2732	void *fsdata;
2732	int err;	2733	int err;
2733	char *kaddr;	2734	char *kaddr;
2734		2735
2735	retry:	2736	retry:
2736	err = -ENOMEM;	2737	err = pagecache_write_begin(NULL, mapping, 0, len-1,
2737	page = find_or_create_page(mapping, 0, gfp_mask);	2738	AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
2738	if (!page)
2739	goto fail;
2740	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2741	if (err == AOP_TRUNCATED_PAGE) {
2742	page_cache_release(page);
2743	goto retry;
2744	}
2745	if (err)	2739	if (err)
2746	goto fail_map;	2740	goto fail;
		2741
2747	kaddr = kmap_atomic(page, KM_USER0);	2742	kaddr = kmap_atomic(page, KM_USER0);
2748	memcpy(kaddr, symname, len-1);	2743	memcpy(kaddr, symname, len-1);
2749	kunmap_atomic(kaddr, KM_USER0);	2744	kunmap_atomic(kaddr, KM_USER0);
2750	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);	2745
2751	if (err == AOP_TRUNCATED_PAGE) {	2746	err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
2752	page_cache_release(page);	2747	page, fsdata);
2753	goto retry;
2754	}
2755	if (err)
2756	goto fail_map;
2757	/*
2758	* Notice that we are _not_ going to block here - end of page is
2759	* unmapped, so this will only try to map the rest of page, see
2760	* that it is unmapped (typically even will not look into inode -
2761	* ->i_size will be enough for everything) and zero it out.
2762	* OTOH it's obviously correct and should make the page up-to-date.
2763	*/
2764	if (!PageUptodate(page)) {
2765	err = mapping->a_ops->readpage(NULL, page);
2766	if (err != AOP_TRUNCATED_PAGE)
2767	wait_on_page_locked(page);
2768	} else {
2769	unlock_page(page);
2770	}
2771	page_cache_release(page);
2772	if (err < 0)	2748	if (err < 0)
2773	goto fail;	2749	goto fail;
		2750	if (err < len-1)
		2751	goto retry;
		2752
2774	mark_inode_dirty(inode);	2753	mark_inode_dirty(inode);
2775	return 0;	2754	return 0;
2776	fail_map:
2777	unlock_page(page);
2778	page_cache_release(page);
2779	fail:	2755	fail:
2780	return err;	2756	return err;
2781	}	2757	}


diff --git a/fs/splice.c b/fs/splice.c index 2df6be43c667..a7568bcc0f99 100644 --- a/fs/splice.c +++ b/fs/splice.c
@@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info pipe, struct pipe_buffer buf,
563	struct address_space *mapping = file->f_mapping;	563	struct address_space *mapping = file->f_mapping;
564	unsigned int offset, this_len;	564	unsigned int offset, this_len;
565	struct page *page;	565	struct page *page;
566	pgoff_t index;	566	void *fsdata;
567	int ret;	567	int ret;
568		568
569	/*	569	/*
@@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info pipe, struct pipe_buffer buf,
573	if (unlikely(ret))	573	if (unlikely(ret))
574	return ret;	574	return ret;
575		575
576	index = sd->pos >> PAGE_CACHE_SHIFT;
577	offset = sd->pos & ~PAGE_CACHE_MASK;	576	offset = sd->pos & ~PAGE_CACHE_MASK;
578		577
579	this_len = sd->len;	578	this_len = sd->len;
580	if (this_len + offset > PAGE_CACHE_SIZE)	579	if (this_len + offset > PAGE_CACHE_SIZE)
581	this_len = PAGE_CACHE_SIZE - offset;	580	this_len = PAGE_CACHE_SIZE - offset;
582		581
583	find_page:	582	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
584	page = find_lock_page(mapping, index);	583	AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
585	if (!page) {	584	if (unlikely(ret))
586	ret = -ENOMEM;	585	goto out;
587	page = page_cache_alloc_cold(mapping);
588	if (unlikely(!page))
589	goto out_ret;
590
591	/*
592	* This will also lock the page
593	*/
594	ret = add_to_page_cache_lru(page, mapping, index,
595	GFP_KERNEL);
596	if (unlikely(ret))
597	goto out_release;
598	}
599
600	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
601	if (unlikely(ret)) {
602	loff_t isize = i_size_read(mapping->host);
603
604	if (ret != AOP_TRUNCATED_PAGE)
605	unlock_page(page);
606	page_cache_release(page);
607	if (ret == AOP_TRUNCATED_PAGE)
608	goto find_page;
609
610	/*
611	* prepare_write() may have instantiated a few blocks
612	* outside i_size. Trim these off again.
613	*/
614	if (sd->pos + this_len > isize)
615	vmtruncate(mapping->host, isize);
616
617	goto out_ret;
618	}
619		586
620	if (buf->page != page) {	587	if (buf->page != page) {
621	/*	588	/*
@@ -629,31 +596,9 @@ find_page:
629	kunmap_atomic(dst, KM_USER1);	596	kunmap_atomic(dst, KM_USER1);
630	buf->ops->unmap(pipe, buf, src);	597	buf->ops->unmap(pipe, buf, src);
631	}	598	}
632		599	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
633	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);	600	page, fsdata);
634	if (ret) {
635	if (ret == AOP_TRUNCATED_PAGE) {
636	page_cache_release(page);
637	goto find_page;
638	}
639	if (ret < 0)
640	goto out;
641	/*
642	* Partial write has happened, so 'ret' already initialized by
643	* number of bytes written, Where is nothing we have to do here.
644	*/
645	} else
646	ret = this_len;
647	/*
648	* Return the number of bytes written and mark page as
649	* accessed, we are now done!
650	*/
651	mark_page_accessed(page);
652	out:	601	out:
653	unlock_page(page);
654	out_release:
655	page_cache_release(page);
656	out_ret:
657	return ret;	602	return ret;
658	}	603	}
659		604