1 files changed, 74 insertions, 191 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e9..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
        if (uptodate) {
                set_buffer_uptodate(bh);
        } else {
-                if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
+                if (!quiet_error(bh)) {
                        buffer_io_error(bh);
                        printk(KERN_WARNING "lost page write due to "
                                        "I/O error on %s\n",
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
                                spin_unlock(lock);
                                /*
                                 * Ensure any pending I/O completes so that
-                                 * ll_rw_block() actually writes the current
+                                 * write_dirty_buffer() actually writes the
-                                 * contents - it is a noop if I/O is still in
+                                 * current contents - it is a noop if I/O is
-                                 * flight on potentially older contents.
+                                 * still in flight on potentially older
+                                 * contents.
                                 */
-                                ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+                                write_dirty_buffer(bh, WRITE_SYNC_PLUG);
                                /*
                                 * Kick off IO for the previous mapping. Note
@@ -904,7 +905,6 @@ try_again:
                bh->b_state = 0;
                atomic_set(&bh->b_count, 0);
-                bh->b_private = NULL;
                bh->b_size = size;
                /* Link the buffer to its page */
@@ -1705,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                 * and kswapd activity, but those code paths have their own
                 * higher-level throttling.
                 */
-                if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+                if (wbc->sync_mode != WB_SYNC_NONE) {
                        lock_buffer(bh);
                } else if (!trylock_buffer(bh)) {
                        redirty_page_for_writepage(wbc, page);
@@ -1833,9 +1833,12 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
-static int __block_prepare_write(struct inode *inode, struct page *page,
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-                unsigned from, unsigned to, get_block_t *get_block)
+                get_block_t *get_block)
 {
+        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+        unsigned to = from + len;
+        struct inode *inode = page->mapping->host;
        unsigned block_start, block_end;
        sector_t block;
        int err = 0;
@@ -1908,10 +1911,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
                if (!buffer_uptodate(*wait_bh))
                        err = -EIO;
        }
-        if (unlikely(err))
+        if (unlikely(err)) {
                page_zero_new_buffers(page, from, to);
+                ClearPageUptodate(page);
+        }
        return err;
 }
+EXPORT_SYMBOL(__block_write_begin);
 static int __block_commit_write(struct inode *inode, struct page *page,
                unsigned from, unsigned to)
@@ -1949,90 +1955,32 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 }
 /*
- * Filesystems implementing the new truncate sequence should use the
+ * block_write_begin takes care of the basic task of block allocation and
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * bringing partial write blocks uptodate first.
+ *
 * The filesystem needs to handle block truncation upon failure.
 */
-int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
-                        loff_t pos, unsigned len, unsigned flags,
+                unsigned flags, struct page **pagep, get_block_t *get_block)
-                        struct page **pagep, void **fsdata,
-                        get_block_t *get_block)
 {
-        struct inode *inode = mapping->host;
+        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-        int status = 0;
        struct page *page;
-        pgoff_t index;
+        int status;
-        unsigned start, end;
-        int ownpage = 0;
-        index = pos >> PAGE_CACHE_SHIFT;
+        page = grab_cache_page_write_begin(mapping, index, flags);
-        start = pos & (PAGE_CACHE_SIZE - 1);
+        if (!page)
-        end = start + len;
+                return -ENOMEM;
-        page = *pagep;
-        if (page == NULL) {
-                ownpage = 1;
-                page = grab_cache_page_write_begin(mapping, index, flags);
-                if (!page) {
-                        status = -ENOMEM;
-                        goto out;
-                }
-                *pagep = page;
-        } else
-                BUG_ON(!PageLocked(page));
-        status = __block_prepare_write(inode, page, start, end, get_block);
+        status = __block_write_begin(page, pos, len, get_block);
        if (unlikely(status)) {
-                ClearPageUptodate(page);
+                unlock_page(page);
+                page_cache_release(page);
-                if (ownpage) {
+                page = NULL;
-                        unlock_page(page);
-                        page_cache_release(page);
-                        *pagep = NULL;
-                }
        }
-out:
+        *pagep = page;
        return status;
 }
-EXPORT_SYMBOL(block_write_begin_newtrunc);
-/*
- * block_write_begin takes care of the basic task of block allocation and
- * bringing partial write blocks uptodate first.
- *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
- */
-int block_write_begin(struct file *file, struct address_space *mapping,
-                        loff_t pos, unsigned len, unsigned flags,
-                        struct page **pagep, void **fsdata,
-                        get_block_t *get_block)
-{
-        int ret;
-        ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
-                                        pagep, fsdata, get_block);
-        /*
-         * prepare_write() may have instantiated a few blocks
-         * outside i_size.  Trim these off again. Don't need
-         * i_size_read because we hold i_mutex.
-         *
-         * Filesystems which pass down their own page also cannot
-         * call into vmtruncate here because it would lead to lock
-         * inversion problems (*pagep is locked). This is a further
-         * example of where the old truncate sequence is inadequate.
-         */
-        if (unlikely(ret) && *pagep == NULL) {
-                loff_t isize = mapping->host->i_size;
-                if (pos + len > isize)
-                        vmtruncate(mapping->host, isize);
-        }
-        return ret;
-}
 EXPORT_SYMBOL(block_write_begin);
 int block_write_end(struct file *file, struct address_space *mapping,
@@ -2351,7 +2299,7 @@ out:
 * For moronic filesystems that do not allow holes in file.
 * We may have to extend the file.
 */
-int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int cont_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned flags,
                        struct page **pagep, void **fsdata,
                        get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2311,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
        err = cont_expand_zero(file, mapping, pos, bytes);
        if (err)
-                goto out;
+                return err;
        zerofrom = *bytes & ~PAGE_CACHE_MASK;
        if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2319,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
                (*bytes)++;
        }
-        *pagep = NULL;
+        return block_write_begin(mapping, pos, len, flags, pagep, get_block);
-        err = block_write_begin_newtrunc(file, mapping, pos, len,
-                                flags, pagep, fsdata, get_block);
-out:
-        return err;
-}
-EXPORT_SYMBOL(cont_write_begin_newtrunc);
-int cont_write_begin(struct file *file, struct address_space *mapping,
-                        loff_t pos, unsigned len, unsigned flags,
-                        struct page **pagep, void **fsdata,
-                        get_block_t *get_block, loff_t *bytes)
-{
-        int ret;
-        ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
-                                        pagep, fsdata, get_block, bytes);
-        if (unlikely(ret)) {
-                loff_t isize = mapping->host->i_size;
-                if (pos + len > isize)
-                        vmtruncate(mapping->host, isize);
-        }
-        return ret;
 }
 EXPORT_SYMBOL(cont_write_begin);
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
-                        get_block_t *get_block)
-{
-        struct inode *inode = page->mapping->host;
-        int err = __block_prepare_write(inode, page, from, to, get_block);
-        if (err)
-                ClearPageUptodate(page);
-        return err;
-}
-EXPORT_SYMBOL(block_prepare_write);
 int block_commit_write(struct page *page, unsigned from, unsigned to)
 {
        struct inode *inode = page->mapping->host;
@@ -2457,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        else
                end = PAGE_CACHE_SIZE;
-        ret = block_prepare_write(page, 0, end, get_block);
+        ret = __block_write_begin(page, 0, end, get_block);
        if (!ret)
                ret = block_commit_write(page, 0, end);
@@ -2510,11 +2424,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
 }
 /*
- * Filesystems implementing the new truncate sequence should use the
+ * On entry, the page is fully not uptodate.
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
 * The filesystem needs to handle block truncation upon failure.
 */
-int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int nobh_write_begin(struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned flags,
                        struct page **pagep, void **fsdata,
                        get_block_t *get_block)
@@ -2544,11 +2458,10 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
        *fsdata = NULL;
        if (page_has_buffers(page)) {
-                unlock_page(page);
+                ret = __block_write_begin(page, pos, len, get_block);
-                page_cache_release(page);
+                if (unlikely(ret))
-                *pagep = NULL;
+                        goto out_release;
-                return block_write_begin_newtrunc(file, mapping, pos, len,
+                return ret;
-                                        flags, pagep, fsdata, get_block);
        }
        if (PageMappedToDisk(page))
@@ -2654,35 +2567,6 @@ out_release:
        return ret;
 }
-EXPORT_SYMBOL(nobh_write_begin_newtrunc);
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
-                        loff_t pos, unsigned len, unsigned flags,
-                        struct page **pagep, void **fsdata,
-                        get_block_t *get_block)
-{
-        int ret;
-        ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
-                                        pagep, fsdata, get_block);
-        /*
-         * prepare_write() may have instantiated a few blocks
-         * outside i_size.  Trim these off again. Don't need
-         * i_size_read because we hold i_mutex.
-         */
-        if (unlikely(ret)) {
-                loff_t isize = mapping->host->i_size;
-                if (pos + len > isize)
-                        vmtruncate(mapping->host, isize);
-        }
-        return ret;
-}
 EXPORT_SYMBOL(nobh_write_begin);
 int nobh_write_end(struct file *file, struct address_space *mapping,
@@ -2998,7 +2882,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
        if (err == -EOPNOTSUPP) {
                set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-                set_bit(BH_Eopnotsupp, &bh->b_state);
        }
        if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
@@ -3020,13 +2903,6 @@ int submit_bh(int rw, struct buffer_head * bh)
        BUG_ON(buffer_unwritten(bh));
        /*
-         * Mask in barrier bit for a write (could be either a WRITE or a
-         * WRITE_SYNC
-         */
-        if (buffer_ordered(bh) && (rw & WRITE))
-                rw |= WRITE_BARRIER;
-        /*
         * Only clear out a write error when rewriting
         */
        if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -3064,22 +2940,21 @@ EXPORT_SYMBOL(submit_bh);
 /**
 * ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
 * @nr: number of &struct buffer_heads in the array
 * @bhs: array of pointers to &struct buffer_head
 *
 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
 * requests an I/O operation on them, either a %READ or a %WRITE.  The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
+ * %READA option is described in the documentation for generic_make_request()
- * are sent to disk. The fourth %READA option is described in the documentation
+ * which ll_rw_block() calls.
- * for generic_make_request() which ll_rw_block() calls.
 *
 * This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
- * clean when doing a write request, and any buffer that appears to be
+ * request, and any buffer that appears to be up-to-date when doing read
- * up-to-date when doing read request.  Further it marks as clean buffers that
+ * request.  Further it marks as clean buffers that are processed for
- * are processed for writing (the buffer cache won't assume that they are
+ * writing (the buffer cache won't assume that they are actually clean
- * actually clean until the buffer gets unlocked).
+ * until the buffer gets unlocked).
 *
 * ll_rw_block sets b_end_io to simple completion handler that marks
 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -3095,20 +2970,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
        for (i = 0; i < nr; i++) {
                struct buffer_head *bh = bhs[i];
-                if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
+                if (!trylock_buffer(bh))
-                        lock_buffer(bh);
-                else if (!trylock_buffer(bh))
                        continue;
+                if (rw == WRITE) {
-                if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
-                    rw == SWRITE_SYNC_PLUG) {
                        if (test_clear_buffer_dirty(bh)) {
                                bh->b_end_io = end_buffer_write_sync;
                                get_bh(bh);
-                                if (rw == SWRITE_SYNC)
+                                submit_bh(WRITE, bh);
-                                        submit_bh(WRITE_SYNC, bh);
-                                else
-                                        submit_bh(WRITE, bh);
                                continue;
                        }
                } else {
@@ -3124,12 +2992,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
 }
 EXPORT_SYMBOL(ll_rw_block);
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+        lock_buffer(bh);
+        if (!test_clear_buffer_dirty(bh)) {
+                unlock_buffer(bh);
+                return;
+        }
+        bh->b_end_io = end_buffer_write_sync;
+        get_bh(bh);
+        submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
 /*
 * For a data-integrity writeout, we need to wait upon any in-progress I/O
 * and then start new I/O and then wait upon it.  The caller must have a ref on
 * the buffer_head.
 */
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
 {
        int ret = 0;
@@ -3138,12 +3019,8 @@ int sync_dirty_buffer(struct buffer_head *bh)
        if (test_clear_buffer_dirty(bh)) {
                get_bh(bh);
                bh->b_end_io = end_buffer_write_sync;
-                ret = submit_bh(WRITE_SYNC, bh);
+                ret = submit_bh(rw, bh);
                wait_on_buffer(bh);
-                if (buffer_eopnotsupp(bh)) {
-                        clear_buffer_eopnotsupp(bh);
-                        ret = -EOPNOTSUPP;
-                }
                if (!ret && !buffer_uptodate(bh))
                        ret = -EIO;
        } else {
@@ -3151,6 +3028,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
        }
        return ret;
 }
+EXPORT_SYMBOL(__sync_dirty_buffer);
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+        return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
 EXPORT_SYMBOL(sync_dirty_buffer);
 /*

diff --git a/fs/buffer.c b/fs/buffer.c index d54812b198e9..5930e382959b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
156	if (uptodate) {	156	if (uptodate) {
157	set_buffer_uptodate(bh);	157	set_buffer_uptodate(bh);
158	} else {	158	} else {
159	if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {	159	if (!quiet_error(bh)) {
160	buffer_io_error(bh);	160	buffer_io_error(bh);
161	printk(KERN_WARNING "lost page write due to "	161	printk(KERN_WARNING "lost page write due to "
162	"I/O error on %s\n",	162	"I/O error on %s\n",
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t lock, struct list_head list)
770	spin_unlock(lock);	770	spin_unlock(lock);
771	/*	771	/*
772	* Ensure any pending I/O completes so that	772	* Ensure any pending I/O completes so that
773	* ll_rw_block() actually writes the current	773	* write_dirty_buffer() actually writes the
774	* contents - it is a noop if I/O is still in	774	* current contents - it is a noop if I/O is
775	* flight on potentially older contents.	775	* still in flight on potentially older
		776	* contents.
776	*/	777	*/
777	ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);	778	write_dirty_buffer(bh, WRITE_SYNC_PLUG);
778		779
779	/*	780	/*
780	* Kick off IO for the previous mapping. Note	781	* Kick off IO for the previous mapping. Note
@@ -904,7 +905,6 @@ try_again:
904		905
905	bh->b_state = 0;	906	bh->b_state = 0;
906	atomic_set(&bh->b_count, 0);	907	atomic_set(&bh->b_count, 0);
907	bh->b_private = NULL;
908	bh->b_size = size;	908	bh->b_size = size;
909		909
910	/* Link the buffer to its page */	910	/* Link the buffer to its page */
@@ -1705,7 +1705,7 @@ static int __block_write_full_page(struct inode inode, struct page page,
1705	* and kswapd activity, but those code paths have their own	1705	* and kswapd activity, but those code paths have their own
1706	* higher-level throttling.	1706	* higher-level throttling.
1707	*/	1707	*/
1708	if (wbc->sync_mode != WB_SYNC_NONE \|\| !wbc->nonblocking) {	1708	if (wbc->sync_mode != WB_SYNC_NONE) {
1709	lock_buffer(bh);	1709	lock_buffer(bh);
1710	} else if (!trylock_buffer(bh)) {	1710	} else if (!trylock_buffer(bh)) {
1711	redirty_page_for_writepage(wbc, page);	1711	redirty_page_for_writepage(wbc, page);
@@ -1833,9 +1833,12 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833	}	1833	}
1834	EXPORT_SYMBOL(page_zero_new_buffers);	1834	EXPORT_SYMBOL(page_zero_new_buffers);
1835		1835
1836	static int __block_prepare_write(struct inode inode, struct page page,	1836	int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1837	unsigned from, unsigned to, get_block_t *get_block)	1837	get_block_t *get_block)
1838	{	1838	{
		1839	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
		1840	unsigned to = from + len;
		1841	struct inode *inode = page->mapping->host;
1839	unsigned block_start, block_end;	1842	unsigned block_start, block_end;
1840	sector_t block;	1843	sector_t block;
1841	int err = 0;	1844	int err = 0;
@@ -1908,10 +1911,13 @@ static int __block_prepare_write(struct inode inode, struct page page,
1908	if (!buffer_uptodate(*wait_bh))	1911	if (!buffer_uptodate(*wait_bh))
1909	err = -EIO;	1912	err = -EIO;
1910	}	1913	}
1911	if (unlikely(err))	1914	if (unlikely(err)) {
1912	page_zero_new_buffers(page, from, to);	1915	page_zero_new_buffers(page, from, to);
		1916	ClearPageUptodate(page);
		1917	}
1913	return err;	1918	return err;
1914	}	1919	}
		1920	EXPORT_SYMBOL(__block_write_begin);
1915		1921
1916	static int __block_commit_write(struct inode inode, struct page page,	1922	static int __block_commit_write(struct inode inode, struct page page,
1917	unsigned from, unsigned to)	1923	unsigned from, unsigned to)
@@ -1949,90 +1955,32 @@ static int __block_commit_write(struct inode inode, struct page page,
1949	}	1955	}
1950		1956
1951	/*	1957	/*
1952	* Filesystems implementing the new truncate sequence should use the	1958	* block_write_begin takes care of the basic task of block allocation and
1953	* _newtrunc postfix variant which won't incorrectly call vmtruncate.	1959	* bringing partial write blocks uptodate first.
		1960	*
1954	* The filesystem needs to handle block truncation upon failure.	1961	* The filesystem needs to handle block truncation upon failure.
1955	*/	1962	*/
1956	int block_write_begin_newtrunc(struct file file, struct address_space mapping,	1963	int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1957	loff_t pos, unsigned len, unsigned flags,	1964	unsigned flags, struct page *pagep, get_block_t get_block)
1958	struct page pagep, void fsdata,
1959	get_block_t *get_block)
1960	{	1965	{
1961	struct inode *inode = mapping->host;	1966	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1962	int status = 0;
1963	struct page *page;	1967	struct page *page;
1964	pgoff_t index;	1968	int status;
1965	unsigned start, end;
1966	int ownpage = 0;
1967		1969
1968	index = pos >> PAGE_CACHE_SHIFT;	1970	page = grab_cache_page_write_begin(mapping, index, flags);
1969	start = pos & (PAGE_CACHE_SIZE - 1);	1971	if (!page)
1970	end = start + len;	1972	return -ENOMEM;
1971
1972	page = *pagep;
1973	if (page == NULL) {
1974	ownpage = 1;
1975	page = grab_cache_page_write_begin(mapping, index, flags);
1976	if (!page) {
1977	status = -ENOMEM;
1978	goto out;
1979	}
1980	*pagep = page;
1981	} else
1982	BUG_ON(!PageLocked(page));
1983		1973
1984	status = __block_prepare_write(inode, page, start, end, get_block);	1974	status = __block_write_begin(page, pos, len, get_block);
1985	if (unlikely(status)) {	1975	if (unlikely(status)) {
1986	ClearPageUptodate(page);	1976	unlock_page(page);
1987		1977	page_cache_release(page);
1988	if (ownpage) {	1978	page = NULL;
1989	unlock_page(page);
1990	page_cache_release(page);
1991	*pagep = NULL;
1992	}
1993	}	1979	}
1994		1980
1995	out:	1981	*pagep = page;
1996	return status;	1982	return status;
1997	}	1983	}
1998	EXPORT_SYMBOL(block_write_begin_newtrunc);
1999
2000	/*
2001	* block_write_begin takes care of the basic task of block allocation and
2002	* bringing partial write blocks uptodate first.
2003	*
2004	* If *pagep is not NULL, then block_write_begin uses the locked page
2005	* at *pagep rather than allocating its own. In this case, the page will
2006	* not be unlocked or deallocated on failure.
2007	*/
2008	int block_write_begin(struct file file, struct address_space mapping,
2009	loff_t pos, unsigned len, unsigned flags,
2010	struct page pagep, void fsdata,
2011	get_block_t *get_block)
2012	{
2013	int ret;
2014
2015	ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
2016	pagep, fsdata, get_block);
2017
2018	/*
2019	* prepare_write() may have instantiated a few blocks
2020	* outside i_size. Trim these off again. Don't need
2021	* i_size_read because we hold i_mutex.
2022	*
2023	* Filesystems which pass down their own page also cannot
2024	* call into vmtruncate here because it would lead to lock
2025	* inversion problems (*pagep is locked). This is a further
2026	* example of where the old truncate sequence is inadequate.
2027	*/
2028	if (unlikely(ret) && *pagep == NULL) {
2029	loff_t isize = mapping->host->i_size;
2030	if (pos + len > isize)
2031	vmtruncate(mapping->host, isize);
2032	}
2033
2034	return ret;
2035	}
2036	EXPORT_SYMBOL(block_write_begin);	1984	EXPORT_SYMBOL(block_write_begin);
2037		1985
2038	int block_write_end(struct file file, struct address_space mapping,	1986	int block_write_end(struct file file, struct address_space mapping,
@@ -2351,7 +2299,7 @@ out:
2351	* For moronic filesystems that do not allow holes in file.	2299	* For moronic filesystems that do not allow holes in file.
2352	* We may have to extend the file.	2300	* We may have to extend the file.
2353	*/	2301	*/
2354	int cont_write_begin_newtrunc(struct file file, struct address_space mapping,	2302	int cont_write_begin(struct file file, struct address_space mapping,
2355	loff_t pos, unsigned len, unsigned flags,	2303	loff_t pos, unsigned len, unsigned flags,
2356	struct page pagep, void fsdata,	2304	struct page pagep, void fsdata,
2357	get_block_t get_block, loff_t bytes)	2305	get_block_t get_block, loff_t bytes)
@@ -2363,7 +2311,7 @@ int cont_write_begin_newtrunc(struct file file, struct address_space mapping,
2363		2311
2364	err = cont_expand_zero(file, mapping, pos, bytes);	2312	err = cont_expand_zero(file, mapping, pos, bytes);
2365	if (err)	2313	if (err)
2366	goto out;	2314	return err;
2367		2315
2368	zerofrom = *bytes & ~PAGE_CACHE_MASK;	2316	zerofrom = *bytes & ~PAGE_CACHE_MASK;
2369	if (pos+len > *bytes && zerofrom & (blocksize-1)) {	2317	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2319,10 @@ int cont_write_begin_newtrunc(struct file file, struct address_space mapping,
2371	(*bytes)++;	2319	(*bytes)++;
2372	}	2320	}
2373		2321
2374	*pagep = NULL;	2322	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2375	err = block_write_begin_newtrunc(file, mapping, pos, len,
2376	flags, pagep, fsdata, get_block);
2377	out:
2378	return err;
2379	}
2380	EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382	int cont_write_begin(struct file file, struct address_space mapping,
2383	loff_t pos, unsigned len, unsigned flags,
2384	struct page pagep, void fsdata,
2385	get_block_t get_block, loff_t bytes)
2386	{
2387	int ret;
2388
2389	ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390	pagep, fsdata, get_block, bytes);
2391	if (unlikely(ret)) {
2392	loff_t isize = mapping->host->i_size;
2393	if (pos + len > isize)
2394	vmtruncate(mapping->host, isize);
2395	}
2396
2397	return ret;
2398	}	2323	}
2399	EXPORT_SYMBOL(cont_write_begin);	2324	EXPORT_SYMBOL(cont_write_begin);
2400		2325
2401	int block_prepare_write(struct page *page, unsigned from, unsigned to,
2402	get_block_t *get_block)
2403	{
2404	struct inode *inode = page->mapping->host;
2405	int err = __block_prepare_write(inode, page, from, to, get_block);
2406	if (err)
2407	ClearPageUptodate(page);
2408	return err;
2409	}
2410	EXPORT_SYMBOL(block_prepare_write);
2411
2412	int block_commit_write(struct page *page, unsigned from, unsigned to)	2326	int block_commit_write(struct page *page, unsigned from, unsigned to)
2413	{	2327	{
2414	struct inode *inode = page->mapping->host;	2328	struct inode *inode = page->mapping->host;
@@ -2457,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct vma, struct vm_fault vmf,
2457	else	2371	else
2458	end = PAGE_CACHE_SIZE;	2372	end = PAGE_CACHE_SIZE;
2459		2373
2460	ret = block_prepare_write(page, 0, end, get_block);	2374	ret = __block_write_begin(page, 0, end, get_block);
2461	if (!ret)	2375	if (!ret)
2462	ret = block_commit_write(page, 0, end);	2376	ret = block_commit_write(page, 0, end);
2463		2377
@@ -2510,11 +2424,11 @@ static void attach_nobh_buffers(struct page page, struct buffer_head head)
2510	}	2424	}
2511		2425
2512	/*	2426	/*
2513	* Filesystems implementing the new truncate sequence should use the	2427	* On entry, the page is fully not uptodate.
2514	* _newtrunc postfix variant which won't incorrectly call vmtruncate.	2428	* On exit the page is fully uptodate in the areas outside (from,to)
2515	* The filesystem needs to handle block truncation upon failure.	2429	* The filesystem needs to handle block truncation upon failure.
2516	*/	2430	*/
2517	int nobh_write_begin_newtrunc(struct file file, struct address_space mapping,	2431	int nobh_write_begin(struct address_space *mapping,
2518	loff_t pos, unsigned len, unsigned flags,	2432	loff_t pos, unsigned len, unsigned flags,
2519	struct page pagep, void fsdata,	2433	struct page pagep, void fsdata,
2520	get_block_t *get_block)	2434	get_block_t *get_block)
@@ -2544,11 +2458,10 @@ int nobh_write_begin_newtrunc(struct file file, struct address_space mapping,
2544	*fsdata = NULL;	2458	*fsdata = NULL;
2545		2459
2546	if (page_has_buffers(page)) {	2460	if (page_has_buffers(page)) {
2547	unlock_page(page);	2461	ret = __block_write_begin(page, pos, len, get_block);
2548	page_cache_release(page);	2462	if (unlikely(ret))
2549	*pagep = NULL;	2463	goto out_release;
2550	return block_write_begin_newtrunc(file, mapping, pos, len,	2464	return ret;
2551	flags, pagep, fsdata, get_block);
2552	}	2465	}
2553		2466
2554	if (PageMappedToDisk(page))	2467	if (PageMappedToDisk(page))
@@ -2654,35 +2567,6 @@ out_release:
2654		2567
2655	return ret;	2568	return ret;
2656	}	2569	}
2657	EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659	/*
2660	* On entry, the page is fully not uptodate.
2661	* On exit the page is fully uptodate in the areas outside (from,to)
2662	*/
2663	int nobh_write_begin(struct file file, struct address_space mapping,
2664	loff_t pos, unsigned len, unsigned flags,
2665	struct page pagep, void fsdata,
2666	get_block_t *get_block)
2667	{
2668	int ret;
2669
2670	ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671	pagep, fsdata, get_block);
2672
2673	/*
2674	* prepare_write() may have instantiated a few blocks
2675	* outside i_size. Trim these off again. Don't need
2676	* i_size_read because we hold i_mutex.
2677	*/
2678	if (unlikely(ret)) {
2679	loff_t isize = mapping->host->i_size;
2680	if (pos + len > isize)
2681	vmtruncate(mapping->host, isize);
2682	}
2683
2684	return ret;
2685	}
2686	EXPORT_SYMBOL(nobh_write_begin);	2570	EXPORT_SYMBOL(nobh_write_begin);
2687		2571
2688	int nobh_write_end(struct file file, struct address_space mapping,	2572	int nobh_write_end(struct file file, struct address_space mapping,
@@ -2998,7 +2882,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2998		2882
2999	if (err == -EOPNOTSUPP) {	2883	if (err == -EOPNOTSUPP) {
3000	set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);	2884	set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
3001	set_bit(BH_Eopnotsupp, &bh->b_state);
3002	}	2885	}
3003		2886
3004	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))	2887	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
@@ -3020,13 +2903,6 @@ int submit_bh(int rw, struct buffer_head * bh)
3020	BUG_ON(buffer_unwritten(bh));	2903	BUG_ON(buffer_unwritten(bh));
3021		2904
3022	/*	2905	/*
3023	* Mask in barrier bit for a write (could be either a WRITE or a
3024	* WRITE_SYNC
3025	*/
3026	if (buffer_ordered(bh) && (rw & WRITE))
3027	rw \|= WRITE_BARRIER;
3028
3029	/*
3030	* Only clear out a write error when rewriting	2906	* Only clear out a write error when rewriting
3031	*/	2907	*/
3032	if (test_set_buffer_req(bh) && (rw & WRITE))	2908	if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -3064,22 +2940,21 @@ EXPORT_SYMBOL(submit_bh);
3064		2940
3065	/**	2941	/**
3066	* ll_rw_block: low-level access to block devices (DEPRECATED)	2942	* ll_rw_block: low-level access to block devices (DEPRECATED)
3067	* @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)	2943	* @rw: whether to %READ or %WRITE or maybe %READA (readahead)
3068	* @nr: number of &struct buffer_heads in the array	2944	* @nr: number of &struct buffer_heads in the array
3069	* @bhs: array of pointers to &struct buffer_head	2945	* @bhs: array of pointers to &struct buffer_head
3070	*	2946	*
3071	* ll_rw_block() takes an array of pointers to &struct buffer_heads, and	2947	* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
3072	* requests an I/O operation on them, either a %READ or a %WRITE. The third	2948	* requests an I/O operation on them, either a %READ or a %WRITE. The third
3073	* %SWRITE is like %WRITE only we make sure that the current data in buffers	2949	* %READA option is described in the documentation for generic_make_request()
3074	* are sent to disk. The fourth %READA option is described in the documentation	2950	* which ll_rw_block() calls.
3075	* for generic_make_request() which ll_rw_block() calls.
3076	*	2951	*
3077	* This function drops any buffer that it cannot get a lock on (with the	2952	* This function drops any buffer that it cannot get a lock on (with the
3078	* BH_Lock state bit) unless SWRITE is required, any buffer that appears to be	2953	* BH_Lock state bit), any buffer that appears to be clean when doing a write
3079	* clean when doing a write request, and any buffer that appears to be	2954	* request, and any buffer that appears to be up-to-date when doing read
3080	* up-to-date when doing read request. Further it marks as clean buffers that	2955	* request. Further it marks as clean buffers that are processed for
3081	* are processed for writing (the buffer cache won't assume that they are	2956	* writing (the buffer cache won't assume that they are actually clean
3082	* actually clean until the buffer gets unlocked).	2957	* until the buffer gets unlocked).
3083	*	2958	*
3084	* ll_rw_block sets b_end_io to simple completion handler that marks	2959	* ll_rw_block sets b_end_io to simple completion handler that marks
3085	* the buffer up-to-date (if approriate), unlocks the buffer and wakes	2960	* the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -3095,20 +2970,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3095	for (i = 0; i < nr; i++) {	2970	for (i = 0; i < nr; i++) {
3096	struct buffer_head *bh = bhs[i];	2971	struct buffer_head *bh = bhs[i];
3097		2972
3098	if (rw == SWRITE \|\| rw == SWRITE_SYNC \|\| rw == SWRITE_SYNC_PLUG)	2973	if (!trylock_buffer(bh))
3099	lock_buffer(bh);
3100	else if (!trylock_buffer(bh))
3101	continue;	2974	continue;
3102		2975	if (rw == WRITE) {
3103	if (rw == WRITE \|\| rw == SWRITE \|\| rw == SWRITE_SYNC \|\|
3104	rw == SWRITE_SYNC_PLUG) {
3105	if (test_clear_buffer_dirty(bh)) {	2976	if (test_clear_buffer_dirty(bh)) {
3106	bh->b_end_io = end_buffer_write_sync;	2977	bh->b_end_io = end_buffer_write_sync;
3107	get_bh(bh);	2978	get_bh(bh);
3108	if (rw == SWRITE_SYNC)	2979	submit_bh(WRITE, bh);
3109	submit_bh(WRITE_SYNC, bh);
3110	else
3111	submit_bh(WRITE, bh);
3112	continue;	2980	continue;
3113	}	2981	}
3114	} else {	2982	} else {
@@ -3124,12 +2992,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3124	}	2992	}
3125	EXPORT_SYMBOL(ll_rw_block);	2993	EXPORT_SYMBOL(ll_rw_block);
3126		2994
		2995	void write_dirty_buffer(struct buffer_head *bh, int rw)
		2996	{
		2997	lock_buffer(bh);
		2998	if (!test_clear_buffer_dirty(bh)) {
		2999	unlock_buffer(bh);
		3000	return;
		3001	}
		3002	bh->b_end_io = end_buffer_write_sync;
		3003	get_bh(bh);
		3004	submit_bh(rw, bh);
		3005	}
		3006	EXPORT_SYMBOL(write_dirty_buffer);
		3007
3127	/*	3008	/*
3128	* For a data-integrity writeout, we need to wait upon any in-progress I/O	3009	* For a data-integrity writeout, we need to wait upon any in-progress I/O
3129	* and then start new I/O and then wait upon it. The caller must have a ref on	3010	* and then start new I/O and then wait upon it. The caller must have a ref on
3130	* the buffer_head.	3011	* the buffer_head.
3131	*/	3012	*/
3132	int sync_dirty_buffer(struct buffer_head *bh)	3013	int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3133	{	3014	{
3134	int ret = 0;	3015	int ret = 0;
3135		3016
@@ -3138,12 +3019,8 @@ int sync_dirty_buffer(struct buffer_head *bh)
3138	if (test_clear_buffer_dirty(bh)) {	3019	if (test_clear_buffer_dirty(bh)) {
3139	get_bh(bh);	3020	get_bh(bh);
3140	bh->b_end_io = end_buffer_write_sync;	3021	bh->b_end_io = end_buffer_write_sync;
3141	ret = submit_bh(WRITE_SYNC, bh);	3022	ret = submit_bh(rw, bh);
3142	wait_on_buffer(bh);	3023	wait_on_buffer(bh);
3143	if (buffer_eopnotsupp(bh)) {
3144	clear_buffer_eopnotsupp(bh);
3145	ret = -EOPNOTSUPP;
3146	}
3147	if (!ret && !buffer_uptodate(bh))	3024	if (!ret && !buffer_uptodate(bh))
3148	ret = -EIO;	3025	ret = -EIO;
3149	} else {	3026	} else {
@@ -3151,6 +3028,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
3151	}	3028	}
3152	return ret;	3029	return ret;
3153	}	3030	}
		3031	EXPORT_SYMBOL(__sync_dirty_buffer);
		3032
		3033	int sync_dirty_buffer(struct buffer_head *bh)
		3034	{
		3035	return __sync_dirty_buffer(bh, WRITE_SYNC);
		3036	}
3154	EXPORT_SYMBOL(sync_dirty_buffer);	3037	EXPORT_SYMBOL(sync_dirty_buffer);
3155		3038
3156	/*	3039	/*