1 files changed, 113 insertions, 88 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ffc95ba48859..8dd22eade42c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
 #include "acl.h"
 #include "ext4_extents.h"
+#define MPAGE_DA_EXTENT_TAIL 0x01
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
@@ -1626,11 +1628,13 @@ struct mpage_da_data {
        unsigned long first_page, next_page;    /* extent of pages */
        get_block_t *get_block;
        struct writeback_control *wbc;
+        int io_done;
+        long pages_written;
 };
 /*
 * mpage_da_submit_io - walks through extent of pages and try to write
- * them with __mpage_writepage()
+ * them with writepage() call back
 *
 * @mpd->inode: inode
 * @mpd->first_page: first page of the extent
@@ -1645,18 +1649,11 @@ struct mpage_da_data {
 static int mpage_da_submit_io(struct mpage_da_data *mpd)
 {
        struct address_space *mapping = mpd->inode->i_mapping;
-        struct mpage_data mpd_pp = {
-                .bio = NULL,
-                .last_block_in_bio = 0,
-                .get_block = mpd->get_block,
-                .use_writepage = 1,
-        };
        int ret = 0, err, nr_pages, i;
        unsigned long index, end;
        struct pagevec pvec;
        BUG_ON(mpd->next_page <= mpd->first_page);
        pagevec_init(&pvec, 0);
        index = mpd->first_page;
        end = mpd->next_page - 1;
@@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                                break;
                        index++;
-                        err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
+                        err = mapping->a_ops->writepage(page, mpd->wbc);
+                        if (!err)
+                                mpd->pages_written++;
                        /*
                         * In error case, we have to continue because
                         * remaining pages are still locked
@@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                }
                pagevec_release(&pvec);
        }
-        if (mpd_pp.bio)
-                mpage_bio_submit(WRITE, mpd_pp.bio);
        return ret;
 }
@@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
        int blocks = exbh->b_size >> inode->i_blkbits;
        sector_t pblock = exbh->b_blocknr, cur_logical;
        struct buffer_head *head, *bh;
-        unsigned long index, end;
+        pgoff_t index, end;
        struct pagevec pvec;
        int nr_pages, i;
@@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
 *
 * The function skips space we know is already mapped to disk blocks.
 *
- * The function ignores errors ->get_block() returns, thus real
- * error handling is postponed to __mpage_writepage()
 */
 static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
+        int err = 0;
        struct buffer_head *lbh = &mpd->lbh;
-        int err = 0, remain = lbh->b_size;
        sector_t next = lbh->b_blocknr;
        struct buffer_head new;
@@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
        if (buffer_mapped(lbh) && !buffer_delay(lbh))
                return;
-        while (remain) {
+        new.b_state = lbh->b_state;
-                new.b_state = lbh->b_state;
+        new.b_blocknr = 0;
-                new.b_blocknr = 0;
+        new.b_size = lbh->b_size;
-                new.b_size = remain;
-                err = mpd->get_block(mpd->inode, next, &new, 1);
-                if (err) {
-                        /*
-                         * Rather than implement own error handling
-                         * here, we just leave remaining blocks
-                         * unallocated and try again with ->writepage()
-                         */
-                        break;
-                }
-                BUG_ON(new.b_size == 0);
-                if (buffer_new(&new))
+        /*
-                        __unmap_underlying_blocks(mpd->inode, &new);
+         * If we didn't accumulate anything
+         * to write simply return
+         */
+        if (!new.b_size)
+                return;
+        err = mpd->get_block(mpd->inode, next, &new, 1);
+        if (err)
+                return;
+        BUG_ON(new.b_size == 0);
-                /*
+        if (buffer_new(&new))
-                 * If blocks are delayed marked, we need to
+                __unmap_underlying_blocks(mpd->inode, &new);
-                 * put actual blocknr and drop delayed bit
-                 */
-                if (buffer_delay(lbh) || buffer_unwritten(lbh))
-                        mpage_put_bnr_to_bhs(mpd, next, &new);
-                /* go for the remaining blocks */
+        /*
-                next += new.b_size >> mpd->inode->i_blkbits;
+         * If blocks are delayed marked, we need to
-                remain -= new.b_size;
+         * put actual blocknr and drop delayed bit
-        }
+         */
+        if (buffer_delay(lbh) || buffer_unwritten(lbh))
+                mpage_put_bnr_to_bhs(mpd, next, &new);
+        return;
 }
 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         * need to flush current  extent and start new one
         */
        mpage_da_map_blocks(mpd);
+        mpage_da_submit_io(mpd);
-        /*
+        mpd->io_done = 1;
-         * Now start a new extent
+        return;
-         */
-        lbh->b_size = bh->b_size;
-        lbh->b_state = bh->b_state & BH_FLAGS;
-        lbh->b_blocknr = logical;
 }
 /*
@@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page,
        struct buffer_head *bh, *head, fake;
        sector_t logical;
+        if (mpd->io_done) {
+                /*
+                 * Rest of the page in the page_vec
+                 * redirty then and skip then. We will
+                 * try to to write them again after
+                 * starting a new transaction
+                 */
+                redirty_page_for_writepage(wbc, page);
+                unlock_page(page);
+                return MPAGE_DA_EXTENT_TAIL;
+        }
        /*
         * Can we merge this page to current extent?
         */
        if (mpd->next_page != page->index) {
                /*
                 * Nope, we can't. So, we map non-allocated blocks
-                 * and start IO on them using __mpage_writepage()
+                 * and start IO on them using writepage()
                 */
                if (mpd->next_page != mpd->first_page) {
                        mpage_da_map_blocks(mpd);
                        mpage_da_submit_io(mpd);
+                        /*
+                         * skip rest of the page in the page_vec
+                         */
+                        mpd->io_done = 1;
+                        redirty_page_for_writepage(wbc, page);
+                        unlock_page(page);
+                        return MPAGE_DA_EXTENT_TAIL;
                }
                /*
@@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page,
                set_buffer_dirty(bh);
                set_buffer_uptodate(bh);
                mpage_add_bh_to_extent(mpd, logical, bh);
+                if (mpd->io_done)
+                        return MPAGE_DA_EXTENT_TAIL;
        } else {
                /*
                 * Page with regular buffer heads, just add all dirty ones
@@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page,
                bh = head;
                do {
                        BUG_ON(buffer_locked(bh));
-                        if (buffer_dirty(bh))
+                        if (buffer_dirty(bh) &&
+                                (!buffer_mapped(bh) || buffer_delay(bh))) {
                                mpage_add_bh_to_extent(mpd, logical, bh);
+                                if (mpd->io_done)
+                                        return MPAGE_DA_EXTENT_TAIL;
+                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
        }
@@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page,
 *
 * This is a library function, which implements the writepages()
 * address_space_operation.
- *
- * In order to avoid duplication of logic that deals with partial pages,
- * multiple bio per page, etc, we find non-allocated blocks, allocate
- * them with minimal calls to ->get_block() and re-use __mpage_writepage()
- *
- * It's important that we call __mpage_writepage() only once for each
- * involved page, otherwise we'd have to implement more complicated logic
- * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
- *
- * See comments to mpage_writepages()
 */
 static int mpage_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc,
                               get_block_t get_block)
 {
        struct mpage_da_data mpd;
+        long to_write;
        int ret;
        if (!get_block)
@@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping,
        mpd.first_page = 0;
        mpd.next_page = 0;
        mpd.get_block = get_block;
+        mpd.io_done = 0;
+        mpd.pages_written = 0;
+        to_write = wbc->nr_to_write;
        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
        /*
         * Handle last extent of pages
         */
-        if (mpd.next_page != mpd.first_page) {
+        if (!mpd.io_done && mpd.next_page != mpd.first_page) {
                mpage_da_map_blocks(&mpd);
                mpage_da_submit_io(&mpd);
        }
+        wbc->nr_to_write = to_write - mpd.pages_written;
        return ret;
 }
@@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page,
 #define EXT4_MAX_WRITEBACK_CREDITS    25
 static int ext4_da_writepages(struct address_space *mapping,
-                                struct writeback_control *wbc)
+                              struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
        handle_t *handle = NULL;
@@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping,
        int ret = 0;
        long to_write;
        loff_t range_start = 0;
+        long pages_skipped = 0;
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
         * because that could violate lock ordering on umount
         */
-        if (!mapping->nrpages)
+        if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
-        /*
+        if (!wbc->range_cyclic)
-         * Estimate the worse case needed credits to write out
-         * EXT4_MAX_BUF_BLOCKS pages
-         */
-        needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
-        to_write = wbc->nr_to_write;
-        if (!wbc->range_cyclic) {
                /*
                 * If range_cyclic is not set force range_cont
                 * and save the old writeback_index
                 */
                wbc->range_cont = 1;
-                range_start =  wbc->range_start;
-        }
-        while (!ret && to_write) {
+        range_start =  wbc->range_start;
+        pages_skipped = wbc->pages_skipped;
+restart_loop:
+        to_write = wbc->nr_to_write;
+        while (!ret && to_write > 0) {
+                /*
+                 * we  insert one extent at a time. So we need
+                 * credit needed for single extent allocation.
+                 * journalled mode is currently not supported
+                 * by delalloc
+                 */
+                BUG_ON(ext4_should_journal_data(inode));
+                needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
                /* start a new transaction*/
                handle = ext4_journal_start(inode, needed_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
+                        printk(KERN_EMERG "%s: jbd2_start: "
+                               "%ld pages, ino %lu; err %d\n", __func__,
+                                wbc->nr_to_write, inode->i_ino, ret);
+                        dump_stack();
                        goto out_writepages;
                }
                if (ext4_should_order_data(inode)) {
                        /*
                         * With ordered mode we need to add
-                         * the inode to the journal handle
+                         * the inode to the journal handl
                         * when we do block allocation.
                         */
                        ret = ext4_jbd2_file_inode(handle, inode);
@@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping,
                                ext4_journal_stop(handle);
                                goto out_writepages;
                        }
                }
-                /*
-                 * set the max dirty pages could be write at a time
-                 * to fit into the reserved transaction credits
-                 */
-                if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
-                        wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
                to_write -= wbc->nr_to_write;
                ret = mpage_da_writepages(mapping, wbc,
-                                                ext4_da_get_block_write);
+                                          ext4_da_get_block_write);
                ext4_journal_stop(handle);
-                if (wbc->nr_to_write) {
+                if (ret == MPAGE_DA_EXTENT_TAIL) {
+                        /*
+                         * got one extent now try with
+                         * rest of the pages
+                         */
+                        to_write += wbc->nr_to_write;
+                        ret = 0;
+                } else if (wbc->nr_to_write) {
                        /*
                         * There is no more writeout needed
                         * or we requested for a noblocking writeout
@@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping,
                wbc->nr_to_write = to_write;
        }
+        if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
+                /* We skipped pages in this loop */
+                wbc->range_start = range_start;
+                wbc->nr_to_write = to_write +
+                                wbc->pages_skipped - pages_skipped;
+                wbc->pages_skipped = pages_skipped;
+                goto restart_loop;
+        }
 out_writepages:
        wbc->nr_to_write = to_write;
-        if (range_start)
+        wbc->range_start = range_start;
-                wbc->range_start = range_start;
        return ret;
 }

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ffc95ba48859..8dd22eade42c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
41	#include "acl.h"	41	#include "acl.h"
42	#include "ext4_extents.h"	42	#include "ext4_extents.h"
43		43
		44	#define MPAGE_DA_EXTENT_TAIL 0x01
		45
44	static inline int ext4_begin_ordered_truncate(struct inode *inode,	46	static inline int ext4_begin_ordered_truncate(struct inode *inode,
45	loff_t new_size)	47	loff_t new_size)
46	{	48	{
@@ -1626,11 +1628,13 @@ struct mpage_da_data {
1626	unsigned long first_page, next_page; /* extent of pages */	1628	unsigned long first_page, next_page; /* extent of pages */
1627	get_block_t *get_block;	1629	get_block_t *get_block;
1628	struct writeback_control *wbc;	1630	struct writeback_control *wbc;
		1631	int io_done;
		1632	long pages_written;
1629	};	1633	};
1630		1634
1631	/*	1635	/*
1632	* mpage_da_submit_io - walks through extent of pages and try to write	1636	* mpage_da_submit_io - walks through extent of pages and try to write
1633	* them with __mpage_writepage()	1637	* them with writepage() call back
1634	*	1638	*
1635	* @mpd->inode: inode	1639	* @mpd->inode: inode
1636	* @mpd->first_page: first page of the extent	1640	* @mpd->first_page: first page of the extent
@@ -1645,18 +1649,11 @@ struct mpage_da_data {
1645	static int mpage_da_submit_io(struct mpage_da_data *mpd)	1649	static int mpage_da_submit_io(struct mpage_da_data *mpd)
1646	{	1650	{
1647	struct address_space *mapping = mpd->inode->i_mapping;	1651	struct address_space *mapping = mpd->inode->i_mapping;
1648	struct mpage_data mpd_pp = {
1649	.bio = NULL,
1650	.last_block_in_bio = 0,
1651	.get_block = mpd->get_block,
1652	.use_writepage = 1,
1653	};
1654	int ret = 0, err, nr_pages, i;	1652	int ret = 0, err, nr_pages, i;
1655	unsigned long index, end;	1653	unsigned long index, end;
1656	struct pagevec pvec;	1654	struct pagevec pvec;
1657		1655
1658	BUG_ON(mpd->next_page <= mpd->first_page);	1656	BUG_ON(mpd->next_page <= mpd->first_page);
1659
1660	pagevec_init(&pvec, 0);	1657	pagevec_init(&pvec, 0);
1661	index = mpd->first_page;	1658	index = mpd->first_page;
1662	end = mpd->next_page - 1;	1659	end = mpd->next_page - 1;
@@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1674	break;	1671	break;
1675	index++;	1672	index++;
1676		1673
1677	err = __mpage_writepage(page, mpd->wbc, &mpd_pp);	1674	err = mapping->a_ops->writepage(page, mpd->wbc);
1678		1675	if (!err)
		1676	mpd->pages_written++;
1679	/*	1677	/*
1680	* In error case, we have to continue because	1678	* In error case, we have to continue because
1681	* remaining pages are still locked	1679	* remaining pages are still locked
@@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1686	}	1684	}
1687	pagevec_release(&pvec);	1685	pagevec_release(&pvec);
1688	}	1686	}
1689	if (mpd_pp.bio)
1690	mpage_bio_submit(WRITE, mpd_pp.bio);
1691
1692	return ret;	1687	return ret;
1693	}	1688	}
1694		1689
@@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1711	int blocks = exbh->b_size >> inode->i_blkbits;	1706	int blocks = exbh->b_size >> inode->i_blkbits;
1712	sector_t pblock = exbh->b_blocknr, cur_logical;	1707	sector_t pblock = exbh->b_blocknr, cur_logical;
1713	struct buffer_head head, bh;	1708	struct buffer_head head, bh;
1714	unsigned long index, end;	1709	pgoff_t index, end;
1715	struct pagevec pvec;	1710	struct pagevec pvec;
1716	int nr_pages, i;	1711	int nr_pages, i;
1717		1712
@@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1796	*	1791	*
1797	* The function skips space we know is already mapped to disk blocks.	1792	* The function skips space we know is already mapped to disk blocks.
1798	*	1793	*
1799	* The function ignores errors ->get_block() returns, thus real
1800	* error handling is postponed to __mpage_writepage()
1801	*/	1794	*/
1802	static void mpage_da_map_blocks(struct mpage_da_data *mpd)	1795	static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1803	{	1796	{
		1797	int err = 0;
1804	struct buffer_head *lbh = &mpd->lbh;	1798	struct buffer_head *lbh = &mpd->lbh;
1805	int err = 0, remain = lbh->b_size;
1806	sector_t next = lbh->b_blocknr;	1799	sector_t next = lbh->b_blocknr;
1807	struct buffer_head new;	1800	struct buffer_head new;
1808		1801
@@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1812	if (buffer_mapped(lbh) && !buffer_delay(lbh))	1805	if (buffer_mapped(lbh) && !buffer_delay(lbh))
1813	return;	1806	return;
1814		1807
1815	while (remain) {	1808	new.b_state = lbh->b_state;
1816	new.b_state = lbh->b_state;	1809	new.b_blocknr = 0;
1817	new.b_blocknr = 0;	1810	new.b_size = lbh->b_size;
1818	new.b_size = remain;
1819	err = mpd->get_block(mpd->inode, next, &new, 1);
1820	if (err) {
1821	/*
1822	* Rather than implement own error handling
1823	* here, we just leave remaining blocks
1824	* unallocated and try again with ->writepage()
1825	*/
1826	break;
1827	}
1828	BUG_ON(new.b_size == 0);
1829		1811
1830	if (buffer_new(&new))	1812	/*
1831	__unmap_underlying_blocks(mpd->inode, &new);	1813	* If we didn't accumulate anything
		1814	* to write simply return
		1815	*/
		1816	if (!new.b_size)
		1817	return;
		1818	err = mpd->get_block(mpd->inode, next, &new, 1);
		1819	if (err)
		1820	return;
		1821	BUG_ON(new.b_size == 0);
1832		1822
1833	/*	1823	if (buffer_new(&new))
1834	* If blocks are delayed marked, we need to	1824	__unmap_underlying_blocks(mpd->inode, &new);
1835	* put actual blocknr and drop delayed bit
1836	*/
1837	if (buffer_delay(lbh) \|\| buffer_unwritten(lbh))
1838	mpage_put_bnr_to_bhs(mpd, next, &new);
1839		1825
1840	/* go for the remaining blocks */	1826	/*
1841	next += new.b_size >> mpd->inode->i_blkbits;	1827	* If blocks are delayed marked, we need to
1842	remain -= new.b_size;	1828	* put actual blocknr and drop delayed bit
1843	}	1829	*/
		1830	if (buffer_delay(lbh) \|\| buffer_unwritten(lbh))
		1831	mpage_put_bnr_to_bhs(mpd, next, &new);
		1832
		1833	return;
1844	}	1834	}
1845		1835
1846	#define BH_FLAGS ((1 << BH_Uptodate) \| (1 << BH_Mapped) \| \	1836	#define BH_FLAGS ((1 << BH_Uptodate) \| (1 << BH_Mapped) \| \
@@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1886	* need to flush current extent and start new one	1876	* need to flush current extent and start new one
1887	*/	1877	*/
1888	mpage_da_map_blocks(mpd);	1878	mpage_da_map_blocks(mpd);
1889		1879	mpage_da_submit_io(mpd);
1890	/*	1880	mpd->io_done = 1;
1891	* Now start a new extent	1881	return;
1892	*/
1893	lbh->b_size = bh->b_size;
1894	lbh->b_state = bh->b_state & BH_FLAGS;
1895	lbh->b_blocknr = logical;
1896	}	1882	}
1897		1883
1898	/*	1884	/*
@@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page,
1912	struct buffer_head bh, head, fake;	1898	struct buffer_head bh, head, fake;
1913	sector_t logical;	1899	sector_t logical;
1914		1900
		1901	if (mpd->io_done) {
		1902	/*
		1903	* Rest of the page in the page_vec
		1904	* redirty then and skip then. We will
		1905	* try to to write them again after
		1906	* starting a new transaction
		1907	*/
		1908	redirty_page_for_writepage(wbc, page);
		1909	unlock_page(page);
		1910	return MPAGE_DA_EXTENT_TAIL;
		1911	}
1915	/*	1912	/*
1916	* Can we merge this page to current extent?	1913	* Can we merge this page to current extent?
1917	*/	1914	*/
1918	if (mpd->next_page != page->index) {	1915	if (mpd->next_page != page->index) {
1919	/*	1916	/*
1920	* Nope, we can't. So, we map non-allocated blocks	1917	* Nope, we can't. So, we map non-allocated blocks
1921	* and start IO on them using __mpage_writepage()	1918	* and start IO on them using writepage()
1922	*/	1919	*/
1923	if (mpd->next_page != mpd->first_page) {	1920	if (mpd->next_page != mpd->first_page) {
1924	mpage_da_map_blocks(mpd);	1921	mpage_da_map_blocks(mpd);
1925	mpage_da_submit_io(mpd);	1922	mpage_da_submit_io(mpd);
		1923	/*
		1924	* skip rest of the page in the page_vec
		1925	*/
		1926	mpd->io_done = 1;
		1927	redirty_page_for_writepage(wbc, page);
		1928	unlock_page(page);
		1929	return MPAGE_DA_EXTENT_TAIL;
1926	}	1930	}
1927		1931
1928	/*	1932	/*
@@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page,
1953	set_buffer_dirty(bh);	1957	set_buffer_dirty(bh);
1954	set_buffer_uptodate(bh);	1958	set_buffer_uptodate(bh);
1955	mpage_add_bh_to_extent(mpd, logical, bh);	1959	mpage_add_bh_to_extent(mpd, logical, bh);
		1960	if (mpd->io_done)
		1961	return MPAGE_DA_EXTENT_TAIL;
1956	} else {	1962	} else {
1957	/*	1963	/*
1958	* Page with regular buffer heads, just add all dirty ones	1964	* Page with regular buffer heads, just add all dirty ones
@@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page,
1961	bh = head;	1967	bh = head;
1962	do {	1968	do {
1963	BUG_ON(buffer_locked(bh));	1969	BUG_ON(buffer_locked(bh));
1964	if (buffer_dirty(bh))	1970	if (buffer_dirty(bh) &&
		1971	(!buffer_mapped(bh) \|\| buffer_delay(bh))) {
1965	mpage_add_bh_to_extent(mpd, logical, bh);	1972	mpage_add_bh_to_extent(mpd, logical, bh);
		1973	if (mpd->io_done)
		1974	return MPAGE_DA_EXTENT_TAIL;
		1975	}
1966	logical++;	1976	logical++;
1967	} while ((bh = bh->b_this_page) != head);	1977	} while ((bh = bh->b_this_page) != head);
1968	}	1978	}
@@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page,
1981	*	1991	*
1982	* This is a library function, which implements the writepages()	1992	* This is a library function, which implements the writepages()
1983	* address_space_operation.	1993	* address_space_operation.
1984	*
1985	* In order to avoid duplication of logic that deals with partial pages,
1986	* multiple bio per page, etc, we find non-allocated blocks, allocate
1987	* them with minimal calls to ->get_block() and re-use __mpage_writepage()
1988	*
1989	* It's important that we call __mpage_writepage() only once for each
1990	* involved page, otherwise we'd have to implement more complicated logic
1991	* to deal with pages w/o PG_lock or w/ PG_writeback and so on.
1992	*
1993	* See comments to mpage_writepages()
1994	*/	1994	*/
1995	static int mpage_da_writepages(struct address_space *mapping,	1995	static int mpage_da_writepages(struct address_space *mapping,
1996	struct writeback_control *wbc,	1996	struct writeback_control *wbc,
1997	get_block_t get_block)	1997	get_block_t get_block)
1998	{	1998	{
1999	struct mpage_da_data mpd;	1999	struct mpage_da_data mpd;
		2000	long to_write;
2000	int ret;	2001	int ret;
2001		2002
2002	if (!get_block)	2003	if (!get_block)
@@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping,
2010	mpd.first_page = 0;	2011	mpd.first_page = 0;
2011	mpd.next_page = 0;	2012	mpd.next_page = 0;
2012	mpd.get_block = get_block;	2013	mpd.get_block = get_block;
		2014	mpd.io_done = 0;
		2015	mpd.pages_written = 0;
		2016
		2017	to_write = wbc->nr_to_write;
2013		2018
2014	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);	2019	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
2015		2020
2016	/*	2021	/*
2017	* Handle last extent of pages	2022	* Handle last extent of pages
2018	*/	2023	*/
2019	if (mpd.next_page != mpd.first_page) {	2024	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
2020	mpage_da_map_blocks(&mpd);	2025	mpage_da_map_blocks(&mpd);
2021	mpage_da_submit_io(&mpd);	2026	mpage_da_submit_io(&mpd);
2022	}	2027	}
2023		2028
		2029	wbc->nr_to_write = to_write - mpd.pages_written;
2024	return ret;	2030	return ret;
2025	}	2031	}
2026		2032
@@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page,
2238	#define EXT4_MAX_WRITEBACK_CREDITS 25	2244	#define EXT4_MAX_WRITEBACK_CREDITS 25
2239		2245
2240	static int ext4_da_writepages(struct address_space *mapping,	2246	static int ext4_da_writepages(struct address_space *mapping,
2241	struct writeback_control *wbc)	2247	struct writeback_control *wbc)
2242	{	2248	{
2243	struct inode *inode = mapping->host;	2249	struct inode *inode = mapping->host;
2244	handle_t *handle = NULL;	2250	handle_t *handle = NULL;
@@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping,
2246	int ret = 0;	2252	int ret = 0;
2247	long to_write;	2253	long to_write;
2248	loff_t range_start = 0;	2254	loff_t range_start = 0;
		2255	long pages_skipped = 0;
2249		2256
2250	/*	2257	/*
2251	* No pages to write? This is mainly a kludge to avoid starting	2258	* No pages to write? This is mainly a kludge to avoid starting
2252	* a transaction for special inodes like journal inode on last iput()	2259	* a transaction for special inodes like journal inode on last iput()
2253	* because that could violate lock ordering on umount	2260	* because that could violate lock ordering on umount
2254	*/	2261	*/
2255	if (!mapping->nrpages)	2262	if (!mapping->nrpages \|\| !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2256	return 0;	2263	return 0;
2257		2264
2258	/*	2265	if (!wbc->range_cyclic)
2259	* Estimate the worse case needed credits to write out
2260	* EXT4_MAX_BUF_BLOCKS pages
2261	*/
2262	needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
2263
2264	to_write = wbc->nr_to_write;
2265	if (!wbc->range_cyclic) {
2266	/*	2266	/*
2267	* If range_cyclic is not set force range_cont	2267	* If range_cyclic is not set force range_cont
2268	* and save the old writeback_index	2268	* and save the old writeback_index
2269	*/	2269	*/
2270	wbc->range_cont = 1;	2270	wbc->range_cont = 1;
2271	range_start = wbc->range_start;
2272	}
2273		2271
2274	while (!ret && to_write) {	2272	range_start = wbc->range_start;
		2273	pages_skipped = wbc->pages_skipped;
		2274
		2275	restart_loop:
		2276	to_write = wbc->nr_to_write;
		2277	while (!ret && to_write > 0) {
		2278
		2279	/*
		2280	* we insert one extent at a time. So we need
		2281	* credit needed for single extent allocation.
		2282	* journalled mode is currently not supported
		2283	* by delalloc
		2284	*/
		2285	BUG_ON(ext4_should_journal_data(inode));
		2286	needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
		2287
2275	/* start a new transaction*/	2288	/* start a new transaction*/
2276	handle = ext4_journal_start(inode, needed_blocks);	2289	handle = ext4_journal_start(inode, needed_blocks);
2277	if (IS_ERR(handle)) {	2290	if (IS_ERR(handle)) {
2278	ret = PTR_ERR(handle);	2291	ret = PTR_ERR(handle);
		2292	printk(KERN_EMERG "%s: jbd2_start: "
		2293	"%ld pages, ino %lu; err %d\n", __func__,
		2294	wbc->nr_to_write, inode->i_ino, ret);
		2295	dump_stack();
2279	goto out_writepages;	2296	goto out_writepages;
2280	}	2297	}
2281	if (ext4_should_order_data(inode)) {	2298	if (ext4_should_order_data(inode)) {
2282	/*	2299	/*
2283	* With ordered mode we need to add	2300	* With ordered mode we need to add
2284	* the inode to the journal handle	2301	* the inode to the journal handl
2285	* when we do block allocation.	2302	* when we do block allocation.
2286	*/	2303	*/
2287	ret = ext4_jbd2_file_inode(handle, inode);	2304	ret = ext4_jbd2_file_inode(handle, inode);
@@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping,
2289	ext4_journal_stop(handle);	2306	ext4_journal_stop(handle);
2290	goto out_writepages;	2307	goto out_writepages;
2291	}	2308	}
2292
2293	}	2309	}
2294	/*
2295	* set the max dirty pages could be write at a time
2296	* to fit into the reserved transaction credits
2297	*/
2298	if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
2299	wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
2300		2310
2301	to_write -= wbc->nr_to_write;	2311	to_write -= wbc->nr_to_write;
2302	ret = mpage_da_writepages(mapping, wbc,	2312	ret = mpage_da_writepages(mapping, wbc,
2303	ext4_da_get_block_write);	2313	ext4_da_get_block_write);
2304	ext4_journal_stop(handle);	2314	ext4_journal_stop(handle);
2305	if (wbc->nr_to_write) {	2315	if (ret == MPAGE_DA_EXTENT_TAIL) {
		2316	/*
		2317	* got one extent now try with
		2318	* rest of the pages
		2319	*/
		2320	to_write += wbc->nr_to_write;
		2321	ret = 0;
		2322	} else if (wbc->nr_to_write) {
2306	/*	2323	/*
2307	* There is no more writeout needed	2324	* There is no more writeout needed
2308	* or we requested for a noblocking writeout	2325	* or we requested for a noblocking writeout
@@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping,
2314	wbc->nr_to_write = to_write;	2331	wbc->nr_to_write = to_write;
2315	}	2332	}
2316		2333
		2334	if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
		2335	/* We skipped pages in this loop */
		2336	wbc->range_start = range_start;
		2337	wbc->nr_to_write = to_write +
		2338	wbc->pages_skipped - pages_skipped;
		2339	wbc->pages_skipped = pages_skipped;
		2340	goto restart_loop;
		2341	}
		2342
2317	out_writepages:	2343	out_writepages:
2318	wbc->nr_to_write = to_write;	2344	wbc->nr_to_write = to_write;
2319	if (range_start)	2345	wbc->range_start = range_start;
2320	wbc->range_start = range_start;
2321	return ret;	2346	return ret;
2322	}	2347	}
2323		2348