aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/inode.c201
1 files changed, 113 insertions, 88 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ffc95ba48859..8dd22eade42c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
41#include "acl.h" 41#include "acl.h"
42#include "ext4_extents.h" 42#include "ext4_extents.h"
43 43
44#define MPAGE_DA_EXTENT_TAIL 0x01
45
44static inline int ext4_begin_ordered_truncate(struct inode *inode, 46static inline int ext4_begin_ordered_truncate(struct inode *inode,
45 loff_t new_size) 47 loff_t new_size)
46{ 48{
@@ -1626,11 +1628,13 @@ struct mpage_da_data {
1626 unsigned long first_page, next_page; /* extent of pages */ 1628 unsigned long first_page, next_page; /* extent of pages */
1627 get_block_t *get_block; 1629 get_block_t *get_block;
1628 struct writeback_control *wbc; 1630 struct writeback_control *wbc;
1631 int io_done;
1632 long pages_written;
1629}; 1633};
1630 1634
1631/* 1635/*
1632 * mpage_da_submit_io - walks through extent of pages and try to write 1636 * mpage_da_submit_io - walks through extent of pages and try to write
1633 * them with __mpage_writepage() 1637 * them with writepage() call back
1634 * 1638 *
1635 * @mpd->inode: inode 1639 * @mpd->inode: inode
1636 * @mpd->first_page: first page of the extent 1640 * @mpd->first_page: first page of the extent
@@ -1645,18 +1649,11 @@ struct mpage_da_data {
1645static int mpage_da_submit_io(struct mpage_da_data *mpd) 1649static int mpage_da_submit_io(struct mpage_da_data *mpd)
1646{ 1650{
1647 struct address_space *mapping = mpd->inode->i_mapping; 1651 struct address_space *mapping = mpd->inode->i_mapping;
1648 struct mpage_data mpd_pp = {
1649 .bio = NULL,
1650 .last_block_in_bio = 0,
1651 .get_block = mpd->get_block,
1652 .use_writepage = 1,
1653 };
1654 int ret = 0, err, nr_pages, i; 1652 int ret = 0, err, nr_pages, i;
1655 unsigned long index, end; 1653 unsigned long index, end;
1656 struct pagevec pvec; 1654 struct pagevec pvec;
1657 1655
1658 BUG_ON(mpd->next_page <= mpd->first_page); 1656 BUG_ON(mpd->next_page <= mpd->first_page);
1659
1660 pagevec_init(&pvec, 0); 1657 pagevec_init(&pvec, 0);
1661 index = mpd->first_page; 1658 index = mpd->first_page;
1662 end = mpd->next_page - 1; 1659 end = mpd->next_page - 1;
@@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1674 break; 1671 break;
1675 index++; 1672 index++;
1676 1673
1677 err = __mpage_writepage(page, mpd->wbc, &mpd_pp); 1674 err = mapping->a_ops->writepage(page, mpd->wbc);
1678 1675 if (!err)
1676 mpd->pages_written++;
1679 /* 1677 /*
1680 * In error case, we have to continue because 1678 * In error case, we have to continue because
1681 * remaining pages are still locked 1679 * remaining pages are still locked
@@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1686 } 1684 }
1687 pagevec_release(&pvec); 1685 pagevec_release(&pvec);
1688 } 1686 }
1689 if (mpd_pp.bio)
1690 mpage_bio_submit(WRITE, mpd_pp.bio);
1691
1692 return ret; 1687 return ret;
1693} 1688}
1694 1689
@@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1711 int blocks = exbh->b_size >> inode->i_blkbits; 1706 int blocks = exbh->b_size >> inode->i_blkbits;
1712 sector_t pblock = exbh->b_blocknr, cur_logical; 1707 sector_t pblock = exbh->b_blocknr, cur_logical;
1713 struct buffer_head *head, *bh; 1708 struct buffer_head *head, *bh;
1714 unsigned long index, end; 1709 pgoff_t index, end;
1715 struct pagevec pvec; 1710 struct pagevec pvec;
1716 int nr_pages, i; 1711 int nr_pages, i;
1717 1712
@@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
1796 * 1791 *
1797 * The function skips space we know is already mapped to disk blocks. 1792 * The function skips space we know is already mapped to disk blocks.
1798 * 1793 *
1799 * The function ignores errors ->get_block() returns, thus real
1800 * error handling is postponed to __mpage_writepage()
1801 */ 1794 */
1802static void mpage_da_map_blocks(struct mpage_da_data *mpd) 1795static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1803{ 1796{
1797 int err = 0;
1804 struct buffer_head *lbh = &mpd->lbh; 1798 struct buffer_head *lbh = &mpd->lbh;
1805 int err = 0, remain = lbh->b_size;
1806 sector_t next = lbh->b_blocknr; 1799 sector_t next = lbh->b_blocknr;
1807 struct buffer_head new; 1800 struct buffer_head new;
1808 1801
@@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1812 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 1805 if (buffer_mapped(lbh) && !buffer_delay(lbh))
1813 return; 1806 return;
1814 1807
1815 while (remain) { 1808 new.b_state = lbh->b_state;
1816 new.b_state = lbh->b_state; 1809 new.b_blocknr = 0;
1817 new.b_blocknr = 0; 1810 new.b_size = lbh->b_size;
1818 new.b_size = remain;
1819 err = mpd->get_block(mpd->inode, next, &new, 1);
1820 if (err) {
1821 /*
1822 * Rather than implement own error handling
1823 * here, we just leave remaining blocks
1824 * unallocated and try again with ->writepage()
1825 */
1826 break;
1827 }
1828 BUG_ON(new.b_size == 0);
1829 1811
1830 if (buffer_new(&new)) 1812 /*
1831 __unmap_underlying_blocks(mpd->inode, &new); 1813 * If we didn't accumulate anything
1814 * to write simply return
1815 */
1816 if (!new.b_size)
1817 return;
1818 err = mpd->get_block(mpd->inode, next, &new, 1);
1819 if (err)
1820 return;
1821 BUG_ON(new.b_size == 0);
1832 1822
1833 /* 1823 if (buffer_new(&new))
1834 * If blocks are delayed marked, we need to 1824 __unmap_underlying_blocks(mpd->inode, &new);
1835 * put actual blocknr and drop delayed bit
1836 */
1837 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1838 mpage_put_bnr_to_bhs(mpd, next, &new);
1839 1825
1840 /* go for the remaining blocks */ 1826 /*
1841 next += new.b_size >> mpd->inode->i_blkbits; 1827 * If blocks are delayed marked, we need to
1842 remain -= new.b_size; 1828 * put actual blocknr and drop delayed bit
1843 } 1829 */
1830 if (buffer_delay(lbh) || buffer_unwritten(lbh))
1831 mpage_put_bnr_to_bhs(mpd, next, &new);
1832
1833 return;
1844} 1834}
1845 1835
1846#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ 1836#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1886 * need to flush current extent and start new one 1876 * need to flush current extent and start new one
1887 */ 1877 */
1888 mpage_da_map_blocks(mpd); 1878 mpage_da_map_blocks(mpd);
1889 1879 mpage_da_submit_io(mpd);
1890 /* 1880 mpd->io_done = 1;
1891 * Now start a new extent 1881 return;
1892 */
1893 lbh->b_size = bh->b_size;
1894 lbh->b_state = bh->b_state & BH_FLAGS;
1895 lbh->b_blocknr = logical;
1896} 1882}
1897 1883
1898/* 1884/*
@@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page,
1912 struct buffer_head *bh, *head, fake; 1898 struct buffer_head *bh, *head, fake;
1913 sector_t logical; 1899 sector_t logical;
1914 1900
1901 if (mpd->io_done) {
1902 /*
1903 * Rest of the page in the page_vec
1904 * redirty then and skip then. We will
1905 * try to to write them again after
1906 * starting a new transaction
1907 */
1908 redirty_page_for_writepage(wbc, page);
1909 unlock_page(page);
1910 return MPAGE_DA_EXTENT_TAIL;
1911 }
1915 /* 1912 /*
1916 * Can we merge this page to current extent? 1913 * Can we merge this page to current extent?
1917 */ 1914 */
1918 if (mpd->next_page != page->index) { 1915 if (mpd->next_page != page->index) {
1919 /* 1916 /*
1920 * Nope, we can't. So, we map non-allocated blocks 1917 * Nope, we can't. So, we map non-allocated blocks
1921 * and start IO on them using __mpage_writepage() 1918 * and start IO on them using writepage()
1922 */ 1919 */
1923 if (mpd->next_page != mpd->first_page) { 1920 if (mpd->next_page != mpd->first_page) {
1924 mpage_da_map_blocks(mpd); 1921 mpage_da_map_blocks(mpd);
1925 mpage_da_submit_io(mpd); 1922 mpage_da_submit_io(mpd);
1923 /*
1924 * skip rest of the page in the page_vec
1925 */
1926 mpd->io_done = 1;
1927 redirty_page_for_writepage(wbc, page);
1928 unlock_page(page);
1929 return MPAGE_DA_EXTENT_TAIL;
1926 } 1930 }
1927 1931
1928 /* 1932 /*
@@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page,
1953 set_buffer_dirty(bh); 1957 set_buffer_dirty(bh);
1954 set_buffer_uptodate(bh); 1958 set_buffer_uptodate(bh);
1955 mpage_add_bh_to_extent(mpd, logical, bh); 1959 mpage_add_bh_to_extent(mpd, logical, bh);
1960 if (mpd->io_done)
1961 return MPAGE_DA_EXTENT_TAIL;
1956 } else { 1962 } else {
1957 /* 1963 /*
1958 * Page with regular buffer heads, just add all dirty ones 1964 * Page with regular buffer heads, just add all dirty ones
@@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page,
1961 bh = head; 1967 bh = head;
1962 do { 1968 do {
1963 BUG_ON(buffer_locked(bh)); 1969 BUG_ON(buffer_locked(bh));
1964 if (buffer_dirty(bh)) 1970 if (buffer_dirty(bh) &&
1971 (!buffer_mapped(bh) || buffer_delay(bh))) {
1965 mpage_add_bh_to_extent(mpd, logical, bh); 1972 mpage_add_bh_to_extent(mpd, logical, bh);
1973 if (mpd->io_done)
1974 return MPAGE_DA_EXTENT_TAIL;
1975 }
1966 logical++; 1976 logical++;
1967 } while ((bh = bh->b_this_page) != head); 1977 } while ((bh = bh->b_this_page) != head);
1968 } 1978 }
@@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page,
1981 * 1991 *
1982 * This is a library function, which implements the writepages() 1992 * This is a library function, which implements the writepages()
1983 * address_space_operation. 1993 * address_space_operation.
1984 *
1985 * In order to avoid duplication of logic that deals with partial pages,
1986 * multiple bio per page, etc, we find non-allocated blocks, allocate
1987 * them with minimal calls to ->get_block() and re-use __mpage_writepage()
1988 *
1989 * It's important that we call __mpage_writepage() only once for each
1990 * involved page, otherwise we'd have to implement more complicated logic
1991 * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
1992 *
1993 * See comments to mpage_writepages()
1994 */ 1994 */
1995static int mpage_da_writepages(struct address_space *mapping, 1995static int mpage_da_writepages(struct address_space *mapping,
1996 struct writeback_control *wbc, 1996 struct writeback_control *wbc,
1997 get_block_t get_block) 1997 get_block_t get_block)
1998{ 1998{
1999 struct mpage_da_data mpd; 1999 struct mpage_da_data mpd;
2000 long to_write;
2000 int ret; 2001 int ret;
2001 2002
2002 if (!get_block) 2003 if (!get_block)
@@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping,
2010 mpd.first_page = 0; 2011 mpd.first_page = 0;
2011 mpd.next_page = 0; 2012 mpd.next_page = 0;
2012 mpd.get_block = get_block; 2013 mpd.get_block = get_block;
2014 mpd.io_done = 0;
2015 mpd.pages_written = 0;
2016
2017 to_write = wbc->nr_to_write;
2013 2018
2014 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); 2019 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
2015 2020
2016 /* 2021 /*
2017 * Handle last extent of pages 2022 * Handle last extent of pages
2018 */ 2023 */
2019 if (mpd.next_page != mpd.first_page) { 2024 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
2020 mpage_da_map_blocks(&mpd); 2025 mpage_da_map_blocks(&mpd);
2021 mpage_da_submit_io(&mpd); 2026 mpage_da_submit_io(&mpd);
2022 } 2027 }
2023 2028
2029 wbc->nr_to_write = to_write - mpd.pages_written;
2024 return ret; 2030 return ret;
2025} 2031}
2026 2032
@@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page,
2238#define EXT4_MAX_WRITEBACK_CREDITS 25 2244#define EXT4_MAX_WRITEBACK_CREDITS 25
2239 2245
2240static int ext4_da_writepages(struct address_space *mapping, 2246static int ext4_da_writepages(struct address_space *mapping,
2241 struct writeback_control *wbc) 2247 struct writeback_control *wbc)
2242{ 2248{
2243 struct inode *inode = mapping->host; 2249 struct inode *inode = mapping->host;
2244 handle_t *handle = NULL; 2250 handle_t *handle = NULL;
@@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping,
2246 int ret = 0; 2252 int ret = 0;
2247 long to_write; 2253 long to_write;
2248 loff_t range_start = 0; 2254 loff_t range_start = 0;
2255 long pages_skipped = 0;
2249 2256
2250 /* 2257 /*
2251 * No pages to write? This is mainly a kludge to avoid starting 2258 * No pages to write? This is mainly a kludge to avoid starting
2252 * a transaction for special inodes like journal inode on last iput() 2259 * a transaction for special inodes like journal inode on last iput()
2253 * because that could violate lock ordering on umount 2260 * because that could violate lock ordering on umount
2254 */ 2261 */
2255 if (!mapping->nrpages) 2262 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2256 return 0; 2263 return 0;
2257 2264
2258 /* 2265 if (!wbc->range_cyclic)
2259 * Estimate the worse case needed credits to write out
2260 * EXT4_MAX_BUF_BLOCKS pages
2261 */
2262 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
2263
2264 to_write = wbc->nr_to_write;
2265 if (!wbc->range_cyclic) {
2266 /* 2266 /*
2267 * If range_cyclic is not set force range_cont 2267 * If range_cyclic is not set force range_cont
2268 * and save the old writeback_index 2268 * and save the old writeback_index
2269 */ 2269 */
2270 wbc->range_cont = 1; 2270 wbc->range_cont = 1;
2271 range_start = wbc->range_start;
2272 }
2273 2271
2274 while (!ret && to_write) { 2272 range_start = wbc->range_start;
2273 pages_skipped = wbc->pages_skipped;
2274
2275restart_loop:
2276 to_write = wbc->nr_to_write;
2277 while (!ret && to_write > 0) {
2278
2279 /*
2280 * we insert one extent at a time. So we need
2281 * credit needed for single extent allocation.
2282 * journalled mode is currently not supported
2283 * by delalloc
2284 */
2285 BUG_ON(ext4_should_journal_data(inode));
2286 needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
2287
2275 /* start a new transaction*/ 2288 /* start a new transaction*/
2276 handle = ext4_journal_start(inode, needed_blocks); 2289 handle = ext4_journal_start(inode, needed_blocks);
2277 if (IS_ERR(handle)) { 2290 if (IS_ERR(handle)) {
2278 ret = PTR_ERR(handle); 2291 ret = PTR_ERR(handle);
2292 printk(KERN_EMERG "%s: jbd2_start: "
2293 "%ld pages, ino %lu; err %d\n", __func__,
2294 wbc->nr_to_write, inode->i_ino, ret);
2295 dump_stack();
2279 goto out_writepages; 2296 goto out_writepages;
2280 } 2297 }
2281 if (ext4_should_order_data(inode)) { 2298 if (ext4_should_order_data(inode)) {
2282 /* 2299 /*
2283 * With ordered mode we need to add 2300 * With ordered mode we need to add
2284 * the inode to the journal handle 2301 * the inode to the journal handl
2285 * when we do block allocation. 2302 * when we do block allocation.
2286 */ 2303 */
2287 ret = ext4_jbd2_file_inode(handle, inode); 2304 ret = ext4_jbd2_file_inode(handle, inode);
@@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping,
2289 ext4_journal_stop(handle); 2306 ext4_journal_stop(handle);
2290 goto out_writepages; 2307 goto out_writepages;
2291 } 2308 }
2292
2293 } 2309 }
2294 /*
2295 * set the max dirty pages could be write at a time
2296 * to fit into the reserved transaction credits
2297 */
2298 if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
2299 wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
2300 2310
2301 to_write -= wbc->nr_to_write; 2311 to_write -= wbc->nr_to_write;
2302 ret = mpage_da_writepages(mapping, wbc, 2312 ret = mpage_da_writepages(mapping, wbc,
2303 ext4_da_get_block_write); 2313 ext4_da_get_block_write);
2304 ext4_journal_stop(handle); 2314 ext4_journal_stop(handle);
2305 if (wbc->nr_to_write) { 2315 if (ret == MPAGE_DA_EXTENT_TAIL) {
2316 /*
2317 * got one extent now try with
2318 * rest of the pages
2319 */
2320 to_write += wbc->nr_to_write;
2321 ret = 0;
2322 } else if (wbc->nr_to_write) {
2306 /* 2323 /*
2307 * There is no more writeout needed 2324 * There is no more writeout needed
2308 * or we requested for a noblocking writeout 2325 * or we requested for a noblocking writeout
@@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping,
2314 wbc->nr_to_write = to_write; 2331 wbc->nr_to_write = to_write;
2315 } 2332 }
2316 2333
2334 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2335 /* We skipped pages in this loop */
2336 wbc->range_start = range_start;
2337 wbc->nr_to_write = to_write +
2338 wbc->pages_skipped - pages_skipped;
2339 wbc->pages_skipped = pages_skipped;
2340 goto restart_loop;
2341 }
2342
2317out_writepages: 2343out_writepages:
2318 wbc->nr_to_write = to_write; 2344 wbc->nr_to_write = to_write;
2319 if (range_start) 2345 wbc->range_start = range_start;
2320 wbc->range_start = range_start;
2321 return ret; 2346 return ret;
2322} 2347}
2323 2348