diff options
-rw-r--r-- | fs/ext4/inode.c | 201 |
1 files changed, 113 insertions, 88 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ffc95ba48859..8dd22eade42c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
43 | 43 | ||
44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
45 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
45 | loff_t new_size) | 47 | loff_t new_size) |
46 | { | 48 | { |
@@ -1626,11 +1628,13 @@ struct mpage_da_data { | |||
1626 | unsigned long first_page, next_page; /* extent of pages */ | 1628 | unsigned long first_page, next_page; /* extent of pages */ |
1627 | get_block_t *get_block; | 1629 | get_block_t *get_block; |
1628 | struct writeback_control *wbc; | 1630 | struct writeback_control *wbc; |
1631 | int io_done; | ||
1632 | long pages_written; | ||
1629 | }; | 1633 | }; |
1630 | 1634 | ||
1631 | /* | 1635 | /* |
1632 | * mpage_da_submit_io - walks through extent of pages and try to write | 1636 | * mpage_da_submit_io - walks through extent of pages and try to write |
1633 | * them with __mpage_writepage() | 1637 | * them with writepage() call back |
1634 | * | 1638 | * |
1635 | * @mpd->inode: inode | 1639 | * @mpd->inode: inode |
1636 | * @mpd->first_page: first page of the extent | 1640 | * @mpd->first_page: first page of the extent |
@@ -1645,18 +1649,11 @@ struct mpage_da_data { | |||
1645 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1649 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
1646 | { | 1650 | { |
1647 | struct address_space *mapping = mpd->inode->i_mapping; | 1651 | struct address_space *mapping = mpd->inode->i_mapping; |
1648 | struct mpage_data mpd_pp = { | ||
1649 | .bio = NULL, | ||
1650 | .last_block_in_bio = 0, | ||
1651 | .get_block = mpd->get_block, | ||
1652 | .use_writepage = 1, | ||
1653 | }; | ||
1654 | int ret = 0, err, nr_pages, i; | 1652 | int ret = 0, err, nr_pages, i; |
1655 | unsigned long index, end; | 1653 | unsigned long index, end; |
1656 | struct pagevec pvec; | 1654 | struct pagevec pvec; |
1657 | 1655 | ||
1658 | BUG_ON(mpd->next_page <= mpd->first_page); | 1656 | BUG_ON(mpd->next_page <= mpd->first_page); |
1659 | |||
1660 | pagevec_init(&pvec, 0); | 1657 | pagevec_init(&pvec, 0); |
1661 | index = mpd->first_page; | 1658 | index = mpd->first_page; |
1662 | end = mpd->next_page - 1; | 1659 | end = mpd->next_page - 1; |
@@ -1674,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1674 | break; | 1671 | break; |
1675 | index++; | 1672 | index++; |
1676 | 1673 | ||
1677 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1674 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1678 | 1675 | if (!err) | |
1676 | mpd->pages_written++; | ||
1679 | /* | 1677 | /* |
1680 | * In error case, we have to continue because | 1678 | * In error case, we have to continue because |
1681 | * remaining pages are still locked | 1679 | * remaining pages are still locked |
@@ -1686,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1686 | } | 1684 | } |
1687 | pagevec_release(&pvec); | 1685 | pagevec_release(&pvec); |
1688 | } | 1686 | } |
1689 | if (mpd_pp.bio) | ||
1690 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1691 | |||
1692 | return ret; | 1687 | return ret; |
1693 | } | 1688 | } |
1694 | 1689 | ||
@@ -1711,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1711 | int blocks = exbh->b_size >> inode->i_blkbits; | 1706 | int blocks = exbh->b_size >> inode->i_blkbits; |
1712 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1707 | sector_t pblock = exbh->b_blocknr, cur_logical; |
1713 | struct buffer_head *head, *bh; | 1708 | struct buffer_head *head, *bh; |
1714 | unsigned long index, end; | 1709 | pgoff_t index, end; |
1715 | struct pagevec pvec; | 1710 | struct pagevec pvec; |
1716 | int nr_pages, i; | 1711 | int nr_pages, i; |
1717 | 1712 | ||
@@ -1796,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1796 | * | 1791 | * |
1797 | * The function skips space we know is already mapped to disk blocks. | 1792 | * The function skips space we know is already mapped to disk blocks. |
1798 | * | 1793 | * |
1799 | * The function ignores errors ->get_block() returns, thus real | ||
1800 | * error handling is postponed to __mpage_writepage() | ||
1801 | */ | 1794 | */ |
1802 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) |
1803 | { | 1796 | { |
1797 | int err = 0; | ||
1804 | struct buffer_head *lbh = &mpd->lbh; | 1798 | struct buffer_head *lbh = &mpd->lbh; |
1805 | int err = 0, remain = lbh->b_size; | ||
1806 | sector_t next = lbh->b_blocknr; | 1799 | sector_t next = lbh->b_blocknr; |
1807 | struct buffer_head new; | 1800 | struct buffer_head new; |
1808 | 1801 | ||
@@ -1812,35 +1805,32 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1812 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1813 | return; | 1806 | return; |
1814 | 1807 | ||
1815 | while (remain) { | 1808 | new.b_state = lbh->b_state; |
1816 | new.b_state = lbh->b_state; | 1809 | new.b_blocknr = 0; |
1817 | new.b_blocknr = 0; | 1810 | new.b_size = lbh->b_size; |
1818 | new.b_size = remain; | ||
1819 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1820 | if (err) { | ||
1821 | /* | ||
1822 | * Rather than implement own error handling | ||
1823 | * here, we just leave remaining blocks | ||
1824 | * unallocated and try again with ->writepage() | ||
1825 | */ | ||
1826 | break; | ||
1827 | } | ||
1828 | BUG_ON(new.b_size == 0); | ||
1829 | 1811 | ||
1830 | if (buffer_new(&new)) | 1812 | /* |
1831 | __unmap_underlying_blocks(mpd->inode, &new); | 1813 | * If we didn't accumulate anything |
1814 | * to write simply return | ||
1815 | */ | ||
1816 | if (!new.b_size) | ||
1817 | return; | ||
1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1819 | if (err) | ||
1820 | return; | ||
1821 | BUG_ON(new.b_size == 0); | ||
1832 | 1822 | ||
1833 | /* | 1823 | if (buffer_new(&new)) |
1834 | * If blocks are delayed marked, we need to | 1824 | __unmap_underlying_blocks(mpd->inode, &new); |
1835 | * put actual blocknr and drop delayed bit | ||
1836 | */ | ||
1837 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1838 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1839 | 1825 | ||
1840 | /* go for the remaining blocks */ | 1826 | /* |
1841 | next += new.b_size >> mpd->inode->i_blkbits; | 1827 | * If blocks are delayed marked, we need to |
1842 | remain -= new.b_size; | 1828 | * put actual blocknr and drop delayed bit |
1843 | } | 1829 | */ |
1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1832 | |||
1833 | return; | ||
1844 | } | 1834 | } |
1845 | 1835 | ||
1846 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -1886,13 +1876,9 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
1886 | * need to flush current extent and start new one | 1876 | * need to flush current extent and start new one |
1887 | */ | 1877 | */ |
1888 | mpage_da_map_blocks(mpd); | 1878 | mpage_da_map_blocks(mpd); |
1889 | 1879 | mpage_da_submit_io(mpd); | |
1890 | /* | 1880 | mpd->io_done = 1; |
1891 | * Now start a new extent | 1881 | return; |
1892 | */ | ||
1893 | lbh->b_size = bh->b_size; | ||
1894 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1895 | lbh->b_blocknr = logical; | ||
1896 | } | 1882 | } |
1897 | 1883 | ||
1898 | /* | 1884 | /* |
@@ -1912,17 +1898,35 @@ static int __mpage_da_writepage(struct page *page, | |||
1912 | struct buffer_head *bh, *head, fake; | 1898 | struct buffer_head *bh, *head, fake; |
1913 | sector_t logical; | 1899 | sector_t logical; |
1914 | 1900 | ||
1901 | if (mpd->io_done) { | ||
1902 | /* | ||
1903 | * Rest of the page in the page_vec | ||
1904 | * redirty then and skip then. We will | ||
1905 | * try to to write them again after | ||
1906 | * starting a new transaction | ||
1907 | */ | ||
1908 | redirty_page_for_writepage(wbc, page); | ||
1909 | unlock_page(page); | ||
1910 | return MPAGE_DA_EXTENT_TAIL; | ||
1911 | } | ||
1915 | /* | 1912 | /* |
1916 | * Can we merge this page to current extent? | 1913 | * Can we merge this page to current extent? |
1917 | */ | 1914 | */ |
1918 | if (mpd->next_page != page->index) { | 1915 | if (mpd->next_page != page->index) { |
1919 | /* | 1916 | /* |
1920 | * Nope, we can't. So, we map non-allocated blocks | 1917 | * Nope, we can't. So, we map non-allocated blocks |
1921 | * and start IO on them using __mpage_writepage() | 1918 | * and start IO on them using writepage() |
1922 | */ | 1919 | */ |
1923 | if (mpd->next_page != mpd->first_page) { | 1920 | if (mpd->next_page != mpd->first_page) { |
1924 | mpage_da_map_blocks(mpd); | 1921 | mpage_da_map_blocks(mpd); |
1925 | mpage_da_submit_io(mpd); | 1922 | mpage_da_submit_io(mpd); |
1923 | /* | ||
1924 | * skip rest of the page in the page_vec | ||
1925 | */ | ||
1926 | mpd->io_done = 1; | ||
1927 | redirty_page_for_writepage(wbc, page); | ||
1928 | unlock_page(page); | ||
1929 | return MPAGE_DA_EXTENT_TAIL; | ||
1926 | } | 1930 | } |
1927 | 1931 | ||
1928 | /* | 1932 | /* |
@@ -1953,6 +1957,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1953 | set_buffer_dirty(bh); | 1957 | set_buffer_dirty(bh); |
1954 | set_buffer_uptodate(bh); | 1958 | set_buffer_uptodate(bh); |
1955 | mpage_add_bh_to_extent(mpd, logical, bh); | 1959 | mpage_add_bh_to_extent(mpd, logical, bh); |
1960 | if (mpd->io_done) | ||
1961 | return MPAGE_DA_EXTENT_TAIL; | ||
1956 | } else { | 1962 | } else { |
1957 | /* | 1963 | /* |
1958 | * Page with regular buffer heads, just add all dirty ones | 1964 | * Page with regular buffer heads, just add all dirty ones |
@@ -1961,8 +1967,12 @@ static int __mpage_da_writepage(struct page *page, | |||
1961 | bh = head; | 1967 | bh = head; |
1962 | do { | 1968 | do { |
1963 | BUG_ON(buffer_locked(bh)); | 1969 | BUG_ON(buffer_locked(bh)); |
1964 | if (buffer_dirty(bh)) | 1970 | if (buffer_dirty(bh) && |
1971 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
1965 | mpage_add_bh_to_extent(mpd, logical, bh); | 1972 | mpage_add_bh_to_extent(mpd, logical, bh); |
1973 | if (mpd->io_done) | ||
1974 | return MPAGE_DA_EXTENT_TAIL; | ||
1975 | } | ||
1966 | logical++; | 1976 | logical++; |
1967 | } while ((bh = bh->b_this_page) != head); | 1977 | } while ((bh = bh->b_this_page) != head); |
1968 | } | 1978 | } |
@@ -1981,22 +1991,13 @@ static int __mpage_da_writepage(struct page *page, | |||
1981 | * | 1991 | * |
1982 | * This is a library function, which implements the writepages() | 1992 | * This is a library function, which implements the writepages() |
1983 | * address_space_operation. | 1993 | * address_space_operation. |
1984 | * | ||
1985 | * In order to avoid duplication of logic that deals with partial pages, | ||
1986 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1987 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1988 | * | ||
1989 | * It's important that we call __mpage_writepage() only once for each | ||
1990 | * involved page, otherwise we'd have to implement more complicated logic | ||
1991 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1992 | * | ||
1993 | * See comments to mpage_writepages() | ||
1994 | */ | 1994 | */ |
1995 | static int mpage_da_writepages(struct address_space *mapping, | 1995 | static int mpage_da_writepages(struct address_space *mapping, |
1996 | struct writeback_control *wbc, | 1996 | struct writeback_control *wbc, |
1997 | get_block_t get_block) | 1997 | get_block_t get_block) |
1998 | { | 1998 | { |
1999 | struct mpage_da_data mpd; | 1999 | struct mpage_da_data mpd; |
2000 | long to_write; | ||
2000 | int ret; | 2001 | int ret; |
2001 | 2002 | ||
2002 | if (!get_block) | 2003 | if (!get_block) |
@@ -2010,17 +2011,22 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
2010 | mpd.first_page = 0; | 2011 | mpd.first_page = 0; |
2011 | mpd.next_page = 0; | 2012 | mpd.next_page = 0; |
2012 | mpd.get_block = get_block; | 2013 | mpd.get_block = get_block; |
2014 | mpd.io_done = 0; | ||
2015 | mpd.pages_written = 0; | ||
2016 | |||
2017 | to_write = wbc->nr_to_write; | ||
2013 | 2018 | ||
2014 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2019 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); |
2015 | 2020 | ||
2016 | /* | 2021 | /* |
2017 | * Handle last extent of pages | 2022 | * Handle last extent of pages |
2018 | */ | 2023 | */ |
2019 | if (mpd.next_page != mpd.first_page) { | 2024 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
2020 | mpage_da_map_blocks(&mpd); | 2025 | mpage_da_map_blocks(&mpd); |
2021 | mpage_da_submit_io(&mpd); | 2026 | mpage_da_submit_io(&mpd); |
2022 | } | 2027 | } |
2023 | 2028 | ||
2029 | wbc->nr_to_write = to_write - mpd.pages_written; | ||
2024 | return ret; | 2030 | return ret; |
2025 | } | 2031 | } |
2026 | 2032 | ||
@@ -2238,7 +2244,7 @@ static int ext4_da_writepage(struct page *page, | |||
2238 | #define EXT4_MAX_WRITEBACK_CREDITS 25 | 2244 | #define EXT4_MAX_WRITEBACK_CREDITS 25 |
2239 | 2245 | ||
2240 | static int ext4_da_writepages(struct address_space *mapping, | 2246 | static int ext4_da_writepages(struct address_space *mapping, |
2241 | struct writeback_control *wbc) | 2247 | struct writeback_control *wbc) |
2242 | { | 2248 | { |
2243 | struct inode *inode = mapping->host; | 2249 | struct inode *inode = mapping->host; |
2244 | handle_t *handle = NULL; | 2250 | handle_t *handle = NULL; |
@@ -2246,42 +2252,53 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2246 | int ret = 0; | 2252 | int ret = 0; |
2247 | long to_write; | 2253 | long to_write; |
2248 | loff_t range_start = 0; | 2254 | loff_t range_start = 0; |
2255 | long pages_skipped = 0; | ||
2249 | 2256 | ||
2250 | /* | 2257 | /* |
2251 | * No pages to write? This is mainly a kludge to avoid starting | 2258 | * No pages to write? This is mainly a kludge to avoid starting |
2252 | * a transaction for special inodes like journal inode on last iput() | 2259 | * a transaction for special inodes like journal inode on last iput() |
2253 | * because that could violate lock ordering on umount | 2260 | * because that could violate lock ordering on umount |
2254 | */ | 2261 | */ |
2255 | if (!mapping->nrpages) | 2262 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2256 | return 0; | 2263 | return 0; |
2257 | 2264 | ||
2258 | /* | 2265 | if (!wbc->range_cyclic) |
2259 | * Estimate the worse case needed credits to write out | ||
2260 | * EXT4_MAX_BUF_BLOCKS pages | ||
2261 | */ | ||
2262 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | ||
2263 | |||
2264 | to_write = wbc->nr_to_write; | ||
2265 | if (!wbc->range_cyclic) { | ||
2266 | /* | 2266 | /* |
2267 | * If range_cyclic is not set force range_cont | 2267 | * If range_cyclic is not set force range_cont |
2268 | * and save the old writeback_index | 2268 | * and save the old writeback_index |
2269 | */ | 2269 | */ |
2270 | wbc->range_cont = 1; | 2270 | wbc->range_cont = 1; |
2271 | range_start = wbc->range_start; | ||
2272 | } | ||
2273 | 2271 | ||
2274 | while (!ret && to_write) { | 2272 | range_start = wbc->range_start; |
2273 | pages_skipped = wbc->pages_skipped; | ||
2274 | |||
2275 | restart_loop: | ||
2276 | to_write = wbc->nr_to_write; | ||
2277 | while (!ret && to_write > 0) { | ||
2278 | |||
2279 | /* | ||
2280 | * we insert one extent at a time. So we need | ||
2281 | * credit needed for single extent allocation. | ||
2282 | * journalled mode is currently not supported | ||
2283 | * by delalloc | ||
2284 | */ | ||
2285 | BUG_ON(ext4_should_journal_data(inode)); | ||
2286 | needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); | ||
2287 | |||
2275 | /* start a new transaction*/ | 2288 | /* start a new transaction*/ |
2276 | handle = ext4_journal_start(inode, needed_blocks); | 2289 | handle = ext4_journal_start(inode, needed_blocks); |
2277 | if (IS_ERR(handle)) { | 2290 | if (IS_ERR(handle)) { |
2278 | ret = PTR_ERR(handle); | 2291 | ret = PTR_ERR(handle); |
2292 | printk(KERN_EMERG "%s: jbd2_start: " | ||
2293 | "%ld pages, ino %lu; err %d\n", __func__, | ||
2294 | wbc->nr_to_write, inode->i_ino, ret); | ||
2295 | dump_stack(); | ||
2279 | goto out_writepages; | 2296 | goto out_writepages; |
2280 | } | 2297 | } |
2281 | if (ext4_should_order_data(inode)) { | 2298 | if (ext4_should_order_data(inode)) { |
2282 | /* | 2299 | /* |
2283 | * With ordered mode we need to add | 2300 | * With ordered mode we need to add |
2284 | * the inode to the journal handle | 2301 | * the inode to the journal handl |
2285 | * when we do block allocation. | 2302 | * when we do block allocation. |
2286 | */ | 2303 | */ |
2287 | ret = ext4_jbd2_file_inode(handle, inode); | 2304 | ret = ext4_jbd2_file_inode(handle, inode); |
@@ -2289,20 +2306,20 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2289 | ext4_journal_stop(handle); | 2306 | ext4_journal_stop(handle); |
2290 | goto out_writepages; | 2307 | goto out_writepages; |
2291 | } | 2308 | } |
2292 | |||
2293 | } | 2309 | } |
2294 | /* | ||
2295 | * set the max dirty pages could be write at a time | ||
2296 | * to fit into the reserved transaction credits | ||
2297 | */ | ||
2298 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2299 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2300 | 2310 | ||
2301 | to_write -= wbc->nr_to_write; | 2311 | to_write -= wbc->nr_to_write; |
2302 | ret = mpage_da_writepages(mapping, wbc, | 2312 | ret = mpage_da_writepages(mapping, wbc, |
2303 | ext4_da_get_block_write); | 2313 | ext4_da_get_block_write); |
2304 | ext4_journal_stop(handle); | 2314 | ext4_journal_stop(handle); |
2305 | if (wbc->nr_to_write) { | 2315 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
2316 | /* | ||
2317 | * got one extent now try with | ||
2318 | * rest of the pages | ||
2319 | */ | ||
2320 | to_write += wbc->nr_to_write; | ||
2321 | ret = 0; | ||
2322 | } else if (wbc->nr_to_write) { | ||
2306 | /* | 2323 | /* |
2307 | * There is no more writeout needed | 2324 | * There is no more writeout needed |
2308 | * or we requested for a noblocking writeout | 2325 | * or we requested for a noblocking writeout |
@@ -2314,10 +2331,18 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2314 | wbc->nr_to_write = to_write; | 2331 | wbc->nr_to_write = to_write; |
2315 | } | 2332 | } |
2316 | 2333 | ||
2334 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { | ||
2335 | /* We skipped pages in this loop */ | ||
2336 | wbc->range_start = range_start; | ||
2337 | wbc->nr_to_write = to_write + | ||
2338 | wbc->pages_skipped - pages_skipped; | ||
2339 | wbc->pages_skipped = pages_skipped; | ||
2340 | goto restart_loop; | ||
2341 | } | ||
2342 | |||
2317 | out_writepages: | 2343 | out_writepages: |
2318 | wbc->nr_to_write = to_write; | 2344 | wbc->nr_to_write = to_write; |
2319 | if (range_start) | 2345 | wbc->range_start = range_start; |
2320 | wbc->range_start = range_start; | ||
2321 | return ret; | 2346 | return ret; |
2322 | } | 2347 | } |
2323 | 2348 | ||