aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c293
1 files changed, 162 insertions, 131 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2ca04e67a4f..9115f2807515 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -553,7 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
553 } 553 }
554 if (retval > 0) { 554 if (retval > 0) {
555 int ret; 555 int ret;
556 unsigned long long status; 556 unsigned int status;
557 557
558 if (unlikely(retval != map->m_len)) { 558 if (unlikely(retval != map->m_len)) {
559 ext4_warning(inode->i_sb, 559 ext4_warning(inode->i_sb,
@@ -653,7 +653,7 @@ found:
653 653
654 if (retval > 0) { 654 if (retval > 0) {
655 int ret; 655 int ret;
656 unsigned long long status; 656 unsigned int status;
657 657
658 if (unlikely(retval != map->m_len)) { 658 if (unlikely(retval != map->m_len)) {
659 ext4_warning(inode->i_sb, 659 ext4_warning(inode->i_sb,
@@ -969,7 +969,8 @@ retry_journal:
969 ext4_journal_stop(handle); 969 ext4_journal_stop(handle);
970 goto retry_grab; 970 goto retry_grab;
971 } 971 }
972 wait_on_page_writeback(page); 972 /* In case writeback began while the page was unlocked */
973 wait_for_stable_page(page);
973 974
974 if (ext4_should_dioread_nolock(inode)) 975 if (ext4_should_dioread_nolock(inode))
975 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 976 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -1633,7 +1634,7 @@ add_delayed:
1633 set_buffer_delay(bh); 1634 set_buffer_delay(bh);
1634 } else if (retval > 0) { 1635 } else if (retval > 0) {
1635 int ret; 1636 int ret;
1636 unsigned long long status; 1637 unsigned int status;
1637 1638
1638 if (unlikely(retval != map->m_len)) { 1639 if (unlikely(retval != map->m_len)) {
1639 ext4_warning(inode->i_sb, 1640 ext4_warning(inode->i_sb,
@@ -1890,12 +1891,32 @@ static int ext4_writepage(struct page *page,
1890 return ret; 1891 return ret;
1891} 1892}
1892 1893
1894static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1895{
1896 int len;
1897 loff_t size = i_size_read(mpd->inode);
1898 int err;
1899
1900 BUG_ON(page->index != mpd->first_page);
1901 if (page->index == size >> PAGE_CACHE_SHIFT)
1902 len = size & ~PAGE_CACHE_MASK;
1903 else
1904 len = PAGE_CACHE_SIZE;
1905 clear_page_dirty_for_io(page);
1906 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1907 if (!err)
1908 mpd->wbc->nr_to_write--;
1909 mpd->first_page++;
1910
1911 return err;
1912}
1913
1893#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) 1914#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))
1894 1915
1895/* 1916/*
1896 * mballoc gives us at most this number of blocks... 1917 * mballoc gives us at most this number of blocks...
1897 * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 1918 * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
1898 * The rest of mballoc seems to handle chunks upto full group size. 1919 * The rest of mballoc seems to handle chunks up to full group size.
1899 */ 1920 */
1900#define MAX_WRITEPAGES_EXTENT_LEN 2048 1921#define MAX_WRITEPAGES_EXTENT_LEN 2048
1901 1922
@@ -1904,82 +1925,94 @@ static int ext4_writepage(struct page *page,
1904 * 1925 *
1905 * @mpd - extent of blocks 1926 * @mpd - extent of blocks
1906 * @lblk - logical number of the block in the file 1927 * @lblk - logical number of the block in the file
1907 * @b_state - b_state of the buffer head added 1928 * @bh - buffer head we want to add to the extent
1908 * 1929 *
1909 * the function is used to collect contig. blocks in same state 1930 * The function is used to collect contig. blocks in the same state. If the
1931 * buffer doesn't require mapping for writeback and we haven't started the
1932 * extent of buffers to map yet, the function returns 'true' immediately - the
1933 * caller can write the buffer right away. Otherwise the function returns true
1934 * if the block has been added to the extent, false if the block couldn't be
1935 * added.
1910 */ 1936 */
1911static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 1937static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
1912 unsigned long b_state) 1938 struct buffer_head *bh)
1913{ 1939{
1914 struct ext4_map_blocks *map = &mpd->map; 1940 struct ext4_map_blocks *map = &mpd->map;
1915 1941
1916 /* Don't go larger than mballoc is willing to allocate */ 1942 /* Buffer that doesn't need mapping for writeback? */
1917 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 1943 if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
1918 return 0; 1944 (!buffer_delay(bh) && !buffer_unwritten(bh))) {
1945 /* So far no extent to map => we write the buffer right away */
1946 if (map->m_len == 0)
1947 return true;
1948 return false;
1949 }
1919 1950
1920 /* First block in the extent? */ 1951 /* First block in the extent? */
1921 if (map->m_len == 0) { 1952 if (map->m_len == 0) {
1922 map->m_lblk = lblk; 1953 map->m_lblk = lblk;
1923 map->m_len = 1; 1954 map->m_len = 1;
1924 map->m_flags = b_state & BH_FLAGS; 1955 map->m_flags = bh->b_state & BH_FLAGS;
1925 return 1; 1956 return true;
1926 } 1957 }
1927 1958
1959 /* Don't go larger than mballoc is willing to allocate */
1960 if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
1961 return false;
1962
1928 /* Can we merge the block to our big extent? */ 1963 /* Can we merge the block to our big extent? */
1929 if (lblk == map->m_lblk + map->m_len && 1964 if (lblk == map->m_lblk + map->m_len &&
1930 (b_state & BH_FLAGS) == map->m_flags) { 1965 (bh->b_state & BH_FLAGS) == map->m_flags) {
1931 map->m_len++; 1966 map->m_len++;
1932 return 1; 1967 return true;
1933 } 1968 }
1934 return 0; 1969 return false;
1935} 1970}
1936 1971
1937static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, 1972/*
1938 struct buffer_head *head, 1973 * mpage_process_page_bufs - submit page buffers for IO or add them to extent
1939 struct buffer_head *bh, 1974 *
1940 ext4_lblk_t lblk) 1975 * @mpd - extent of blocks for mapping
1976 * @head - the first buffer in the page
1977 * @bh - buffer we should start processing from
1978 * @lblk - logical number of the block in the file corresponding to @bh
1979 *
1980 * Walk through page buffers from @bh upto @head (exclusive) and either submit
1981 * the page for IO if all buffers in this page were mapped and there's no
1982 * accumulated extent of buffers to map or add buffers in the page to the
1983 * extent of buffers to map. The function returns 1 if the caller can continue
1984 * by processing the next page, 0 if it should stop adding buffers to the
1985 * extent to map because we cannot extend it anymore. It can also return value
1986 * < 0 in case of error during IO submission.
1987 */
1988static int mpage_process_page_bufs(struct mpage_da_data *mpd,
1989 struct buffer_head *head,
1990 struct buffer_head *bh,
1991 ext4_lblk_t lblk)
1941{ 1992{
1942 struct inode *inode = mpd->inode; 1993 struct inode *inode = mpd->inode;
1994 int err;
1943 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 1995 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
1944 >> inode->i_blkbits; 1996 >> inode->i_blkbits;
1945 1997
1946 do { 1998 do {
1947 BUG_ON(buffer_locked(bh)); 1999 BUG_ON(buffer_locked(bh));
1948 2000
1949 if (!buffer_dirty(bh) || !buffer_mapped(bh) || 2001 if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
1950 (!buffer_delay(bh) && !buffer_unwritten(bh)) ||
1951 lblk >= blocks) {
1952 /* Found extent to map? */ 2002 /* Found extent to map? */
1953 if (mpd->map.m_len) 2003 if (mpd->map.m_len)
1954 return false; 2004 return 0;
1955 if (lblk >= blocks) 2005 /* Everything mapped so far and we hit EOF */
1956 return true; 2006 break;
1957 continue;
1958 } 2007 }
1959 if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state))
1960 return false;
1961 } while (lblk++, (bh = bh->b_this_page) != head); 2008 } while (lblk++, (bh = bh->b_this_page) != head);
1962 return true; 2009 /* So far everything mapped? Submit the page for IO. */
1963} 2010 if (mpd->map.m_len == 0) {
1964 2011 err = mpage_submit_page(mpd, head->b_page);
1965static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) 2012 if (err < 0)
1966{ 2013 return err;
1967 int len; 2014 }
1968 loff_t size = i_size_read(mpd->inode); 2015 return lblk < blocks;
1969 int err;
1970
1971 BUG_ON(page->index != mpd->first_page);
1972 if (page->index == size >> PAGE_CACHE_SHIFT)
1973 len = size & ~PAGE_CACHE_MASK;
1974 else
1975 len = PAGE_CACHE_SIZE;
1976 clear_page_dirty_for_io(page);
1977 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
1978 if (!err)
1979 mpd->wbc->nr_to_write--;
1980 mpd->first_page++;
1981
1982 return err;
1983} 2016}
1984 2017
1985/* 2018/*
@@ -2003,8 +2036,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2003 struct inode *inode = mpd->inode; 2036 struct inode *inode = mpd->inode;
2004 struct buffer_head *head, *bh; 2037 struct buffer_head *head, *bh;
2005 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; 2038 int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
2006 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
2007 >> inode->i_blkbits;
2008 pgoff_t start, end; 2039 pgoff_t start, end;
2009 ext4_lblk_t lblk; 2040 ext4_lblk_t lblk;
2010 sector_t pblock; 2041 sector_t pblock;
@@ -2026,7 +2057,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2026 2057
2027 if (page->index > end) 2058 if (page->index > end)
2028 break; 2059 break;
2029 /* Upto 'end' pages must be contiguous */ 2060 /* Up to 'end' pages must be contiguous */
2030 BUG_ON(page->index != start); 2061 BUG_ON(page->index != start);
2031 bh = head = page_buffers(page); 2062 bh = head = page_buffers(page);
2032 do { 2063 do {
@@ -2039,18 +2070,26 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
2039 */ 2070 */
2040 mpd->map.m_len = 0; 2071 mpd->map.m_len = 0;
2041 mpd->map.m_flags = 0; 2072 mpd->map.m_flags = 0;
2042 add_page_bufs_to_extent(mpd, head, bh, 2073 /*
2043 lblk); 2074 * FIXME: If dioread_nolock supports
2075 * blocksize < pagesize, we need to make
2076 * sure we add size mapped so far to
2077 * io_end->size as the following call
2078 * can submit the page for IO.
2079 */
2080 err = mpage_process_page_bufs(mpd, head,
2081 bh, lblk);
2044 pagevec_release(&pvec); 2082 pagevec_release(&pvec);
2045 return 0; 2083 if (err > 0)
2084 err = 0;
2085 return err;
2046 } 2086 }
2047 if (buffer_delay(bh)) { 2087 if (buffer_delay(bh)) {
2048 clear_buffer_delay(bh); 2088 clear_buffer_delay(bh);
2049 bh->b_blocknr = pblock++; 2089 bh->b_blocknr = pblock++;
2050 } 2090 }
2051 clear_buffer_unwritten(bh); 2091 clear_buffer_unwritten(bh);
2052 } while (++lblk < blocks && 2092 } while (lblk++, (bh = bh->b_this_page) != head);
2053 (bh = bh->b_this_page) != head);
2054 2093
2055 /* 2094 /*
2056 * FIXME: This is going to break if dioread_nolock 2095 * FIXME: This is going to break if dioread_nolock
@@ -2199,12 +2238,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2199 2238
2200 /* Update on-disk size after IO is submitted */ 2239 /* Update on-disk size after IO is submitted */
2201 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2240 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2202 if (disksize > i_size_read(inode))
2203 disksize = i_size_read(inode);
2204 if (disksize > EXT4_I(inode)->i_disksize) { 2241 if (disksize > EXT4_I(inode)->i_disksize) {
2205 int err2; 2242 int err2;
2206 2243
2207 ext4_update_i_disksize(inode, disksize); 2244 ext4_wb_update_i_disksize(inode, disksize);
2208 err2 = ext4_mark_inode_dirty(handle, inode); 2245 err2 = ext4_mark_inode_dirty(handle, inode);
2209 if (err2) 2246 if (err2)
2210 ext4_error(inode->i_sb, 2247 ext4_error(inode->i_sb,
@@ -2219,7 +2256,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2219/* 2256/*
2220 * Calculate the total number of credits to reserve for one writepages 2257 * Calculate the total number of credits to reserve for one writepages
2221 * iteration. This is called from ext4_writepages(). We map an extent of 2258 * iteration. This is called from ext4_writepages(). We map an extent of
2222 * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 2259 * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
2223 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 2260 * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
2224 * bpp - 1 blocks in bpp different extents. 2261 * bpp - 1 blocks in bpp different extents.
2225 */ 2262 */
@@ -2319,14 +2356,10 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
2319 lblk = ((ext4_lblk_t)page->index) << 2356 lblk = ((ext4_lblk_t)page->index) <<
2320 (PAGE_CACHE_SHIFT - blkbits); 2357 (PAGE_CACHE_SHIFT - blkbits);
2321 head = page_buffers(page); 2358 head = page_buffers(page);
2322 if (!add_page_bufs_to_extent(mpd, head, head, lblk)) 2359 err = mpage_process_page_bufs(mpd, head, head, lblk);
2360 if (err <= 0)
2323 goto out; 2361 goto out;
2324 /* So far everything mapped? Submit the page for IO. */ 2362 err = 0;
2325 if (mpd->map.m_len == 0) {
2326 err = mpage_submit_page(mpd, page);
2327 if (err < 0)
2328 goto out;
2329 }
2330 2363
2331 /* 2364 /*
2332 * Accumulated enough dirty pages? This doesn't apply 2365 * Accumulated enough dirty pages? This doesn't apply
@@ -2410,7 +2443,7 @@ static int ext4_writepages(struct address_space *mapping,
2410 2443
2411 if (ext4_should_dioread_nolock(inode)) { 2444 if (ext4_should_dioread_nolock(inode)) {
2412 /* 2445 /*
2413 * We may need to convert upto one extent per block in 2446 * We may need to convert up to one extent per block in
2414 * the page and we may dirty the inode. 2447 * the page and we may dirty the inode.
2415 */ 2448 */
2416 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); 2449 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
@@ -2646,7 +2679,7 @@ retry_journal:
2646 goto retry_grab; 2679 goto retry_grab;
2647 } 2680 }
2648 /* In case writeback began while the page was unlocked */ 2681 /* In case writeback began while the page was unlocked */
2649 wait_on_page_writeback(page); 2682 wait_for_stable_page(page);
2650 2683
2651 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2684 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2652 if (ret < 0) { 2685 if (ret < 0) {
@@ -4566,7 +4599,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4566 ext4_journal_stop(handle); 4599 ext4_journal_stop(handle);
4567 } 4600 }
4568 4601
4569 if (attr->ia_valid & ATTR_SIZE) { 4602 if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
4603 handle_t *handle;
4604 loff_t oldsize = inode->i_size;
4570 4605
4571 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4606 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4572 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4607 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4574,73 +4609,69 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4574 if (attr->ia_size > sbi->s_bitmap_maxbytes) 4609 if (attr->ia_size > sbi->s_bitmap_maxbytes)
4575 return -EFBIG; 4610 return -EFBIG;
4576 } 4611 }
4577 } 4612 if (S_ISREG(inode->i_mode) &&
4578 4613 (attr->ia_size < inode->i_size)) {
4579 if (S_ISREG(inode->i_mode) && 4614 if (ext4_should_order_data(inode)) {
4580 attr->ia_valid & ATTR_SIZE && 4615 error = ext4_begin_ordered_truncate(inode,
4581 (attr->ia_size < inode->i_size)) {
4582 handle_t *handle;
4583
4584 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4585 if (IS_ERR(handle)) {
4586 error = PTR_ERR(handle);
4587 goto err_out;
4588 }
4589 if (ext4_handle_valid(handle)) {
4590 error = ext4_orphan_add(handle, inode);
4591 orphan = 1;
4592 }
4593 EXT4_I(inode)->i_disksize = attr->ia_size;
4594 rc = ext4_mark_inode_dirty(handle, inode);
4595 if (!error)
4596 error = rc;
4597 ext4_journal_stop(handle);
4598
4599 if (ext4_should_order_data(inode)) {
4600 error = ext4_begin_ordered_truncate(inode,
4601 attr->ia_size); 4616 attr->ia_size);
4602 if (error) { 4617 if (error)
4603 /* Do as much error cleanup as possible */
4604 handle = ext4_journal_start(inode,
4605 EXT4_HT_INODE, 3);
4606 if (IS_ERR(handle)) {
4607 ext4_orphan_del(NULL, inode);
4608 goto err_out; 4618 goto err_out;
4609 } 4619 }
4610 ext4_orphan_del(handle, inode); 4620 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4611 orphan = 0; 4621 if (IS_ERR(handle)) {
4612 ext4_journal_stop(handle); 4622 error = PTR_ERR(handle);
4613 goto err_out; 4623 goto err_out;
4614 } 4624 }
4615 } 4625 if (ext4_handle_valid(handle)) {
4616 } 4626 error = ext4_orphan_add(handle, inode);
4617 4627 orphan = 1;
4618 if (attr->ia_valid & ATTR_SIZE) {
4619 if (attr->ia_size != inode->i_size) {
4620 loff_t oldsize = inode->i_size;
4621
4622 i_size_write(inode, attr->ia_size);
4623 /*
4624 * Blocks are going to be removed from the inode. Wait
4625 * for dio in flight. Temporarily disable
4626 * dioread_nolock to prevent livelock.
4627 */
4628 if (orphan) {
4629 if (!ext4_should_journal_data(inode)) {
4630 ext4_inode_block_unlocked_dio(inode);
4631 inode_dio_wait(inode);
4632 ext4_inode_resume_unlocked_dio(inode);
4633 } else
4634 ext4_wait_for_tail_page_commit(inode);
4635 } 4628 }
4629 down_write(&EXT4_I(inode)->i_data_sem);
4630 EXT4_I(inode)->i_disksize = attr->ia_size;
4631 rc = ext4_mark_inode_dirty(handle, inode);
4632 if (!error)
4633 error = rc;
4636 /* 4634 /*
4637 * Truncate pagecache after we've waited for commit 4635 * We have to update i_size under i_data_sem together
4638 * in data=journal mode to make pages freeable. 4636 * with i_disksize to avoid races with writeback code
4637 * running ext4_wb_update_i_disksize().
4639 */ 4638 */
4640 truncate_pagecache(inode, oldsize, inode->i_size); 4639 if (!error)
4640 i_size_write(inode, attr->ia_size);
4641 up_write(&EXT4_I(inode)->i_data_sem);
4642 ext4_journal_stop(handle);
4643 if (error) {
4644 ext4_orphan_del(NULL, inode);
4645 goto err_out;
4646 }
4647 } else
4648 i_size_write(inode, attr->ia_size);
4649
4650 /*
4651 * Blocks are going to be removed from the inode. Wait
4652 * for dio in flight. Temporarily disable
4653 * dioread_nolock to prevent livelock.
4654 */
4655 if (orphan) {
4656 if (!ext4_should_journal_data(inode)) {
4657 ext4_inode_block_unlocked_dio(inode);
4658 inode_dio_wait(inode);
4659 ext4_inode_resume_unlocked_dio(inode);
4660 } else
4661 ext4_wait_for_tail_page_commit(inode);
4641 } 4662 }
4642 ext4_truncate(inode); 4663 /*
4664 * Truncate pagecache after we've waited for commit
4665 * in data=journal mode to make pages freeable.
4666 */
4667 truncate_pagecache(inode, oldsize, inode->i_size);
4643 } 4668 }
4669 /*
4670 * We want to call ext4_truncate() even if attr->ia_size ==
4671 * inode->i_size for cases like truncation of fallocated space
4672 */
4673 if (attr->ia_valid & ATTR_SIZE)
4674 ext4_truncate(inode);
4644 4675
4645 if (!rc) { 4676 if (!rc) {
4646 setattr_copy(inode, attr); 4677 setattr_copy(inode, attr);