aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c471
1 files changed, 306 insertions, 165 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71d3ecd5db79..2a9ffd528dd1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode,
371 return n; 371 return n;
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 __le32 *p, unsigned int max) {
376
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 __le32 *bref = p;
379 while (bref < p+max) {
380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
381 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) "
383 "in inode #%lu, offset=%d",
384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p));
386 return -EIO;
387 }
388 bref++;
389 }
390 return 0;
391}
392
393
394#define ext4_check_indirect_blockref(inode, bh) \
395 __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
396 EXT4_ADDR_PER_BLOCK((inode)->i_sb))
397
398#define ext4_check_inode_blockref(inode) \
399 __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
400 EXT4_NDIR_BLOCKS)
401
374/** 402/**
375 * ext4_get_branch - read the chain of indirect blocks leading to data 403 * ext4_get_branch - read the chain of indirect blocks leading to data
376 * @inode: inode in question 404 * @inode: inode in question
@@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
415 if (!p->key) 443 if (!p->key)
416 goto no_block; 444 goto no_block;
417 while (--depth) { 445 while (--depth) {
418 bh = sb_bread(sb, le32_to_cpu(p->key)); 446 bh = sb_getblk(sb, le32_to_cpu(p->key));
419 if (!bh) 447 if (unlikely(!bh))
420 goto failure; 448 goto failure;
449
450 if (!bh_uptodate_or_lock(bh)) {
451 if (bh_submit_read(bh) < 0) {
452 put_bh(bh);
453 goto failure;
454 }
455 /* validate block references */
456 if (ext4_check_indirect_blockref(inode, bh)) {
457 put_bh(bh);
458 goto failure;
459 }
460 }
461
421 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); 462 add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
422 /* Reader: end */ 463 /* Reader: end */
423 if (!p->key) 464 if (!p->key)
@@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
459 ext4_fsblk_t bg_start; 500 ext4_fsblk_t bg_start;
460 ext4_fsblk_t last_block; 501 ext4_fsblk_t last_block;
461 ext4_grpblk_t colour; 502 ext4_grpblk_t colour;
503 ext4_group_t block_group;
504 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
462 505
463 /* Try to find previous block */ 506 /* Try to find previous block */
464 for (p = ind->p - 1; p >= start; p--) { 507 for (p = ind->p - 1; p >= start; p--) {
@@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
474 * It is going to be referred to from the inode itself? OK, just put it 517 * It is going to be referred to from the inode itself? OK, just put it
475 * into the same cylinder group then. 518 * into the same cylinder group then.
476 */ 519 */
477 bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group); 520 block_group = ei->i_block_group;
521 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
522 block_group &= ~(flex_size-1);
523 if (S_ISREG(inode->i_mode))
524 block_group++;
525 }
526 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
478 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; 527 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
479 528
529 /*
530 * If we are doing delayed allocation, we don't need take
531 * colour into account.
532 */
533 if (test_opt(inode->i_sb, DELALLOC))
534 return bg_start;
535
480 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block) 536 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
481 colour = (current->pid % 16) * 537 colour = (current->pid % 16) *
482 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 538 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
@@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1052 /* 1108 /*
1053 * free those over-booking quota for metadata blocks 1109 * free those over-booking quota for metadata blocks
1054 */ 1110 */
1055
1056 if (mdb_free) 1111 if (mdb_free)
1057 vfs_dq_release_reservation_block(inode, mdb_free); 1112 vfs_dq_release_reservation_block(inode, mdb_free);
1113
1114 /*
1115 * If we have done all the pending block allocations and if
1116 * there aren't any writers on the inode, we can discard the
1117 * inode's preallocations.
1118 */
1119 if (!total && (atomic_read(&inode->i_writecount) == 0))
1120 ext4_discard_preallocations(inode);
1058} 1121}
1059 1122
1060/* 1123/*
@@ -1086,6 +1149,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1086 int retval; 1149 int retval;
1087 1150
1088 clear_buffer_mapped(bh); 1151 clear_buffer_mapped(bh);
1152 clear_buffer_unwritten(bh);
1089 1153
1090 /* 1154 /*
1091 * Try to see if we can get the block without requesting 1155 * Try to see if we can get the block without requesting
@@ -1116,6 +1180,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1116 return retval; 1180 return retval;
1117 1181
1118 /* 1182 /*
1183 * When we call get_blocks without the create flag, the
1184 * BH_Unwritten flag could have gotten set if the blocks
1185 * requested were part of a uninitialized extent. We need to
1186 * clear this flag now that we are committed to convert all or
1187 * part of the uninitialized extent to be an initialized
1188 * extent. This is because we need to avoid the combination
1189 * of BH_Unwritten and BH_Mapped flags being simultaneously
1190 * set on the buffer_head.
1191 */
1192 clear_buffer_unwritten(bh);
1193
1194 /*
1119 * New blocks allocate and/or writing to uninitialized extent 1195 * New blocks allocate and/or writing to uninitialized extent
1120 * will possibly result in updating i_data, so we take 1196 * will possibly result in updating i_data, so we take
1121 * the write lock of i_data_sem, and call get_blocks() 1197 * the write lock of i_data_sem, and call get_blocks()
@@ -1688,9 +1764,10 @@ static void ext4_da_page_release_reservation(struct page *page,
1688 1764
1689struct mpage_da_data { 1765struct mpage_da_data {
1690 struct inode *inode; 1766 struct inode *inode;
1691 struct buffer_head lbh; /* extent of blocks */ 1767 sector_t b_blocknr; /* start block number of extent */
1768 size_t b_size; /* size of extent */
1769 unsigned long b_state; /* state of the extent */
1692 unsigned long first_page, next_page; /* extent of pages */ 1770 unsigned long first_page, next_page; /* extent of pages */
1693 get_block_t *get_block;
1694 struct writeback_control *wbc; 1771 struct writeback_control *wbc;
1695 int io_done; 1772 int io_done;
1696 int pages_written; 1773 int pages_written;
@@ -1704,7 +1781,6 @@ struct mpage_da_data {
1704 * @mpd->inode: inode 1781 * @mpd->inode: inode
1705 * @mpd->first_page: first page of the extent 1782 * @mpd->first_page: first page of the extent
1706 * @mpd->next_page: page after the last page of the extent 1783 * @mpd->next_page: page after the last page of the extent
1707 * @mpd->get_block: the filesystem's block mapper function
1708 * 1784 *
1709 * By the time mpage_da_submit_io() is called we expect all blocks 1785 * By the time mpage_da_submit_io() is called we expect all blocks
1710 * to be allocated. this may be wrong if allocation failed. 1786 * to be allocated. this may be wrong if allocation failed.
@@ -1724,7 +1800,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1724 /* 1800 /*
1725 * We need to start from the first_page to the next_page - 1 1801 * We need to start from the first_page to the next_page - 1
1726 * to make sure we also write the mapped dirty buffer_heads. 1802 * to make sure we also write the mapped dirty buffer_heads.
1727 * If we look at mpd->lbh.b_blocknr we would only be looking 1803 * If we look at mpd->b_blocknr we would only be looking
1728 * at the currently mapped buffer_heads. 1804 * at the currently mapped buffer_heads.
1729 */ 1805 */
1730 index = mpd->first_page; 1806 index = mpd->first_page;
@@ -1914,68 +1990,111 @@ static void ext4_print_free_blocks(struct inode *inode)
1914 return; 1990 return;
1915} 1991}
1916 1992
1993#define EXT4_DELALLOC_RSVED 1
1994static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
1995 struct buffer_head *bh_result, int create)
1996{
1997 int ret;
1998 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
1999 loff_t disksize = EXT4_I(inode)->i_disksize;
2000 handle_t *handle = NULL;
2001
2002 handle = ext4_journal_current_handle();
2003 BUG_ON(!handle);
2004 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2005 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2006 if (ret <= 0)
2007 return ret;
2008
2009 bh_result->b_size = (ret << inode->i_blkbits);
2010
2011 if (ext4_should_order_data(inode)) {
2012 int retval;
2013 retval = ext4_jbd2_file_inode(handle, inode);
2014 if (retval)
2015 /*
2016 * Failed to add inode for ordered mode. Don't
2017 * update file size
2018 */
2019 return retval;
2020 }
2021
2022 /*
2023 * Update on-disk size along with block allocation we don't
2024 * use 'extend_disksize' as size may change within already
2025 * allocated block -bzzz
2026 */
2027 disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
2028 if (disksize > i_size_read(inode))
2029 disksize = i_size_read(inode);
2030 if (disksize > EXT4_I(inode)->i_disksize) {
2031 ext4_update_i_disksize(inode, disksize);
2032 ret = ext4_mark_inode_dirty(handle, inode);
2033 return ret;
2034 }
2035 return 0;
2036}
2037
1917/* 2038/*
1918 * mpage_da_map_blocks - go through given space 2039 * mpage_da_map_blocks - go through given space
1919 * 2040 *
1920 * @mpd->lbh - bh describing space 2041 * @mpd - bh describing space
1921 * @mpd->get_block - the filesystem's block mapper function
1922 * 2042 *
1923 * The function skips space we know is already mapped to disk blocks. 2043 * The function skips space we know is already mapped to disk blocks.
1924 * 2044 *
1925 */ 2045 */
1926static int mpage_da_map_blocks(struct mpage_da_data *mpd) 2046static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1927{ 2047{
1928 int err = 0; 2048 int err = 0;
1929 struct buffer_head new; 2049 struct buffer_head new;
1930 struct buffer_head *lbh = &mpd->lbh;
1931 sector_t next; 2050 sector_t next;
1932 2051
1933 /* 2052 /*
1934 * We consider only non-mapped and non-allocated blocks 2053 * We consider only non-mapped and non-allocated blocks
1935 */ 2054 */
1936 if (buffer_mapped(lbh) && !buffer_delay(lbh)) 2055 if ((mpd->b_state & (1 << BH_Mapped)) &&
2056 !(mpd->b_state & (1 << BH_Delay)))
1937 return 0; 2057 return 0;
1938 new.b_state = lbh->b_state; 2058 new.b_state = mpd->b_state;
1939 new.b_blocknr = 0; 2059 new.b_blocknr = 0;
1940 new.b_size = lbh->b_size; 2060 new.b_size = mpd->b_size;
1941 next = lbh->b_blocknr; 2061 next = mpd->b_blocknr;
1942 /* 2062 /*
1943 * If we didn't accumulate anything 2063 * If we didn't accumulate anything
1944 * to write simply return 2064 * to write simply return
1945 */ 2065 */
1946 if (!new.b_size) 2066 if (!new.b_size)
1947 return 0; 2067 return 0;
1948 err = mpd->get_block(mpd->inode, next, &new, 1);
1949 if (err) {
1950 2068
1951 /* If get block returns with error 2069 err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
1952 * we simply return. Later writepage 2070 if (err) {
1953 * will redirty the page and writepages 2071 /*
1954 * will find the dirty page again 2072 * If get block returns with error we simply
2073 * return. Later writepage will redirty the page and
2074 * writepages will find the dirty page again
1955 */ 2075 */
1956 if (err == -EAGAIN) 2076 if (err == -EAGAIN)
1957 return 0; 2077 return 0;
1958 2078
1959 if (err == -ENOSPC && 2079 if (err == -ENOSPC &&
1960 ext4_count_free_blocks(mpd->inode->i_sb)) { 2080 ext4_count_free_blocks(mpd->inode->i_sb)) {
1961 mpd->retval = err; 2081 mpd->retval = err;
1962 return 0; 2082 return 0;
1963 } 2083 }
1964 2084
1965 /* 2085 /*
1966 * get block failure will cause us 2086 * get block failure will cause us to loop in
1967 * to loop in writepages. Because 2087 * writepages, because a_ops->writepage won't be able
1968 * a_ops->writepage won't be able to 2088 * to make progress. The page will be redirtied by
1969 * make progress. The page will be redirtied 2089 * writepage and writepages will again try to write
1970 * by writepage and writepages will again 2090 * the same.
1971 * try to write the same.
1972 */ 2091 */
1973 printk(KERN_EMERG "%s block allocation failed for inode %lu " 2092 printk(KERN_EMERG "%s block allocation failed for inode %lu "
1974 "at logical offset %llu with max blocks " 2093 "at logical offset %llu with max blocks "
1975 "%zd with error %d\n", 2094 "%zd with error %d\n",
1976 __func__, mpd->inode->i_ino, 2095 __func__, mpd->inode->i_ino,
1977 (unsigned long long)next, 2096 (unsigned long long)next,
1978 lbh->b_size >> mpd->inode->i_blkbits, err); 2097 mpd->b_size >> mpd->inode->i_blkbits, err);
1979 printk(KERN_EMERG "This should not happen.!! " 2098 printk(KERN_EMERG "This should not happen.!! "
1980 "Data will be lost\n"); 2099 "Data will be lost\n");
1981 if (err == -ENOSPC) { 2100 if (err == -ENOSPC) {
@@ -1983,7 +2102,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1983 } 2102 }
1984 /* invlaidate all the pages */ 2103 /* invlaidate all the pages */
1985 ext4_da_block_invalidatepages(mpd, next, 2104 ext4_da_block_invalidatepages(mpd, next,
1986 lbh->b_size >> mpd->inode->i_blkbits); 2105 mpd->b_size >> mpd->inode->i_blkbits);
1987 return err; 2106 return err;
1988 } 2107 }
1989 BUG_ON(new.b_size == 0); 2108 BUG_ON(new.b_size == 0);
@@ -1995,7 +2114,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
1995 * If blocks are delayed marked, we need to 2114 * If blocks are delayed marked, we need to
1996 * put actual blocknr and drop delayed bit 2115 * put actual blocknr and drop delayed bit
1997 */ 2116 */
1998 if (buffer_delay(lbh) || buffer_unwritten(lbh)) 2117 if ((mpd->b_state & (1 << BH_Delay)) ||
2118 (mpd->b_state & (1 << BH_Unwritten)))
1999 mpage_put_bnr_to_bhs(mpd, next, &new); 2119 mpage_put_bnr_to_bhs(mpd, next, &new);
2000 2120
2001 return 0; 2121 return 0;
@@ -2014,12 +2134,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2014 * the function is used to collect contig. blocks in same state 2134 * the function is used to collect contig. blocks in same state
2015 */ 2135 */
2016static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 2136static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2017 sector_t logical, struct buffer_head *bh) 2137 sector_t logical, size_t b_size,
2138 unsigned long b_state)
2018{ 2139{
2019 sector_t next; 2140 sector_t next;
2020 size_t b_size = bh->b_size; 2141 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
2021 struct buffer_head *lbh = &mpd->lbh;
2022 int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
2023 2142
2024 /* check if thereserved journal credits might overflow */ 2143 /* check if thereserved journal credits might overflow */
2025 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { 2144 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
@@ -2046,19 +2165,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
2046 /* 2165 /*
2047 * First block in the extent 2166 * First block in the extent
2048 */ 2167 */
2049 if (lbh->b_size == 0) { 2168 if (mpd->b_size == 0) {
2050 lbh->b_blocknr = logical; 2169 mpd->b_blocknr = logical;
2051 lbh->b_size = b_size; 2170 mpd->b_size = b_size;
2052 lbh->b_state = bh->b_state & BH_FLAGS; 2171 mpd->b_state = b_state & BH_FLAGS;
2053 return; 2172 return;
2054 } 2173 }
2055 2174
2056 next = lbh->b_blocknr + nrblocks; 2175 next = mpd->b_blocknr + nrblocks;
2057 /* 2176 /*
2058 * Can we merge the block to our big extent? 2177 * Can we merge the block to our big extent?
2059 */ 2178 */
2060 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { 2179 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
2061 lbh->b_size += b_size; 2180 mpd->b_size += b_size;
2062 return; 2181 return;
2063 } 2182 }
2064 2183
@@ -2087,7 +2206,7 @@ static int __mpage_da_writepage(struct page *page,
2087{ 2206{
2088 struct mpage_da_data *mpd = data; 2207 struct mpage_da_data *mpd = data;
2089 struct inode *inode = mpd->inode; 2208 struct inode *inode = mpd->inode;
2090 struct buffer_head *bh, *head, fake; 2209 struct buffer_head *bh, *head;
2091 sector_t logical; 2210 sector_t logical;
2092 2211
2093 if (mpd->io_done) { 2212 if (mpd->io_done) {
@@ -2129,9 +2248,9 @@ static int __mpage_da_writepage(struct page *page,
2129 /* 2248 /*
2130 * ... and blocks 2249 * ... and blocks
2131 */ 2250 */
2132 mpd->lbh.b_size = 0; 2251 mpd->b_size = 0;
2133 mpd->lbh.b_state = 0; 2252 mpd->b_state = 0;
2134 mpd->lbh.b_blocknr = 0; 2253 mpd->b_blocknr = 0;
2135 } 2254 }
2136 2255
2137 mpd->next_page = page->index + 1; 2256 mpd->next_page = page->index + 1;
@@ -2139,16 +2258,8 @@ static int __mpage_da_writepage(struct page *page,
2139 (PAGE_CACHE_SHIFT - inode->i_blkbits); 2258 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2140 2259
2141 if (!page_has_buffers(page)) { 2260 if (!page_has_buffers(page)) {
2142 /* 2261 mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
2143 * There is no attached buffer heads yet (mmap?) 2262 (1 << BH_Dirty) | (1 << BH_Uptodate));
2144 * we treat the page asfull of dirty blocks
2145 */
2146 bh = &fake;
2147 bh->b_size = PAGE_CACHE_SIZE;
2148 bh->b_state = 0;
2149 set_buffer_dirty(bh);
2150 set_buffer_uptodate(bh);
2151 mpage_add_bh_to_extent(mpd, logical, bh);
2152 if (mpd->io_done) 2263 if (mpd->io_done)
2153 return MPAGE_DA_EXTENT_TAIL; 2264 return MPAGE_DA_EXTENT_TAIL;
2154 } else { 2265 } else {
@@ -2166,8 +2277,10 @@ static int __mpage_da_writepage(struct page *page,
2166 * with the page in ext4_da_writepage 2277 * with the page in ext4_da_writepage
2167 */ 2278 */
2168 if (buffer_dirty(bh) && 2279 if (buffer_dirty(bh) &&
2169 (!buffer_mapped(bh) || buffer_delay(bh))) { 2280 (!buffer_mapped(bh) || buffer_delay(bh))) {
2170 mpage_add_bh_to_extent(mpd, logical, bh); 2281 mpage_add_bh_to_extent(mpd, logical,
2282 bh->b_size,
2283 bh->b_state);
2171 if (mpd->io_done) 2284 if (mpd->io_done)
2172 return MPAGE_DA_EXTENT_TAIL; 2285 return MPAGE_DA_EXTENT_TAIL;
2173 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { 2286 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
@@ -2179,9 +2292,8 @@ static int __mpage_da_writepage(struct page *page,
2179 * unmapped buffer_head later we need to 2292 * unmapped buffer_head later we need to
2180 * use the b_state flag of that buffer_head. 2293 * use the b_state flag of that buffer_head.
2181 */ 2294 */
2182 if (mpd->lbh.b_size == 0) 2295 if (mpd->b_size == 0)
2183 mpd->lbh.b_state = 2296 mpd->b_state = bh->b_state & BH_FLAGS;
2184 bh->b_state & BH_FLAGS;
2185 } 2297 }
2186 logical++; 2298 logical++;
2187 } while ((bh = bh->b_this_page) != head); 2299 } while ((bh = bh->b_this_page) != head);
@@ -2191,51 +2303,6 @@ static int __mpage_da_writepage(struct page *page,
2191} 2303}
2192 2304
2193/* 2305/*
2194 * mpage_da_writepages - walk the list of dirty pages of the given
2195 * address space, allocates non-allocated blocks, maps newly-allocated
2196 * blocks to existing bhs and issue IO them
2197 *
2198 * @mapping: address space structure to write
2199 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2200 * @get_block: the filesystem's block mapper function.
2201 *
2202 * This is a library function, which implements the writepages()
2203 * address_space_operation.
2204 */
2205static int mpage_da_writepages(struct address_space *mapping,
2206 struct writeback_control *wbc,
2207 struct mpage_da_data *mpd)
2208{
2209 int ret;
2210
2211 if (!mpd->get_block)
2212 return generic_writepages(mapping, wbc);
2213
2214 mpd->lbh.b_size = 0;
2215 mpd->lbh.b_state = 0;
2216 mpd->lbh.b_blocknr = 0;
2217 mpd->first_page = 0;
2218 mpd->next_page = 0;
2219 mpd->io_done = 0;
2220 mpd->pages_written = 0;
2221 mpd->retval = 0;
2222
2223 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2224 /*
2225 * Handle last extent of pages
2226 */
2227 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2228 if (mpage_da_map_blocks(mpd) == 0)
2229 mpage_da_submit_io(mpd);
2230
2231 mpd->io_done = 1;
2232 ret = MPAGE_DA_EXTENT_TAIL;
2233 }
2234 wbc->nr_to_write -= mpd->pages_written;
2235 return ret;
2236}
2237
2238/*
2239 * this is a special callback for ->write_begin() only 2306 * this is a special callback for ->write_begin() only
2240 * it's intention is to return mapped block or reserve space 2307 * it's intention is to return mapped block or reserve space
2241 */ 2308 */
@@ -2243,6 +2310,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2243 struct buffer_head *bh_result, int create) 2310 struct buffer_head *bh_result, int create)
2244{ 2311{
2245 int ret = 0; 2312 int ret = 0;
2313 sector_t invalid_block = ~((sector_t) 0xffff);
2314
2315 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
2316 invalid_block = ~0;
2246 2317
2247 BUG_ON(create == 0); 2318 BUG_ON(create == 0);
2248 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); 2319 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
@@ -2264,59 +2335,21 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2264 /* not enough space to reserve */ 2335 /* not enough space to reserve */
2265 return ret; 2336 return ret;
2266 2337
2267 map_bh(bh_result, inode->i_sb, 0); 2338 map_bh(bh_result, inode->i_sb, invalid_block);
2268 set_buffer_new(bh_result); 2339 set_buffer_new(bh_result);
2269 set_buffer_delay(bh_result); 2340 set_buffer_delay(bh_result);
2270 } else if (ret > 0) { 2341 } else if (ret > 0) {
2271 bh_result->b_size = (ret << inode->i_blkbits); 2342 bh_result->b_size = (ret << inode->i_blkbits);
2272 ret = 0;
2273 }
2274
2275 return ret;
2276}
2277#define EXT4_DELALLOC_RSVED 1
2278static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
2279 struct buffer_head *bh_result, int create)
2280{
2281 int ret;
2282 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
2283 loff_t disksize = EXT4_I(inode)->i_disksize;
2284 handle_t *handle = NULL;
2285
2286 handle = ext4_journal_current_handle();
2287 BUG_ON(!handle);
2288 ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
2289 bh_result, create, 0, EXT4_DELALLOC_RSVED);
2290 if (ret > 0) {
2291
2292 bh_result->b_size = (ret << inode->i_blkbits);
2293
2294 if (ext4_should_order_data(inode)) {
2295 int retval;
2296 retval = ext4_jbd2_file_inode(handle, inode);
2297 if (retval)
2298 /*
2299 * Failed to add inode for ordered
2300 * mode. Don't update file size
2301 */
2302 return retval;
2303 }
2304
2305 /* 2343 /*
2306 * Update on-disk size along with block allocation 2344 * With sub-block writes into unwritten extents
2307 * we don't use 'extend_disksize' as size may change 2345 * we also need to mark the buffer as new so that
2308 * within already allocated block -bzzz 2346 * the unwritten parts of the buffer gets correctly zeroed.
2309 */ 2347 */
2310 disksize = ((loff_t) iblock + ret) << inode->i_blkbits; 2348 if (buffer_unwritten(bh_result))
2311 if (disksize > i_size_read(inode)) 2349 set_buffer_new(bh_result);
2312 disksize = i_size_read(inode);
2313 if (disksize > EXT4_I(inode)->i_disksize) {
2314 ext4_update_i_disksize(inode, disksize);
2315 ret = ext4_mark_inode_dirty(handle, inode);
2316 return ret;
2317 }
2318 ret = 0; 2350 ret = 0;
2319 } 2351 }
2352
2320 return ret; 2353 return ret;
2321} 2354}
2322 2355
@@ -2569,8 +2602,38 @@ retry:
2569 dump_stack(); 2602 dump_stack();
2570 goto out_writepages; 2603 goto out_writepages;
2571 } 2604 }
2572 mpd.get_block = ext4_da_get_block_write; 2605
2573 ret = mpage_da_writepages(mapping, wbc, &mpd); 2606 /*
2607 * Now call __mpage_da_writepage to find the next
2608 * contiguous region of logical blocks that need
2609 * blocks to be allocated by ext4. We don't actually
2610 * submit the blocks for I/O here, even though
2611 * write_cache_pages thinks it will, and will set the
2612 * pages as clean for write before calling
2613 * __mpage_da_writepage().
2614 */
2615 mpd.b_size = 0;
2616 mpd.b_state = 0;
2617 mpd.b_blocknr = 0;
2618 mpd.first_page = 0;
2619 mpd.next_page = 0;
2620 mpd.io_done = 0;
2621 mpd.pages_written = 0;
2622 mpd.retval = 0;
2623 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2624 &mpd);
2625 /*
2626 * If we have a contigous extent of pages and we
2627 * haven't done the I/O yet, map the blocks and submit
2628 * them for I/O.
2629 */
2630 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
2631 if (mpage_da_map_blocks(&mpd) == 0)
2632 mpage_da_submit_io(&mpd);
2633 mpd.io_done = 1;
2634 ret = MPAGE_DA_EXTENT_TAIL;
2635 }
2636 wbc->nr_to_write -= mpd.pages_written;
2574 2637
2575 ext4_journal_stop(handle); 2638 ext4_journal_stop(handle);
2576 2639
@@ -2846,6 +2909,48 @@ out:
2846 return; 2909 return;
2847} 2910}
2848 2911
2912/*
2913 * Force all delayed allocation blocks to be allocated for a given inode.
2914 */
2915int ext4_alloc_da_blocks(struct inode *inode)
2916{
2917 if (!EXT4_I(inode)->i_reserved_data_blocks &&
2918 !EXT4_I(inode)->i_reserved_meta_blocks)
2919 return 0;
2920
2921 /*
2922 * We do something simple for now. The filemap_flush() will
2923 * also start triggering a write of the data blocks, which is
2924 * not strictly speaking necessary (and for users of
2925 * laptop_mode, not even desirable). However, to do otherwise
2926 * would require replicating code paths in:
2927 *
2928 * ext4_da_writepages() ->
2929 * write_cache_pages() ---> (via passed in callback function)
2930 * __mpage_da_writepage() -->
2931 * mpage_add_bh_to_extent()
2932 * mpage_da_map_blocks()
2933 *
2934 * The problem is that write_cache_pages(), located in
2935 * mm/page-writeback.c, marks pages clean in preparation for
2936 * doing I/O, which is not desirable if we're not planning on
2937 * doing I/O at all.
2938 *
2939 * We could call write_cache_pages(), and then redirty all of
2940 * the pages by calling redirty_page_for_writeback() but that
2941 * would be ugly in the extreme. So instead we would need to
2942 * replicate parts of the code in the above functions,
2943 * simplifying them becuase we wouldn't actually intend to
2944 * write out the pages, but rather only collect contiguous
2945 * logical block extents, call the multi-block allocator, and
2946 * then update the buffer heads with the block allocations.
2947 *
2948 * For now, though, we'll cheat by calling filemap_flush(),
2949 * which will map the blocks, and start the I/O, but not
2950 * actually wait for the I/O to complete.
2951 */
2952 return filemap_flush(inode->i_mapping);
2953}
2849 2954
2850/* 2955/*
2851 * bmap() is special. It gets used by applications such as lilo and by 2956 * bmap() is special. It gets used by applications such as lilo and by
@@ -3868,6 +3973,9 @@ void ext4_truncate(struct inode *inode)
3868 if (!ext4_can_truncate(inode)) 3973 if (!ext4_can_truncate(inode))
3869 return; 3974 return;
3870 3975
3976 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3977 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3978
3871 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 3979 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
3872 ext4_ext_truncate(inode); 3980 ext4_ext_truncate(inode);
3873 return; 3981 return;
@@ -4110,12 +4218,7 @@ make_io:
4110 unsigned num; 4218 unsigned num;
4111 4219
4112 table = ext4_inode_table(sb, gdp); 4220 table = ext4_inode_table(sb, gdp);
4113 /* Make sure s_inode_readahead_blks is a power of 2 */ 4221 /* s_inode_readahead_blks is always a power of 2 */
4114 while (EXT4_SB(sb)->s_inode_readahead_blks &
4115 (EXT4_SB(sb)->s_inode_readahead_blks-1))
4116 EXT4_SB(sb)->s_inode_readahead_blks =
4117 (EXT4_SB(sb)->s_inode_readahead_blks &
4118 (EXT4_SB(sb)->s_inode_readahead_blks-1));
4119 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); 4222 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
4120 if (table > b) 4223 if (table > b)
4121 b = table; 4224 b = table;
@@ -4278,15 +4381,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4278 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 4381 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
4279 inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 4382 inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
4280 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 4383 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
4281 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4384 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
4282 cpu_to_le32(EXT4_OS_HURD)) {
4283 ei->i_file_acl |= 4385 ei->i_file_acl |=
4284 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4386 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4285 }
4286 inode->i_size = ext4_isize(raw_inode); 4387 inode->i_size = ext4_isize(raw_inode);
4287 ei->i_disksize = inode->i_size; 4388 ei->i_disksize = inode->i_size;
4288 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4389 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
4289 ei->i_block_group = iloc.block_group; 4390 ei->i_block_group = iloc.block_group;
4391 ei->i_last_alloc_group = ~0;
4290 /* 4392 /*
4291 * NOTE! The in-memory inode i_data array is in little-endian order 4393 * NOTE! The in-memory inode i_data array is in little-endian order
4292 * even on big-endian machines: we do NOT byteswap the block numbers! 4394 * even on big-endian machines: we do NOT byteswap the block numbers!
@@ -4329,6 +4431,34 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4329 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4431 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4330 } 4432 }
4331 4433
4434 ret = 0;
4435 if (ei->i_file_acl &&
4436 ((ei->i_file_acl <
4437 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4438 EXT4_SB(sb)->s_gdb_count)) ||
4439 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4440 ext4_error(sb, __func__,
4441 "bad extended attribute block %llu in inode #%lu",
4442 ei->i_file_acl, inode->i_ino);
4443 ret = -EIO;
4444 goto bad_inode;
4445 } else if (ei->i_flags & EXT4_EXTENTS_FL) {
4446 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4447 (S_ISLNK(inode->i_mode) &&
4448 !ext4_inode_is_fast_symlink(inode)))
4449 /* Validate extent which is part of inode */
4450 ret = ext4_ext_check_inode(inode);
4451 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4452 (S_ISLNK(inode->i_mode) &&
4453 !ext4_inode_is_fast_symlink(inode))) {
4454 /* Validate block references which are part of inode */
4455 ret = ext4_check_inode_blockref(inode);
4456 }
4457 if (ret) {
4458 brelse(bh);
4459 goto bad_inode;
4460 }
4461
4332 if (S_ISREG(inode->i_mode)) { 4462 if (S_ISREG(inode->i_mode)) {
4333 inode->i_op = &ext4_file_inode_operations; 4463 inode->i_op = &ext4_file_inode_operations;
4334 inode->i_fop = &ext4_file_operations; 4464 inode->i_fop = &ext4_file_operations;
@@ -4345,7 +4475,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4345 inode->i_op = &ext4_symlink_inode_operations; 4475 inode->i_op = &ext4_symlink_inode_operations;
4346 ext4_set_aops(inode); 4476 ext4_set_aops(inode);
4347 } 4477 }
4348 } else { 4478 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
4479 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
4349 inode->i_op = &ext4_special_inode_operations; 4480 inode->i_op = &ext4_special_inode_operations;
4350 if (raw_inode->i_block[0]) 4481 if (raw_inode->i_block[0])
4351 init_special_inode(inode, inode->i_mode, 4482 init_special_inode(inode, inode->i_mode,
@@ -4353,6 +4484,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4353 else 4484 else
4354 init_special_inode(inode, inode->i_mode, 4485 init_special_inode(inode, inode->i_mode,
4355 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4486 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4487 } else {
4488 brelse(bh);
4489 ret = -EIO;
4490 ext4_error(inode->i_sb, __func__,
4491 "bogus i_mode (%o) for inode=%lu",
4492 inode->i_mode, inode->i_ino);
4493 goto bad_inode;
4356 } 4494 }
4357 brelse(iloc.bh); 4495 brelse(iloc.bh);
4358 ext4_set_inode_flags(inode); 4496 ext4_set_inode_flags(inode);
@@ -5146,8 +5284,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
5146 return !buffer_mapped(bh); 5284 return !buffer_mapped(bh);
5147} 5285}
5148 5286
5149int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) 5287int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5150{ 5288{
5289 struct page *page = vmf->page;
5151 loff_t size; 5290 loff_t size;
5152 unsigned long len; 5291 unsigned long len;
5153 int ret = -EINVAL; 5292 int ret = -EINVAL;
@@ -5199,6 +5338,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
5199 goto out_unlock; 5338 goto out_unlock;
5200 ret = 0; 5339 ret = 0;
5201out_unlock: 5340out_unlock:
5341 if (ret)
5342 ret = VM_FAULT_SIGBUS;
5202 up_read(&inode->i_alloc_sem); 5343 up_read(&inode->i_alloc_sem);
5203 return ret; 5344 return ret;
5204} 5345}