aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c465
1 files changed, 320 insertions, 145 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..f977aade0d1b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h>
41 42
42#include "ext4_jbd2.h" 43#include "ext4_jbd2.h"
43#include "xattr.h" 44#include "xattr.h"
@@ -194,7 +195,7 @@ void ext4_delete_inode(struct inode *inode)
194 inode->i_size = 0; 195 inode->i_size = 0;
195 err = ext4_mark_inode_dirty(handle, inode); 196 err = ext4_mark_inode_dirty(handle, inode);
196 if (err) { 197 if (err) {
197 ext4_warning(inode->i_sb, __func__, 198 ext4_warning(inode->i_sb,
198 "couldn't mark inode dirty (err %d)", err); 199 "couldn't mark inode dirty (err %d)", err);
199 goto stop_handle; 200 goto stop_handle;
200 } 201 }
@@ -212,7 +213,7 @@ void ext4_delete_inode(struct inode *inode)
212 if (err > 0) 213 if (err > 0)
213 err = ext4_journal_restart(handle, 3); 214 err = ext4_journal_restart(handle, 3);
214 if (err != 0) { 215 if (err != 0) {
215 ext4_warning(inode->i_sb, __func__, 216 ext4_warning(inode->i_sb,
216 "couldn't extend journal (err %d)", err); 217 "couldn't extend journal (err %d)", err);
217 stop_handle: 218 stop_handle:
218 ext4_journal_stop(handle); 219 ext4_journal_stop(handle);
@@ -323,8 +324,7 @@ static int ext4_block_to_path(struct inode *inode,
323 offsets[n++] = i_block & (ptrs - 1); 324 offsets[n++] = i_block & (ptrs - 1);
324 final = ptrs; 325 final = ptrs;
325 } else { 326 } else {
326 ext4_warning(inode->i_sb, "ext4_block_to_path", 327 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
327 "block %lu > max in inode %lu",
328 i_block + direct_blocks + 328 i_block + direct_blocks +
329 indirect_blocks + double_blocks, inode->i_ino); 329 indirect_blocks + double_blocks, inode->i_ino);
330 } 330 }
@@ -344,7 +344,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
344 if (blk && 344 if (blk &&
345 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 345 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
346 blk, 1))) { 346 blk, 1))) {
347 ext4_error(inode->i_sb, function, 347 __ext4_error(inode->i_sb, function,
348 "invalid block reference %u " 348 "invalid block reference %u "
349 "in inode #%lu", blk, inode->i_ino); 349 "in inode #%lu", blk, inode->i_ino);
350 return -EIO; 350 return -EIO;
@@ -607,7 +607,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
607 if (*err) 607 if (*err)
608 goto failed_out; 608 goto failed_out;
609 609
610 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); 610 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
611 EXT4_ERROR_INODE(inode,
612 "current_block %llu + count %lu > %d!",
613 current_block, count,
614 EXT4_MAX_BLOCK_FILE_PHYS);
615 *err = -EIO;
616 goto failed_out;
617 }
611 618
612 target -= count; 619 target -= count;
613 /* allocate blocks for indirect blocks */ 620 /* allocate blocks for indirect blocks */
@@ -643,7 +650,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
643 ar.flags = EXT4_MB_HINT_DATA; 650 ar.flags = EXT4_MB_HINT_DATA;
644 651
645 current_block = ext4_mb_new_blocks(handle, &ar, err); 652 current_block = ext4_mb_new_blocks(handle, &ar, err);
646 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); 653 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
654 EXT4_ERROR_INODE(inode,
655 "current_block %llu + ar.len %d > %d!",
656 current_block, ar.len,
657 EXT4_MAX_BLOCK_FILE_PHYS);
658 *err = -EIO;
659 goto failed_out;
660 }
647 661
648 if (*err && (target == blks)) { 662 if (*err && (target == blks)) {
649 /* 663 /*
@@ -1061,6 +1075,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
1061 int mdb_free = 0, allocated_meta_blocks = 0; 1075 int mdb_free = 0, allocated_meta_blocks = 0;
1062 1076
1063 spin_lock(&ei->i_block_reservation_lock); 1077 spin_lock(&ei->i_block_reservation_lock);
1078 trace_ext4_da_update_reserve_space(inode, used);
1064 if (unlikely(used > ei->i_reserved_data_blocks)) { 1079 if (unlikely(used > ei->i_reserved_data_blocks)) {
1065 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 1080 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1066 "with only %d reserved data blocks\n", 1081 "with only %d reserved data blocks\n",
@@ -1124,7 +1139,7 @@ static int check_block_validity(struct inode *inode, const char *msg,
1124 sector_t logical, sector_t phys, int len) 1139 sector_t logical, sector_t phys, int len)
1125{ 1140{
1126 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1141 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1127 ext4_error(inode->i_sb, msg, 1142 __ext4_error(inode->i_sb, msg,
1128 "inode #%lu logical block %llu mapped to %llu " 1143 "inode #%lu logical block %llu mapped to %llu "
1129 "(size %d)", inode->i_ino, 1144 "(size %d)", inode->i_ino,
1130 (unsigned long long) logical, 1145 (unsigned long long) logical,
@@ -1306,7 +1321,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1306 * i_data's format changing. Force the migrate 1321 * i_data's format changing. Force the migrate
1307 * to fail by clearing migrate flags 1322 * to fail by clearing migrate flags
1308 */ 1323 */
1309 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1324 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
1310 } 1325 }
1311 1326
1312 /* 1327 /*
@@ -1534,6 +1549,8 @@ static void ext4_truncate_failed_write(struct inode *inode)
1534 ext4_truncate(inode); 1549 ext4_truncate(inode);
1535} 1550}
1536 1551
1552static int ext4_get_block_write(struct inode *inode, sector_t iblock,
1553 struct buffer_head *bh_result, int create);
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1554static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1555 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1556 struct page **pagep, void **fsdata)
@@ -1575,8 +1592,12 @@ retry:
1575 } 1592 }
1576 *pagep = page; 1593 *pagep = page;
1577 1594
1578 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1595 if (ext4_should_dioread_nolock(inode))
1579 ext4_get_block); 1596 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1597 fsdata, ext4_get_block_write);
1598 else
1599 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1600 fsdata, ext4_get_block);
1580 1601
1581 if (!ret && ext4_should_journal_data(inode)) { 1602 if (!ret && ext4_should_journal_data(inode)) {
1582 ret = walk_page_buffers(handle, page_buffers(page), 1603 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1793,7 +1814,7 @@ static int ext4_journalled_write_end(struct file *file,
1793 new_i_size = pos + copied; 1814 new_i_size = pos + copied;
1794 if (new_i_size > inode->i_size) 1815 if (new_i_size > inode->i_size)
1795 i_size_write(inode, pos+copied); 1816 i_size_write(inode, pos+copied);
1796 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1817 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1797 if (new_i_size > EXT4_I(inode)->i_disksize) { 1818 if (new_i_size > EXT4_I(inode)->i_disksize) {
1798 ext4_update_i_disksize(inode, new_i_size); 1819 ext4_update_i_disksize(inode, new_i_size);
1799 ret2 = ext4_mark_inode_dirty(handle, inode); 1820 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1846,6 +1867,7 @@ repeat:
1846 spin_lock(&ei->i_block_reservation_lock); 1867 spin_lock(&ei->i_block_reservation_lock);
1847 md_reserved = ei->i_reserved_meta_blocks; 1868 md_reserved = ei->i_reserved_meta_blocks;
1848 md_needed = ext4_calc_metadata_amount(inode, lblock); 1869 md_needed = ext4_calc_metadata_amount(inode, lblock);
1870 trace_ext4_da_reserve_space(inode, md_needed);
1849 spin_unlock(&ei->i_block_reservation_lock); 1871 spin_unlock(&ei->i_block_reservation_lock);
1850 1872
1851 /* 1873 /*
@@ -2091,6 +2113,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
2091 } else if (buffer_mapped(bh)) 2113 } else if (buffer_mapped(bh))
2092 BUG_ON(bh->b_blocknr != pblock); 2114 BUG_ON(bh->b_blocknr != pblock);
2093 2115
2116 if (buffer_uninit(exbh))
2117 set_buffer_uninit(bh);
2094 cur_logical++; 2118 cur_logical++;
2095 pblock++; 2119 pblock++;
2096 } while ((bh = bh->b_this_page) != head); 2120 } while ((bh = bh->b_this_page) != head);
@@ -2133,17 +2157,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2133 break; 2157 break;
2134 for (i = 0; i < nr_pages; i++) { 2158 for (i = 0; i < nr_pages; i++) {
2135 struct page *page = pvec.pages[i]; 2159 struct page *page = pvec.pages[i];
2136 index = page->index; 2160 if (page->index > end)
2137 if (index > end)
2138 break; 2161 break;
2139 index++;
2140
2141 BUG_ON(!PageLocked(page)); 2162 BUG_ON(!PageLocked(page));
2142 BUG_ON(PageWriteback(page)); 2163 BUG_ON(PageWriteback(page));
2143 block_invalidatepage(page, 0); 2164 block_invalidatepage(page, 0);
2144 ClearPageUptodate(page); 2165 ClearPageUptodate(page);
2145 unlock_page(page); 2166 unlock_page(page);
2146 } 2167 }
2168 index = pvec.pages[nr_pages - 1]->index + 1;
2169 pagevec_release(&pvec);
2147 } 2170 }
2148 return; 2171 return;
2149} 2172}
@@ -2220,6 +2243,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2220 */ 2243 */
2221 new.b_state = 0; 2244 new.b_state = 0;
2222 get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2245 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2246 if (ext4_should_dioread_nolock(mpd->inode))
2247 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2223 if (mpd->b_state & (1 << BH_Delay)) 2248 if (mpd->b_state & (1 << BH_Delay))
2224 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2249 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2225 2250
@@ -2630,11 +2655,14 @@ static int __ext4_journalled_writepage(struct page *page,
2630 ret = err; 2655 ret = err;
2631 2656
2632 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); 2657 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2633 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 2658 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
2634out: 2659out:
2635 return ret; 2660 return ret;
2636} 2661}
2637 2662
2663static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
2664static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2665
2638/* 2666/*
2639 * Note that we don't need to start a transaction unless we're journaling data 2667 * Note that we don't need to start a transaction unless we're journaling data
2640 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2668 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2682,7 +2710,7 @@ static int ext4_writepage(struct page *page,
2682 int ret = 0; 2710 int ret = 0;
2683 loff_t size; 2711 loff_t size;
2684 unsigned int len; 2712 unsigned int len;
2685 struct buffer_head *page_bufs; 2713 struct buffer_head *page_bufs = NULL;
2686 struct inode *inode = page->mapping->host; 2714 struct inode *inode = page->mapping->host;
2687 2715
2688 trace_ext4_writepage(inode, page); 2716 trace_ext4_writepage(inode, page);
@@ -2758,7 +2786,11 @@ static int ext4_writepage(struct page *page,
2758 2786
2759 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2787 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2760 ret = nobh_writepage(page, noalloc_get_block_write, wbc); 2788 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2761 else 2789 else if (page_bufs && buffer_uninit(page_bufs)) {
2790 ext4_set_bh_endio(page_bufs, inode);
2791 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2792 wbc, ext4_end_io_buffer_write);
2793 } else
2762 ret = block_write_full_page(page, noalloc_get_block_write, 2794 ret = block_write_full_page(page, noalloc_get_block_write,
2763 wbc); 2795 wbc);
2764 2796
@@ -3301,7 +3333,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3301 filemap_write_and_wait(mapping); 3333 filemap_write_and_wait(mapping);
3302 } 3334 }
3303 3335
3304 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 3336 if (EXT4_JOURNAL(inode) &&
3337 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
3305 /* 3338 /*
3306 * This is a REALLY heavyweight approach, but the use of 3339 * This is a REALLY heavyweight approach, but the use of
3307 * bmap on dirty files is expected to be extremely rare: 3340 * bmap on dirty files is expected to be extremely rare:
@@ -3320,7 +3353,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3320 * everything they get. 3353 * everything they get.
3321 */ 3354 */
3322 3355
3323 EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; 3356 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
3324 journal = EXT4_JOURNAL(inode); 3357 journal = EXT4_JOURNAL(inode);
3325 jbd2_journal_lock_updates(journal); 3358 jbd2_journal_lock_updates(journal);
3326 err = jbd2_journal_flush(journal); 3359 err = jbd2_journal_flush(journal);
@@ -3345,11 +3378,45 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3345 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3378 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3346} 3379}
3347 3380
3381static void ext4_free_io_end(ext4_io_end_t *io)
3382{
3383 BUG_ON(!io);
3384 if (io->page)
3385 put_page(io->page);
3386 iput(io->inode);
3387 kfree(io);
3388}
3389
3390static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3391{
3392 struct buffer_head *head, *bh;
3393 unsigned int curr_off = 0;
3394
3395 if (!page_has_buffers(page))
3396 return;
3397 head = bh = page_buffers(page);
3398 do {
3399 if (offset <= curr_off && test_clear_buffer_uninit(bh)
3400 && bh->b_private) {
3401 ext4_free_io_end(bh->b_private);
3402 bh->b_private = NULL;
3403 bh->b_end_io = NULL;
3404 }
3405 curr_off = curr_off + bh->b_size;
3406 bh = bh->b_this_page;
3407 } while (bh != head);
3408}
3409
3348static void ext4_invalidatepage(struct page *page, unsigned long offset) 3410static void ext4_invalidatepage(struct page *page, unsigned long offset)
3349{ 3411{
3350 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3412 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3351 3413
3352 /* 3414 /*
3415 * free any io_end structure allocated for buffers to be discarded
3416 */
3417 if (ext4_should_dioread_nolock(page->mapping->host))
3418 ext4_invalidatepage_free_endio(page, offset);
3419 /*
3353 * If it's a full truncate we just forget about the pending dirtying 3420 * If it's a full truncate we just forget about the pending dirtying
3354 */ 3421 */
3355 if (offset == 0) 3422 if (offset == 0)
@@ -3420,7 +3487,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3420 } 3487 }
3421 3488
3422retry: 3489retry:
3423 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3490 if (rw == READ && ext4_should_dioread_nolock(inode))
3491 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
3492 inode->i_sb->s_bdev, iov,
3493 offset, nr_segs,
3494 ext4_get_block, NULL);
3495 else
3496 ret = blockdev_direct_IO(rw, iocb, inode,
3497 inode->i_sb->s_bdev, iov,
3424 offset, nr_segs, 3498 offset, nr_segs,
3425 ext4_get_block, NULL); 3499 ext4_get_block, NULL);
3426 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3500 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3436,6 +3510,9 @@ retry:
3436 * but cannot extend i_size. Bail out and pretend 3510 * but cannot extend i_size. Bail out and pretend
3437 * the write failed... */ 3511 * the write failed... */
3438 ret = PTR_ERR(handle); 3512 ret = PTR_ERR(handle);
3513 if (inode->i_nlink)
3514 ext4_orphan_del(NULL, inode);
3515
3439 goto out; 3516 goto out;
3440 } 3517 }
3441 if (inode->i_nlink) 3518 if (inode->i_nlink)
@@ -3463,75 +3540,63 @@ out:
3463 return ret; 3540 return ret;
3464} 3541}
3465 3542
3466static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3543static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3467 struct buffer_head *bh_result, int create) 3544 struct buffer_head *bh_result, int create)
3468{ 3545{
3469 handle_t *handle = NULL; 3546 handle_t *handle = ext4_journal_current_handle();
3470 int ret = 0; 3547 int ret = 0;
3471 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3548 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3472 int dio_credits; 3549 int dio_credits;
3550 int started = 0;
3473 3551
3474 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3552 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3475 inode->i_ino, create); 3553 inode->i_ino, create);
3476 /* 3554 /*
3477 * DIO VFS code passes create = 0 flag for write to 3555 * ext4_get_block in prepare for a DIO write or buffer write.
3478 * the middle of file. It does this to avoid block 3556 * We allocate an uinitialized extent if blocks haven't been allocated.
3479 * allocation for holes, to prevent expose stale data 3557 * The extent will be converted to initialized after IO complete.
3480 * out when there is parallel buffered read (which does
3481 * not hold the i_mutex lock) while direct IO write has
3482 * not completed. DIO request on holes finally falls back
3483 * to buffered IO for this reason.
3484 *
3485 * For ext4 extent based file, since we support fallocate,
3486 * new allocated extent as uninitialized, for holes, we
3487 * could fallocate blocks for holes, thus parallel
3488 * buffered IO read will zero out the page when read on
3489 * a hole while parallel DIO write to the hole has not completed.
3490 *
3491 * when we come here, we know it's a direct IO write to
3492 * to the middle of file (<i_size)
3493 * so it's safe to override the create flag from VFS.
3494 */ 3558 */
3495 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3559 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3496 3560
3497 if (max_blocks > DIO_MAX_BLOCKS) 3561 if (!handle) {
3498 max_blocks = DIO_MAX_BLOCKS; 3562 if (max_blocks > DIO_MAX_BLOCKS)
3499 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 3563 max_blocks = DIO_MAX_BLOCKS;
3500 handle = ext4_journal_start(inode, dio_credits); 3564 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
3501 if (IS_ERR(handle)) { 3565 handle = ext4_journal_start(inode, dio_credits);
3502 ret = PTR_ERR(handle); 3566 if (IS_ERR(handle)) {
3503 goto out; 3567 ret = PTR_ERR(handle);
3568 goto out;
3569 }
3570 started = 1;
3504 } 3571 }
3572
3505 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 3573 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
3506 create); 3574 create);
3507 if (ret > 0) { 3575 if (ret > 0) {
3508 bh_result->b_size = (ret << inode->i_blkbits); 3576 bh_result->b_size = (ret << inode->i_blkbits);
3509 ret = 0; 3577 ret = 0;
3510 } 3578 }
3511 ext4_journal_stop(handle); 3579 if (started)
3580 ext4_journal_stop(handle);
3512out: 3581out:
3513 return ret; 3582 return ret;
3514} 3583}
3515 3584
3516static void ext4_free_io_end(ext4_io_end_t *io) 3585static void dump_completed_IO(struct inode * inode)
3517{
3518 BUG_ON(!io);
3519 iput(io->inode);
3520 kfree(io);
3521}
3522static void dump_aio_dio_list(struct inode * inode)
3523{ 3586{
3524#ifdef EXT4_DEBUG 3587#ifdef EXT4_DEBUG
3525 struct list_head *cur, *before, *after; 3588 struct list_head *cur, *before, *after;
3526 ext4_io_end_t *io, *io0, *io1; 3589 ext4_io_end_t *io, *io0, *io1;
3590 unsigned long flags;
3527 3591
3528 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3592 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3529 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3593 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3530 return; 3594 return;
3531 } 3595 }
3532 3596
3533 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3597 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3534 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3598 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3599 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3535 cur = &io->list; 3600 cur = &io->list;
3536 before = cur->prev; 3601 before = cur->prev;
3537 io0 = container_of(before, ext4_io_end_t, list); 3602 io0 = container_of(before, ext4_io_end_t, list);
@@ -3541,32 +3606,31 @@ static void dump_aio_dio_list(struct inode * inode)
3541 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", 3606 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3542 io, inode->i_ino, io0, io1); 3607 io, inode->i_ino, io0, io1);
3543 } 3608 }
3609 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3544#endif 3610#endif
3545} 3611}
3546 3612
3547/* 3613/*
3548 * check a range of space and convert unwritten extents to written. 3614 * check a range of space and convert unwritten extents to written.
3549 */ 3615 */
3550static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3616static int ext4_end_io_nolock(ext4_io_end_t *io)
3551{ 3617{
3552 struct inode *inode = io->inode; 3618 struct inode *inode = io->inode;
3553 loff_t offset = io->offset; 3619 loff_t offset = io->offset;
3554 size_t size = io->size; 3620 ssize_t size = io->size;
3555 int ret = 0; 3621 int ret = 0;
3556 3622
3557 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3623 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3558 "list->prev 0x%p\n", 3624 "list->prev 0x%p\n",
3559 io, inode->i_ino, io->list.next, io->list.prev); 3625 io, inode->i_ino, io->list.next, io->list.prev);
3560 3626
3561 if (list_empty(&io->list)) 3627 if (list_empty(&io->list))
3562 return ret; 3628 return ret;
3563 3629
3564 if (io->flag != DIO_AIO_UNWRITTEN) 3630 if (io->flag != EXT4_IO_UNWRITTEN)
3565 return ret; 3631 return ret;
3566 3632
3567 if (offset + size <= i_size_read(inode)) 3633 ret = ext4_convert_unwritten_extents(inode, offset, size);
3568 ret = ext4_convert_unwritten_extents(inode, offset, size);
3569
3570 if (ret < 0) { 3634 if (ret < 0) {
3571 printk(KERN_EMERG "%s: failed to convert unwritten" 3635 printk(KERN_EMERG "%s: failed to convert unwritten"
3572 "extents to written extents, error is %d" 3636 "extents to written extents, error is %d"
@@ -3579,50 +3643,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3579 io->flag = 0; 3643 io->flag = 0;
3580 return ret; 3644 return ret;
3581} 3645}
3646
3582/* 3647/*
3583 * work on completed aio dio IO, to convert unwritten extents to extents 3648 * work on completed aio dio IO, to convert unwritten extents to extents
3584 */ 3649 */
3585static void ext4_end_aio_dio_work(struct work_struct *work) 3650static void ext4_end_io_work(struct work_struct *work)
3586{ 3651{
3587 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3652 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3588 struct inode *inode = io->inode; 3653 struct inode *inode = io->inode;
3589 int ret = 0; 3654 struct ext4_inode_info *ei = EXT4_I(inode);
3655 unsigned long flags;
3656 int ret;
3590 3657
3591 mutex_lock(&inode->i_mutex); 3658 mutex_lock(&inode->i_mutex);
3592 ret = ext4_end_aio_dio_nolock(io); 3659 ret = ext4_end_io_nolock(io);
3593 if (ret >= 0) { 3660 if (ret < 0) {
3594 if (!list_empty(&io->list)) 3661 mutex_unlock(&inode->i_mutex);
3595 list_del_init(&io->list); 3662 return;
3596 ext4_free_io_end(io);
3597 } 3663 }
3664
3665 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3666 if (!list_empty(&io->list))
3667 list_del_init(&io->list);
3668 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3598 mutex_unlock(&inode->i_mutex); 3669 mutex_unlock(&inode->i_mutex);
3670 ext4_free_io_end(io);
3599} 3671}
3672
3600/* 3673/*
3601 * This function is called from ext4_sync_file(). 3674 * This function is called from ext4_sync_file().
3602 * 3675 *
3603 * When AIO DIO IO is completed, the work to convert unwritten 3676 * When IO is completed, the work to convert unwritten extents to
3604 * extents to written is queued on workqueue but may not get immediately 3677 * written is queued on workqueue but may not get immediately
3605 * scheduled. When fsync is called, we need to ensure the 3678 * scheduled. When fsync is called, we need to ensure the
3606 * conversion is complete before fsync returns. 3679 * conversion is complete before fsync returns.
3607 * The inode keeps track of a list of completed AIO from DIO path 3680 * The inode keeps track of a list of pending/completed IO that
3608 * that might needs to do the conversion. This function walks through 3681 * might needs to do the conversion. This function walks through
3609 * the list and convert the related unwritten extents to written. 3682 * the list and convert the related unwritten extents for completed IO
3683 * to written.
3684 * The function return the number of pending IOs on success.
3610 */ 3685 */
3611int flush_aio_dio_completed_IO(struct inode *inode) 3686int flush_completed_IO(struct inode *inode)
3612{ 3687{
3613 ext4_io_end_t *io; 3688 ext4_io_end_t *io;
3689 struct ext4_inode_info *ei = EXT4_I(inode);
3690 unsigned long flags;
3614 int ret = 0; 3691 int ret = 0;
3615 int ret2 = 0; 3692 int ret2 = 0;
3616 3693
3617 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3694 if (list_empty(&ei->i_completed_io_list))
3618 return ret; 3695 return ret;
3619 3696
3620 dump_aio_dio_list(inode); 3697 dump_completed_IO(inode);
3621 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3698 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3622 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3699 while (!list_empty(&ei->i_completed_io_list)){
3700 io = list_entry(ei->i_completed_io_list.next,
3623 ext4_io_end_t, list); 3701 ext4_io_end_t, list);
3624 /* 3702 /*
3625 * Calling ext4_end_aio_dio_nolock() to convert completed 3703 * Calling ext4_end_io_nolock() to convert completed
3626 * IO to written. 3704 * IO to written.
3627 * 3705 *
3628 * When ext4_sync_file() is called, run_queue() may already 3706 * When ext4_sync_file() is called, run_queue() may already
@@ -3635,20 +3713,23 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3635 * avoid double converting from both fsync and background work 3713 * avoid double converting from both fsync and background work
3636 * queue work. 3714 * queue work.
3637 */ 3715 */
3638 ret = ext4_end_aio_dio_nolock(io); 3716 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3717 ret = ext4_end_io_nolock(io);
3718 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3639 if (ret < 0) 3719 if (ret < 0)
3640 ret2 = ret; 3720 ret2 = ret;
3641 else 3721 else
3642 list_del_init(&io->list); 3722 list_del_init(&io->list);
3643 } 3723 }
3724 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3644 return (ret2 < 0) ? ret2 : 0; 3725 return (ret2 < 0) ? ret2 : 0;
3645} 3726}
3646 3727
3647static ext4_io_end_t *ext4_init_io_end (struct inode *inode) 3728static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3648{ 3729{
3649 ext4_io_end_t *io = NULL; 3730 ext4_io_end_t *io = NULL;
3650 3731
3651 io = kmalloc(sizeof(*io), GFP_NOFS); 3732 io = kmalloc(sizeof(*io), flags);
3652 3733
3653 if (io) { 3734 if (io) {
3654 igrab(inode); 3735 igrab(inode);
@@ -3656,8 +3737,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3656 io->flag = 0; 3737 io->flag = 0;
3657 io->offset = 0; 3738 io->offset = 0;
3658 io->size = 0; 3739 io->size = 0;
3659 io->error = 0; 3740 io->page = NULL;
3660 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3741 INIT_WORK(&io->work, ext4_end_io_work);
3661 INIT_LIST_HEAD(&io->list); 3742 INIT_LIST_HEAD(&io->list);
3662 } 3743 }
3663 3744
@@ -3669,6 +3750,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3669{ 3750{
3670 ext4_io_end_t *io_end = iocb->private; 3751 ext4_io_end_t *io_end = iocb->private;
3671 struct workqueue_struct *wq; 3752 struct workqueue_struct *wq;
3753 unsigned long flags;
3754 struct ext4_inode_info *ei;
3672 3755
3673 /* if not async direct IO or dio with 0 bytes write, just return */ 3756 /* if not async direct IO or dio with 0 bytes write, just return */
3674 if (!io_end || !size) 3757 if (!io_end || !size)
@@ -3680,7 +3763,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3680 size); 3763 size);
3681 3764
3682 /* if not aio dio with unwritten extents, just free io and return */ 3765 /* if not aio dio with unwritten extents, just free io and return */
3683 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3766 if (io_end->flag != EXT4_IO_UNWRITTEN){
3684 ext4_free_io_end(io_end); 3767 ext4_free_io_end(io_end);
3685 iocb->private = NULL; 3768 iocb->private = NULL;
3686 return; 3769 return;
@@ -3688,16 +3771,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3688 3771
3689 io_end->offset = offset; 3772 io_end->offset = offset;
3690 io_end->size = size; 3773 io_end->size = size;
3774 io_end->flag = EXT4_IO_UNWRITTEN;
3691 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3775 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3692 3776
3693 /* queue the work to convert unwritten extents to written */ 3777 /* queue the work to convert unwritten extents to written */
3694 queue_work(wq, &io_end->work); 3778 queue_work(wq, &io_end->work);
3695 3779
3696 /* Add the io_end to per-inode completed aio dio list*/ 3780 /* Add the io_end to per-inode completed aio dio list*/
3697 list_add_tail(&io_end->list, 3781 ei = EXT4_I(io_end->inode);
3698 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3782 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3783 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3784 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3699 iocb->private = NULL; 3785 iocb->private = NULL;
3700} 3786}
3787
3788static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3789{
3790 ext4_io_end_t *io_end = bh->b_private;
3791 struct workqueue_struct *wq;
3792 struct inode *inode;
3793 unsigned long flags;
3794
3795 if (!test_clear_buffer_uninit(bh) || !io_end)
3796 goto out;
3797
3798 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3799 printk("sb umounted, discard end_io request for inode %lu\n",
3800 io_end->inode->i_ino);
3801 ext4_free_io_end(io_end);
3802 goto out;
3803 }
3804
3805 io_end->flag = EXT4_IO_UNWRITTEN;
3806 inode = io_end->inode;
3807
3808 /* Add the io_end to per-inode completed io list*/
3809 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3810 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
3811 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3812
3813 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
3814 /* queue the work to convert unwritten extents to written */
3815 queue_work(wq, &io_end->work);
3816out:
3817 bh->b_private = NULL;
3818 bh->b_end_io = NULL;
3819 clear_buffer_uninit(bh);
3820 end_buffer_async_write(bh, uptodate);
3821}
3822
3823static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3824{
3825 ext4_io_end_t *io_end;
3826 struct page *page = bh->b_page;
3827 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3828 size_t size = bh->b_size;
3829
3830retry:
3831 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3832 if (!io_end) {
3833 if (printk_ratelimit())
3834 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3835 schedule();
3836 goto retry;
3837 }
3838 io_end->offset = offset;
3839 io_end->size = size;
3840 /*
3841 * We need to hold a reference to the page to make sure it
3842 * doesn't get evicted before ext4_end_io_work() has a chance
3843 * to convert the extent from written to unwritten.
3844 */
3845 io_end->page = page;
3846 get_page(io_end->page);
3847
3848 bh->b_private = io_end;
3849 bh->b_end_io = ext4_end_io_buffer_write;
3850 return 0;
3851}
3852
3701/* 3853/*
3702 * For ext4 extent files, ext4 will do direct-io write to holes, 3854 * For ext4 extent files, ext4 will do direct-io write to holes,
3703 * preallocated extents, and those write extend the file, no need to 3855 * preallocated extents, and those write extend the file, no need to
@@ -3751,7 +3903,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3751 iocb->private = NULL; 3903 iocb->private = NULL;
3752 EXT4_I(inode)->cur_aio_dio = NULL; 3904 EXT4_I(inode)->cur_aio_dio = NULL;
3753 if (!is_sync_kiocb(iocb)) { 3905 if (!is_sync_kiocb(iocb)) {
3754 iocb->private = ext4_init_io_end(inode); 3906 iocb->private = ext4_init_io_end(inode, GFP_NOFS);
3755 if (!iocb->private) 3907 if (!iocb->private)
3756 return -ENOMEM; 3908 return -ENOMEM;
3757 /* 3909 /*
@@ -3767,7 +3919,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3767 ret = blockdev_direct_IO(rw, iocb, inode, 3919 ret = blockdev_direct_IO(rw, iocb, inode,
3768 inode->i_sb->s_bdev, iov, 3920 inode->i_sb->s_bdev, iov,
3769 offset, nr_segs, 3921 offset, nr_segs,
3770 ext4_get_block_dio_write, 3922 ext4_get_block_write,
3771 ext4_end_io_dio); 3923 ext4_end_io_dio);
3772 if (iocb->private) 3924 if (iocb->private)
3773 EXT4_I(inode)->cur_aio_dio = NULL; 3925 EXT4_I(inode)->cur_aio_dio = NULL;
@@ -3788,8 +3940,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3788 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3940 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3789 ext4_free_io_end(iocb->private); 3941 ext4_free_io_end(iocb->private);
3790 iocb->private = NULL; 3942 iocb->private = NULL;
3791 } else if (ret > 0 && (EXT4_I(inode)->i_state & 3943 } else if (ret > 0 && ext4_test_inode_state(inode,
3792 EXT4_STATE_DIO_UNWRITTEN)) { 3944 EXT4_STATE_DIO_UNWRITTEN)) {
3793 int err; 3945 int err;
3794 /* 3946 /*
3795 * for non AIO case, since the IO is already 3947 * for non AIO case, since the IO is already
@@ -3799,7 +3951,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3799 offset, ret); 3951 offset, ret);
3800 if (err < 0) 3952 if (err < 0)
3801 ret = err; 3953 ret = err;
3802 EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; 3954 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3803 } 3955 }
3804 return ret; 3956 return ret;
3805 } 3957 }
@@ -4130,18 +4282,27 @@ no_top:
4130 * We release `count' blocks on disk, but (last - first) may be greater 4282 * We release `count' blocks on disk, but (last - first) may be greater
4131 * than `count' because there can be holes in there. 4283 * than `count' because there can be holes in there.
4132 */ 4284 */
4133static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 4285static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 struct buffer_head *bh, 4286 struct buffer_head *bh,
4135 ext4_fsblk_t block_to_free, 4287 ext4_fsblk_t block_to_free,
4136 unsigned long count, __le32 *first, 4288 unsigned long count, __le32 *first,
4137 __le32 *last) 4289 __le32 *last)
4138{ 4290{
4139 __le32 *p; 4291 __le32 *p;
4140 int flags = EXT4_FREE_BLOCKS_FORGET; 4292 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4141 4293
4142 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 4294 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4143 flags |= EXT4_FREE_BLOCKS_METADATA; 4295 flags |= EXT4_FREE_BLOCKS_METADATA;
4144 4296
4297 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
4298 count)) {
4299 ext4_error(inode->i_sb, "inode #%lu: "
4300 "attempt to clear blocks %llu len %lu, invalid",
4301 inode->i_ino, (unsigned long long) block_to_free,
4302 count);
4303 return 1;
4304 }
4305
4145 if (try_to_extend_transaction(handle, inode)) { 4306 if (try_to_extend_transaction(handle, inode)) {
4146 if (bh) { 4307 if (bh) {
4147 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4308 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4160,6 +4321,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4160 *p = 0; 4321 *p = 0;
4161 4322
4162 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4323 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4324 return 0;
4163} 4325}
4164 4326
4165/** 4327/**
@@ -4215,9 +4377,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4215 } else if (nr == block_to_free + count) { 4377 } else if (nr == block_to_free + count) {
4216 count++; 4378 count++;
4217 } else { 4379 } else {
4218 ext4_clear_blocks(handle, inode, this_bh, 4380 if (ext4_clear_blocks(handle, inode, this_bh,
4219 block_to_free, 4381 block_to_free, count,
4220 count, block_to_free_p, p); 4382 block_to_free_p, p))
4383 break;
4221 block_to_free = nr; 4384 block_to_free = nr;
4222 block_to_free_p = p; 4385 block_to_free_p = p;
4223 count = 1; 4386 count = 1;
@@ -4241,7 +4404,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4241 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 4404 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
4242 ext4_handle_dirty_metadata(handle, inode, this_bh); 4405 ext4_handle_dirty_metadata(handle, inode, this_bh);
4243 else 4406 else
4244 ext4_error(inode->i_sb, __func__, 4407 ext4_error(inode->i_sb,
4245 "circular indirect block detected, " 4408 "circular indirect block detected, "
4246 "inode=%lu, block=%llu", 4409 "inode=%lu, block=%llu",
4247 inode->i_ino, 4410 inode->i_ino,
@@ -4281,6 +4444,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4281 if (!nr) 4444 if (!nr)
4282 continue; /* A hole */ 4445 continue; /* A hole */
4283 4446
4447 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
4448 nr, 1)) {
4449 ext4_error(inode->i_sb,
4450 "indirect mapped block in inode "
4451 "#%lu invalid (level %d, blk #%lu)",
4452 inode->i_ino, depth,
4453 (unsigned long) nr);
4454 break;
4455 }
4456
4284 /* Go read the buffer for the next level down */ 4457 /* Go read the buffer for the next level down */
4285 bh = sb_bread(inode->i_sb, nr); 4458 bh = sb_bread(inode->i_sb, nr);
4286 4459
@@ -4289,7 +4462,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4289 * (should be rare). 4462 * (should be rare).
4290 */ 4463 */
4291 if (!bh) { 4464 if (!bh) {
4292 ext4_error(inode->i_sb, "ext4_free_branches", 4465 ext4_error(inode->i_sb,
4293 "Read failure, inode=%lu, block=%llu", 4466 "Read failure, inode=%lu, block=%llu",
4294 inode->i_ino, nr); 4467 inode->i_ino, nr);
4295 continue; 4468 continue;
@@ -4433,8 +4606,10 @@ void ext4_truncate(struct inode *inode)
4433 if (!ext4_can_truncate(inode)) 4606 if (!ext4_can_truncate(inode))
4434 return; 4607 return;
4435 4608
4609 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
4610
4436 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4611 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4437 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4612 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
4438 4613
4439 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4614 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
4440 ext4_ext_truncate(inode); 4615 ext4_ext_truncate(inode);
@@ -4604,9 +4779,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4604 4779
4605 bh = sb_getblk(sb, block); 4780 bh = sb_getblk(sb, block);
4606 if (!bh) { 4781 if (!bh) {
4607 ext4_error(sb, "ext4_get_inode_loc", "unable to read " 4782 ext4_error(sb, "unable to read inode block - "
4608 "inode block - inode=%lu, block=%llu", 4783 "inode=%lu, block=%llu", inode->i_ino, block);
4609 inode->i_ino, block);
4610 return -EIO; 4784 return -EIO;
4611 } 4785 }
4612 if (!buffer_uptodate(bh)) { 4786 if (!buffer_uptodate(bh)) {
@@ -4704,9 +4878,8 @@ make_io:
4704 submit_bh(READ_META, bh); 4878 submit_bh(READ_META, bh);
4705 wait_on_buffer(bh); 4879 wait_on_buffer(bh);
4706 if (!buffer_uptodate(bh)) { 4880 if (!buffer_uptodate(bh)) {
4707 ext4_error(sb, __func__, 4881 ext4_error(sb, "unable to read inode block - inode=%lu,"
4708 "unable to read inode block - inode=%lu, " 4882 " block=%llu", inode->i_ino, block);
4709 "block=%llu", inode->i_ino, block);
4710 brelse(bh); 4883 brelse(bh);
4711 return -EIO; 4884 return -EIO;
4712 } 4885 }
@@ -4720,7 +4893,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
4720{ 4893{
4721 /* We have all inode data except xattrs in memory here. */ 4894 /* We have all inode data except xattrs in memory here. */
4722 return __ext4_get_inode_loc(inode, iloc, 4895 return __ext4_get_inode_loc(inode, iloc,
4723 !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); 4896 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
4724} 4897}
4725 4898
4726void ext4_set_inode_flags(struct inode *inode) 4899void ext4_set_inode_flags(struct inode *inode)
@@ -4814,7 +4987,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4814 } 4987 }
4815 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4988 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4816 4989
4817 ei->i_state = 0; 4990 ei->i_state_flags = 0;
4818 ei->i_dir_start_lookup = 0; 4991 ei->i_dir_start_lookup = 0;
4819 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4992 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4820 /* We now have enough fields to check if the inode was active or not. 4993 /* We now have enough fields to check if the inode was active or not.
@@ -4897,7 +5070,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4897 EXT4_GOOD_OLD_INODE_SIZE + 5070 EXT4_GOOD_OLD_INODE_SIZE +
4898 ei->i_extra_isize; 5071 ei->i_extra_isize;
4899 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 5072 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4900 ei->i_state |= EXT4_STATE_XATTR; 5073 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4901 } 5074 }
4902 } else 5075 } else
4903 ei->i_extra_isize = 0; 5076 ei->i_extra_isize = 0;
@@ -4917,8 +5090,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4917 ret = 0; 5090 ret = 0;
4918 if (ei->i_file_acl && 5091 if (ei->i_file_acl &&
4919 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { 5092 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4920 ext4_error(sb, __func__, 5093 ext4_error(sb, "bad extended attribute block %llu inode #%lu",
4921 "bad extended attribute block %llu in inode #%lu",
4922 ei->i_file_acl, inode->i_ino); 5094 ei->i_file_acl, inode->i_ino);
4923 ret = -EIO; 5095 ret = -EIO;
4924 goto bad_inode; 5096 goto bad_inode;
@@ -4964,8 +5136,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4964 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 5136 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4965 } else { 5137 } else {
4966 ret = -EIO; 5138 ret = -EIO;
4967 ext4_error(inode->i_sb, __func__, 5139 ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
4968 "bogus i_mode (%o) for inode=%lu",
4969 inode->i_mode, inode->i_ino); 5140 inode->i_mode, inode->i_ino);
4970 goto bad_inode; 5141 goto bad_inode;
4971 } 5142 }
@@ -5037,7 +5208,7 @@ static int ext4_do_update_inode(handle_t *handle,
5037 5208
5038 /* For fields not not tracking in the in-memory inode, 5209 /* For fields not not tracking in the in-memory inode,
5039 * initialise them to zero for new inodes. */ 5210 * initialise them to zero for new inodes. */
5040 if (ei->i_state & EXT4_STATE_NEW) 5211 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
5041 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 5212 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
5042 5213
5043 ext4_get_inode_flags(ei); 5214 ext4_get_inode_flags(ei);
@@ -5101,7 +5272,7 @@ static int ext4_do_update_inode(handle_t *handle,
5101 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 5272 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
5102 sb->s_dirt = 1; 5273 sb->s_dirt = 1;
5103 ext4_handle_sync(handle); 5274 ext4_handle_sync(handle);
5104 err = ext4_handle_dirty_metadata(handle, inode, 5275 err = ext4_handle_dirty_metadata(handle, NULL,
5105 EXT4_SB(sb)->s_sbh); 5276 EXT4_SB(sb)->s_sbh);
5106 } 5277 }
5107 } 5278 }
@@ -5130,10 +5301,10 @@ static int ext4_do_update_inode(handle_t *handle,
5130 } 5301 }
5131 5302
5132 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 5303 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
5133 rc = ext4_handle_dirty_metadata(handle, inode, bh); 5304 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
5134 if (!err) 5305 if (!err)
5135 err = rc; 5306 err = rc;
5136 ei->i_state &= ~EXT4_STATE_NEW; 5307 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
5137 5308
5138 ext4_update_inode_fsync_trans(handle, inode, 0); 5309 ext4_update_inode_fsync_trans(handle, inode, 0);
5139out_brelse: 5310out_brelse:
@@ -5177,7 +5348,7 @@ out_brelse:
5177 * `stuff()' is running, and the new i_size will be lost. Plus the inode 5348 * `stuff()' is running, and the new i_size will be lost. Plus the inode
5178 * will no longer be on the superblock's dirty inode list. 5349 * will no longer be on the superblock's dirty inode list.
5179 */ 5350 */
5180int ext4_write_inode(struct inode *inode, int wait) 5351int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5181{ 5352{
5182 int err; 5353 int err;
5183 5354
@@ -5191,7 +5362,7 @@ int ext4_write_inode(struct inode *inode, int wait)
5191 return -EIO; 5362 return -EIO;
5192 } 5363 }
5193 5364
5194 if (!wait) 5365 if (wbc->sync_mode != WB_SYNC_ALL)
5195 return 0; 5366 return 0;
5196 5367
5197 err = ext4_force_commit(inode->i_sb); 5368 err = ext4_force_commit(inode->i_sb);
@@ -5201,13 +5372,11 @@ int ext4_write_inode(struct inode *inode, int wait)
5201 err = ext4_get_inode_loc(inode, &iloc); 5372 err = ext4_get_inode_loc(inode, &iloc);
5202 if (err) 5373 if (err)
5203 return err; 5374 return err;
5204 if (wait) 5375 if (wbc->sync_mode == WB_SYNC_ALL)
5205 sync_dirty_buffer(iloc.bh); 5376 sync_dirty_buffer(iloc.bh);
5206 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5377 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5207 ext4_error(inode->i_sb, __func__, 5378 ext4_error(inode->i_sb, "IO error syncing inode, "
5208 "IO error syncing inode, " 5379 "inode=%lu, block=%llu", inode->i_ino,
5209 "inode=%lu, block=%llu",
5210 inode->i_ino,
5211 (unsigned long long)iloc.bh->b_blocknr); 5380 (unsigned long long)iloc.bh->b_blocknr);
5212 err = -EIO; 5381 err = -EIO;
5213 } 5382 }
@@ -5288,7 +5457,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5288 } 5457 }
5289 5458
5290 if (S_ISREG(inode->i_mode) && 5459 if (S_ISREG(inode->i_mode) &&
5291 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 5460 attr->ia_valid & ATTR_SIZE &&
5461 (attr->ia_size < inode->i_size ||
5462 (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
5292 handle_t *handle; 5463 handle_t *handle;
5293 5464
5294 handle = ext4_journal_start(inode, 3); 5465 handle = ext4_journal_start(inode, 3);
@@ -5319,6 +5490,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5319 goto err_out; 5490 goto err_out;
5320 } 5491 }
5321 } 5492 }
5493 /* ext4_truncate will clear the flag */
5494 if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
5495 ext4_truncate(inode);
5322 } 5496 }
5323 5497
5324 rc = inode_setattr(inode, attr); 5498 rc = inode_setattr(inode, attr);
@@ -5557,8 +5731,8 @@ static int ext4_expand_extra_isize(struct inode *inode,
5557 entry = IFIRST(header); 5731 entry = IFIRST(header);
5558 5732
5559 /* No extended attributes present */ 5733 /* No extended attributes present */
5560 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || 5734 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5561 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5735 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5562 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 5736 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
5563 new_extra_isize); 5737 new_extra_isize);
5564 EXT4_I(inode)->i_extra_isize = new_extra_isize; 5738 EXT4_I(inode)->i_extra_isize = new_extra_isize;
@@ -5602,7 +5776,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5602 err = ext4_reserve_inode_write(handle, inode, &iloc); 5776 err = ext4_reserve_inode_write(handle, inode, &iloc);
5603 if (ext4_handle_valid(handle) && 5777 if (ext4_handle_valid(handle) &&
5604 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5778 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5605 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 5779 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
5606 /* 5780 /*
5607 * We need extra buffer credits since we may write into EA block 5781 * We need extra buffer credits since we may write into EA block
5608 * with this same handle. If journal_extend fails, then it will 5782 * with this same handle. If journal_extend fails, then it will
@@ -5616,10 +5790,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5616 sbi->s_want_extra_isize, 5790 sbi->s_want_extra_isize,
5617 iloc, handle); 5791 iloc, handle);
5618 if (ret) { 5792 if (ret) {
5619 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 5793 ext4_set_inode_state(inode,
5794 EXT4_STATE_NO_EXPAND);
5620 if (mnt_count != 5795 if (mnt_count !=
5621 le16_to_cpu(sbi->s_es->s_mnt_count)) { 5796 le16_to_cpu(sbi->s_es->s_mnt_count)) {
5622 ext4_warning(inode->i_sb, __func__, 5797 ext4_warning(inode->i_sb,
5623 "Unable to expand inode %lu. Delete" 5798 "Unable to expand inode %lu. Delete"
5624 " some EAs or run e2fsck.", 5799 " some EAs or run e2fsck.",
5625 inode->i_ino); 5800 inode->i_ino);
@@ -5683,7 +5858,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
5683 err = jbd2_journal_get_write_access(handle, iloc.bh); 5858 err = jbd2_journal_get_write_access(handle, iloc.bh);
5684 if (!err) 5859 if (!err)
5685 err = ext4_handle_dirty_metadata(handle, 5860 err = ext4_handle_dirty_metadata(handle,
5686 inode, 5861 NULL,
5687 iloc.bh); 5862 iloc.bh);
5688 brelse(iloc.bh); 5863 brelse(iloc.bh);
5689 } 5864 }