aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c465
1 files changed, 320 insertions, 145 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bec222ca9ba4..986120f30066 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
38#include <linux/uio.h> 38#include <linux/uio.h>
39#include <linux/bio.h> 39#include <linux/bio.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/kernel.h>
41 42
42#include "ext4_jbd2.h" 43#include "ext4_jbd2.h"
43#include "xattr.h" 44#include "xattr.h"
@@ -197,7 +198,7 @@ void ext4_delete_inode(struct inode *inode)
197 inode->i_size = 0; 198 inode->i_size = 0;
198 err = ext4_mark_inode_dirty(handle, inode); 199 err = ext4_mark_inode_dirty(handle, inode);
199 if (err) { 200 if (err) {
200 ext4_warning(inode->i_sb, __func__, 201 ext4_warning(inode->i_sb,
201 "couldn't mark inode dirty (err %d)", err); 202 "couldn't mark inode dirty (err %d)", err);
202 goto stop_handle; 203 goto stop_handle;
203 } 204 }
@@ -215,7 +216,7 @@ void ext4_delete_inode(struct inode *inode)
215 if (err > 0) 216 if (err > 0)
216 err = ext4_journal_restart(handle, 3); 217 err = ext4_journal_restart(handle, 3);
217 if (err != 0) { 218 if (err != 0) {
218 ext4_warning(inode->i_sb, __func__, 219 ext4_warning(inode->i_sb,
219 "couldn't extend journal (err %d)", err); 220 "couldn't extend journal (err %d)", err);
220 stop_handle: 221 stop_handle:
221 ext4_journal_stop(handle); 222 ext4_journal_stop(handle);
@@ -326,8 +327,7 @@ static int ext4_block_to_path(struct inode *inode,
326 offsets[n++] = i_block & (ptrs - 1); 327 offsets[n++] = i_block & (ptrs - 1);
327 final = ptrs; 328 final = ptrs;
328 } else { 329 } else {
329 ext4_warning(inode->i_sb, "ext4_block_to_path", 330 ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
330 "block %lu > max in inode %lu",
331 i_block + direct_blocks + 331 i_block + direct_blocks +
332 indirect_blocks + double_blocks, inode->i_ino); 332 indirect_blocks + double_blocks, inode->i_ino);
333 } 333 }
@@ -347,7 +347,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
347 if (blk && 347 if (blk &&
348 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), 348 unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
349 blk, 1))) { 349 blk, 1))) {
350 ext4_error(inode->i_sb, function, 350 __ext4_error(inode->i_sb, function,
351 "invalid block reference %u " 351 "invalid block reference %u "
352 "in inode #%lu", blk, inode->i_ino); 352 "in inode #%lu", blk, inode->i_ino);
353 return -EIO; 353 return -EIO;
@@ -610,7 +610,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
610 if (*err) 610 if (*err)
611 goto failed_out; 611 goto failed_out;
612 612
613 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); 613 if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
614 EXT4_ERROR_INODE(inode,
615 "current_block %llu + count %lu > %d!",
616 current_block, count,
617 EXT4_MAX_BLOCK_FILE_PHYS);
618 *err = -EIO;
619 goto failed_out;
620 }
614 621
615 target -= count; 622 target -= count;
616 /* allocate blocks for indirect blocks */ 623 /* allocate blocks for indirect blocks */
@@ -646,7 +653,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
646 ar.flags = EXT4_MB_HINT_DATA; 653 ar.flags = EXT4_MB_HINT_DATA;
647 654
648 current_block = ext4_mb_new_blocks(handle, &ar, err); 655 current_block = ext4_mb_new_blocks(handle, &ar, err);
649 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); 656 if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
657 EXT4_ERROR_INODE(inode,
658 "current_block %llu + ar.len %d > %d!",
659 current_block, ar.len,
660 EXT4_MAX_BLOCK_FILE_PHYS);
661 *err = -EIO;
662 goto failed_out;
663 }
650 664
651 if (*err && (target == blks)) { 665 if (*err && (target == blks)) {
652 /* 666 /*
@@ -1064,6 +1078,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
1064 int mdb_free = 0, allocated_meta_blocks = 0; 1078 int mdb_free = 0, allocated_meta_blocks = 0;
1065 1079
1066 spin_lock(&ei->i_block_reservation_lock); 1080 spin_lock(&ei->i_block_reservation_lock);
1081 trace_ext4_da_update_reserve_space(inode, used);
1067 if (unlikely(used > ei->i_reserved_data_blocks)) { 1082 if (unlikely(used > ei->i_reserved_data_blocks)) {
1068 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 1083 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1069 "with only %d reserved data blocks\n", 1084 "with only %d reserved data blocks\n",
@@ -1127,7 +1142,7 @@ static int check_block_validity(struct inode *inode, const char *msg,
1127 sector_t logical, sector_t phys, int len) 1142 sector_t logical, sector_t phys, int len)
1128{ 1143{
1129 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1144 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1130 ext4_error(inode->i_sb, msg, 1145 __ext4_error(inode->i_sb, msg,
1131 "inode #%lu logical block %llu mapped to %llu " 1146 "inode #%lu logical block %llu mapped to %llu "
1132 "(size %d)", inode->i_ino, 1147 "(size %d)", inode->i_ino,
1133 (unsigned long long) logical, 1148 (unsigned long long) logical,
@@ -1309,7 +1324,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1309 * i_data's format changing. Force the migrate 1324 * i_data's format changing. Force the migrate
1310 * to fail by clearing migrate flags 1325 * to fail by clearing migrate flags
1311 */ 1326 */
1312 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; 1327 ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
1313 } 1328 }
1314 1329
1315 /* 1330 /*
@@ -1537,6 +1552,8 @@ static void ext4_truncate_failed_write(struct inode *inode)
1537 ext4_truncate(inode); 1552 ext4_truncate(inode);
1538} 1553}
1539 1554
1555static int ext4_get_block_write(struct inode *inode, sector_t iblock,
1556 struct buffer_head *bh_result, int create);
1540static int ext4_write_begin(struct file *file, struct address_space *mapping, 1557static int ext4_write_begin(struct file *file, struct address_space *mapping,
1541 loff_t pos, unsigned len, unsigned flags, 1558 loff_t pos, unsigned len, unsigned flags,
1542 struct page **pagep, void **fsdata) 1559 struct page **pagep, void **fsdata)
@@ -1578,8 +1595,12 @@ retry:
1578 } 1595 }
1579 *pagep = page; 1596 *pagep = page;
1580 1597
1581 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1598 if (ext4_should_dioread_nolock(inode))
1582 ext4_get_block); 1599 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1600 fsdata, ext4_get_block_write);
1601 else
1602 ret = block_write_begin(file, mapping, pos, len, flags, pagep,
1603 fsdata, ext4_get_block);
1583 1604
1584 if (!ret && ext4_should_journal_data(inode)) { 1605 if (!ret && ext4_should_journal_data(inode)) {
1585 ret = walk_page_buffers(handle, page_buffers(page), 1606 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1796,7 +1817,7 @@ static int ext4_journalled_write_end(struct file *file,
1796 new_i_size = pos + copied; 1817 new_i_size = pos + copied;
1797 if (new_i_size > inode->i_size) 1818 if (new_i_size > inode->i_size)
1798 i_size_write(inode, pos+copied); 1819 i_size_write(inode, pos+copied);
1799 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 1820 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1800 if (new_i_size > EXT4_I(inode)->i_disksize) { 1821 if (new_i_size > EXT4_I(inode)->i_disksize) {
1801 ext4_update_i_disksize(inode, new_i_size); 1822 ext4_update_i_disksize(inode, new_i_size);
1802 ret2 = ext4_mark_inode_dirty(handle, inode); 1823 ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1850,6 +1871,7 @@ repeat:
1850 spin_lock(&ei->i_block_reservation_lock); 1871 spin_lock(&ei->i_block_reservation_lock);
1851 md_reserved = ei->i_reserved_meta_blocks; 1872 md_reserved = ei->i_reserved_meta_blocks;
1852 md_needed = ext4_calc_metadata_amount(inode, lblock); 1873 md_needed = ext4_calc_metadata_amount(inode, lblock);
1874 trace_ext4_da_reserve_space(inode, md_needed);
1853 spin_unlock(&ei->i_block_reservation_lock); 1875 spin_unlock(&ei->i_block_reservation_lock);
1854 1876
1855 /* 1877 /*
@@ -2096,6 +2118,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
2096 } else if (buffer_mapped(bh)) 2118 } else if (buffer_mapped(bh))
2097 BUG_ON(bh->b_blocknr != pblock); 2119 BUG_ON(bh->b_blocknr != pblock);
2098 2120
2121 if (buffer_uninit(exbh))
2122 set_buffer_uninit(bh);
2099 cur_logical++; 2123 cur_logical++;
2100 pblock++; 2124 pblock++;
2101 } while ((bh = bh->b_this_page) != head); 2125 } while ((bh = bh->b_this_page) != head);
@@ -2138,17 +2162,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
2138 break; 2162 break;
2139 for (i = 0; i < nr_pages; i++) { 2163 for (i = 0; i < nr_pages; i++) {
2140 struct page *page = pvec.pages[i]; 2164 struct page *page = pvec.pages[i];
2141 index = page->index; 2165 if (page->index > end)
2142 if (index > end)
2143 break; 2166 break;
2144 index++;
2145
2146 BUG_ON(!PageLocked(page)); 2167 BUG_ON(!PageLocked(page));
2147 BUG_ON(PageWriteback(page)); 2168 BUG_ON(PageWriteback(page));
2148 block_invalidatepage(page, 0); 2169 block_invalidatepage(page, 0);
2149 ClearPageUptodate(page); 2170 ClearPageUptodate(page);
2150 unlock_page(page); 2171 unlock_page(page);
2151 } 2172 }
2173 index = pvec.pages[nr_pages - 1]->index + 1;
2174 pagevec_release(&pvec);
2152 } 2175 }
2153 return; 2176 return;
2154} 2177}
@@ -2225,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2225 */ 2248 */
2226 new.b_state = 0; 2249 new.b_state = 0;
2227 get_blocks_flags = EXT4_GET_BLOCKS_CREATE; 2250 get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
2251 if (ext4_should_dioread_nolock(mpd->inode))
2252 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2228 if (mpd->b_state & (1 << BH_Delay)) 2253 if (mpd->b_state & (1 << BH_Delay))
2229 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2254 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
2230 2255
@@ -2635,11 +2660,14 @@ static int __ext4_journalled_writepage(struct page *page,
2635 ret = err; 2660 ret = err;
2636 2661
2637 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); 2662 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2638 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; 2663 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
2639out: 2664out:
2640 return ret; 2665 return ret;
2641} 2666}
2642 2667
2668static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
2669static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2670
2643/* 2671/*
2644 * Note that we don't need to start a transaction unless we're journaling data 2672 * Note that we don't need to start a transaction unless we're journaling data
2645 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2673 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2687,7 +2715,7 @@ static int ext4_writepage(struct page *page,
2687 int ret = 0; 2715 int ret = 0;
2688 loff_t size; 2716 loff_t size;
2689 unsigned int len; 2717 unsigned int len;
2690 struct buffer_head *page_bufs; 2718 struct buffer_head *page_bufs = NULL;
2691 struct inode *inode = page->mapping->host; 2719 struct inode *inode = page->mapping->host;
2692 2720
2693 trace_ext4_writepage(inode, page); 2721 trace_ext4_writepage(inode, page);
@@ -2763,7 +2791,11 @@ static int ext4_writepage(struct page *page,
2763 2791
2764 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2792 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
2765 ret = nobh_writepage(page, noalloc_get_block_write, wbc); 2793 ret = nobh_writepage(page, noalloc_get_block_write, wbc);
2766 else 2794 else if (page_bufs && buffer_uninit(page_bufs)) {
2795 ext4_set_bh_endio(page_bufs, inode);
2796 ret = block_write_full_page_endio(page, noalloc_get_block_write,
2797 wbc, ext4_end_io_buffer_write);
2798 } else
2767 ret = block_write_full_page(page, noalloc_get_block_write, 2799 ret = block_write_full_page(page, noalloc_get_block_write,
2768 wbc); 2800 wbc);
2769 2801
@@ -3306,7 +3338,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3306 filemap_write_and_wait(mapping); 3338 filemap_write_and_wait(mapping);
3307 } 3339 }
3308 3340
3309 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 3341 if (EXT4_JOURNAL(inode) &&
3342 ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
3310 /* 3343 /*
3311 * This is a REALLY heavyweight approach, but the use of 3344 * This is a REALLY heavyweight approach, but the use of
3312 * bmap on dirty files is expected to be extremely rare: 3345 * bmap on dirty files is expected to be extremely rare:
@@ -3325,7 +3358,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3325 * everything they get. 3358 * everything they get.
3326 */ 3359 */
3327 3360
3328 EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; 3361 ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
3329 journal = EXT4_JOURNAL(inode); 3362 journal = EXT4_JOURNAL(inode);
3330 jbd2_journal_lock_updates(journal); 3363 jbd2_journal_lock_updates(journal);
3331 err = jbd2_journal_flush(journal); 3364 err = jbd2_journal_flush(journal);
@@ -3350,11 +3383,45 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3350 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3383 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3351} 3384}
3352 3385
3386static void ext4_free_io_end(ext4_io_end_t *io)
3387{
3388 BUG_ON(!io);
3389 if (io->page)
3390 put_page(io->page);
3391 iput(io->inode);
3392 kfree(io);
3393}
3394
3395static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3396{
3397 struct buffer_head *head, *bh;
3398 unsigned int curr_off = 0;
3399
3400 if (!page_has_buffers(page))
3401 return;
3402 head = bh = page_buffers(page);
3403 do {
3404 if (offset <= curr_off && test_clear_buffer_uninit(bh)
3405 && bh->b_private) {
3406 ext4_free_io_end(bh->b_private);
3407 bh->b_private = NULL;
3408 bh->b_end_io = NULL;
3409 }
3410 curr_off = curr_off + bh->b_size;
3411 bh = bh->b_this_page;
3412 } while (bh != head);
3413}
3414
3353static void ext4_invalidatepage(struct page *page, unsigned long offset) 3415static void ext4_invalidatepage(struct page *page, unsigned long offset)
3354{ 3416{
3355 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 3417 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
3356 3418
3357 /* 3419 /*
3420 * free any io_end structure allocated for buffers to be discarded
3421 */
3422 if (ext4_should_dioread_nolock(page->mapping->host))
3423 ext4_invalidatepage_free_endio(page, offset);
3424 /*
3358 * If it's a full truncate we just forget about the pending dirtying 3425 * If it's a full truncate we just forget about the pending dirtying
3359 */ 3426 */
3360 if (offset == 0) 3427 if (offset == 0)
@@ -3425,7 +3492,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3425 } 3492 }
3426 3493
3427retry: 3494retry:
3428 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3495 if (rw == READ && ext4_should_dioread_nolock(inode))
3496 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
3497 inode->i_sb->s_bdev, iov,
3498 offset, nr_segs,
3499 ext4_get_block, NULL);
3500 else
3501 ret = blockdev_direct_IO(rw, iocb, inode,
3502 inode->i_sb->s_bdev, iov,
3429 offset, nr_segs, 3503 offset, nr_segs,
3430 ext4_get_block, NULL); 3504 ext4_get_block, NULL);
3431 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3505 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3441,6 +3515,9 @@ retry:
3441 * but cannot extend i_size. Bail out and pretend 3515 * but cannot extend i_size. Bail out and pretend
3442 * the write failed... */ 3516 * the write failed... */
3443 ret = PTR_ERR(handle); 3517 ret = PTR_ERR(handle);
3518 if (inode->i_nlink)
3519 ext4_orphan_del(NULL, inode);
3520
3444 goto out; 3521 goto out;
3445 } 3522 }
3446 if (inode->i_nlink) 3523 if (inode->i_nlink)
@@ -3468,75 +3545,63 @@ out:
3468 return ret; 3545 return ret;
3469} 3546}
3470 3547
3471static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3548static int ext4_get_block_write(struct inode *inode, sector_t iblock,
3472 struct buffer_head *bh_result, int create) 3549 struct buffer_head *bh_result, int create)
3473{ 3550{
3474 handle_t *handle = NULL; 3551 handle_t *handle = ext4_journal_current_handle();
3475 int ret = 0; 3552 int ret = 0;
3476 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; 3553 unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
3477 int dio_credits; 3554 int dio_credits;
3555 int started = 0;
3478 3556
3479 ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", 3557 ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
3480 inode->i_ino, create); 3558 inode->i_ino, create);
3481 /* 3559 /*
3482 * DIO VFS code passes create = 0 flag for write to 3560 * ext4_get_block in prepare for a DIO write or buffer write.
3483 * the middle of file. It does this to avoid block 3561 * We allocate an uinitialized extent if blocks haven't been allocated.
3484 * allocation for holes, to prevent expose stale data 3562 * The extent will be converted to initialized after IO complete.
3485 * out when there is parallel buffered read (which does
3486 * not hold the i_mutex lock) while direct IO write has
3487 * not completed. DIO request on holes finally falls back
3488 * to buffered IO for this reason.
3489 *
3490 * For ext4 extent based file, since we support fallocate,
3491 * new allocated extent as uninitialized, for holes, we
3492 * could fallocate blocks for holes, thus parallel
3493 * buffered IO read will zero out the page when read on
3494 * a hole while parallel DIO write to the hole has not completed.
3495 *
3496 * when we come here, we know it's a direct IO write to
3497 * to the middle of file (<i_size)
3498 * so it's safe to override the create flag from VFS.
3499 */ 3563 */
3500 create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; 3564 create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
3501 3565
3502 if (max_blocks > DIO_MAX_BLOCKS) 3566 if (!handle) {
3503 max_blocks = DIO_MAX_BLOCKS; 3567 if (max_blocks > DIO_MAX_BLOCKS)
3504 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); 3568 max_blocks = DIO_MAX_BLOCKS;
3505 handle = ext4_journal_start(inode, dio_credits); 3569 dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
3506 if (IS_ERR(handle)) { 3570 handle = ext4_journal_start(inode, dio_credits);
3507 ret = PTR_ERR(handle); 3571 if (IS_ERR(handle)) {
3508 goto out; 3572 ret = PTR_ERR(handle);
3573 goto out;
3574 }
3575 started = 1;
3509 } 3576 }
3577
3510 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, 3578 ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
3511 create); 3579 create);
3512 if (ret > 0) { 3580 if (ret > 0) {
3513 bh_result->b_size = (ret << inode->i_blkbits); 3581 bh_result->b_size = (ret << inode->i_blkbits);
3514 ret = 0; 3582 ret = 0;
3515 } 3583 }
3516 ext4_journal_stop(handle); 3584 if (started)
3585 ext4_journal_stop(handle);
3517out: 3586out:
3518 return ret; 3587 return ret;
3519} 3588}
3520 3589
3521static void ext4_free_io_end(ext4_io_end_t *io) 3590static void dump_completed_IO(struct inode * inode)
3522{
3523 BUG_ON(!io);
3524 iput(io->inode);
3525 kfree(io);
3526}
3527static void dump_aio_dio_list(struct inode * inode)
3528{ 3591{
3529#ifdef EXT4_DEBUG 3592#ifdef EXT4_DEBUG
3530 struct list_head *cur, *before, *after; 3593 struct list_head *cur, *before, *after;
3531 ext4_io_end_t *io, *io0, *io1; 3594 ext4_io_end_t *io, *io0, *io1;
3595 unsigned long flags;
3532 3596
3533 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3597 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
3534 ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); 3598 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
3535 return; 3599 return;
3536 } 3600 }
3537 3601
3538 ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); 3602 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
3539 list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ 3603 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3604 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
3540 cur = &io->list; 3605 cur = &io->list;
3541 before = cur->prev; 3606 before = cur->prev;
3542 io0 = container_of(before, ext4_io_end_t, list); 3607 io0 = container_of(before, ext4_io_end_t, list);
@@ -3546,32 +3611,31 @@ static void dump_aio_dio_list(struct inode * inode)
3546 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", 3611 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
3547 io, inode->i_ino, io0, io1); 3612 io, inode->i_ino, io0, io1);
3548 } 3613 }
3614 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3549#endif 3615#endif
3550} 3616}
3551 3617
3552/* 3618/*
3553 * check a range of space and convert unwritten extents to written. 3619 * check a range of space and convert unwritten extents to written.
3554 */ 3620 */
3555static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) 3621static int ext4_end_io_nolock(ext4_io_end_t *io)
3556{ 3622{
3557 struct inode *inode = io->inode; 3623 struct inode *inode = io->inode;
3558 loff_t offset = io->offset; 3624 loff_t offset = io->offset;
3559 size_t size = io->size; 3625 ssize_t size = io->size;
3560 int ret = 0; 3626 int ret = 0;
3561 3627
3562 ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," 3628 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3563 "list->prev 0x%p\n", 3629 "list->prev 0x%p\n",
3564 io, inode->i_ino, io->list.next, io->list.prev); 3630 io, inode->i_ino, io->list.next, io->list.prev);
3565 3631
3566 if (list_empty(&io->list)) 3632 if (list_empty(&io->list))
3567 return ret; 3633 return ret;
3568 3634
3569 if (io->flag != DIO_AIO_UNWRITTEN) 3635 if (io->flag != EXT4_IO_UNWRITTEN)
3570 return ret; 3636 return ret;
3571 3637
3572 if (offset + size <= i_size_read(inode)) 3638 ret = ext4_convert_unwritten_extents(inode, offset, size);
3573 ret = ext4_convert_unwritten_extents(inode, offset, size);
3574
3575 if (ret < 0) { 3639 if (ret < 0) {
3576 printk(KERN_EMERG "%s: failed to convert unwritten" 3640 printk(KERN_EMERG "%s: failed to convert unwritten"
3577 "extents to written extents, error is %d" 3641 "extents to written extents, error is %d"
@@ -3584,50 +3648,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
3584 io->flag = 0; 3648 io->flag = 0;
3585 return ret; 3649 return ret;
3586} 3650}
3651
3587/* 3652/*
3588 * work on completed aio dio IO, to convert unwritten extents to extents 3653 * work on completed aio dio IO, to convert unwritten extents to extents
3589 */ 3654 */
3590static void ext4_end_aio_dio_work(struct work_struct *work) 3655static void ext4_end_io_work(struct work_struct *work)
3591{ 3656{
3592 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 3657 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3593 struct inode *inode = io->inode; 3658 struct inode *inode = io->inode;
3594 int ret = 0; 3659 struct ext4_inode_info *ei = EXT4_I(inode);
3660 unsigned long flags;
3661 int ret;
3595 3662
3596 mutex_lock(&inode->i_mutex); 3663 mutex_lock(&inode->i_mutex);
3597 ret = ext4_end_aio_dio_nolock(io); 3664 ret = ext4_end_io_nolock(io);
3598 if (ret >= 0) { 3665 if (ret < 0) {
3599 if (!list_empty(&io->list)) 3666 mutex_unlock(&inode->i_mutex);
3600 list_del_init(&io->list); 3667 return;
3601 ext4_free_io_end(io);
3602 } 3668 }
3669
3670 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3671 if (!list_empty(&io->list))
3672 list_del_init(&io->list);
3673 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3603 mutex_unlock(&inode->i_mutex); 3674 mutex_unlock(&inode->i_mutex);
3675 ext4_free_io_end(io);
3604} 3676}
3677
3605/* 3678/*
3606 * This function is called from ext4_sync_file(). 3679 * This function is called from ext4_sync_file().
3607 * 3680 *
3608 * When AIO DIO IO is completed, the work to convert unwritten 3681 * When IO is completed, the work to convert unwritten extents to
3609 * extents to written is queued on workqueue but may not get immediately 3682 * written is queued on workqueue but may not get immediately
3610 * scheduled. When fsync is called, we need to ensure the 3683 * scheduled. When fsync is called, we need to ensure the
3611 * conversion is complete before fsync returns. 3684 * conversion is complete before fsync returns.
3612 * The inode keeps track of a list of completed AIO from DIO path 3685 * The inode keeps track of a list of pending/completed IO that
3613 * that might needs to do the conversion. This function walks through 3686 * might needs to do the conversion. This function walks through
3614 * the list and convert the related unwritten extents to written. 3687 * the list and convert the related unwritten extents for completed IO
3688 * to written.
3689 * The function return the number of pending IOs on success.
3615 */ 3690 */
3616int flush_aio_dio_completed_IO(struct inode *inode) 3691int flush_completed_IO(struct inode *inode)
3617{ 3692{
3618 ext4_io_end_t *io; 3693 ext4_io_end_t *io;
3694 struct ext4_inode_info *ei = EXT4_I(inode);
3695 unsigned long flags;
3619 int ret = 0; 3696 int ret = 0;
3620 int ret2 = 0; 3697 int ret2 = 0;
3621 3698
3622 if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) 3699 if (list_empty(&ei->i_completed_io_list))
3623 return ret; 3700 return ret;
3624 3701
3625 dump_aio_dio_list(inode); 3702 dump_completed_IO(inode);
3626 while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ 3703 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3627 io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, 3704 while (!list_empty(&ei->i_completed_io_list)){
3705 io = list_entry(ei->i_completed_io_list.next,
3628 ext4_io_end_t, list); 3706 ext4_io_end_t, list);
3629 /* 3707 /*
3630 * Calling ext4_end_aio_dio_nolock() to convert completed 3708 * Calling ext4_end_io_nolock() to convert completed
3631 * IO to written. 3709 * IO to written.
3632 * 3710 *
3633 * When ext4_sync_file() is called, run_queue() may already 3711 * When ext4_sync_file() is called, run_queue() may already
@@ -3640,20 +3718,23 @@ int flush_aio_dio_completed_IO(struct inode *inode)
3640 * avoid double converting from both fsync and background work 3718 * avoid double converting from both fsync and background work
3641 * queue work. 3719 * queue work.
3642 */ 3720 */
3643 ret = ext4_end_aio_dio_nolock(io); 3721 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3722 ret = ext4_end_io_nolock(io);
3723 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3644 if (ret < 0) 3724 if (ret < 0)
3645 ret2 = ret; 3725 ret2 = ret;
3646 else 3726 else
3647 list_del_init(&io->list); 3727 list_del_init(&io->list);
3648 } 3728 }
3729 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3649 return (ret2 < 0) ? ret2 : 0; 3730 return (ret2 < 0) ? ret2 : 0;
3650} 3731}
3651 3732
3652static ext4_io_end_t *ext4_init_io_end (struct inode *inode) 3733static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3653{ 3734{
3654 ext4_io_end_t *io = NULL; 3735 ext4_io_end_t *io = NULL;
3655 3736
3656 io = kmalloc(sizeof(*io), GFP_NOFS); 3737 io = kmalloc(sizeof(*io), flags);
3657 3738
3658 if (io) { 3739 if (io) {
3659 igrab(inode); 3740 igrab(inode);
@@ -3661,8 +3742,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
3661 io->flag = 0; 3742 io->flag = 0;
3662 io->offset = 0; 3743 io->offset = 0;
3663 io->size = 0; 3744 io->size = 0;
3664 io->error = 0; 3745 io->page = NULL;
3665 INIT_WORK(&io->work, ext4_end_aio_dio_work); 3746 INIT_WORK(&io->work, ext4_end_io_work);
3666 INIT_LIST_HEAD(&io->list); 3747 INIT_LIST_HEAD(&io->list);
3667 } 3748 }
3668 3749
@@ -3674,6 +3755,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3674{ 3755{
3675 ext4_io_end_t *io_end = iocb->private; 3756 ext4_io_end_t *io_end = iocb->private;
3676 struct workqueue_struct *wq; 3757 struct workqueue_struct *wq;
3758 unsigned long flags;
3759 struct ext4_inode_info *ei;
3677 3760
3678 /* if not async direct IO or dio with 0 bytes write, just return */ 3761 /* if not async direct IO or dio with 0 bytes write, just return */
3679 if (!io_end || !size) 3762 if (!io_end || !size)
@@ -3685,7 +3768,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3685 size); 3768 size);
3686 3769
3687 /* if not aio dio with unwritten extents, just free io and return */ 3770 /* if not aio dio with unwritten extents, just free io and return */
3688 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3771 if (io_end->flag != EXT4_IO_UNWRITTEN){
3689 ext4_free_io_end(io_end); 3772 ext4_free_io_end(io_end);
3690 iocb->private = NULL; 3773 iocb->private = NULL;
3691 return; 3774 return;
@@ -3693,16 +3776,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3693 3776
3694 io_end->offset = offset; 3777 io_end->offset = offset;
3695 io_end->size = size; 3778 io_end->size = size;
3779 io_end->flag = EXT4_IO_UNWRITTEN;
3696 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 3780 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
3697 3781
3698 /* queue the work to convert unwritten extents to written */ 3782 /* queue the work to convert unwritten extents to written */
3699 queue_work(wq, &io_end->work); 3783 queue_work(wq, &io_end->work);
3700 3784
3701 /* Add the io_end to per-inode completed aio dio list*/ 3785 /* Add the io_end to per-inode completed aio dio list*/
3702 list_add_tail(&io_end->list, 3786 ei = EXT4_I(io_end->inode);
3703 &EXT4_I(io_end->inode)->i_aio_dio_complete_list); 3787 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3788 list_add_tail(&io_end->list, &ei->i_completed_io_list);
3789 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3704 iocb->private = NULL; 3790 iocb->private = NULL;
3705} 3791}
3792
3793static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3794{
3795 ext4_io_end_t *io_end = bh->b_private;
3796 struct workqueue_struct *wq;
3797 struct inode *inode;
3798 unsigned long flags;
3799
3800 if (!test_clear_buffer_uninit(bh) || !io_end)
3801 goto out;
3802
3803 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3804 printk("sb umounted, discard end_io request for inode %lu\n",
3805 io_end->inode->i_ino);
3806 ext4_free_io_end(io_end);
3807 goto out;
3808 }
3809
3810 io_end->flag = EXT4_IO_UNWRITTEN;
3811 inode = io_end->inode;
3812
3813 /* Add the io_end to per-inode completed io list*/
3814 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
3815 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
3816 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
3817
3818 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
3819 /* queue the work to convert unwritten extents to written */
3820 queue_work(wq, &io_end->work);
3821out:
3822 bh->b_private = NULL;
3823 bh->b_end_io = NULL;
3824 clear_buffer_uninit(bh);
3825 end_buffer_async_write(bh, uptodate);
3826}
3827
3828static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3829{
3830 ext4_io_end_t *io_end;
3831 struct page *page = bh->b_page;
3832 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3833 size_t size = bh->b_size;
3834
3835retry:
3836 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3837 if (!io_end) {
3838 if (printk_ratelimit())
3839 printk(KERN_WARNING "%s: allocation fail\n", __func__);
3840 schedule();
3841 goto retry;
3842 }
3843 io_end->offset = offset;
3844 io_end->size = size;
3845 /*
3846 * We need to hold a reference to the page to make sure it
3847 * doesn't get evicted before ext4_end_io_work() has a chance
3848 * to convert the extent from written to unwritten.
3849 */
3850 io_end->page = page;
3851 get_page(io_end->page);
3852
3853 bh->b_private = io_end;
3854 bh->b_end_io = ext4_end_io_buffer_write;
3855 return 0;
3856}
3857
3706/* 3858/*
3707 * For ext4 extent files, ext4 will do direct-io write to holes, 3859 * For ext4 extent files, ext4 will do direct-io write to holes,
3708 * preallocated extents, and those write extend the file, no need to 3860 * preallocated extents, and those write extend the file, no need to
@@ -3756,7 +3908,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3756 iocb->private = NULL; 3908 iocb->private = NULL;
3757 EXT4_I(inode)->cur_aio_dio = NULL; 3909 EXT4_I(inode)->cur_aio_dio = NULL;
3758 if (!is_sync_kiocb(iocb)) { 3910 if (!is_sync_kiocb(iocb)) {
3759 iocb->private = ext4_init_io_end(inode); 3911 iocb->private = ext4_init_io_end(inode, GFP_NOFS);
3760 if (!iocb->private) 3912 if (!iocb->private)
3761 return -ENOMEM; 3913 return -ENOMEM;
3762 /* 3914 /*
@@ -3772,7 +3924,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3772 ret = blockdev_direct_IO(rw, iocb, inode, 3924 ret = blockdev_direct_IO(rw, iocb, inode,
3773 inode->i_sb->s_bdev, iov, 3925 inode->i_sb->s_bdev, iov,
3774 offset, nr_segs, 3926 offset, nr_segs,
3775 ext4_get_block_dio_write, 3927 ext4_get_block_write,
3776 ext4_end_io_dio); 3928 ext4_end_io_dio);
3777 if (iocb->private) 3929 if (iocb->private)
3778 EXT4_I(inode)->cur_aio_dio = NULL; 3930 EXT4_I(inode)->cur_aio_dio = NULL;
@@ -3793,8 +3945,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3793 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3945 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3794 ext4_free_io_end(iocb->private); 3946 ext4_free_io_end(iocb->private);
3795 iocb->private = NULL; 3947 iocb->private = NULL;
3796 } else if (ret > 0 && (EXT4_I(inode)->i_state & 3948 } else if (ret > 0 && ext4_test_inode_state(inode,
3797 EXT4_STATE_DIO_UNWRITTEN)) { 3949 EXT4_STATE_DIO_UNWRITTEN)) {
3798 int err; 3950 int err;
3799 /* 3951 /*
3800 * for non AIO case, since the IO is already 3952 * for non AIO case, since the IO is already
@@ -3804,7 +3956,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3804 offset, ret); 3956 offset, ret);
3805 if (err < 0) 3957 if (err < 0)
3806 ret = err; 3958 ret = err;
3807 EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; 3959 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3808 } 3960 }
3809 return ret; 3961 return ret;
3810 } 3962 }
@@ -4135,18 +4287,27 @@ no_top:
4135 * We release `count' blocks on disk, but (last - first) may be greater 4287 * We release `count' blocks on disk, but (last - first) may be greater
4136 * than `count' because there can be holes in there. 4288 * than `count' because there can be holes in there.
4137 */ 4289 */
4138static void ext4_clear_blocks(handle_t *handle, struct inode *inode, 4290static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
4139 struct buffer_head *bh, 4291 struct buffer_head *bh,
4140 ext4_fsblk_t block_to_free, 4292 ext4_fsblk_t block_to_free,
4141 unsigned long count, __le32 *first, 4293 unsigned long count, __le32 *first,
4142 __le32 *last) 4294 __le32 *last)
4143{ 4295{
4144 __le32 *p; 4296 __le32 *p;
4145 int flags = EXT4_FREE_BLOCKS_FORGET; 4297 int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
4146 4298
4147 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 4299 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4148 flags |= EXT4_FREE_BLOCKS_METADATA; 4300 flags |= EXT4_FREE_BLOCKS_METADATA;
4149 4301
4302 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
4303 count)) {
4304 ext4_error(inode->i_sb, "inode #%lu: "
4305 "attempt to clear blocks %llu len %lu, invalid",
4306 inode->i_ino, (unsigned long long) block_to_free,
4307 count);
4308 return 1;
4309 }
4310
4150 if (try_to_extend_transaction(handle, inode)) { 4311 if (try_to_extend_transaction(handle, inode)) {
4151 if (bh) { 4312 if (bh) {
4152 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4313 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4165,6 +4326,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4165 *p = 0; 4326 *p = 0;
4166 4327
4167 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); 4328 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4329 return 0;
4168} 4330}
4169 4331
4170/** 4332/**
@@ -4220,9 +4382,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4220 } else if (nr == block_to_free + count) { 4382 } else if (nr == block_to_free + count) {
4221 count++; 4383 count++;
4222 } else { 4384 } else {
4223 ext4_clear_blocks(handle, inode, this_bh, 4385 if (ext4_clear_blocks(handle, inode, this_bh,
4224 block_to_free, 4386 block_to_free, count,
4225 count, block_to_free_p, p); 4387 block_to_free_p, p))
4388 break;
4226 block_to_free = nr; 4389 block_to_free = nr;
4227 block_to_free_p = p; 4390 block_to_free_p = p;
4228 count = 1; 4391 count = 1;
@@ -4246,7 +4409,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4246 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) 4409 if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
4247 ext4_handle_dirty_metadata(handle, inode, this_bh); 4410 ext4_handle_dirty_metadata(handle, inode, this_bh);
4248 else 4411 else
4249 ext4_error(inode->i_sb, __func__, 4412 ext4_error(inode->i_sb,
4250 "circular indirect block detected, " 4413 "circular indirect block detected, "
4251 "inode=%lu, block=%llu", 4414 "inode=%lu, block=%llu",
4252 inode->i_ino, 4415 inode->i_ino,
@@ -4286,6 +4449,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4286 if (!nr) 4449 if (!nr)
4287 continue; /* A hole */ 4450 continue; /* A hole */
4288 4451
4452 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
4453 nr, 1)) {
4454 ext4_error(inode->i_sb,
4455 "indirect mapped block in inode "
4456 "#%lu invalid (level %d, blk #%lu)",
4457 inode->i_ino, depth,
4458 (unsigned long) nr);
4459 break;
4460 }
4461
4289 /* Go read the buffer for the next level down */ 4462 /* Go read the buffer for the next level down */
4290 bh = sb_bread(inode->i_sb, nr); 4463 bh = sb_bread(inode->i_sb, nr);
4291 4464
@@ -4294,7 +4467,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4294 * (should be rare). 4467 * (should be rare).
4295 */ 4468 */
4296 if (!bh) { 4469 if (!bh) {
4297 ext4_error(inode->i_sb, "ext4_free_branches", 4470 ext4_error(inode->i_sb,
4298 "Read failure, inode=%lu, block=%llu", 4471 "Read failure, inode=%lu, block=%llu",
4299 inode->i_ino, nr); 4472 inode->i_ino, nr);
4300 continue; 4473 continue;
@@ -4438,8 +4611,10 @@ void ext4_truncate(struct inode *inode)
4438 if (!ext4_can_truncate(inode)) 4611 if (!ext4_can_truncate(inode))
4439 return; 4612 return;
4440 4613
4614 EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
4615
4441 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 4616 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
4442 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 4617 ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
4443 4618
4444 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 4619 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
4445 ext4_ext_truncate(inode); 4620 ext4_ext_truncate(inode);
@@ -4609,9 +4784,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
4609 4784
4610 bh = sb_getblk(sb, block); 4785 bh = sb_getblk(sb, block);
4611 if (!bh) { 4786 if (!bh) {
4612 ext4_error(sb, "ext4_get_inode_loc", "unable to read " 4787 ext4_error(sb, "unable to read inode block - "
4613 "inode block - inode=%lu, block=%llu", 4788 "inode=%lu, block=%llu", inode->i_ino, block);
4614 inode->i_ino, block);
4615 return -EIO; 4789 return -EIO;
4616 } 4790 }
4617 if (!buffer_uptodate(bh)) { 4791 if (!buffer_uptodate(bh)) {
@@ -4709,9 +4883,8 @@ make_io:
4709 submit_bh(READ_META, bh); 4883 submit_bh(READ_META, bh);
4710 wait_on_buffer(bh); 4884 wait_on_buffer(bh);
4711 if (!buffer_uptodate(bh)) { 4885 if (!buffer_uptodate(bh)) {
4712 ext4_error(sb, __func__, 4886 ext4_error(sb, "unable to read inode block - inode=%lu,"
4713 "unable to read inode block - inode=%lu, " 4887 " block=%llu", inode->i_ino, block);
4714 "block=%llu", inode->i_ino, block);
4715 brelse(bh); 4888 brelse(bh);
4716 return -EIO; 4889 return -EIO;
4717 } 4890 }
@@ -4725,7 +4898,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
4725{ 4898{
4726 /* We have all inode data except xattrs in memory here. */ 4899 /* We have all inode data except xattrs in memory here. */
4727 return __ext4_get_inode_loc(inode, iloc, 4900 return __ext4_get_inode_loc(inode, iloc,
4728 !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); 4901 !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
4729} 4902}
4730 4903
4731void ext4_set_inode_flags(struct inode *inode) 4904void ext4_set_inode_flags(struct inode *inode)
@@ -4819,7 +4992,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4819 } 4992 }
4820 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 4993 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
4821 4994
4822 ei->i_state = 0; 4995 ei->i_state_flags = 0;
4823 ei->i_dir_start_lookup = 0; 4996 ei->i_dir_start_lookup = 0;
4824 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 4997 ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
4825 /* We now have enough fields to check if the inode was active or not. 4998 /* We now have enough fields to check if the inode was active or not.
@@ -4902,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4902 EXT4_GOOD_OLD_INODE_SIZE + 5075 EXT4_GOOD_OLD_INODE_SIZE +
4903 ei->i_extra_isize; 5076 ei->i_extra_isize;
4904 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) 5077 if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
4905 ei->i_state |= EXT4_STATE_XATTR; 5078 ext4_set_inode_state(inode, EXT4_STATE_XATTR);
4906 } 5079 }
4907 } else 5080 } else
4908 ei->i_extra_isize = 0; 5081 ei->i_extra_isize = 0;
@@ -4922,8 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4922 ret = 0; 5095 ret = 0;
4923 if (ei->i_file_acl && 5096 if (ei->i_file_acl &&
4924 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { 5097 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4925 ext4_error(sb, __func__, 5098 ext4_error(sb, "bad extended attribute block %llu inode #%lu",
4926 "bad extended attribute block %llu in inode #%lu",
4927 ei->i_file_acl, inode->i_ino); 5099 ei->i_file_acl, inode->i_ino);
4928 ret = -EIO; 5100 ret = -EIO;
4929 goto bad_inode; 5101 goto bad_inode;
@@ -4969,8 +5141,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4969 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 5141 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4970 } else { 5142 } else {
4971 ret = -EIO; 5143 ret = -EIO;
4972 ext4_error(inode->i_sb, __func__, 5144 ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
4973 "bogus i_mode (%o) for inode=%lu",
4974 inode->i_mode, inode->i_ino); 5145 inode->i_mode, inode->i_ino);
4975 goto bad_inode; 5146 goto bad_inode;
4976 } 5147 }
@@ -5042,7 +5213,7 @@ static int ext4_do_update_inode(handle_t *handle,
5042 5213
5043 /* For fields not not tracking in the in-memory inode, 5214 /* For fields not not tracking in the in-memory inode,
5044 * initialise them to zero for new inodes. */ 5215 * initialise them to zero for new inodes. */
5045 if (ei->i_state & EXT4_STATE_NEW) 5216 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
5046 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 5217 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
5047 5218
5048 ext4_get_inode_flags(ei); 5219 ext4_get_inode_flags(ei);
@@ -5106,7 +5277,7 @@ static int ext4_do_update_inode(handle_t *handle,
5106 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 5277 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
5107 sb->s_dirt = 1; 5278 sb->s_dirt = 1;
5108 ext4_handle_sync(handle); 5279 ext4_handle_sync(handle);
5109 err = ext4_handle_dirty_metadata(handle, inode, 5280 err = ext4_handle_dirty_metadata(handle, NULL,
5110 EXT4_SB(sb)->s_sbh); 5281 EXT4_SB(sb)->s_sbh);
5111 } 5282 }
5112 } 5283 }
@@ -5135,10 +5306,10 @@ static int ext4_do_update_inode(handle_t *handle,
5135 } 5306 }
5136 5307
5137 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 5308 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
5138 rc = ext4_handle_dirty_metadata(handle, inode, bh); 5309 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
5139 if (!err) 5310 if (!err)
5140 err = rc; 5311 err = rc;
5141 ei->i_state &= ~EXT4_STATE_NEW; 5312 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
5142 5313
5143 ext4_update_inode_fsync_trans(handle, inode, 0); 5314 ext4_update_inode_fsync_trans(handle, inode, 0);
5144out_brelse: 5315out_brelse:
@@ -5182,7 +5353,7 @@ out_brelse:
5182 * `stuff()' is running, and the new i_size will be lost. Plus the inode 5353 * `stuff()' is running, and the new i_size will be lost. Plus the inode
5183 * will no longer be on the superblock's dirty inode list. 5354 * will no longer be on the superblock's dirty inode list.
5184 */ 5355 */
5185int ext4_write_inode(struct inode *inode, int wait) 5356int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
5186{ 5357{
5187 int err; 5358 int err;
5188 5359
@@ -5196,7 +5367,7 @@ int ext4_write_inode(struct inode *inode, int wait)
5196 return -EIO; 5367 return -EIO;
5197 } 5368 }
5198 5369
5199 if (!wait) 5370 if (wbc->sync_mode != WB_SYNC_ALL)
5200 return 0; 5371 return 0;
5201 5372
5202 err = ext4_force_commit(inode->i_sb); 5373 err = ext4_force_commit(inode->i_sb);
@@ -5206,13 +5377,11 @@ int ext4_write_inode(struct inode *inode, int wait)
5206 err = ext4_get_inode_loc(inode, &iloc); 5377 err = ext4_get_inode_loc(inode, &iloc);
5207 if (err) 5378 if (err)
5208 return err; 5379 return err;
5209 if (wait) 5380 if (wbc->sync_mode == WB_SYNC_ALL)
5210 sync_dirty_buffer(iloc.bh); 5381 sync_dirty_buffer(iloc.bh);
5211 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 5382 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
5212 ext4_error(inode->i_sb, __func__, 5383 ext4_error(inode->i_sb, "IO error syncing inode, "
5213 "IO error syncing inode, " 5384 "inode=%lu, block=%llu", inode->i_ino,
5214 "inode=%lu, block=%llu",
5215 inode->i_ino,
5216 (unsigned long long)iloc.bh->b_blocknr); 5385 (unsigned long long)iloc.bh->b_blocknr);
5217 err = -EIO; 5386 err = -EIO;
5218 } 5387 }
@@ -5295,7 +5464,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5295 } 5464 }
5296 5465
5297 if (S_ISREG(inode->i_mode) && 5466 if (S_ISREG(inode->i_mode) &&
5298 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 5467 attr->ia_valid & ATTR_SIZE &&
5468 (attr->ia_size < inode->i_size ||
5469 (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
5299 handle_t *handle; 5470 handle_t *handle;
5300 5471
5301 handle = ext4_journal_start(inode, 3); 5472 handle = ext4_journal_start(inode, 3);
@@ -5326,6 +5497,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5326 goto err_out; 5497 goto err_out;
5327 } 5498 }
5328 } 5499 }
5500 /* ext4_truncate will clear the flag */
5501 if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
5502 ext4_truncate(inode);
5329 } 5503 }
5330 5504
5331 rc = inode_setattr(inode, attr); 5505 rc = inode_setattr(inode, attr);
@@ -5564,8 +5738,8 @@ static int ext4_expand_extra_isize(struct inode *inode,
5564 entry = IFIRST(header); 5738 entry = IFIRST(header);
5565 5739
5566 /* No extended attributes present */ 5740 /* No extended attributes present */
5567 if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || 5741 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5568 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5742 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5569 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 5743 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
5570 new_extra_isize); 5744 new_extra_isize);
5571 EXT4_I(inode)->i_extra_isize = new_extra_isize; 5745 EXT4_I(inode)->i_extra_isize = new_extra_isize;
@@ -5609,7 +5783,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5609 err = ext4_reserve_inode_write(handle, inode, &iloc); 5783 err = ext4_reserve_inode_write(handle, inode, &iloc);
5610 if (ext4_handle_valid(handle) && 5784 if (ext4_handle_valid(handle) &&
5611 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5785 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
5612 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 5786 !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
5613 /* 5787 /*
5614 * We need extra buffer credits since we may write into EA block 5788 * We need extra buffer credits since we may write into EA block
5615 * with this same handle. If journal_extend fails, then it will 5789 * with this same handle. If journal_extend fails, then it will
@@ -5623,10 +5797,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5623 sbi->s_want_extra_isize, 5797 sbi->s_want_extra_isize,
5624 iloc, handle); 5798 iloc, handle);
5625 if (ret) { 5799 if (ret) {
5626 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 5800 ext4_set_inode_state(inode,
5801 EXT4_STATE_NO_EXPAND);
5627 if (mnt_count != 5802 if (mnt_count !=
5628 le16_to_cpu(sbi->s_es->s_mnt_count)) { 5803 le16_to_cpu(sbi->s_es->s_mnt_count)) {
5629 ext4_warning(inode->i_sb, __func__, 5804 ext4_warning(inode->i_sb,
5630 "Unable to expand inode %lu. Delete" 5805 "Unable to expand inode %lu. Delete"
5631 " some EAs or run e2fsck.", 5806 " some EAs or run e2fsck.",
5632 inode->i_ino); 5807 inode->i_ino);
@@ -5690,7 +5865,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
5690 err = jbd2_journal_get_write_access(handle, iloc.bh); 5865 err = jbd2_journal_get_write_access(handle, iloc.bh);
5691 if (!err) 5866 if (!err)
5692 err = ext4_handle_dirty_metadata(handle, 5867 err = ext4_handle_dirty_metadata(handle,
5693 inode, 5868 NULL,
5694 iloc.bh); 5869 iloc.bh);
5695 brelse(iloc.bh); 5870 brelse(iloc.bh);
5696 } 5871 }