aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c150
1 files changed, 99 insertions, 51 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f9c642b22efa..4abd683b963d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
192 * so before we call here everything must be consistently dirtied against 192 * so before we call here everything must be consistently dirtied against
193 * this transaction. 193 * this transaction.
194 */ 194 */
195static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) 195 int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
196 int nblocks)
196{ 197{
198 int ret;
199
200 /*
201 * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
202 * moment, get_block can be called only for blocks inside i_size since
203 * page cache has been already dropped and writes are blocked by
204 * i_mutex. So we can safely drop the i_data_sem here.
205 */
197 BUG_ON(EXT4_JOURNAL(inode) == NULL); 206 BUG_ON(EXT4_JOURNAL(inode) == NULL);
198 jbd_debug(2, "restarting handle %p\n", handle); 207 jbd_debug(2, "restarting handle %p\n", handle);
199 return ext4_journal_restart(handle, blocks_for_truncate(inode)); 208 up_write(&EXT4_I(inode)->i_data_sem);
209 ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
210 down_write(&EXT4_I(inode)->i_data_sem);
211
212 return ret;
200} 213}
201 214
202/* 215/*
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode,
341 int n = 0; 354 int n = 0;
342 int final = 0; 355 int final = 0;
343 356
344 if (i_block < 0) { 357 if (i_block < direct_blocks) {
345 ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
346 } else if (i_block < direct_blocks) {
347 offsets[n++] = i_block; 358 offsets[n++] = i_block;
348 final = direct_blocks; 359 final = direct_blocks;
349 } else if ((i_block -= direct_blocks) < indirect_blocks) { 360 } else if ((i_block -= direct_blocks) < indirect_blocks) {
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
551 * 562 *
552 * Normally this function find the preferred place for block allocation, 563 * Normally this function find the preferred place for block allocation,
553 * returns it. 564 * returns it.
565 * Because this is only used for non-extent files, we limit the block nr
566 * to 32 bits.
554 */ 567 */
555static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 568static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
556 Indirect *partial) 569 Indirect *partial)
557{ 570{
571 ext4_fsblk_t goal;
572
558 /* 573 /*
559 * XXX need to get goal block from mballoc's data structures 574 * XXX need to get goal block from mballoc's data structures
560 */ 575 */
561 576
562 return ext4_find_near(inode, partial); 577 goal = ext4_find_near(inode, partial);
578 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
579 return goal;
563} 580}
564 581
565/** 582/**
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
640 if (*err) 657 if (*err)
641 goto failed_out; 658 goto failed_out;
642 659
660 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS);
661
643 target -= count; 662 target -= count;
644 /* allocate blocks for indirect blocks */ 663 /* allocate blocks for indirect blocks */
645 while (index < indirect_blks && count) { 664 while (index < indirect_blks && count) {
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
674 ar.flags = EXT4_MB_HINT_DATA; 693 ar.flags = EXT4_MB_HINT_DATA;
675 694
676 current_block = ext4_mb_new_blocks(handle, &ar, err); 695 current_block = ext4_mb_new_blocks(handle, &ar, err);
696 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS);
677 697
678 if (*err && (target == blks)) { 698 if (*err && (target == blks)) {
679 /* 699 /*
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
762 BUFFER_TRACE(bh, "call get_create_access"); 782 BUFFER_TRACE(bh, "call get_create_access");
763 err = ext4_journal_get_create_access(handle, bh); 783 err = ext4_journal_get_create_access(handle, bh);
764 if (err) { 784 if (err) {
785 /* Don't brelse(bh) here; it's done in
786 * ext4_journal_forget() below */
765 unlock_buffer(bh); 787 unlock_buffer(bh);
766 brelse(bh);
767 goto failed; 788 goto failed;
768 } 789 }
769 790
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1109 ext4_discard_preallocations(inode); 1130 ext4_discard_preallocations(inode);
1110} 1131}
1111 1132
1112static int check_block_validity(struct inode *inode, sector_t logical, 1133static int check_block_validity(struct inode *inode, const char *msg,
1113 sector_t phys, int len) 1134 sector_t logical, sector_t phys, int len)
1114{ 1135{
1115 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1136 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1116 ext4_error(inode->i_sb, "check_block_validity", 1137 ext4_error(inode->i_sb, msg,
1117 "inode #%lu logical block %llu mapped to %llu " 1138 "inode #%lu logical block %llu mapped to %llu "
1118 "(size %d)", inode->i_ino, 1139 "(size %d)", inode->i_ino,
1119 (unsigned long long) logical, 1140 (unsigned long long) logical,
1120 (unsigned long long) phys, len); 1141 (unsigned long long) phys, len);
1121 WARN_ON(1);
1122 return -EIO; 1142 return -EIO;
1123 } 1143 }
1124 return 0; 1144 return 0;
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1170 up_read((&EXT4_I(inode)->i_data_sem)); 1190 up_read((&EXT4_I(inode)->i_data_sem));
1171 1191
1172 if (retval > 0 && buffer_mapped(bh)) { 1192 if (retval > 0 && buffer_mapped(bh)) {
1173 int ret = check_block_validity(inode, block, 1193 int ret = check_block_validity(inode, "file system corruption",
1174 bh->b_blocknr, retval); 1194 block, bh->b_blocknr, retval);
1175 if (ret != 0) 1195 if (ret != 0)
1176 return ret; 1196 return ret;
1177 } 1197 }
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1235 * i_data's format changing. Force the migrate 1255 * i_data's format changing. Force the migrate
1236 * to fail by clearing migrate flags 1256 * to fail by clearing migrate flags
1237 */ 1257 */
1238 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & 1258 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1239 ~EXT4_EXT_MIGRATE;
1240 } 1259 }
1241 } 1260 }
1242 1261
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1252 1271
1253 up_write((&EXT4_I(inode)->i_data_sem)); 1272 up_write((&EXT4_I(inode)->i_data_sem));
1254 if (retval > 0 && buffer_mapped(bh)) { 1273 if (retval > 0 && buffer_mapped(bh)) {
1255 int ret = check_block_validity(inode, block, 1274 int ret = check_block_validity(inode, "file system "
1256 bh->b_blocknr, retval); 1275 "corruption after allocation",
1276 block, bh->b_blocknr, retval);
1257 if (ret != 0) 1277 if (ret != 0)
1258 return ret; 1278 return ret;
1259 } 1279 }
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page,
1863 * Delayed allocation stuff 1883 * Delayed allocation stuff
1864 */ 1884 */
1865 1885
1866struct mpage_da_data {
1867 struct inode *inode;
1868 sector_t b_blocknr; /* start block number of extent */
1869 size_t b_size; /* size of extent */
1870 unsigned long b_state; /* state of the extent */
1871 unsigned long first_page, next_page; /* extent of pages */
1872 struct writeback_control *wbc;
1873 int io_done;
1874 int pages_written;
1875 int retval;
1876};
1877
1878/* 1886/*
1879 * mpage_da_submit_io - walks through extent of pages and try to write 1887 * mpage_da_submit_io - walks through extent of pages and try to write
1880 * them with writepage() call back 1888 * them with writepage() call back
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2737 long pages_skipped; 2745 long pages_skipped;
2738 int range_cyclic, cycled = 1, io_done = 0; 2746 int range_cyclic, cycled = 1, io_done = 0;
2739 int needed_blocks, ret = 0, nr_to_writebump = 0; 2747 int needed_blocks, ret = 0, nr_to_writebump = 0;
2748 loff_t range_start = wbc->range_start;
2740 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2749 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2741 2750
2742 trace_ext4_da_writepages(inode, wbc); 2751 trace_ext4_da_writepages(inode, wbc);
@@ -2850,6 +2859,7 @@ retry:
2850 mpd.io_done = 1; 2859 mpd.io_done = 1;
2851 ret = MPAGE_DA_EXTENT_TAIL; 2860 ret = MPAGE_DA_EXTENT_TAIL;
2852 } 2861 }
2862 trace_ext4_da_write_pages(inode, &mpd);
2853 wbc->nr_to_write -= mpd.pages_written; 2863 wbc->nr_to_write -= mpd.pages_written;
2854 2864
2855 ext4_journal_stop(handle); 2865 ext4_journal_stop(handle);
@@ -2905,6 +2915,7 @@ out_writepages:
2905 if (!no_nrwrite_index_update) 2915 if (!no_nrwrite_index_update)
2906 wbc->no_nrwrite_index_update = 0; 2916 wbc->no_nrwrite_index_update = 0;
2907 wbc->nr_to_write -= nr_to_writebump; 2917 wbc->nr_to_write -= nr_to_writebump;
2918 wbc->range_start = range_start;
2908 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2919 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2909 return ret; 2920 return ret;
2910} 2921}
@@ -3117,6 +3128,8 @@ out:
3117 */ 3128 */
3118int ext4_alloc_da_blocks(struct inode *inode) 3129int ext4_alloc_da_blocks(struct inode *inode)
3119{ 3130{
3131 trace_ext4_alloc_da_blocks(inode);
3132
3120 if (!EXT4_I(inode)->i_reserved_data_blocks && 3133 if (!EXT4_I(inode)->i_reserved_data_blocks &&
3121 !EXT4_I(inode)->i_reserved_meta_blocks) 3134 !EXT4_I(inode)->i_reserved_meta_blocks)
3122 return 0; 3135 return 0;
@@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3659 ext4_handle_dirty_metadata(handle, inode, bh); 3672 ext4_handle_dirty_metadata(handle, inode, bh);
3660 } 3673 }
3661 ext4_mark_inode_dirty(handle, inode); 3674 ext4_mark_inode_dirty(handle, inode);
3662 ext4_journal_test_restart(handle, inode); 3675 ext4_truncate_restart_trans(handle, inode,
3676 blocks_for_truncate(inode));
3663 if (bh) { 3677 if (bh) {
3664 BUFFER_TRACE(bh, "retaking write access"); 3678 BUFFER_TRACE(bh, "retaking write access");
3665 ext4_journal_get_write_access(handle, bh); 3679 ext4_journal_get_write_access(handle, bh);
@@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3870 return; 3884 return;
3871 if (try_to_extend_transaction(handle, inode)) { 3885 if (try_to_extend_transaction(handle, inode)) {
3872 ext4_mark_inode_dirty(handle, inode); 3886 ext4_mark_inode_dirty(handle, inode);
3873 ext4_journal_test_restart(handle, inode); 3887 ext4_truncate_restart_trans(handle, inode,
3888 blocks_for_truncate(inode));
3874 } 3889 }
3875 3890
3876 ext4_free_blocks(handle, inode, nr, 1, 1); 3891 ext4_free_blocks(handle, inode, nr, 1, 1);
@@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode)
3958 if (!ext4_can_truncate(inode)) 3973 if (!ext4_can_truncate(inode))
3959 return; 3974 return;
3960 3975
3961 if (ei->i_disksize && inode->i_size == 0 && 3976 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3962 !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3963 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 3977 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3964 3978
3965 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 3979 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle,
4533 */ 4547 */
4534static int ext4_do_update_inode(handle_t *handle, 4548static int ext4_do_update_inode(handle_t *handle,
4535 struct inode *inode, 4549 struct inode *inode,
4536 struct ext4_iloc *iloc) 4550 struct ext4_iloc *iloc,
4551 int do_sync)
4537{ 4552{
4538 struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 4553 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4539 struct ext4_inode_info *ei = EXT4_I(inode); 4554 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle,
4581 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 4596 if (ext4_inode_blocks_set(handle, raw_inode, ei))
4582 goto out_brelse; 4597 goto out_brelse;
4583 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4598 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4584 /* clear the migrate flag in the raw_inode */ 4599 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
4585 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
4586 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4600 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
4587 cpu_to_le32(EXT4_OS_HURD)) 4601 cpu_to_le32(EXT4_OS_HURD))
4588 raw_inode->i_file_acl_high = 4602 raw_inode->i_file_acl_high =
@@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle,
4635 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4649 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4636 } 4650 }
4637 4651
4638 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4652 /*
4639 rc = ext4_handle_dirty_metadata(handle, inode, bh); 4653 * If we're not using a journal and we were called from
4640 if (!err) 4654 * ext4_write_inode() to sync the inode (making do_sync true),
4641 err = rc; 4655 * we can just use sync_dirty_buffer() directly to do our dirty
4656 * work. Testing s_journal here is a bit redundant but it's
4657 * worth it to avoid potential future trouble.
4658 */
4659 if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) {
4660 BUFFER_TRACE(bh, "call sync_dirty_buffer");
4661 sync_dirty_buffer(bh);
4662 } else {
4663 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4664 rc = ext4_handle_dirty_metadata(handle, inode, bh);
4665 if (!err)
4666 err = rc;
4667 }
4642 ei->i_state &= ~EXT4_STATE_NEW; 4668 ei->i_state &= ~EXT4_STATE_NEW;
4643 4669
4644out_brelse: 4670out_brelse:
@@ -4684,19 +4710,32 @@ out_brelse:
4684 */ 4710 */
4685int ext4_write_inode(struct inode *inode, int wait) 4711int ext4_write_inode(struct inode *inode, int wait)
4686{ 4712{
4713 int err;
4714
4687 if (current->flags & PF_MEMALLOC) 4715 if (current->flags & PF_MEMALLOC)
4688 return 0; 4716 return 0;
4689 4717
4690 if (ext4_journal_current_handle()) { 4718 if (EXT4_SB(inode->i_sb)->s_journal) {
4691 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); 4719 if (ext4_journal_current_handle()) {
4692 dump_stack(); 4720 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
4693 return -EIO; 4721 dump_stack();
4694 } 4722 return -EIO;
4723 }
4695 4724
4696 if (!wait) 4725 if (!wait)
4697 return 0; 4726 return 0;
4727
4728 err = ext4_force_commit(inode->i_sb);
4729 } else {
4730 struct ext4_iloc iloc;
4698 4731
4699 return ext4_force_commit(inode->i_sb); 4732 err = ext4_get_inode_loc(inode, &iloc);
4733 if (err)
4734 return err;
4735 err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE,
4736 inode, &iloc, wait);
4737 }
4738 return err;
4700} 4739}
4701 4740
4702/* 4741/*
@@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
4990 get_bh(iloc->bh); 5029 get_bh(iloc->bh);
4991 5030
4992 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ 5031 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
4993 err = ext4_do_update_inode(handle, inode, iloc); 5032 err = ext4_do_update_inode(handle, inode, iloc, 0);
4994 put_bh(iloc->bh); 5033 put_bh(iloc->bh);
4995 return err; 5034 return err;
4996} 5035}
@@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5281 else 5320 else
5282 len = PAGE_CACHE_SIZE; 5321 len = PAGE_CACHE_SIZE;
5283 5322
5323 lock_page(page);
5324 /*
5325 * return if we have all the buffers mapped. This avoid
5326 * the need to call write_begin/write_end which does a
5327 * journal_start/journal_stop which can block and take
5328 * long time
5329 */
5284 if (page_has_buffers(page)) { 5330 if (page_has_buffers(page)) {
5285 /* return if we have all the buffers mapped */
5286 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5331 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5287 ext4_bh_unmapped)) 5332 ext4_bh_unmapped)) {
5333 unlock_page(page);
5288 goto out_unlock; 5334 goto out_unlock;
5335 }
5289 } 5336 }
5337 unlock_page(page);
5290 /* 5338 /*
5291 * OK, we need to fill the hole... Do write_begin write_end 5339 * OK, we need to fill the hole... Do write_begin write_end
5292 * to do block allocation/reservation.We are not holding 5340 * to do block allocation/reservation.We are not holding