aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c152
1 files changed, 100 insertions, 52 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 349dd6b4da47..064746fad581 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
192 * so before we call here everything must be consistently dirtied against 192 * so before we call here everything must be consistently dirtied against
193 * this transaction. 193 * this transaction.
194 */ 194 */
195static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) 195 int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
196 int nblocks)
196{ 197{
198 int ret;
199
200 /*
201 * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
202 * moment, get_block can be called only for blocks inside i_size since
203 * page cache has been already dropped and writes are blocked by
204 * i_mutex. So we can safely drop the i_data_sem here.
205 */
197 BUG_ON(EXT4_JOURNAL(inode) == NULL); 206 BUG_ON(EXT4_JOURNAL(inode) == NULL);
198 jbd_debug(2, "restarting handle %p\n", handle); 207 jbd_debug(2, "restarting handle %p\n", handle);
199 return ext4_journal_restart(handle, blocks_for_truncate(inode)); 208 up_write(&EXT4_I(inode)->i_data_sem);
209 ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
210 down_write(&EXT4_I(inode)->i_data_sem);
211
212 return ret;
200} 213}
201 214
202/* 215/*
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode,
341 int n = 0; 354 int n = 0;
342 int final = 0; 355 int final = 0;
343 356
344 if (i_block < 0) { 357 if (i_block < direct_blocks) {
345 ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
346 } else if (i_block < direct_blocks) {
347 offsets[n++] = i_block; 358 offsets[n++] = i_block;
348 final = direct_blocks; 359 final = direct_blocks;
349 } else if ((i_block -= direct_blocks) < indirect_blocks) { 360 } else if ((i_block -= direct_blocks) < indirect_blocks) {
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
551 * 562 *
552 * Normally this function find the preferred place for block allocation, 563 * Normally this function find the preferred place for block allocation,
553 * returns it. 564 * returns it.
565 * Because this is only used for non-extent files, we limit the block nr
566 * to 32 bits.
554 */ 567 */
555static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, 568static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
556 Indirect *partial) 569 Indirect *partial)
557{ 570{
571 ext4_fsblk_t goal;
572
558 /* 573 /*
559 * XXX need to get goal block from mballoc's data structures 574 * XXX need to get goal block from mballoc's data structures
560 */ 575 */
561 576
562 return ext4_find_near(inode, partial); 577 goal = ext4_find_near(inode, partial);
578 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
579 return goal;
563} 580}
564 581
565/** 582/**
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
640 if (*err) 657 if (*err)
641 goto failed_out; 658 goto failed_out;
642 659
660 BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS);
661
643 target -= count; 662 target -= count;
644 /* allocate blocks for indirect blocks */ 663 /* allocate blocks for indirect blocks */
645 while (index < indirect_blks && count) { 664 while (index < indirect_blks && count) {
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
674 ar.flags = EXT4_MB_HINT_DATA; 693 ar.flags = EXT4_MB_HINT_DATA;
675 694
676 current_block = ext4_mb_new_blocks(handle, &ar, err); 695 current_block = ext4_mb_new_blocks(handle, &ar, err);
696 BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS);
677 697
678 if (*err && (target == blks)) { 698 if (*err && (target == blks)) {
679 /* 699 /*
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
762 BUFFER_TRACE(bh, "call get_create_access"); 782 BUFFER_TRACE(bh, "call get_create_access");
763 err = ext4_journal_get_create_access(handle, bh); 783 err = ext4_journal_get_create_access(handle, bh);
764 if (err) { 784 if (err) {
785 /* Don't brelse(bh) here; it's done in
786 * ext4_journal_forget() below */
765 unlock_buffer(bh); 787 unlock_buffer(bh);
766 brelse(bh);
767 goto failed; 788 goto failed;
768 } 789 }
769 790
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1109 ext4_discard_preallocations(inode); 1130 ext4_discard_preallocations(inode);
1110} 1131}
1111 1132
1112static int check_block_validity(struct inode *inode, sector_t logical, 1133static int check_block_validity(struct inode *inode, const char *msg,
1113 sector_t phys, int len) 1134 sector_t logical, sector_t phys, int len)
1114{ 1135{
1115 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { 1136 if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
1116 ext4_error(inode->i_sb, "check_block_validity", 1137 ext4_error(inode->i_sb, msg,
1117 "inode #%lu logical block %llu mapped to %llu " 1138 "inode #%lu logical block %llu mapped to %llu "
1118 "(size %d)", inode->i_ino, 1139 "(size %d)", inode->i_ino,
1119 (unsigned long long) logical, 1140 (unsigned long long) logical,
1120 (unsigned long long) phys, len); 1141 (unsigned long long) phys, len);
1121 WARN_ON(1);
1122 return -EIO; 1142 return -EIO;
1123 } 1143 }
1124 return 0; 1144 return 0;
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1170 up_read((&EXT4_I(inode)->i_data_sem)); 1190 up_read((&EXT4_I(inode)->i_data_sem));
1171 1191
1172 if (retval > 0 && buffer_mapped(bh)) { 1192 if (retval > 0 && buffer_mapped(bh)) {
1173 int ret = check_block_validity(inode, block, 1193 int ret = check_block_validity(inode, "file system corruption",
1174 bh->b_blocknr, retval); 1194 block, bh->b_blocknr, retval);
1175 if (ret != 0) 1195 if (ret != 0)
1176 return ret; 1196 return ret;
1177 } 1197 }
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1235 * i_data's format changing. Force the migrate 1255 * i_data's format changing. Force the migrate
1236 * to fail by clearing migrate flags 1256 * to fail by clearing migrate flags
1237 */ 1257 */
1238 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & 1258 EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
1239 ~EXT4_EXT_MIGRATE;
1240 } 1259 }
1241 } 1260 }
1242 1261
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
1252 1271
1253 up_write((&EXT4_I(inode)->i_data_sem)); 1272 up_write((&EXT4_I(inode)->i_data_sem));
1254 if (retval > 0 && buffer_mapped(bh)) { 1273 if (retval > 0 && buffer_mapped(bh)) {
1255 int ret = check_block_validity(inode, block, 1274 int ret = check_block_validity(inode, "file system "
1256 bh->b_blocknr, retval); 1275 "corruption after allocation",
1276 block, bh->b_blocknr, retval);
1257 if (ret != 0) 1277 if (ret != 0)
1258 return ret; 1278 return ret;
1259 } 1279 }
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page,
1863 * Delayed allocation stuff 1883 * Delayed allocation stuff
1864 */ 1884 */
1865 1885
1866struct mpage_da_data {
1867 struct inode *inode;
1868 sector_t b_blocknr; /* start block number of extent */
1869 size_t b_size; /* size of extent */
1870 unsigned long b_state; /* state of the extent */
1871 unsigned long first_page, next_page; /* extent of pages */
1872 struct writeback_control *wbc;
1873 int io_done;
1874 int pages_written;
1875 int retval;
1876};
1877
1878/* 1886/*
1879 * mpage_da_submit_io - walks through extent of pages and try to write 1887 * mpage_da_submit_io - walks through extent of pages and try to write
1880 * them with writepage() call back 1888 * them with writepage() call back
@@ -2329,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
2329 /* 2337 /*
2330 * Rest of the page in the page_vec 2338 * Rest of the page in the page_vec
2331 * redirty then and skip then. We will 2339 * redirty then and skip then. We will
2332 * try to to write them again after 2340 * try to write them again after
2333 * starting a new transaction 2341 * starting a new transaction
2334 */ 2342 */
2335 redirty_page_for_writepage(wbc, page); 2343 redirty_page_for_writepage(wbc, page);
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2737 long pages_skipped; 2745 long pages_skipped;
2738 int range_cyclic, cycled = 1, io_done = 0; 2746 int range_cyclic, cycled = 1, io_done = 0;
2739 int needed_blocks, ret = 0, nr_to_writebump = 0; 2747 int needed_blocks, ret = 0, nr_to_writebump = 0;
2748 loff_t range_start = wbc->range_start;
2740 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2749 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2741 2750
2742 trace_ext4_da_writepages(inode, wbc); 2751 trace_ext4_da_writepages(inode, wbc);
@@ -2850,6 +2859,7 @@ retry:
2850 mpd.io_done = 1; 2859 mpd.io_done = 1;
2851 ret = MPAGE_DA_EXTENT_TAIL; 2860 ret = MPAGE_DA_EXTENT_TAIL;
2852 } 2861 }
2862 trace_ext4_da_write_pages(inode, &mpd);
2853 wbc->nr_to_write -= mpd.pages_written; 2863 wbc->nr_to_write -= mpd.pages_written;
2854 2864
2855 ext4_journal_stop(handle); 2865 ext4_journal_stop(handle);
@@ -2905,6 +2915,7 @@ out_writepages:
2905 if (!no_nrwrite_index_update) 2915 if (!no_nrwrite_index_update)
2906 wbc->no_nrwrite_index_update = 0; 2916 wbc->no_nrwrite_index_update = 0;
2907 wbc->nr_to_write -= nr_to_writebump; 2917 wbc->nr_to_write -= nr_to_writebump;
2918 wbc->range_start = range_start;
2908 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2919 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2909 return ret; 2920 return ret;
2910} 2921}
@@ -3117,6 +3128,8 @@ out:
3117 */ 3128 */
3118int ext4_alloc_da_blocks(struct inode *inode) 3129int ext4_alloc_da_blocks(struct inode *inode)
3119{ 3130{
3131 trace_ext4_alloc_da_blocks(inode);
3132
3120 if (!EXT4_I(inode)->i_reserved_data_blocks && 3133 if (!EXT4_I(inode)->i_reserved_data_blocks &&
3121 !EXT4_I(inode)->i_reserved_meta_blocks) 3134 !EXT4_I(inode)->i_reserved_meta_blocks)
3122 return 0; 3135 return 0;
@@ -3663,7 +3676,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3663 ext4_handle_dirty_metadata(handle, inode, bh); 3676 ext4_handle_dirty_metadata(handle, inode, bh);
3664 } 3677 }
3665 ext4_mark_inode_dirty(handle, inode); 3678 ext4_mark_inode_dirty(handle, inode);
3666 ext4_journal_test_restart(handle, inode); 3679 ext4_truncate_restart_trans(handle, inode,
3680 blocks_for_truncate(inode));
3667 if (bh) { 3681 if (bh) {
3668 BUFFER_TRACE(bh, "retaking write access"); 3682 BUFFER_TRACE(bh, "retaking write access");
3669 ext4_journal_get_write_access(handle, bh); 3683 ext4_journal_get_write_access(handle, bh);
@@ -3874,7 +3888,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3874 return; 3888 return;
3875 if (try_to_extend_transaction(handle, inode)) { 3889 if (try_to_extend_transaction(handle, inode)) {
3876 ext4_mark_inode_dirty(handle, inode); 3890 ext4_mark_inode_dirty(handle, inode);
3877 ext4_journal_test_restart(handle, inode); 3891 ext4_truncate_restart_trans(handle, inode,
3892 blocks_for_truncate(inode));
3878 } 3893 }
3879 3894
3880 ext4_free_blocks(handle, inode, nr, 1, 1); 3895 ext4_free_blocks(handle, inode, nr, 1, 1);
@@ -3962,8 +3977,7 @@ void ext4_truncate(struct inode *inode)
3962 if (!ext4_can_truncate(inode)) 3977 if (!ext4_can_truncate(inode))
3963 return; 3978 return;
3964 3979
3965 if (ei->i_disksize && inode->i_size == 0 && 3980 if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3966 !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
3967 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; 3981 ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
3968 3982
3969 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { 3983 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@@ -4537,7 +4551,8 @@ static int ext4_inode_blocks_set(handle_t *handle,
4537 */ 4551 */
4538static int ext4_do_update_inode(handle_t *handle, 4552static int ext4_do_update_inode(handle_t *handle,
4539 struct inode *inode, 4553 struct inode *inode,
4540 struct ext4_iloc *iloc) 4554 struct ext4_iloc *iloc,
4555 int do_sync)
4541{ 4556{
4542 struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 4557 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4543 struct ext4_inode_info *ei = EXT4_I(inode); 4558 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -4585,8 +4600,7 @@ static int ext4_do_update_inode(handle_t *handle,
4585 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 4600 if (ext4_inode_blocks_set(handle, raw_inode, ei))
4586 goto out_brelse; 4601 goto out_brelse;
4587 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4602 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4588 /* clear the migrate flag in the raw_inode */ 4603 raw_inode->i_flags = cpu_to_le32(ei->i_flags);
4589 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
4590 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4604 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
4591 cpu_to_le32(EXT4_OS_HURD)) 4605 cpu_to_le32(EXT4_OS_HURD))
4592 raw_inode->i_file_acl_high = 4606 raw_inode->i_file_acl_high =
@@ -4639,10 +4653,22 @@ static int ext4_do_update_inode(handle_t *handle,
4639 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4653 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4640 } 4654 }
4641 4655
4642 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4656 /*
4643 rc = ext4_handle_dirty_metadata(handle, inode, bh); 4657 * If we're not using a journal and we were called from
4644 if (!err) 4658 * ext4_write_inode() to sync the inode (making do_sync true),
4645 err = rc; 4659 * we can just use sync_dirty_buffer() directly to do our dirty
4660 * work. Testing s_journal here is a bit redundant but it's
4661 * worth it to avoid potential future trouble.
4662 */
4663 if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) {
4664 BUFFER_TRACE(bh, "call sync_dirty_buffer");
4665 sync_dirty_buffer(bh);
4666 } else {
4667 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4668 rc = ext4_handle_dirty_metadata(handle, inode, bh);
4669 if (!err)
4670 err = rc;
4671 }
4646 ei->i_state &= ~EXT4_STATE_NEW; 4672 ei->i_state &= ~EXT4_STATE_NEW;
4647 4673
4648out_brelse: 4674out_brelse:
@@ -4688,19 +4714,32 @@ out_brelse:
4688 */ 4714 */
4689int ext4_write_inode(struct inode *inode, int wait) 4715int ext4_write_inode(struct inode *inode, int wait)
4690{ 4716{
4717 int err;
4718
4691 if (current->flags & PF_MEMALLOC) 4719 if (current->flags & PF_MEMALLOC)
4692 return 0; 4720 return 0;
4693 4721
4694 if (ext4_journal_current_handle()) { 4722 if (EXT4_SB(inode->i_sb)->s_journal) {
4695 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); 4723 if (ext4_journal_current_handle()) {
4696 dump_stack(); 4724 jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
4697 return -EIO; 4725 dump_stack();
4698 } 4726 return -EIO;
4727 }
4699 4728
4700 if (!wait) 4729 if (!wait)
4701 return 0; 4730 return 0;
4731
4732 err = ext4_force_commit(inode->i_sb);
4733 } else {
4734 struct ext4_iloc iloc;
4702 4735
4703 return ext4_force_commit(inode->i_sb); 4736 err = ext4_get_inode_loc(inode, &iloc);
4737 if (err)
4738 return err;
4739 err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE,
4740 inode, &iloc, wait);
4741 }
4742 return err;
4704} 4743}
4705 4744
4706/* 4745/*
@@ -4994,7 +5033,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
4994 get_bh(iloc->bh); 5033 get_bh(iloc->bh);
4995 5034
4996 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ 5035 /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
4997 err = ext4_do_update_inode(handle, inode, iloc); 5036 err = ext4_do_update_inode(handle, inode, iloc, 0);
4998 put_bh(iloc->bh); 5037 put_bh(iloc->bh);
4999 return err; 5038 return err;
5000} 5039}
@@ -5285,12 +5324,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5285 else 5324 else
5286 len = PAGE_CACHE_SIZE; 5325 len = PAGE_CACHE_SIZE;
5287 5326
5327 lock_page(page);
5328 /*
5329 * return if we have all the buffers mapped. This avoid
5330 * the need to call write_begin/write_end which does a
5331 * journal_start/journal_stop which can block and take
5332 * long time
5333 */
5288 if (page_has_buffers(page)) { 5334 if (page_has_buffers(page)) {
5289 /* return if we have all the buffers mapped */
5290 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5335 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5291 ext4_bh_unmapped)) 5336 ext4_bh_unmapped)) {
5337 unlock_page(page);
5292 goto out_unlock; 5338 goto out_unlock;
5339 }
5293 } 5340 }
5341 unlock_page(page);
5294 /* 5342 /*
5295 * OK, we need to fill the hole... Do write_begin write_end 5343 * OK, we need to fill the hole... Do write_begin write_end
5296 * to do block allocation/reservation.We are not holding 5344 * to do block allocation/reservation.We are not holding