diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 150 |
1 files changed, 99 insertions, 51 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b22efa..4abd683b963d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) | |||
192 | * so before we call here everything must be consistently dirtied against | 192 | * so before we call here everything must be consistently dirtied against |
193 | * this transaction. | 193 | * this transaction. |
194 | */ | 194 | */ |
195 | static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | 195 | int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, |
196 | int nblocks) | ||
196 | { | 197 | { |
198 | int ret; | ||
199 | |||
200 | /* | ||
201 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | ||
202 | * moment, get_block can be called only for blocks inside i_size since | ||
203 | * page cache has been already dropped and writes are blocked by | ||
204 | * i_mutex. So we can safely drop the i_data_sem here. | ||
205 | */ | ||
197 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 206 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
198 | jbd_debug(2, "restarting handle %p\n", handle); | 207 | jbd_debug(2, "restarting handle %p\n", handle); |
199 | return ext4_journal_restart(handle, blocks_for_truncate(inode)); | 208 | up_write(&EXT4_I(inode)->i_data_sem); |
209 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | ||
210 | down_write(&EXT4_I(inode)->i_data_sem); | ||
211 | |||
212 | return ret; | ||
200 | } | 213 | } |
201 | 214 | ||
202 | /* | 215 | /* |
@@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
341 | int n = 0; | 354 | int n = 0; |
342 | int final = 0; | 355 | int final = 0; |
343 | 356 | ||
344 | if (i_block < 0) { | 357 | if (i_block < direct_blocks) { |
345 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); | ||
346 | } else if (i_block < direct_blocks) { | ||
347 | offsets[n++] = i_block; | 358 | offsets[n++] = i_block; |
348 | final = direct_blocks; | 359 | final = direct_blocks; |
349 | } else if ((i_block -= direct_blocks) < indirect_blocks) { | 360 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
@@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
551 | * | 562 | * |
552 | * Normally this function find the preferred place for block allocation, | 563 | * Normally this function find the preferred place for block allocation, |
553 | * returns it. | 564 | * returns it. |
565 | * Because this is only used for non-extent files, we limit the block nr | ||
566 | * to 32 bits. | ||
554 | */ | 567 | */ |
555 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 568 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
556 | Indirect *partial) | 569 | Indirect *partial) |
557 | { | 570 | { |
571 | ext4_fsblk_t goal; | ||
572 | |||
558 | /* | 573 | /* |
559 | * XXX need to get goal block from mballoc's data structures | 574 | * XXX need to get goal block from mballoc's data structures |
560 | */ | 575 | */ |
561 | 576 | ||
562 | return ext4_find_near(inode, partial); | 577 | goal = ext4_find_near(inode, partial); |
578 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | ||
579 | return goal; | ||
563 | } | 580 | } |
564 | 581 | ||
565 | /** | 582 | /** |
@@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
640 | if (*err) | 657 | if (*err) |
641 | goto failed_out; | 658 | goto failed_out; |
642 | 659 | ||
660 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | ||
661 | |||
643 | target -= count; | 662 | target -= count; |
644 | /* allocate blocks for indirect blocks */ | 663 | /* allocate blocks for indirect blocks */ |
645 | while (index < indirect_blks && count) { | 664 | while (index < indirect_blks && count) { |
@@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
674 | ar.flags = EXT4_MB_HINT_DATA; | 693 | ar.flags = EXT4_MB_HINT_DATA; |
675 | 694 | ||
676 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 695 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
696 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | ||
677 | 697 | ||
678 | if (*err && (target == blks)) { | 698 | if (*err && (target == blks)) { |
679 | /* | 699 | /* |
@@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
762 | BUFFER_TRACE(bh, "call get_create_access"); | 782 | BUFFER_TRACE(bh, "call get_create_access"); |
763 | err = ext4_journal_get_create_access(handle, bh); | 783 | err = ext4_journal_get_create_access(handle, bh); |
764 | if (err) { | 784 | if (err) { |
785 | /* Don't brelse(bh) here; it's done in | ||
786 | * ext4_journal_forget() below */ | ||
765 | unlock_buffer(bh); | 787 | unlock_buffer(bh); |
766 | brelse(bh); | ||
767 | goto failed; | 788 | goto failed; |
768 | } | 789 | } |
769 | 790 | ||
@@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1109 | ext4_discard_preallocations(inode); | 1130 | ext4_discard_preallocations(inode); |
1110 | } | 1131 | } |
1111 | 1132 | ||
1112 | static int check_block_validity(struct inode *inode, sector_t logical, | 1133 | static int check_block_validity(struct inode *inode, const char *msg, |
1113 | sector_t phys, int len) | 1134 | sector_t logical, sector_t phys, int len) |
1114 | { | 1135 | { |
1115 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1136 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1116 | ext4_error(inode->i_sb, "check_block_validity", | 1137 | ext4_error(inode->i_sb, msg, |
1117 | "inode #%lu logical block %llu mapped to %llu " | 1138 | "inode #%lu logical block %llu mapped to %llu " |
1118 | "(size %d)", inode->i_ino, | 1139 | "(size %d)", inode->i_ino, |
1119 | (unsigned long long) logical, | 1140 | (unsigned long long) logical, |
1120 | (unsigned long long) phys, len); | 1141 | (unsigned long long) phys, len); |
1121 | WARN_ON(1); | ||
1122 | return -EIO; | 1142 | return -EIO; |
1123 | } | 1143 | } |
1124 | return 0; | 1144 | return 0; |
@@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1170 | up_read((&EXT4_I(inode)->i_data_sem)); | 1190 | up_read((&EXT4_I(inode)->i_data_sem)); |
1171 | 1191 | ||
1172 | if (retval > 0 && buffer_mapped(bh)) { | 1192 | if (retval > 0 && buffer_mapped(bh)) { |
1173 | int ret = check_block_validity(inode, block, | 1193 | int ret = check_block_validity(inode, "file system corruption", |
1174 | bh->b_blocknr, retval); | 1194 | block, bh->b_blocknr, retval); |
1175 | if (ret != 0) | 1195 | if (ret != 0) |
1176 | return ret; | 1196 | return ret; |
1177 | } | 1197 | } |
@@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1235 | * i_data's format changing. Force the migrate | 1255 | * i_data's format changing. Force the migrate |
1236 | * to fail by clearing migrate flags | 1256 | * to fail by clearing migrate flags |
1237 | */ | 1257 | */ |
1238 | EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & | 1258 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; |
1239 | ~EXT4_EXT_MIGRATE; | ||
1240 | } | 1259 | } |
1241 | } | 1260 | } |
1242 | 1261 | ||
@@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1252 | 1271 | ||
1253 | up_write((&EXT4_I(inode)->i_data_sem)); | 1272 | up_write((&EXT4_I(inode)->i_data_sem)); |
1254 | if (retval > 0 && buffer_mapped(bh)) { | 1273 | if (retval > 0 && buffer_mapped(bh)) { |
1255 | int ret = check_block_validity(inode, block, | 1274 | int ret = check_block_validity(inode, "file system " |
1256 | bh->b_blocknr, retval); | 1275 | "corruption after allocation", |
1276 | block, bh->b_blocknr, retval); | ||
1257 | if (ret != 0) | 1277 | if (ret != 0) |
1258 | return ret; | 1278 | return ret; |
1259 | } | 1279 | } |
@@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1863 | * Delayed allocation stuff | 1883 | * Delayed allocation stuff |
1864 | */ | 1884 | */ |
1865 | 1885 | ||
1866 | struct mpage_da_data { | ||
1867 | struct inode *inode; | ||
1868 | sector_t b_blocknr; /* start block number of extent */ | ||
1869 | size_t b_size; /* size of extent */ | ||
1870 | unsigned long b_state; /* state of the extent */ | ||
1871 | unsigned long first_page, next_page; /* extent of pages */ | ||
1872 | struct writeback_control *wbc; | ||
1873 | int io_done; | ||
1874 | int pages_written; | ||
1875 | int retval; | ||
1876 | }; | ||
1877 | |||
1878 | /* | 1886 | /* |
1879 | * mpage_da_submit_io - walks through extent of pages and try to write | 1887 | * mpage_da_submit_io - walks through extent of pages and try to write |
1880 | * them with writepage() call back | 1888 | * them with writepage() call back |
@@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2737 | long pages_skipped; | 2745 | long pages_skipped; |
2738 | int range_cyclic, cycled = 1, io_done = 0; | 2746 | int range_cyclic, cycled = 1, io_done = 0; |
2739 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2748 | loff_t range_start = wbc->range_start; | ||
2740 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2741 | 2750 | ||
2742 | trace_ext4_da_writepages(inode, wbc); | 2751 | trace_ext4_da_writepages(inode, wbc); |
@@ -2850,6 +2859,7 @@ retry: | |||
2850 | mpd.io_done = 1; | 2859 | mpd.io_done = 1; |
2851 | ret = MPAGE_DA_EXTENT_TAIL; | 2860 | ret = MPAGE_DA_EXTENT_TAIL; |
2852 | } | 2861 | } |
2862 | trace_ext4_da_write_pages(inode, &mpd); | ||
2853 | wbc->nr_to_write -= mpd.pages_written; | 2863 | wbc->nr_to_write -= mpd.pages_written; |
2854 | 2864 | ||
2855 | ext4_journal_stop(handle); | 2865 | ext4_journal_stop(handle); |
@@ -2905,6 +2915,7 @@ out_writepages: | |||
2905 | if (!no_nrwrite_index_update) | 2915 | if (!no_nrwrite_index_update) |
2906 | wbc->no_nrwrite_index_update = 0; | 2916 | wbc->no_nrwrite_index_update = 0; |
2907 | wbc->nr_to_write -= nr_to_writebump; | 2917 | wbc->nr_to_write -= nr_to_writebump; |
2918 | wbc->range_start = range_start; | ||
2908 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2909 | return ret; | 2920 | return ret; |
2910 | } | 2921 | } |
@@ -3117,6 +3128,8 @@ out: | |||
3117 | */ | 3128 | */ |
3118 | int ext4_alloc_da_blocks(struct inode *inode) | 3129 | int ext4_alloc_da_blocks(struct inode *inode) |
3119 | { | 3130 | { |
3131 | trace_ext4_alloc_da_blocks(inode); | ||
3132 | |||
3120 | if (!EXT4_I(inode)->i_reserved_data_blocks && | 3133 | if (!EXT4_I(inode)->i_reserved_data_blocks && |
3121 | !EXT4_I(inode)->i_reserved_meta_blocks) | 3134 | !EXT4_I(inode)->i_reserved_meta_blocks) |
3122 | return 0; | 3135 | return 0; |
@@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
3659 | ext4_handle_dirty_metadata(handle, inode, bh); | 3672 | ext4_handle_dirty_metadata(handle, inode, bh); |
3660 | } | 3673 | } |
3661 | ext4_mark_inode_dirty(handle, inode); | 3674 | ext4_mark_inode_dirty(handle, inode); |
3662 | ext4_journal_test_restart(handle, inode); | 3675 | ext4_truncate_restart_trans(handle, inode, |
3676 | blocks_for_truncate(inode)); | ||
3663 | if (bh) { | 3677 | if (bh) { |
3664 | BUFFER_TRACE(bh, "retaking write access"); | 3678 | BUFFER_TRACE(bh, "retaking write access"); |
3665 | ext4_journal_get_write_access(handle, bh); | 3679 | ext4_journal_get_write_access(handle, bh); |
@@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
3870 | return; | 3884 | return; |
3871 | if (try_to_extend_transaction(handle, inode)) { | 3885 | if (try_to_extend_transaction(handle, inode)) { |
3872 | ext4_mark_inode_dirty(handle, inode); | 3886 | ext4_mark_inode_dirty(handle, inode); |
3873 | ext4_journal_test_restart(handle, inode); | 3887 | ext4_truncate_restart_trans(handle, inode, |
3888 | blocks_for_truncate(inode)); | ||
3874 | } | 3889 | } |
3875 | 3890 | ||
3876 | ext4_free_blocks(handle, inode, nr, 1, 1); | 3891 | ext4_free_blocks(handle, inode, nr, 1, 1); |
@@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode) | |||
3958 | if (!ext4_can_truncate(inode)) | 3973 | if (!ext4_can_truncate(inode)) |
3959 | return; | 3974 | return; |
3960 | 3975 | ||
3961 | if (ei->i_disksize && inode->i_size == 0 && | 3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3962 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3963 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3964 | 3978 | ||
3965 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4533 | */ | 4547 | */ |
4534 | static int ext4_do_update_inode(handle_t *handle, | 4548 | static int ext4_do_update_inode(handle_t *handle, |
4535 | struct inode *inode, | 4549 | struct inode *inode, |
4536 | struct ext4_iloc *iloc) | 4550 | struct ext4_iloc *iloc, |
4551 | int do_sync) | ||
4537 | { | 4552 | { |
4538 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); | 4553 | struct ext4_inode *raw_inode = ext4_raw_inode(iloc); |
4539 | struct ext4_inode_info *ei = EXT4_I(inode); | 4554 | struct ext4_inode_info *ei = EXT4_I(inode); |
@@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4581 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 4596 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
4582 | goto out_brelse; | 4597 | goto out_brelse; |
4583 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4598 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4584 | /* clear the migrate flag in the raw_inode */ | 4599 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); |
4585 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE); | ||
4586 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4600 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
4587 | cpu_to_le32(EXT4_OS_HURD)) | 4601 | cpu_to_le32(EXT4_OS_HURD)) |
4588 | raw_inode->i_file_acl_high = | 4602 | raw_inode->i_file_acl_high = |
@@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4635 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4649 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
4636 | } | 4650 | } |
4637 | 4651 | ||
4638 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4652 | /* |
4639 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 4653 | * If we're not using a journal and we were called from |
4640 | if (!err) | 4654 | * ext4_write_inode() to sync the inode (making do_sync true), |
4641 | err = rc; | 4655 | * we can just use sync_dirty_buffer() directly to do our dirty |
4656 | * work. Testing s_journal here is a bit redundant but it's | ||
4657 | * worth it to avoid potential future trouble. | ||
4658 | */ | ||
4659 | if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) { | ||
4660 | BUFFER_TRACE(bh, "call sync_dirty_buffer"); | ||
4661 | sync_dirty_buffer(bh); | ||
4662 | } else { | ||
4663 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
4664 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | ||
4665 | if (!err) | ||
4666 | err = rc; | ||
4667 | } | ||
4642 | ei->i_state &= ~EXT4_STATE_NEW; | 4668 | ei->i_state &= ~EXT4_STATE_NEW; |
4643 | 4669 | ||
4644 | out_brelse: | 4670 | out_brelse: |
@@ -4684,19 +4710,32 @@ out_brelse: | |||
4684 | */ | 4710 | */ |
4685 | int ext4_write_inode(struct inode *inode, int wait) | 4711 | int ext4_write_inode(struct inode *inode, int wait) |
4686 | { | 4712 | { |
4713 | int err; | ||
4714 | |||
4687 | if (current->flags & PF_MEMALLOC) | 4715 | if (current->flags & PF_MEMALLOC) |
4688 | return 0; | 4716 | return 0; |
4689 | 4717 | ||
4690 | if (ext4_journal_current_handle()) { | 4718 | if (EXT4_SB(inode->i_sb)->s_journal) { |
4691 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); | 4719 | if (ext4_journal_current_handle()) { |
4692 | dump_stack(); | 4720 | jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); |
4693 | return -EIO; | 4721 | dump_stack(); |
4694 | } | 4722 | return -EIO; |
4723 | } | ||
4695 | 4724 | ||
4696 | if (!wait) | 4725 | if (!wait) |
4697 | return 0; | 4726 | return 0; |
4727 | |||
4728 | err = ext4_force_commit(inode->i_sb); | ||
4729 | } else { | ||
4730 | struct ext4_iloc iloc; | ||
4698 | 4731 | ||
4699 | return ext4_force_commit(inode->i_sb); | 4732 | err = ext4_get_inode_loc(inode, &iloc); |
4733 | if (err) | ||
4734 | return err; | ||
4735 | err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE, | ||
4736 | inode, &iloc, wait); | ||
4737 | } | ||
4738 | return err; | ||
4700 | } | 4739 | } |
4701 | 4740 | ||
4702 | /* | 4741 | /* |
@@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle, | |||
4990 | get_bh(iloc->bh); | 5029 | get_bh(iloc->bh); |
4991 | 5030 | ||
4992 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ | 5031 | /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ |
4993 | err = ext4_do_update_inode(handle, inode, iloc); | 5032 | err = ext4_do_update_inode(handle, inode, iloc, 0); |
4994 | put_bh(iloc->bh); | 5033 | put_bh(iloc->bh); |
4995 | return err; | 5034 | return err; |
4996 | } | 5035 | } |
@@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5281 | else | 5320 | else |
5282 | len = PAGE_CACHE_SIZE; | 5321 | len = PAGE_CACHE_SIZE; |
5283 | 5322 | ||
5323 | lock_page(page); | ||
5324 | /* | ||
5325 | * return if we have all the buffers mapped. This avoid | ||
5326 | * the need to call write_begin/write_end which does a | ||
5327 | * journal_start/journal_stop which can block and take | ||
5328 | * long time | ||
5329 | */ | ||
5284 | if (page_has_buffers(page)) { | 5330 | if (page_has_buffers(page)) { |
5285 | /* return if we have all the buffers mapped */ | ||
5286 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5331 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5287 | ext4_bh_unmapped)) | 5332 | ext4_bh_unmapped)) { |
5333 | unlock_page(page); | ||
5288 | goto out_unlock; | 5334 | goto out_unlock; |
5335 | } | ||
5289 | } | 5336 | } |
5337 | unlock_page(page); | ||
5290 | /* | 5338 | /* |
5291 | * OK, we need to fill the hole... Do write_begin write_end | 5339 | * OK, we need to fill the hole... Do write_begin write_end |
5292 | * to do block allocation/reservation.We are not holding | 5340 | * to do block allocation/reservation.We are not holding |