aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c193
1 files changed, 85 insertions, 108 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4e8e2f15b8bd..5352db1a3086 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,58 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71} 71}
72 72
73/* 73/*
74 * The ext4 forget function must perform a revoke if we are freeing data
75 * which has been journaled. Metadata (eg. indirect blocks) must be
76 * revoked in all cases.
77 *
78 * "bh" may be NULL: a metadata block may have been freed from memory
79 * but there may still be a record of it in the journal, and that record
80 * still needs to be revoked.
81 *
82 * If the handle isn't valid we're not journaling, but we still need to
83 * call into ext4_journal_revoke() to put the buffer head.
84 */
85int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
86 struct buffer_head *bh, ext4_fsblk_t blocknr)
87{
88 int err;
89
90 might_sleep();
91
92 BUFFER_TRACE(bh, "enter");
93
94 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
95 "data mode %x\n",
96 bh, is_metadata, inode->i_mode,
97 test_opt(inode->i_sb, DATA_FLAGS));
98
99 /* Never use the revoke function if we are doing full data
100 * journaling: there is no need to, and a V1 superblock won't
101 * support it. Otherwise, only skip the revoke on un-journaled
102 * data blocks. */
103
104 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
105 (!is_metadata && !ext4_should_journal_data(inode))) {
106 if (bh) {
107 BUFFER_TRACE(bh, "call jbd2_journal_forget");
108 return ext4_journal_forget(handle, bh);
109 }
110 return 0;
111 }
112
113 /*
114 * data!=journal && (is_metadata || should_journal_data(inode))
115 */
116 BUFFER_TRACE(bh, "call ext4_journal_revoke");
117 err = ext4_journal_revoke(handle, blocknr, bh);
118 if (err)
119 ext4_abort(inode->i_sb, __func__,
120 "error %d when attempting revoke", err);
121 BUFFER_TRACE(bh, "exit");
122 return err;
123}
124
125/*
126 * Work out how many blocks we need to proceed with the next chunk of a 74 * Work out how many blocks we need to proceed with the next chunk of a
127 * truncate transaction. 75 * truncate transaction.
128 */ 76 */
@@ -721,7 +669,7 @@ allocated:
721 return ret; 669 return ret;
722failed_out: 670failed_out:
723 for (i = 0; i < index; i++) 671 for (i = 0; i < index; i++)
724 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 672 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
725 return ret; 673 return ret;
726} 674}
727 675
@@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
817 return err; 765 return err;
818failed: 766failed:
819 /* Allocation failed, free what we already allocated */ 767 /* Allocation failed, free what we already allocated */
768 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
820 for (i = 1; i <= n ; i++) { 769 for (i = 1; i <= n ; i++) {
821 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 770 /*
822 ext4_journal_forget(handle, branch[i].bh); 771 * branch[i].bh is newly allocated, so there is no
772 * need to revoke the block, which is why we don't
773 * need to set EXT4_FREE_BLOCKS_METADATA.
774 */
775 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
776 EXT4_FREE_BLOCKS_FORGET);
823 } 777 }
824 for (i = 0; i < indirect_blks; i++) 778 for (i = n+1; i < indirect_blks; i++)
825 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 779 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
826 780
827 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 781 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
828 782
829 return err; 783 return err;
830} 784}
@@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
903 857
904err_out: 858err_out:
905 for (i = 1; i <= num; i++) { 859 for (i = 1; i <= num; i++) {
906 BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); 860 /*
907 ext4_journal_forget(handle, where[i].bh); 861 * branch[i].bh is newly allocated, so there is no
908 ext4_free_blocks(handle, inode, 862 * need to revoke the block, which is why we don't
909 le32_to_cpu(where[i-1].key), 1, 0); 863 * need to set EXT4_FREE_BLOCKS_METADATA.
864 */
865 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
866 EXT4_FREE_BLOCKS_FORGET);
910 } 867 }
911 ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); 868 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
869 blks, 0);
912 870
913 return err; 871 return err;
914} 872}
@@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
1021 if (!err) 979 if (!err)
1022 err = ext4_splice_branch(handle, inode, iblock, 980 err = ext4_splice_branch(handle, inode, iblock,
1023 partial, indirect_blks, count); 981 partial, indirect_blks, count);
1024 else 982 if (err)
1025 goto cleanup; 983 goto cleanup;
1026 984
1027 set_buffer_new(bh_result); 985 set_buffer_new(bh_result);
986
987 ext4_update_inode_fsync_trans(handle, inode, 1);
1028got_it: 988got_it:
1029 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 989 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
1030 if (count > blocks_to_boundary) 990 if (count > blocks_to_boundary)
@@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
1052 EXT4_I(inode)->i_reserved_meta_blocks; 1012 EXT4_I(inode)->i_reserved_meta_blocks;
1053 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1054 1014
1055 return total; 1015 return (total << inode->i_blkbits);
1056} 1016}
1057/* 1017/*
1058 * Calculate the number of metadata blocks need to reserve 1018 * Calculate the number of metadata blocks need to reserve
@@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
1534 return ext4_journal_get_write_access(handle, bh); 1494 return ext4_journal_get_write_access(handle, bh);
1535} 1495}
1536 1496
1497/*
1498 * Truncate blocks that were not used by write. We have to truncate the
1499 * pagecache as well so that corresponding buffers get properly unmapped.
1500 */
1501static void ext4_truncate_failed_write(struct inode *inode)
1502{
1503 truncate_inode_pages(inode->i_mapping, inode->i_size);
1504 ext4_truncate(inode);
1505}
1506
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1507static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1508 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1509 struct page **pagep, void **fsdata)
@@ -1599,7 +1569,7 @@ retry:
1599 1569
1600 ext4_journal_stop(handle); 1570 ext4_journal_stop(handle);
1601 if (pos + len > inode->i_size) { 1571 if (pos + len > inode->i_size) {
1602 ext4_truncate(inode); 1572 ext4_truncate_failed_write(inode);
1603 /* 1573 /*
1604 * If truncate failed early the inode might 1574 * If truncate failed early the inode might
1605 * still be on the orphan list; we need to 1575 * still be on the orphan list; we need to
@@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
1709 ret = ret2; 1679 ret = ret2;
1710 1680
1711 if (pos + len > inode->i_size) { 1681 if (pos + len > inode->i_size) {
1712 ext4_truncate(inode); 1682 ext4_truncate_failed_write(inode);
1713 /* 1683 /*
1714 * If truncate failed early the inode might still be 1684 * If truncate failed early the inode might still be
1715 * on the orphan list; we need to make sure the inode 1685 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
1751 ret = ret2; 1721 ret = ret2;
1752 1722
1753 if (pos + len > inode->i_size) { 1723 if (pos + len > inode->i_size) {
1754 ext4_truncate(inode); 1724 ext4_truncate_failed_write(inode);
1755 /* 1725 /*
1756 * If truncate failed early the inode might still be 1726 * If truncate failed early the inode might still be
1757 * on the orphan list; we need to make sure the inode 1727 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
1814 if (!ret) 1784 if (!ret)
1815 ret = ret2; 1785 ret = ret2;
1816 if (pos + len > inode->i_size) { 1786 if (pos + len > inode->i_size) {
1817 ext4_truncate(inode); 1787 ext4_truncate_failed_write(inode);
1818 /* 1788 /*
1819 * If truncate failed early the inode might still be 1789 * If truncate failed early the inode might still be
1820 * on the orphan list; we need to make sure the inode 1790 * on the orphan list; we need to make sure the inode
@@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
2600} 2570}
2601 2571
2602static int __ext4_journalled_writepage(struct page *page, 2572static int __ext4_journalled_writepage(struct page *page,
2603 struct writeback_control *wbc,
2604 unsigned int len) 2573 unsigned int len)
2605{ 2574{
2606 struct address_space *mapping = page->mapping; 2575 struct address_space *mapping = page->mapping;
@@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page,
2758 * doesn't seem much point in redirtying the page here. 2727 * doesn't seem much point in redirtying the page here.
2759 */ 2728 */
2760 ClearPageChecked(page); 2729 ClearPageChecked(page);
2761 return __ext4_journalled_writepage(page, wbc, len); 2730 return __ext4_journalled_writepage(page, len);
2762 } 2731 }
2763 2732
2764 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2733 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2788 * number of contiguous block. So we will limit 2757 * number of contiguous block. So we will limit
2789 * number of contiguous block to a sane value 2758 * number of contiguous block to a sane value
2790 */ 2759 */
2791 if (!(inode->i_flags & EXT4_EXTENTS_FL) && 2760 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
2792 (max_blocks > EXT4_MAX_TRANS_DATA)) 2761 (max_blocks > EXT4_MAX_TRANS_DATA))
2793 max_blocks = EXT4_MAX_TRANS_DATA; 2762 max_blocks = EXT4_MAX_TRANS_DATA;
2794 2763
@@ -3091,7 +3060,7 @@ retry:
3091 * i_size_read because we hold i_mutex. 3060 * i_size_read because we hold i_mutex.
3092 */ 3061 */
3093 if (pos + len > inode->i_size) 3062 if (pos + len > inode->i_size)
3094 ext4_truncate(inode); 3063 ext4_truncate_failed_write(inode);
3095 } 3064 }
3096 3065
3097 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3066 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4120 __le32 *last) 4089 __le32 *last)
4121{ 4090{
4122 __le32 *p; 4091 __le32 *p;
4092 int flags = EXT4_FREE_BLOCKS_FORGET;
4093
4094 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4095 flags |= EXT4_FREE_BLOCKS_METADATA;
4096
4123 if (try_to_extend_transaction(handle, inode)) { 4097 if (try_to_extend_transaction(handle, inode)) {
4124 if (bh) { 4098 if (bh) {
4125 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4099 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 } 4108 }
4135 } 4109 }
4136 4110
4137 /* 4111 for (p = first; p < last; p++)
4138 * Any buffers which are on the journal will be in memory. We 4112 *p = 0;
4139 * find them on the hash table so jbd2_journal_revoke() will
4140 * run jbd2_journal_forget() on them. We've already detached
4141 * each block from the file, so bforget() in
4142 * jbd2_journal_forget() should be safe.
4143 *
4144 * AKPM: turn on bforget in jbd2_journal_forget()!!!
4145 */
4146 for (p = first; p < last; p++) {
4147 u32 nr = le32_to_cpu(*p);
4148 if (nr) {
4149 struct buffer_head *tbh;
4150
4151 *p = 0;
4152 tbh = sb_find_get_block(inode->i_sb, nr);
4153 ext4_forget(handle, 0, inode, tbh, nr);
4154 }
4155 }
4156 4113
4157 ext4_free_blocks(handle, inode, block_to_free, count, 0); 4114 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4158} 4115}
4159 4116
4160/** 4117/**
@@ -4342,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4342 blocks_for_truncate(inode)); 4299 blocks_for_truncate(inode));
4343 } 4300 }
4344 4301
4345 ext4_free_blocks(handle, inode, nr, 1, 1); 4302 ext4_free_blocks(handle, inode, 0, nr, 1,
4303 EXT4_FREE_BLOCKS_METADATA);
4346 4304
4347 if (parent_bh) { 4305 if (parent_bh) {
4348 /* 4306 /*
@@ -4781,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4781 struct ext4_iloc iloc; 4739 struct ext4_iloc iloc;
4782 struct ext4_inode *raw_inode; 4740 struct ext4_inode *raw_inode;
4783 struct ext4_inode_info *ei; 4741 struct ext4_inode_info *ei;
4784 struct buffer_head *bh;
4785 struct inode *inode; 4742 struct inode *inode;
4743 journal_t *journal = EXT4_SB(sb)->s_journal;
4786 long ret; 4744 long ret;
4787 int block; 4745 int block;
4788 4746
@@ -4793,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4793 return inode; 4751 return inode;
4794 4752
4795 ei = EXT4_I(inode); 4753 ei = EXT4_I(inode);
4754 iloc.bh = 0;
4796 4755
4797 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4756 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4798 if (ret < 0) 4757 if (ret < 0)
4799 goto bad_inode; 4758 goto bad_inode;
4800 bh = iloc.bh;
4801 raw_inode = ext4_raw_inode(&iloc); 4759 raw_inode = ext4_raw_inode(&iloc);
4802 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4760 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4803 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4761 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4820 if (inode->i_mode == 0 || 4778 if (inode->i_mode == 0 ||
4821 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4779 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
4822 /* this inode is deleted */ 4780 /* this inode is deleted */
4823 brelse(bh);
4824 ret = -ESTALE; 4781 ret = -ESTALE;
4825 goto bad_inode; 4782 goto bad_inode;
4826 } 4783 }
@@ -4848,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4848 ei->i_data[block] = raw_inode->i_block[block]; 4805 ei->i_data[block] = raw_inode->i_block[block];
4849 INIT_LIST_HEAD(&ei->i_orphan); 4806 INIT_LIST_HEAD(&ei->i_orphan);
4850 4807
4808 /*
4809 * Set transaction id's of transactions that have to be committed
4810 * to finish f[data]sync. We set them to currently running transaction
4811 * as we cannot be sure that the inode or some of its metadata isn't
4812 * part of the transaction - the inode could have been reclaimed and
4813 * now it is reread from disk.
4814 */
4815 if (journal) {
4816 transaction_t *transaction;
4817 tid_t tid;
4818
4819 spin_lock(&journal->j_state_lock);
4820 if (journal->j_running_transaction)
4821 transaction = journal->j_running_transaction;
4822 else
4823 transaction = journal->j_committing_transaction;
4824 if (transaction)
4825 tid = transaction->t_tid;
4826 else
4827 tid = journal->j_commit_sequence;
4828 spin_unlock(&journal->j_state_lock);
4829 ei->i_sync_tid = tid;
4830 ei->i_datasync_tid = tid;
4831 }
4832
4851 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4833 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4852 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4834 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4853 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4835 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4854 EXT4_INODE_SIZE(inode->i_sb)) { 4836 EXT4_INODE_SIZE(inode->i_sb)) {
4855 brelse(bh);
4856 ret = -EIO; 4837 ret = -EIO;
4857 goto bad_inode; 4838 goto bad_inode;
4858 } 4839 }
@@ -4884,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4884 4865
4885 ret = 0; 4866 ret = 0;
4886 if (ei->i_file_acl && 4867 if (ei->i_file_acl &&
4887 ((ei->i_file_acl < 4868 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4888 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4889 EXT4_SB(sb)->s_gdb_count)) ||
4890 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4891 ext4_error(sb, __func__, 4869 ext4_error(sb, __func__,
4892 "bad extended attribute block %llu in inode #%lu", 4870 "bad extended attribute block %llu in inode #%lu",
4893 ei->i_file_acl, inode->i_ino); 4871 ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4905 /* Validate block references which are part of inode */ 4883 /* Validate block references which are part of inode */
4906 ret = ext4_check_inode_blockref(inode); 4884 ret = ext4_check_inode_blockref(inode);
4907 } 4885 }
4908 if (ret) { 4886 if (ret)
4909 brelse(bh);
4910 goto bad_inode; 4887 goto bad_inode;
4911 }
4912 4888
4913 if (S_ISREG(inode->i_mode)) { 4889 if (S_ISREG(inode->i_mode)) {
4914 inode->i_op = &ext4_file_inode_operations; 4890 inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4936 init_special_inode(inode, inode->i_mode, 4912 init_special_inode(inode, inode->i_mode,
4937 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4913 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4938 } else { 4914 } else {
4939 brelse(bh);
4940 ret = -EIO; 4915 ret = -EIO;
4941 ext4_error(inode->i_sb, __func__, 4916 ext4_error(inode->i_sb, __func__,
4942 "bogus i_mode (%o) for inode=%lu", 4917 "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4949 return inode; 4924 return inode;
4950 4925
4951bad_inode: 4926bad_inode:
4927 brelse(iloc.bh);
4952 iget_failed(inode); 4928 iget_failed(inode);
4953 return ERR_PTR(ret); 4929 return ERR_PTR(ret);
4954} 4930}
@@ -5108,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
5108 err = rc; 5084 err = rc;
5109 ei->i_state &= ~EXT4_STATE_NEW; 5085 ei->i_state &= ~EXT4_STATE_NEW;
5110 5086
5087 ext4_update_inode_fsync_trans(handle, inode, 0);
5111out_brelse: 5088out_brelse:
5112 brelse(bh); 5089 brelse(bh);
5113 ext4_std_error(inode->i_sb, err); 5090 ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5227 5204
5228 /* (user+group)*(old+new) structure, inode write (sb, 5205 /* (user+group)*(old+new) structure, inode write (sb,
5229 * inode block, ? - but truncate inode update has it) */ 5206 * inode block, ? - but truncate inode update has it) */
5230 handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ 5207 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
5231 EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 5208 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
5232 if (IS_ERR(handle)) { 5209 if (IS_ERR(handle)) {
5233 error = PTR_ERR(handle); 5210 error = PTR_ERR(handle);
5234 goto err_out; 5211 goto err_out;