diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 759 |
1 files changed, 496 insertions, 263 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab807963a614..81d605412844 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | ||
42 | #include <linux/slab.h> | ||
41 | 43 | ||
42 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 45 | #include "xattr.h" |
@@ -170,6 +172,9 @@ void ext4_delete_inode(struct inode *inode) | |||
170 | handle_t *handle; | 172 | handle_t *handle; |
171 | int err; | 173 | int err; |
172 | 174 | ||
175 | if (!is_bad_inode(inode)) | ||
176 | dquot_initialize(inode); | ||
177 | |||
173 | if (ext4_should_order_data(inode)) | 178 | if (ext4_should_order_data(inode)) |
174 | ext4_begin_ordered_truncate(inode, 0); | 179 | ext4_begin_ordered_truncate(inode, 0); |
175 | truncate_inode_pages(&inode->i_data, 0); | 180 | truncate_inode_pages(&inode->i_data, 0); |
@@ -194,7 +199,7 @@ void ext4_delete_inode(struct inode *inode) | |||
194 | inode->i_size = 0; | 199 | inode->i_size = 0; |
195 | err = ext4_mark_inode_dirty(handle, inode); | 200 | err = ext4_mark_inode_dirty(handle, inode); |
196 | if (err) { | 201 | if (err) { |
197 | ext4_warning(inode->i_sb, __func__, | 202 | ext4_warning(inode->i_sb, |
198 | "couldn't mark inode dirty (err %d)", err); | 203 | "couldn't mark inode dirty (err %d)", err); |
199 | goto stop_handle; | 204 | goto stop_handle; |
200 | } | 205 | } |
@@ -212,7 +217,7 @@ void ext4_delete_inode(struct inode *inode) | |||
212 | if (err > 0) | 217 | if (err > 0) |
213 | err = ext4_journal_restart(handle, 3); | 218 | err = ext4_journal_restart(handle, 3); |
214 | if (err != 0) { | 219 | if (err != 0) { |
215 | ext4_warning(inode->i_sb, __func__, | 220 | ext4_warning(inode->i_sb, |
216 | "couldn't extend journal (err %d)", err); | 221 | "couldn't extend journal (err %d)", err); |
217 | stop_handle: | 222 | stop_handle: |
218 | ext4_journal_stop(handle); | 223 | ext4_journal_stop(handle); |
@@ -323,8 +328,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
323 | offsets[n++] = i_block & (ptrs - 1); | 328 | offsets[n++] = i_block & (ptrs - 1); |
324 | final = ptrs; | 329 | final = ptrs; |
325 | } else { | 330 | } else { |
326 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 331 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", |
327 | "block %lu > max in inode %lu", | ||
328 | i_block + direct_blocks + | 332 | i_block + direct_blocks + |
329 | indirect_blocks + double_blocks, inode->i_ino); | 333 | indirect_blocks + double_blocks, inode->i_ino); |
330 | } | 334 | } |
@@ -344,7 +348,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
344 | if (blk && | 348 | if (blk && |
345 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
346 | blk, 1))) { | 350 | blk, 1))) { |
347 | ext4_error(inode->i_sb, function, | 351 | __ext4_error(inode->i_sb, function, |
348 | "invalid block reference %u " | 352 | "invalid block reference %u " |
349 | "in inode #%lu", blk, inode->i_ino); | 353 | "in inode #%lu", blk, inode->i_ino); |
350 | return -EIO; | 354 | return -EIO; |
@@ -607,7 +611,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
607 | if (*err) | 611 | if (*err) |
608 | goto failed_out; | 612 | goto failed_out; |
609 | 613 | ||
610 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | 614 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { |
615 | EXT4_ERROR_INODE(inode, | ||
616 | "current_block %llu + count %lu > %d!", | ||
617 | current_block, count, | ||
618 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
619 | *err = -EIO; | ||
620 | goto failed_out; | ||
621 | } | ||
611 | 622 | ||
612 | target -= count; | 623 | target -= count; |
613 | /* allocate blocks for indirect blocks */ | 624 | /* allocate blocks for indirect blocks */ |
@@ -643,7 +654,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
643 | ar.flags = EXT4_MB_HINT_DATA; | 654 | ar.flags = EXT4_MB_HINT_DATA; |
644 | 655 | ||
645 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 656 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
646 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | 657 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { |
658 | EXT4_ERROR_INODE(inode, | ||
659 | "current_block %llu + ar.len %d > %d!", | ||
660 | current_block, ar.len, | ||
661 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
662 | *err = -EIO; | ||
663 | goto failed_out; | ||
664 | } | ||
647 | 665 | ||
648 | if (*err && (target == blks)) { | 666 | if (*err && (target == blks)) { |
649 | /* | 667 | /* |
@@ -1009,86 +1027,115 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) | |||
1009 | return &EXT4_I(inode)->i_reserved_quota; | 1027 | return &EXT4_I(inode)->i_reserved_quota; |
1010 | } | 1028 | } |
1011 | #endif | 1029 | #endif |
1030 | |||
1012 | /* | 1031 | /* |
1013 | * Calculate the number of metadata blocks need to reserve | 1032 | * Calculate the number of metadata blocks need to reserve |
1014 | * to allocate @blocks for non extent file based file | 1033 | * to allocate a new block at @lblocks for non extent file based file |
1015 | */ | 1034 | */ |
1016 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | 1035 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, |
1036 | sector_t lblock) | ||
1017 | { | 1037 | { |
1018 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1038 | struct ext4_inode_info *ei = EXT4_I(inode); |
1019 | int ind_blks, dind_blks, tind_blks; | 1039 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); |
1020 | 1040 | int blk_bits; | |
1021 | /* number of new indirect blocks needed */ | ||
1022 | ind_blks = (blocks + icap - 1) / icap; | ||
1023 | 1041 | ||
1024 | dind_blks = (ind_blks + icap - 1) / icap; | 1042 | if (lblock < EXT4_NDIR_BLOCKS) |
1043 | return 0; | ||
1025 | 1044 | ||
1026 | tind_blks = 1; | 1045 | lblock -= EXT4_NDIR_BLOCKS; |
1027 | 1046 | ||
1028 | return ind_blks + dind_blks + tind_blks; | 1047 | if (ei->i_da_metadata_calc_len && |
1048 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1049 | ei->i_da_metadata_calc_len++; | ||
1050 | return 0; | ||
1051 | } | ||
1052 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1053 | ei->i_da_metadata_calc_len = 1; | ||
1054 | blk_bits = order_base_2(lblock); | ||
1055 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1029 | } | 1056 | } |
1030 | 1057 | ||
1031 | /* | 1058 | /* |
1032 | * Calculate the number of metadata blocks need to reserve | 1059 | * Calculate the number of metadata blocks need to reserve |
1033 | * to allocate given number of blocks | 1060 | * to allocate a block located at @lblock |
1034 | */ | 1061 | */ |
1035 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1062 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1036 | { | 1063 | { |
1037 | if (!blocks) | ||
1038 | return 0; | ||
1039 | |||
1040 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1064 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1041 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1065 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1042 | 1066 | ||
1043 | return ext4_indirect_calc_metadata_amount(inode, blocks); | 1067 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
1044 | } | 1068 | } |
1045 | 1069 | ||
1046 | static void ext4_da_update_reserve_space(struct inode *inode, int used) | 1070 | /* |
1071 | * Called with i_data_sem down, which is important since we can call | ||
1072 | * ext4_discard_preallocations() from here. | ||
1073 | */ | ||
1074 | void ext4_da_update_reserve_space(struct inode *inode, | ||
1075 | int used, int quota_claim) | ||
1047 | { | 1076 | { |
1048 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1077 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1049 | int total, mdb, mdb_free, mdb_claim = 0; | 1078 | struct ext4_inode_info *ei = EXT4_I(inode); |
1050 | 1079 | int mdb_free = 0, allocated_meta_blocks = 0; | |
1051 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1080 | |
1052 | /* recalculate the number of metablocks still need to be reserved */ | 1081 | spin_lock(&ei->i_block_reservation_lock); |
1053 | total = EXT4_I(inode)->i_reserved_data_blocks - used; | 1082 | trace_ext4_da_update_reserve_space(inode, used); |
1054 | mdb = ext4_calc_metadata_amount(inode, total); | 1083 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
1055 | 1084 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | |
1056 | /* figure out how many metablocks to release */ | 1085 | "with only %d reserved data blocks\n", |
1057 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1086 | __func__, inode->i_ino, used, |
1058 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1087 | ei->i_reserved_data_blocks); |
1059 | 1088 | WARN_ON(1); | |
1060 | if (mdb_free) { | 1089 | used = ei->i_reserved_data_blocks; |
1061 | /* Account for allocated meta_blocks */ | 1090 | } |
1062 | mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; | 1091 | |
1063 | BUG_ON(mdb_free < mdb_claim); | 1092 | /* Update per-inode reservations */ |
1064 | mdb_free -= mdb_claim; | 1093 | ei->i_reserved_data_blocks -= used; |
1065 | 1094 | used += ei->i_allocated_meta_blocks; | |
1066 | /* update fs dirty blocks counter */ | 1095 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
1096 | allocated_meta_blocks = ei->i_allocated_meta_blocks; | ||
1097 | ei->i_allocated_meta_blocks = 0; | ||
1098 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1099 | |||
1100 | if (ei->i_reserved_data_blocks == 0) { | ||
1101 | /* | ||
1102 | * We can release all of the reserved metadata blocks | ||
1103 | * only when we have written all of the delayed | ||
1104 | * allocation blocks. | ||
1105 | */ | ||
1106 | mdb_free = ei->i_reserved_meta_blocks; | ||
1107 | ei->i_reserved_meta_blocks = 0; | ||
1108 | ei->i_da_metadata_calc_len = 0; | ||
1067 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | 1109 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1068 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1069 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1070 | } | 1110 | } |
1071 | |||
1072 | /* update per-inode reservations */ | ||
1073 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | ||
1074 | EXT4_I(inode)->i_reserved_data_blocks -= used; | ||
1075 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); | ||
1076 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1111 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1077 | 1112 | ||
1078 | vfs_dq_claim_block(inode, used + mdb_claim); | 1113 | /* Update quota subsystem */ |
1079 | 1114 | if (quota_claim) { | |
1080 | /* | 1115 | dquot_claim_block(inode, used); |
1081 | * free those over-booking quota for metadata blocks | 1116 | if (mdb_free) |
1082 | */ | 1117 | dquot_release_reservation_block(inode, mdb_free); |
1083 | if (mdb_free) | 1118 | } else { |
1084 | vfs_dq_release_reservation_block(inode, mdb_free); | 1119 | /* |
1120 | * We did fallocate with an offset that is already delayed | ||
1121 | * allocated. So on delayed allocated writeback we should | ||
1122 | * not update the quota for allocated blocks. But then | ||
1123 | * converting an fallocate region to initialized region would | ||
1124 | * have caused a metadata allocation. So claim quota for | ||
1125 | * that | ||
1126 | */ | ||
1127 | if (allocated_meta_blocks) | ||
1128 | dquot_claim_block(inode, allocated_meta_blocks); | ||
1129 | dquot_release_reservation_block(inode, mdb_free + used); | ||
1130 | } | ||
1085 | 1131 | ||
1086 | /* | 1132 | /* |
1087 | * If we have done all the pending block allocations and if | 1133 | * If we have done all the pending block allocations and if |
1088 | * there aren't any writers on the inode, we can discard the | 1134 | * there aren't any writers on the inode, we can discard the |
1089 | * inode's preallocations. | 1135 | * inode's preallocations. |
1090 | */ | 1136 | */ |
1091 | if (!total && (atomic_read(&inode->i_writecount) == 0)) | 1137 | if ((ei->i_reserved_data_blocks == 0) && |
1138 | (atomic_read(&inode->i_writecount) == 0)) | ||
1092 | ext4_discard_preallocations(inode); | 1139 | ext4_discard_preallocations(inode); |
1093 | } | 1140 | } |
1094 | 1141 | ||
@@ -1096,7 +1143,7 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
1096 | sector_t logical, sector_t phys, int len) | 1143 | sector_t logical, sector_t phys, int len) |
1097 | { | 1144 | { |
1098 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1145 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1099 | ext4_error(inode->i_sb, msg, | 1146 | __ext4_error(inode->i_sb, msg, |
1100 | "inode #%lu logical block %llu mapped to %llu " | 1147 | "inode #%lu logical block %llu mapped to %llu " |
1101 | "(size %d)", inode->i_ino, | 1148 | "(size %d)", inode->i_ino, |
1102 | (unsigned long long) logical, | 1149 | (unsigned long long) logical, |
@@ -1278,20 +1325,22 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1278 | * i_data's format changing. Force the migrate | 1325 | * i_data's format changing. Force the migrate |
1279 | * to fail by clearing migrate flags | 1326 | * to fail by clearing migrate flags |
1280 | */ | 1327 | */ |
1281 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; | 1328 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
1282 | } | 1329 | } |
1283 | } | ||
1284 | 1330 | ||
1331 | /* | ||
1332 | * Update reserved blocks/metadata blocks after successful | ||
1333 | * block allocation which had been deferred till now. We don't | ||
1334 | * support fallocate for non extent files. So we can update | ||
1335 | * reserve space here. | ||
1336 | */ | ||
1337 | if ((retval > 0) && | ||
1338 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | ||
1339 | ext4_da_update_reserve_space(inode, retval, 1); | ||
1340 | } | ||
1285 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1341 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1286 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1342 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1287 | 1343 | ||
1288 | /* | ||
1289 | * Update reserved blocks/metadata blocks after successful | ||
1290 | * block allocation which had been deferred till now. | ||
1291 | */ | ||
1292 | if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) | ||
1293 | ext4_da_update_reserve_space(inode, retval); | ||
1294 | |||
1295 | up_write((&EXT4_I(inode)->i_data_sem)); | 1344 | up_write((&EXT4_I(inode)->i_data_sem)); |
1296 | if (retval > 0 && buffer_mapped(bh)) { | 1345 | if (retval > 0 && buffer_mapped(bh)) { |
1297 | int ret = check_block_validity(inode, "file system " | 1346 | int ret = check_block_validity(inode, "file system " |
@@ -1504,6 +1553,8 @@ static void ext4_truncate_failed_write(struct inode *inode) | |||
1504 | ext4_truncate(inode); | 1553 | ext4_truncate(inode); |
1505 | } | 1554 | } |
1506 | 1555 | ||
1556 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
1557 | struct buffer_head *bh_result, int create); | ||
1507 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1558 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
1508 | loff_t pos, unsigned len, unsigned flags, | 1559 | loff_t pos, unsigned len, unsigned flags, |
1509 | struct page **pagep, void **fsdata) | 1560 | struct page **pagep, void **fsdata) |
@@ -1545,8 +1596,12 @@ retry: | |||
1545 | } | 1596 | } |
1546 | *pagep = page; | 1597 | *pagep = page; |
1547 | 1598 | ||
1548 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1599 | if (ext4_should_dioread_nolock(inode)) |
1549 | ext4_get_block); | 1600 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, |
1601 | fsdata, ext4_get_block_write); | ||
1602 | else | ||
1603 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
1604 | fsdata, ext4_get_block); | ||
1550 | 1605 | ||
1551 | if (!ret && ext4_should_journal_data(inode)) { | 1606 | if (!ret && ext4_should_journal_data(inode)) { |
1552 | ret = walk_page_buffers(handle, page_buffers(page), | 1607 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1763,7 +1818,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1763 | new_i_size = pos + copied; | 1818 | new_i_size = pos + copied; |
1764 | if (new_i_size > inode->i_size) | 1819 | if (new_i_size > inode->i_size) |
1765 | i_size_write(inode, pos+copied); | 1820 | i_size_write(inode, pos+copied); |
1766 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1821 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1767 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1822 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1768 | ext4_update_i_disksize(inode, new_i_size); | 1823 | ext4_update_i_disksize(inode, new_i_size); |
1769 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1824 | ret2 = ext4_mark_inode_dirty(handle, inode); |
@@ -1797,11 +1852,16 @@ static int ext4_journalled_write_end(struct file *file, | |||
1797 | return ret ? ret : copied; | 1852 | return ret ? ret : copied; |
1798 | } | 1853 | } |
1799 | 1854 | ||
1800 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1855 | /* |
1856 | * Reserve a single block located at lblock | ||
1857 | */ | ||
1858 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | ||
1801 | { | 1859 | { |
1802 | int retries = 0; | 1860 | int retries = 0; |
1803 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1861 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1804 | unsigned long md_needed, mdblocks, total = 0; | 1862 | struct ext4_inode_info *ei = EXT4_I(inode); |
1863 | unsigned long md_needed, md_reserved; | ||
1864 | int ret; | ||
1805 | 1865 | ||
1806 | /* | 1866 | /* |
1807 | * recalculate the amount of metadata blocks to reserve | 1867 | * recalculate the amount of metadata blocks to reserve |
@@ -1809,35 +1869,33 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1809 | * worse case is one extent per block | 1869 | * worse case is one extent per block |
1810 | */ | 1870 | */ |
1811 | repeat: | 1871 | repeat: |
1812 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1872 | spin_lock(&ei->i_block_reservation_lock); |
1813 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1873 | md_reserved = ei->i_reserved_meta_blocks; |
1814 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1874 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1815 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | 1875 | trace_ext4_da_reserve_space(inode, md_needed); |
1816 | 1876 | spin_unlock(&ei->i_block_reservation_lock); | |
1817 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
1818 | total = md_needed + nrblocks; | ||
1819 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1820 | 1877 | ||
1821 | /* | 1878 | /* |
1822 | * Make quota reservation here to prevent quota overflow | 1879 | * Make quota reservation here to prevent quota overflow |
1823 | * later. Real quota accounting is done at pages writeout | 1880 | * later. Real quota accounting is done at pages writeout |
1824 | * time. | 1881 | * time. |
1825 | */ | 1882 | */ |
1826 | if (vfs_dq_reserve_block(inode, total)) | 1883 | ret = dquot_reserve_block(inode, md_needed + 1); |
1827 | return -EDQUOT; | 1884 | if (ret) |
1885 | return ret; | ||
1828 | 1886 | ||
1829 | if (ext4_claim_free_blocks(sbi, total)) { | 1887 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1830 | vfs_dq_release_reservation_block(inode, total); | 1888 | dquot_release_reservation_block(inode, md_needed + 1); |
1831 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1889 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1832 | yield(); | 1890 | yield(); |
1833 | goto repeat; | 1891 | goto repeat; |
1834 | } | 1892 | } |
1835 | return -ENOSPC; | 1893 | return -ENOSPC; |
1836 | } | 1894 | } |
1837 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1895 | spin_lock(&ei->i_block_reservation_lock); |
1838 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1896 | ei->i_reserved_data_blocks++; |
1839 | EXT4_I(inode)->i_reserved_meta_blocks += md_needed; | 1897 | ei->i_reserved_meta_blocks += md_needed; |
1840 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1898 | spin_unlock(&ei->i_block_reservation_lock); |
1841 | 1899 | ||
1842 | return 0; /* success */ | 1900 | return 0; /* success */ |
1843 | } | 1901 | } |
@@ -1845,49 +1903,46 @@ repeat: | |||
1845 | static void ext4_da_release_space(struct inode *inode, int to_free) | 1903 | static void ext4_da_release_space(struct inode *inode, int to_free) |
1846 | { | 1904 | { |
1847 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1848 | int total, mdb, mdb_free, release; | 1906 | struct ext4_inode_info *ei = EXT4_I(inode); |
1849 | 1907 | ||
1850 | if (!to_free) | 1908 | if (!to_free) |
1851 | return; /* Nothing to release, exit */ | 1909 | return; /* Nothing to release, exit */ |
1852 | 1910 | ||
1853 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1911 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1854 | 1912 | ||
1855 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | 1913 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1856 | /* | 1914 | /* |
1857 | * if there is no reserved blocks, but we try to free some | 1915 | * if there aren't enough reserved blocks, then the |
1858 | * then the counter is messed up somewhere. | 1916 | * counter is messed up somewhere. Since this |
1859 | * but since this function is called from invalidate | 1917 | * function is called from invalidate page, it's |
1860 | * page, it's harmless to return without any action | 1918 | * harmless to return without any action. |
1861 | */ | 1919 | */ |
1862 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | 1920 | ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " |
1863 | "blocks for inode %lu, but there is no reserved " | 1921 | "ino %lu, to_free %d with only %d reserved " |
1864 | "data blocks\n", to_free, inode->i_ino); | 1922 | "data blocks\n", inode->i_ino, to_free, |
1865 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1923 | ei->i_reserved_data_blocks); |
1866 | return; | 1924 | WARN_ON(1); |
1925 | to_free = ei->i_reserved_data_blocks; | ||
1867 | } | 1926 | } |
1927 | ei->i_reserved_data_blocks -= to_free; | ||
1868 | 1928 | ||
1869 | /* recalculate the number of metablocks still need to be reserved */ | 1929 | if (ei->i_reserved_data_blocks == 0) { |
1870 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1930 | /* |
1871 | mdb = ext4_calc_metadata_amount(inode, total); | 1931 | * We can release all of the reserved metadata blocks |
1872 | 1932 | * only when we have written all of the delayed | |
1873 | /* figure out how many metablocks to release */ | 1933 | * allocation blocks. |
1874 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1934 | */ |
1875 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1935 | to_free += ei->i_reserved_meta_blocks; |
1876 | 1936 | ei->i_reserved_meta_blocks = 0; | |
1877 | release = to_free + mdb_free; | 1937 | ei->i_da_metadata_calc_len = 0; |
1878 | 1938 | } | |
1879 | /* update fs dirty blocks counter for truncate case */ | ||
1880 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); | ||
1881 | 1939 | ||
1882 | /* update per-inode reservations */ | 1940 | /* update fs dirty blocks counter */ |
1883 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1941 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1884 | EXT4_I(inode)->i_reserved_data_blocks -= to_free; | ||
1885 | 1942 | ||
1886 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1887 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1888 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1943 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1889 | 1944 | ||
1890 | vfs_dq_release_reservation_block(inode, release); | 1945 | dquot_release_reservation_block(inode, to_free); |
1891 | } | 1946 | } |
1892 | 1947 | ||
1893 | static void ext4_da_page_release_reservation(struct page *page, | 1948 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -2064,6 +2119,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2064 | } else if (buffer_mapped(bh)) | 2119 | } else if (buffer_mapped(bh)) |
2065 | BUG_ON(bh->b_blocknr != pblock); | 2120 | BUG_ON(bh->b_blocknr != pblock); |
2066 | 2121 | ||
2122 | if (buffer_uninit(exbh)) | ||
2123 | set_buffer_uninit(bh); | ||
2067 | cur_logical++; | 2124 | cur_logical++; |
2068 | pblock++; | 2125 | pblock++; |
2069 | } while ((bh = bh->b_this_page) != head); | 2126 | } while ((bh = bh->b_this_page) != head); |
@@ -2106,17 +2163,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2106 | break; | 2163 | break; |
2107 | for (i = 0; i < nr_pages; i++) { | 2164 | for (i = 0; i < nr_pages; i++) { |
2108 | struct page *page = pvec.pages[i]; | 2165 | struct page *page = pvec.pages[i]; |
2109 | index = page->index; | 2166 | if (page->index > end) |
2110 | if (index > end) | ||
2111 | break; | 2167 | break; |
2112 | index++; | ||
2113 | |||
2114 | BUG_ON(!PageLocked(page)); | 2168 | BUG_ON(!PageLocked(page)); |
2115 | BUG_ON(PageWriteback(page)); | 2169 | BUG_ON(PageWriteback(page)); |
2116 | block_invalidatepage(page, 0); | 2170 | block_invalidatepage(page, 0); |
2117 | ClearPageUptodate(page); | 2171 | ClearPageUptodate(page); |
2118 | unlock_page(page); | 2172 | unlock_page(page); |
2119 | } | 2173 | } |
2174 | index = pvec.pages[nr_pages - 1]->index + 1; | ||
2175 | pagevec_release(&pvec); | ||
2120 | } | 2176 | } |
2121 | return; | 2177 | return; |
2122 | } | 2178 | } |
@@ -2192,10 +2248,12 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2192 | * variables are updated after the blocks have been allocated. | 2248 | * variables are updated after the blocks have been allocated. |
2193 | */ | 2249 | */ |
2194 | new.b_state = 0; | 2250 | new.b_state = 0; |
2195 | get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | | 2251 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2196 | EXT4_GET_BLOCKS_DELALLOC_RESERVE); | 2252 | if (ext4_should_dioread_nolock(mpd->inode)) |
2253 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2197 | if (mpd->b_state & (1 << BH_Delay)) | 2254 | if (mpd->b_state & (1 << BH_Delay)) |
2198 | get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; | 2255 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2256 | |||
2199 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | 2257 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, |
2200 | &new, get_blocks_flags); | 2258 | &new, get_blocks_flags); |
2201 | if (blks < 0) { | 2259 | if (blks < 0) { |
@@ -2493,7 +2551,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2493 | * XXX: __block_prepare_write() unmaps passed block, | 2551 | * XXX: __block_prepare_write() unmaps passed block, |
2494 | * is it OK? | 2552 | * is it OK? |
2495 | */ | 2553 | */ |
2496 | ret = ext4_da_reserve_space(inode, 1); | 2554 | ret = ext4_da_reserve_space(inode, iblock); |
2497 | if (ret) | 2555 | if (ret) |
2498 | /* not enough space to reserve */ | 2556 | /* not enough space to reserve */ |
2499 | return ret; | 2557 | return ret; |
@@ -2603,11 +2661,14 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2603 | ret = err; | 2661 | ret = err; |
2604 | 2662 | ||
2605 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 2663 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); |
2606 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 2664 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
2607 | out: | 2665 | out: |
2608 | return ret; | 2666 | return ret; |
2609 | } | 2667 | } |
2610 | 2668 | ||
2669 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
2670 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
2671 | |||
2611 | /* | 2672 | /* |
2612 | * Note that we don't need to start a transaction unless we're journaling data | 2673 | * Note that we don't need to start a transaction unless we're journaling data |
2613 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2674 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
@@ -2655,7 +2716,7 @@ static int ext4_writepage(struct page *page, | |||
2655 | int ret = 0; | 2716 | int ret = 0; |
2656 | loff_t size; | 2717 | loff_t size; |
2657 | unsigned int len; | 2718 | unsigned int len; |
2658 | struct buffer_head *page_bufs; | 2719 | struct buffer_head *page_bufs = NULL; |
2659 | struct inode *inode = page->mapping->host; | 2720 | struct inode *inode = page->mapping->host; |
2660 | 2721 | ||
2661 | trace_ext4_writepage(inode, page); | 2722 | trace_ext4_writepage(inode, page); |
@@ -2731,7 +2792,11 @@ static int ext4_writepage(struct page *page, | |||
2731 | 2792 | ||
2732 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2793 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2733 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | 2794 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2734 | else | 2795 | else if (page_bufs && buffer_uninit(page_bufs)) { |
2796 | ext4_set_bh_endio(page_bufs, inode); | ||
2797 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | ||
2798 | wbc, ext4_end_io_buffer_write); | ||
2799 | } else | ||
2735 | ret = block_write_full_page(page, noalloc_get_block_write, | 2800 | ret = block_write_full_page(page, noalloc_get_block_write, |
2736 | wbc); | 2801 | wbc); |
2737 | 2802 | ||
@@ -2967,8 +3032,7 @@ retry: | |||
2967 | out_writepages: | 3032 | out_writepages: |
2968 | if (!no_nrwrite_index_update) | 3033 | if (!no_nrwrite_index_update) |
2969 | wbc->no_nrwrite_index_update = 0; | 3034 | wbc->no_nrwrite_index_update = 0; |
2970 | if (wbc->nr_to_write > nr_to_writebump) | 3035 | wbc->nr_to_write -= nr_to_writebump; |
2971 | wbc->nr_to_write -= nr_to_writebump; | ||
2972 | wbc->range_start = range_start; | 3036 | wbc->range_start = range_start; |
2973 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3037 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2974 | return ret; | 3038 | return ret; |
@@ -2993,11 +3057,18 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2993 | if (2 * free_blocks < 3 * dirty_blocks || | 3057 | if (2 * free_blocks < 3 * dirty_blocks || |
2994 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 3058 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { |
2995 | /* | 3059 | /* |
2996 | * free block count is less that 150% of dirty blocks | 3060 | * free block count is less than 150% of dirty blocks |
2997 | * or free blocks is less that watermark | 3061 | * or free blocks is less than watermark |
2998 | */ | 3062 | */ |
2999 | return 1; | 3063 | return 1; |
3000 | } | 3064 | } |
3065 | /* | ||
3066 | * Even if we don't switch but are nearing capacity, | ||
3067 | * start pushing delalloc when 1/2 of free blocks are dirty. | ||
3068 | */ | ||
3069 | if (free_blocks < 2 * dirty_blocks) | ||
3070 | writeback_inodes_sb_if_idle(sb); | ||
3071 | |||
3001 | return 0; | 3072 | return 0; |
3002 | } | 3073 | } |
3003 | 3074 | ||
@@ -3005,7 +3076,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3005 | loff_t pos, unsigned len, unsigned flags, | 3076 | loff_t pos, unsigned len, unsigned flags, |
3006 | struct page **pagep, void **fsdata) | 3077 | struct page **pagep, void **fsdata) |
3007 | { | 3078 | { |
3008 | int ret, retries = 0; | 3079 | int ret, retries = 0, quota_retries = 0; |
3009 | struct page *page; | 3080 | struct page *page; |
3010 | pgoff_t index; | 3081 | pgoff_t index; |
3011 | unsigned from, to; | 3082 | unsigned from, to; |
@@ -3064,6 +3135,22 @@ retry: | |||
3064 | 3135 | ||
3065 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3136 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3066 | goto retry; | 3137 | goto retry; |
3138 | |||
3139 | if ((ret == -EDQUOT) && | ||
3140 | EXT4_I(inode)->i_reserved_meta_blocks && | ||
3141 | (quota_retries++ < 3)) { | ||
3142 | /* | ||
3143 | * Since we often over-estimate the number of meta | ||
3144 | * data blocks required, we may sometimes get a | ||
3145 | * spurios out of quota error even though there would | ||
3146 | * be enough space once we write the data blocks and | ||
3147 | * find out how many meta data blocks were _really_ | ||
3148 | * required. So try forcing the inode write to see if | ||
3149 | * that helps. | ||
3150 | */ | ||
3151 | write_inode_now(inode, (quota_retries == 3)); | ||
3152 | goto retry; | ||
3153 | } | ||
3067 | out: | 3154 | out: |
3068 | return ret; | 3155 | return ret; |
3069 | } | 3156 | } |
@@ -3252,7 +3339,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3252 | filemap_write_and_wait(mapping); | 3339 | filemap_write_and_wait(mapping); |
3253 | } | 3340 | } |
3254 | 3341 | ||
3255 | if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 3342 | if (EXT4_JOURNAL(inode) && |
3343 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | ||
3256 | /* | 3344 | /* |
3257 | * This is a REALLY heavyweight approach, but the use of | 3345 | * This is a REALLY heavyweight approach, but the use of |
3258 | * bmap on dirty files is expected to be extremely rare: | 3346 | * bmap on dirty files is expected to be extremely rare: |
@@ -3271,7 +3359,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3271 | * everything they get. | 3359 | * everything they get. |
3272 | */ | 3360 | */ |
3273 | 3361 | ||
3274 | EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; | 3362 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); |
3275 | journal = EXT4_JOURNAL(inode); | 3363 | journal = EXT4_JOURNAL(inode); |
3276 | jbd2_journal_lock_updates(journal); | 3364 | jbd2_journal_lock_updates(journal); |
3277 | err = jbd2_journal_flush(journal); | 3365 | err = jbd2_journal_flush(journal); |
@@ -3296,11 +3384,45 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3296 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3384 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3297 | } | 3385 | } |
3298 | 3386 | ||
3387 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3388 | { | ||
3389 | BUG_ON(!io); | ||
3390 | if (io->page) | ||
3391 | put_page(io->page); | ||
3392 | iput(io->inode); | ||
3393 | kfree(io); | ||
3394 | } | ||
3395 | |||
3396 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | ||
3397 | { | ||
3398 | struct buffer_head *head, *bh; | ||
3399 | unsigned int curr_off = 0; | ||
3400 | |||
3401 | if (!page_has_buffers(page)) | ||
3402 | return; | ||
3403 | head = bh = page_buffers(page); | ||
3404 | do { | ||
3405 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | ||
3406 | && bh->b_private) { | ||
3407 | ext4_free_io_end(bh->b_private); | ||
3408 | bh->b_private = NULL; | ||
3409 | bh->b_end_io = NULL; | ||
3410 | } | ||
3411 | curr_off = curr_off + bh->b_size; | ||
3412 | bh = bh->b_this_page; | ||
3413 | } while (bh != head); | ||
3414 | } | ||
3415 | |||
3299 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 3416 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
3300 | { | 3417 | { |
3301 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3418 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3302 | 3419 | ||
3303 | /* | 3420 | /* |
3421 | * free any io_end structure allocated for buffers to be discarded | ||
3422 | */ | ||
3423 | if (ext4_should_dioread_nolock(page->mapping->host)) | ||
3424 | ext4_invalidatepage_free_endio(page, offset); | ||
3425 | /* | ||
3304 | * If it's a full truncate we just forget about the pending dirtying | 3426 | * If it's a full truncate we just forget about the pending dirtying |
3305 | */ | 3427 | */ |
3306 | if (offset == 0) | 3428 | if (offset == 0) |
@@ -3371,7 +3493,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3371 | } | 3493 | } |
3372 | 3494 | ||
3373 | retry: | 3495 | retry: |
3374 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3496 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3497 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | ||
3498 | inode->i_sb->s_bdev, iov, | ||
3499 | offset, nr_segs, | ||
3500 | ext4_get_block, NULL); | ||
3501 | else | ||
3502 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
3503 | inode->i_sb->s_bdev, iov, | ||
3375 | offset, nr_segs, | 3504 | offset, nr_segs, |
3376 | ext4_get_block, NULL); | 3505 | ext4_get_block, NULL); |
3377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3506 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3387,6 +3516,9 @@ retry: | |||
3387 | * but cannot extend i_size. Bail out and pretend | 3516 | * but cannot extend i_size. Bail out and pretend |
3388 | * the write failed... */ | 3517 | * the write failed... */ |
3389 | ret = PTR_ERR(handle); | 3518 | ret = PTR_ERR(handle); |
3519 | if (inode->i_nlink) | ||
3520 | ext4_orphan_del(NULL, inode); | ||
3521 | |||
3390 | goto out; | 3522 | goto out; |
3391 | } | 3523 | } |
3392 | if (inode->i_nlink) | 3524 | if (inode->i_nlink) |
@@ -3414,75 +3546,63 @@ out: | |||
3414 | return ret; | 3546 | return ret; |
3415 | } | 3547 | } |
3416 | 3548 | ||
3417 | static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | 3549 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3418 | struct buffer_head *bh_result, int create) | 3550 | struct buffer_head *bh_result, int create) |
3419 | { | 3551 | { |
3420 | handle_t *handle = NULL; | 3552 | handle_t *handle = ext4_journal_current_handle(); |
3421 | int ret = 0; | 3553 | int ret = 0; |
3422 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3554 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
3423 | int dio_credits; | 3555 | int dio_credits; |
3556 | int started = 0; | ||
3424 | 3557 | ||
3425 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | 3558 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3426 | inode->i_ino, create); | 3559 | inode->i_ino, create); |
3427 | /* | 3560 | /* |
3428 | * DIO VFS code passes create = 0 flag for write to | 3561 | * ext4_get_block in prepare for a DIO write or buffer write. |
3429 | * the middle of file. It does this to avoid block | 3562 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3430 | * allocation for holes, to prevent expose stale data | 3563 | * The extent will be converted to initialized after IO complete. |
3431 | * out when there is parallel buffered read (which does | ||
3432 | * not hold the i_mutex lock) while direct IO write has | ||
3433 | * not completed. DIO request on holes finally falls back | ||
3434 | * to buffered IO for this reason. | ||
3435 | * | ||
3436 | * For ext4 extent based file, since we support fallocate, | ||
3437 | * new allocated extent as uninitialized, for holes, we | ||
3438 | * could fallocate blocks for holes, thus parallel | ||
3439 | * buffered IO read will zero out the page when read on | ||
3440 | * a hole while parallel DIO write to the hole has not completed. | ||
3441 | * | ||
3442 | * when we come here, we know it's a direct IO write to | ||
3443 | * to the middle of file (<i_size) | ||
3444 | * so it's safe to override the create flag from VFS. | ||
3445 | */ | 3564 | */ |
3446 | create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; | 3565 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; |
3447 | 3566 | ||
3448 | if (max_blocks > DIO_MAX_BLOCKS) | 3567 | if (!handle) { |
3449 | max_blocks = DIO_MAX_BLOCKS; | 3568 | if (max_blocks > DIO_MAX_BLOCKS) |
3450 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3569 | max_blocks = DIO_MAX_BLOCKS; |
3451 | handle = ext4_journal_start(inode, dio_credits); | 3570 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3452 | if (IS_ERR(handle)) { | 3571 | handle = ext4_journal_start(inode, dio_credits); |
3453 | ret = PTR_ERR(handle); | 3572 | if (IS_ERR(handle)) { |
3454 | goto out; | 3573 | ret = PTR_ERR(handle); |
3574 | goto out; | ||
3575 | } | ||
3576 | started = 1; | ||
3455 | } | 3577 | } |
3578 | |||
3456 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 3579 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
3457 | create); | 3580 | create); |
3458 | if (ret > 0) { | 3581 | if (ret > 0) { |
3459 | bh_result->b_size = (ret << inode->i_blkbits); | 3582 | bh_result->b_size = (ret << inode->i_blkbits); |
3460 | ret = 0; | 3583 | ret = 0; |
3461 | } | 3584 | } |
3462 | ext4_journal_stop(handle); | 3585 | if (started) |
3586 | ext4_journal_stop(handle); | ||
3463 | out: | 3587 | out: |
3464 | return ret; | 3588 | return ret; |
3465 | } | 3589 | } |
3466 | 3590 | ||
3467 | static void ext4_free_io_end(ext4_io_end_t *io) | 3591 | static void dump_completed_IO(struct inode * inode) |
3468 | { | ||
3469 | BUG_ON(!io); | ||
3470 | iput(io->inode); | ||
3471 | kfree(io); | ||
3472 | } | ||
3473 | static void dump_aio_dio_list(struct inode * inode) | ||
3474 | { | 3592 | { |
3475 | #ifdef EXT4_DEBUG | 3593 | #ifdef EXT4_DEBUG |
3476 | struct list_head *cur, *before, *after; | 3594 | struct list_head *cur, *before, *after; |
3477 | ext4_io_end_t *io, *io0, *io1; | 3595 | ext4_io_end_t *io, *io0, *io1; |
3596 | unsigned long flags; | ||
3478 | 3597 | ||
3479 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3598 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ |
3480 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | 3599 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); |
3481 | return; | 3600 | return; |
3482 | } | 3601 | } |
3483 | 3602 | ||
3484 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | 3603 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); |
3485 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | 3604 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
3605 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3486 | cur = &io->list; | 3606 | cur = &io->list; |
3487 | before = cur->prev; | 3607 | before = cur->prev; |
3488 | io0 = container_of(before, ext4_io_end_t, list); | 3608 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -3492,32 +3612,31 @@ static void dump_aio_dio_list(struct inode * inode) | |||
3492 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | 3612 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", |
3493 | io, inode->i_ino, io0, io1); | 3613 | io, inode->i_ino, io0, io1); |
3494 | } | 3614 | } |
3615 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3495 | #endif | 3616 | #endif |
3496 | } | 3617 | } |
3497 | 3618 | ||
3498 | /* | 3619 | /* |
3499 | * check a range of space and convert unwritten extents to written. | 3620 | * check a range of space and convert unwritten extents to written. |
3500 | */ | 3621 | */ |
3501 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | 3622 | static int ext4_end_io_nolock(ext4_io_end_t *io) |
3502 | { | 3623 | { |
3503 | struct inode *inode = io->inode; | 3624 | struct inode *inode = io->inode; |
3504 | loff_t offset = io->offset; | 3625 | loff_t offset = io->offset; |
3505 | size_t size = io->size; | 3626 | ssize_t size = io->size; |
3506 | int ret = 0; | 3627 | int ret = 0; |
3507 | 3628 | ||
3508 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," | 3629 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
3509 | "list->prev 0x%p\n", | 3630 | "list->prev 0x%p\n", |
3510 | io, inode->i_ino, io->list.next, io->list.prev); | 3631 | io, inode->i_ino, io->list.next, io->list.prev); |
3511 | 3632 | ||
3512 | if (list_empty(&io->list)) | 3633 | if (list_empty(&io->list)) |
3513 | return ret; | 3634 | return ret; |
3514 | 3635 | ||
3515 | if (io->flag != DIO_AIO_UNWRITTEN) | 3636 | if (io->flag != EXT4_IO_UNWRITTEN) |
3516 | return ret; | 3637 | return ret; |
3517 | 3638 | ||
3518 | if (offset + size <= i_size_read(inode)) | 3639 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
3519 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3520 | |||
3521 | if (ret < 0) { | 3640 | if (ret < 0) { |
3522 | printk(KERN_EMERG "%s: failed to convert unwritten" | 3641 | printk(KERN_EMERG "%s: failed to convert unwritten" |
3523 | "extents to written extents, error is %d" | 3642 | "extents to written extents, error is %d" |
@@ -3530,50 +3649,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | |||
3530 | io->flag = 0; | 3649 | io->flag = 0; |
3531 | return ret; | 3650 | return ret; |
3532 | } | 3651 | } |
3652 | |||
3533 | /* | 3653 | /* |
3534 | * work on completed aio dio IO, to convert unwritten extents to extents | 3654 | * work on completed aio dio IO, to convert unwritten extents to extents |
3535 | */ | 3655 | */ |
3536 | static void ext4_end_aio_dio_work(struct work_struct *work) | 3656 | static void ext4_end_io_work(struct work_struct *work) |
3537 | { | 3657 | { |
3538 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 3658 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); |
3539 | struct inode *inode = io->inode; | 3659 | struct inode *inode = io->inode; |
3540 | int ret = 0; | 3660 | struct ext4_inode_info *ei = EXT4_I(inode); |
3661 | unsigned long flags; | ||
3662 | int ret; | ||
3541 | 3663 | ||
3542 | mutex_lock(&inode->i_mutex); | 3664 | mutex_lock(&inode->i_mutex); |
3543 | ret = ext4_end_aio_dio_nolock(io); | 3665 | ret = ext4_end_io_nolock(io); |
3544 | if (ret >= 0) { | 3666 | if (ret < 0) { |
3545 | if (!list_empty(&io->list)) | 3667 | mutex_unlock(&inode->i_mutex); |
3546 | list_del_init(&io->list); | 3668 | return; |
3547 | ext4_free_io_end(io); | ||
3548 | } | 3669 | } |
3670 | |||
3671 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3672 | if (!list_empty(&io->list)) | ||
3673 | list_del_init(&io->list); | ||
3674 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3549 | mutex_unlock(&inode->i_mutex); | 3675 | mutex_unlock(&inode->i_mutex); |
3676 | ext4_free_io_end(io); | ||
3550 | } | 3677 | } |
3678 | |||
3551 | /* | 3679 | /* |
3552 | * This function is called from ext4_sync_file(). | 3680 | * This function is called from ext4_sync_file(). |
3553 | * | 3681 | * |
3554 | * When AIO DIO IO is completed, the work to convert unwritten | 3682 | * When IO is completed, the work to convert unwritten extents to |
3555 | * extents to written is queued on workqueue but may not get immediately | 3683 | * written is queued on workqueue but may not get immediately |
3556 | * scheduled. When fsync is called, we need to ensure the | 3684 | * scheduled. When fsync is called, we need to ensure the |
3557 | * conversion is complete before fsync returns. | 3685 | * conversion is complete before fsync returns. |
3558 | * The inode keeps track of a list of completed AIO from DIO path | 3686 | * The inode keeps track of a list of pending/completed IO that |
3559 | * that might needs to do the conversion. This function walks through | 3687 | * might needs to do the conversion. This function walks through |
3560 | * the list and convert the related unwritten extents to written. | 3688 | * the list and convert the related unwritten extents for completed IO |
3689 | * to written. | ||
3690 | * The function return the number of pending IOs on success. | ||
3561 | */ | 3691 | */ |
3562 | int flush_aio_dio_completed_IO(struct inode *inode) | 3692 | int flush_completed_IO(struct inode *inode) |
3563 | { | 3693 | { |
3564 | ext4_io_end_t *io; | 3694 | ext4_io_end_t *io; |
3695 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3696 | unsigned long flags; | ||
3565 | int ret = 0; | 3697 | int ret = 0; |
3566 | int ret2 = 0; | 3698 | int ret2 = 0; |
3567 | 3699 | ||
3568 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) | 3700 | if (list_empty(&ei->i_completed_io_list)) |
3569 | return ret; | 3701 | return ret; |
3570 | 3702 | ||
3571 | dump_aio_dio_list(inode); | 3703 | dump_completed_IO(inode); |
3572 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3704 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3573 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | 3705 | while (!list_empty(&ei->i_completed_io_list)){ |
3706 | io = list_entry(ei->i_completed_io_list.next, | ||
3574 | ext4_io_end_t, list); | 3707 | ext4_io_end_t, list); |
3575 | /* | 3708 | /* |
3576 | * Calling ext4_end_aio_dio_nolock() to convert completed | 3709 | * Calling ext4_end_io_nolock() to convert completed |
3577 | * IO to written. | 3710 | * IO to written. |
3578 | * | 3711 | * |
3579 | * When ext4_sync_file() is called, run_queue() may already | 3712 | * When ext4_sync_file() is called, run_queue() may already |
@@ -3586,20 +3719,23 @@ int flush_aio_dio_completed_IO(struct inode *inode) | |||
3586 | * avoid double converting from both fsync and background work | 3719 | * avoid double converting from both fsync and background work |
3587 | * queue work. | 3720 | * queue work. |
3588 | */ | 3721 | */ |
3589 | ret = ext4_end_aio_dio_nolock(io); | 3722 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3723 | ret = ext4_end_io_nolock(io); | ||
3724 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3590 | if (ret < 0) | 3725 | if (ret < 0) |
3591 | ret2 = ret; | 3726 | ret2 = ret; |
3592 | else | 3727 | else |
3593 | list_del_init(&io->list); | 3728 | list_del_init(&io->list); |
3594 | } | 3729 | } |
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3595 | return (ret2 < 0) ? ret2 : 0; | 3731 | return (ret2 < 0) ? ret2 : 0; |
3596 | } | 3732 | } |
3597 | 3733 | ||
3598 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | 3734 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) |
3599 | { | 3735 | { |
3600 | ext4_io_end_t *io = NULL; | 3736 | ext4_io_end_t *io = NULL; |
3601 | 3737 | ||
3602 | io = kmalloc(sizeof(*io), GFP_NOFS); | 3738 | io = kmalloc(sizeof(*io), flags); |
3603 | 3739 | ||
3604 | if (io) { | 3740 | if (io) { |
3605 | igrab(inode); | 3741 | igrab(inode); |
@@ -3607,8 +3743,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | |||
3607 | io->flag = 0; | 3743 | io->flag = 0; |
3608 | io->offset = 0; | 3744 | io->offset = 0; |
3609 | io->size = 0; | 3745 | io->size = 0; |
3610 | io->error = 0; | 3746 | io->page = NULL; |
3611 | INIT_WORK(&io->work, ext4_end_aio_dio_work); | 3747 | INIT_WORK(&io->work, ext4_end_io_work); |
3612 | INIT_LIST_HEAD(&io->list); | 3748 | INIT_LIST_HEAD(&io->list); |
3613 | } | 3749 | } |
3614 | 3750 | ||
@@ -3620,6 +3756,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3620 | { | 3756 | { |
3621 | ext4_io_end_t *io_end = iocb->private; | 3757 | ext4_io_end_t *io_end = iocb->private; |
3622 | struct workqueue_struct *wq; | 3758 | struct workqueue_struct *wq; |
3759 | unsigned long flags; | ||
3760 | struct ext4_inode_info *ei; | ||
3623 | 3761 | ||
3624 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3762 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3625 | if (!io_end || !size) | 3763 | if (!io_end || !size) |
@@ -3631,7 +3769,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3631 | size); | 3769 | size); |
3632 | 3770 | ||
3633 | /* if not aio dio with unwritten extents, just free io and return */ | 3771 | /* if not aio dio with unwritten extents, just free io and return */ |
3634 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | 3772 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3635 | ext4_free_io_end(io_end); | 3773 | ext4_free_io_end(io_end); |
3636 | iocb->private = NULL; | 3774 | iocb->private = NULL; |
3637 | return; | 3775 | return; |
@@ -3639,16 +3777,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3639 | 3777 | ||
3640 | io_end->offset = offset; | 3778 | io_end->offset = offset; |
3641 | io_end->size = size; | 3779 | io_end->size = size; |
3780 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3642 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3781 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3643 | 3782 | ||
3644 | /* queue the work to convert unwritten extents to written */ | 3783 | /* queue the work to convert unwritten extents to written */ |
3645 | queue_work(wq, &io_end->work); | 3784 | queue_work(wq, &io_end->work); |
3646 | 3785 | ||
3647 | /* Add the io_end to per-inode completed aio dio list*/ | 3786 | /* Add the io_end to per-inode completed aio dio list*/ |
3648 | list_add_tail(&io_end->list, | 3787 | ei = EXT4_I(io_end->inode); |
3649 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | 3788 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3789 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
3790 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3650 | iocb->private = NULL; | 3791 | iocb->private = NULL; |
3651 | } | 3792 | } |
3793 | |||
3794 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | ||
3795 | { | ||
3796 | ext4_io_end_t *io_end = bh->b_private; | ||
3797 | struct workqueue_struct *wq; | ||
3798 | struct inode *inode; | ||
3799 | unsigned long flags; | ||
3800 | |||
3801 | if (!test_clear_buffer_uninit(bh) || !io_end) | ||
3802 | goto out; | ||
3803 | |||
3804 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | ||
3805 | printk("sb umounted, discard end_io request for inode %lu\n", | ||
3806 | io_end->inode->i_ino); | ||
3807 | ext4_free_io_end(io_end); | ||
3808 | goto out; | ||
3809 | } | ||
3810 | |||
3811 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3812 | inode = io_end->inode; | ||
3813 | |||
3814 | /* Add the io_end to per-inode completed io list*/ | ||
3815 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3816 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
3817 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3818 | |||
3819 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
3820 | /* queue the work to convert unwritten extents to written */ | ||
3821 | queue_work(wq, &io_end->work); | ||
3822 | out: | ||
3823 | bh->b_private = NULL; | ||
3824 | bh->b_end_io = NULL; | ||
3825 | clear_buffer_uninit(bh); | ||
3826 | end_buffer_async_write(bh, uptodate); | ||
3827 | } | ||
3828 | |||
3829 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | ||
3830 | { | ||
3831 | ext4_io_end_t *io_end; | ||
3832 | struct page *page = bh->b_page; | ||
3833 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | ||
3834 | size_t size = bh->b_size; | ||
3835 | |||
3836 | retry: | ||
3837 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | ||
3838 | if (!io_end) { | ||
3839 | if (printk_ratelimit()) | ||
3840 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
3841 | schedule(); | ||
3842 | goto retry; | ||
3843 | } | ||
3844 | io_end->offset = offset; | ||
3845 | io_end->size = size; | ||
3846 | /* | ||
3847 | * We need to hold a reference to the page to make sure it | ||
3848 | * doesn't get evicted before ext4_end_io_work() has a chance | ||
3849 | * to convert the extent from written to unwritten. | ||
3850 | */ | ||
3851 | io_end->page = page; | ||
3852 | get_page(io_end->page); | ||
3853 | |||
3854 | bh->b_private = io_end; | ||
3855 | bh->b_end_io = ext4_end_io_buffer_write; | ||
3856 | return 0; | ||
3857 | } | ||
3858 | |||
3652 | /* | 3859 | /* |
3653 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3860 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3654 | * preallocated extents, and those write extend the file, no need to | 3861 | * preallocated extents, and those write extend the file, no need to |
@@ -3702,7 +3909,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3702 | iocb->private = NULL; | 3909 | iocb->private = NULL; |
3703 | EXT4_I(inode)->cur_aio_dio = NULL; | 3910 | EXT4_I(inode)->cur_aio_dio = NULL; |
3704 | if (!is_sync_kiocb(iocb)) { | 3911 | if (!is_sync_kiocb(iocb)) { |
3705 | iocb->private = ext4_init_io_end(inode); | 3912 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); |
3706 | if (!iocb->private) | 3913 | if (!iocb->private) |
3707 | return -ENOMEM; | 3914 | return -ENOMEM; |
3708 | /* | 3915 | /* |
@@ -3718,7 +3925,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3718 | ret = blockdev_direct_IO(rw, iocb, inode, | 3925 | ret = blockdev_direct_IO(rw, iocb, inode, |
3719 | inode->i_sb->s_bdev, iov, | 3926 | inode->i_sb->s_bdev, iov, |
3720 | offset, nr_segs, | 3927 | offset, nr_segs, |
3721 | ext4_get_block_dio_write, | 3928 | ext4_get_block_write, |
3722 | ext4_end_io_dio); | 3929 | ext4_end_io_dio); |
3723 | if (iocb->private) | 3930 | if (iocb->private) |
3724 | EXT4_I(inode)->cur_aio_dio = NULL; | 3931 | EXT4_I(inode)->cur_aio_dio = NULL; |
@@ -3739,8 +3946,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3739 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3946 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3740 | ext4_free_io_end(iocb->private); | 3947 | ext4_free_io_end(iocb->private); |
3741 | iocb->private = NULL; | 3948 | iocb->private = NULL; |
3742 | } else if (ret > 0 && (EXT4_I(inode)->i_state & | 3949 | } else if (ret > 0 && ext4_test_inode_state(inode, |
3743 | EXT4_STATE_DIO_UNWRITTEN)) { | 3950 | EXT4_STATE_DIO_UNWRITTEN)) { |
3744 | int err; | 3951 | int err; |
3745 | /* | 3952 | /* |
3746 | * for non AIO case, since the IO is already | 3953 | * for non AIO case, since the IO is already |
@@ -3750,7 +3957,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3750 | offset, ret); | 3957 | offset, ret); |
3751 | if (err < 0) | 3958 | if (err < 0) |
3752 | ret = err; | 3959 | ret = err; |
3753 | EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; | 3960 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3754 | } | 3961 | } |
3755 | return ret; | 3962 | return ret; |
3756 | } | 3963 | } |
@@ -4081,18 +4288,27 @@ no_top: | |||
4081 | * We release `count' blocks on disk, but (last - first) may be greater | 4288 | * We release `count' blocks on disk, but (last - first) may be greater |
4082 | * than `count' because there can be holes in there. | 4289 | * than `count' because there can be holes in there. |
4083 | */ | 4290 | */ |
4084 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4291 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4085 | struct buffer_head *bh, | 4292 | struct buffer_head *bh, |
4086 | ext4_fsblk_t block_to_free, | 4293 | ext4_fsblk_t block_to_free, |
4087 | unsigned long count, __le32 *first, | 4294 | unsigned long count, __le32 *first, |
4088 | __le32 *last) | 4295 | __le32 *last) |
4089 | { | 4296 | { |
4090 | __le32 *p; | 4297 | __le32 *p; |
4091 | int flags = EXT4_FREE_BLOCKS_FORGET; | 4298 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; |
4092 | 4299 | ||
4093 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4300 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
4094 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4301 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4095 | 4302 | ||
4303 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
4304 | count)) { | ||
4305 | ext4_error(inode->i_sb, "inode #%lu: " | ||
4306 | "attempt to clear blocks %llu len %lu, invalid", | ||
4307 | inode->i_ino, (unsigned long long) block_to_free, | ||
4308 | count); | ||
4309 | return 1; | ||
4310 | } | ||
4311 | |||
4096 | if (try_to_extend_transaction(handle, inode)) { | 4312 | if (try_to_extend_transaction(handle, inode)) { |
4097 | if (bh) { | 4313 | if (bh) { |
4098 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4314 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
@@ -4111,6 +4327,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4111 | *p = 0; | 4327 | *p = 0; |
4112 | 4328 | ||
4113 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4329 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); |
4330 | return 0; | ||
4114 | } | 4331 | } |
4115 | 4332 | ||
4116 | /** | 4333 | /** |
@@ -4166,9 +4383,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4166 | } else if (nr == block_to_free + count) { | 4383 | } else if (nr == block_to_free + count) { |
4167 | count++; | 4384 | count++; |
4168 | } else { | 4385 | } else { |
4169 | ext4_clear_blocks(handle, inode, this_bh, | 4386 | if (ext4_clear_blocks(handle, inode, this_bh, |
4170 | block_to_free, | 4387 | block_to_free, count, |
4171 | count, block_to_free_p, p); | 4388 | block_to_free_p, p)) |
4389 | break; | ||
4172 | block_to_free = nr; | 4390 | block_to_free = nr; |
4173 | block_to_free_p = p; | 4391 | block_to_free_p = p; |
4174 | count = 1; | 4392 | count = 1; |
@@ -4192,7 +4410,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4192 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4410 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4193 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4411 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4194 | else | 4412 | else |
4195 | ext4_error(inode->i_sb, __func__, | 4413 | ext4_error(inode->i_sb, |
4196 | "circular indirect block detected, " | 4414 | "circular indirect block detected, " |
4197 | "inode=%lu, block=%llu", | 4415 | "inode=%lu, block=%llu", |
4198 | inode->i_ino, | 4416 | inode->i_ino, |
@@ -4232,6 +4450,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4232 | if (!nr) | 4450 | if (!nr) |
4233 | continue; /* A hole */ | 4451 | continue; /* A hole */ |
4234 | 4452 | ||
4453 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
4454 | nr, 1)) { | ||
4455 | ext4_error(inode->i_sb, | ||
4456 | "indirect mapped block in inode " | ||
4457 | "#%lu invalid (level %d, blk #%lu)", | ||
4458 | inode->i_ino, depth, | ||
4459 | (unsigned long) nr); | ||
4460 | break; | ||
4461 | } | ||
4462 | |||
4235 | /* Go read the buffer for the next level down */ | 4463 | /* Go read the buffer for the next level down */ |
4236 | bh = sb_bread(inode->i_sb, nr); | 4464 | bh = sb_bread(inode->i_sb, nr); |
4237 | 4465 | ||
@@ -4240,7 +4468,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4240 | * (should be rare). | 4468 | * (should be rare). |
4241 | */ | 4469 | */ |
4242 | if (!bh) { | 4470 | if (!bh) { |
4243 | ext4_error(inode->i_sb, "ext4_free_branches", | 4471 | ext4_error(inode->i_sb, |
4244 | "Read failure, inode=%lu, block=%llu", | 4472 | "Read failure, inode=%lu, block=%llu", |
4245 | inode->i_ino, nr); | 4473 | inode->i_ino, nr); |
4246 | continue; | 4474 | continue; |
@@ -4384,8 +4612,10 @@ void ext4_truncate(struct inode *inode) | |||
4384 | if (!ext4_can_truncate(inode)) | 4612 | if (!ext4_can_truncate(inode)) |
4385 | return; | 4613 | return; |
4386 | 4614 | ||
4615 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | ||
4616 | |||
4387 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4617 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4388 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4618 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4389 | 4619 | ||
4390 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4620 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
4391 | ext4_ext_truncate(inode); | 4621 | ext4_ext_truncate(inode); |
@@ -4555,9 +4785,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4555 | 4785 | ||
4556 | bh = sb_getblk(sb, block); | 4786 | bh = sb_getblk(sb, block); |
4557 | if (!bh) { | 4787 | if (!bh) { |
4558 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " | 4788 | ext4_error(sb, "unable to read inode block - " |
4559 | "inode block - inode=%lu, block=%llu", | 4789 | "inode=%lu, block=%llu", inode->i_ino, block); |
4560 | inode->i_ino, block); | ||
4561 | return -EIO; | 4790 | return -EIO; |
4562 | } | 4791 | } |
4563 | if (!buffer_uptodate(bh)) { | 4792 | if (!buffer_uptodate(bh)) { |
@@ -4655,9 +4884,8 @@ make_io: | |||
4655 | submit_bh(READ_META, bh); | 4884 | submit_bh(READ_META, bh); |
4656 | wait_on_buffer(bh); | 4885 | wait_on_buffer(bh); |
4657 | if (!buffer_uptodate(bh)) { | 4886 | if (!buffer_uptodate(bh)) { |
4658 | ext4_error(sb, __func__, | 4887 | ext4_error(sb, "unable to read inode block - inode=%lu," |
4659 | "unable to read inode block - inode=%lu, " | 4888 | " block=%llu", inode->i_ino, block); |
4660 | "block=%llu", inode->i_ino, block); | ||
4661 | brelse(bh); | 4889 | brelse(bh); |
4662 | return -EIO; | 4890 | return -EIO; |
4663 | } | 4891 | } |
@@ -4671,7 +4899,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | |||
4671 | { | 4899 | { |
4672 | /* We have all inode data except xattrs in memory here. */ | 4900 | /* We have all inode data except xattrs in memory here. */ |
4673 | return __ext4_get_inode_loc(inode, iloc, | 4901 | return __ext4_get_inode_loc(inode, iloc, |
4674 | !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); | 4902 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); |
4675 | } | 4903 | } |
4676 | 4904 | ||
4677 | void ext4_set_inode_flags(struct inode *inode) | 4905 | void ext4_set_inode_flags(struct inode *inode) |
@@ -4765,7 +4993,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4765 | } | 4993 | } |
4766 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4994 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
4767 | 4995 | ||
4768 | ei->i_state = 0; | 4996 | ei->i_state_flags = 0; |
4769 | ei->i_dir_start_lookup = 0; | 4997 | ei->i_dir_start_lookup = 0; |
4770 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4998 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
4771 | /* We now have enough fields to check if the inode was active or not. | 4999 | /* We now have enough fields to check if the inode was active or not. |
@@ -4848,7 +5076,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4848 | EXT4_GOOD_OLD_INODE_SIZE + | 5076 | EXT4_GOOD_OLD_INODE_SIZE + |
4849 | ei->i_extra_isize; | 5077 | ei->i_extra_isize; |
4850 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 5078 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
4851 | ei->i_state |= EXT4_STATE_XATTR; | 5079 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
4852 | } | 5080 | } |
4853 | } else | 5081 | } else |
4854 | ei->i_extra_isize = 0; | 5082 | ei->i_extra_isize = 0; |
@@ -4868,8 +5096,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4868 | ret = 0; | 5096 | ret = 0; |
4869 | if (ei->i_file_acl && | 5097 | if (ei->i_file_acl && |
4870 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5098 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
4871 | ext4_error(sb, __func__, | 5099 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", |
4872 | "bad extended attribute block %llu in inode #%lu", | ||
4873 | ei->i_file_acl, inode->i_ino); | 5100 | ei->i_file_acl, inode->i_ino); |
4874 | ret = -EIO; | 5101 | ret = -EIO; |
4875 | goto bad_inode; | 5102 | goto bad_inode; |
@@ -4915,8 +5142,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4915 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5142 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4916 | } else { | 5143 | } else { |
4917 | ret = -EIO; | 5144 | ret = -EIO; |
4918 | ext4_error(inode->i_sb, __func__, | 5145 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", |
4919 | "bogus i_mode (%o) for inode=%lu", | ||
4920 | inode->i_mode, inode->i_ino); | 5146 | inode->i_mode, inode->i_ino); |
4921 | goto bad_inode; | 5147 | goto bad_inode; |
4922 | } | 5148 | } |
@@ -4988,7 +5214,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4988 | 5214 | ||
4989 | /* For fields not not tracking in the in-memory inode, | 5215 | /* For fields not not tracking in the in-memory inode, |
4990 | * initialise them to zero for new inodes. */ | 5216 | * initialise them to zero for new inodes. */ |
4991 | if (ei->i_state & EXT4_STATE_NEW) | 5217 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) |
4992 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 5218 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
4993 | 5219 | ||
4994 | ext4_get_inode_flags(ei); | 5220 | ext4_get_inode_flags(ei); |
@@ -5052,7 +5278,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5052 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 5278 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
5053 | sb->s_dirt = 1; | 5279 | sb->s_dirt = 1; |
5054 | ext4_handle_sync(handle); | 5280 | ext4_handle_sync(handle); |
5055 | err = ext4_handle_dirty_metadata(handle, inode, | 5281 | err = ext4_handle_dirty_metadata(handle, NULL, |
5056 | EXT4_SB(sb)->s_sbh); | 5282 | EXT4_SB(sb)->s_sbh); |
5057 | } | 5283 | } |
5058 | } | 5284 | } |
@@ -5081,10 +5307,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5081 | } | 5307 | } |
5082 | 5308 | ||
5083 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 5309 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
5084 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 5310 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
5085 | if (!err) | 5311 | if (!err) |
5086 | err = rc; | 5312 | err = rc; |
5087 | ei->i_state &= ~EXT4_STATE_NEW; | 5313 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
5088 | 5314 | ||
5089 | ext4_update_inode_fsync_trans(handle, inode, 0); | 5315 | ext4_update_inode_fsync_trans(handle, inode, 0); |
5090 | out_brelse: | 5316 | out_brelse: |
@@ -5128,7 +5354,7 @@ out_brelse: | |||
5128 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 5354 | * `stuff()' is running, and the new i_size will be lost. Plus the inode |
5129 | * will no longer be on the superblock's dirty inode list. | 5355 | * will no longer be on the superblock's dirty inode list. |
5130 | */ | 5356 | */ |
5131 | int ext4_write_inode(struct inode *inode, int wait) | 5357 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) |
5132 | { | 5358 | { |
5133 | int err; | 5359 | int err; |
5134 | 5360 | ||
@@ -5142,26 +5368,25 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
5142 | return -EIO; | 5368 | return -EIO; |
5143 | } | 5369 | } |
5144 | 5370 | ||
5145 | if (!wait) | 5371 | if (wbc->sync_mode != WB_SYNC_ALL) |
5146 | return 0; | 5372 | return 0; |
5147 | 5373 | ||
5148 | err = ext4_force_commit(inode->i_sb); | 5374 | err = ext4_force_commit(inode->i_sb); |
5149 | } else { | 5375 | } else { |
5150 | struct ext4_iloc iloc; | 5376 | struct ext4_iloc iloc; |
5151 | 5377 | ||
5152 | err = ext4_get_inode_loc(inode, &iloc); | 5378 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
5153 | if (err) | 5379 | if (err) |
5154 | return err; | 5380 | return err; |
5155 | if (wait) | 5381 | if (wbc->sync_mode == WB_SYNC_ALL) |
5156 | sync_dirty_buffer(iloc.bh); | 5382 | sync_dirty_buffer(iloc.bh); |
5157 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5383 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5158 | ext4_error(inode->i_sb, __func__, | 5384 | ext4_error(inode->i_sb, "IO error syncing inode, " |
5159 | "IO error syncing inode, " | 5385 | "inode=%lu, block=%llu", inode->i_ino, |
5160 | "inode=%lu, block=%llu", | ||
5161 | inode->i_ino, | ||
5162 | (unsigned long long)iloc.bh->b_blocknr); | 5386 | (unsigned long long)iloc.bh->b_blocknr); |
5163 | err = -EIO; | 5387 | err = -EIO; |
5164 | } | 5388 | } |
5389 | brelse(iloc.bh); | ||
5165 | } | 5390 | } |
5166 | return err; | 5391 | return err; |
5167 | } | 5392 | } |
@@ -5200,6 +5425,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5200 | if (error) | 5425 | if (error) |
5201 | return error; | 5426 | return error; |
5202 | 5427 | ||
5428 | if (ia_valid & ATTR_SIZE) | ||
5429 | dquot_initialize(inode); | ||
5203 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5430 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
5204 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5431 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
5205 | handle_t *handle; | 5432 | handle_t *handle; |
@@ -5212,7 +5439,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5212 | error = PTR_ERR(handle); | 5439 | error = PTR_ERR(handle); |
5213 | goto err_out; | 5440 | goto err_out; |
5214 | } | 5441 | } |
5215 | error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; | 5442 | error = dquot_transfer(inode, attr); |
5216 | if (error) { | 5443 | if (error) { |
5217 | ext4_journal_stop(handle); | 5444 | ext4_journal_stop(handle); |
5218 | return error; | 5445 | return error; |
@@ -5239,7 +5466,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5239 | } | 5466 | } |
5240 | 5467 | ||
5241 | if (S_ISREG(inode->i_mode) && | 5468 | if (S_ISREG(inode->i_mode) && |
5242 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 5469 | attr->ia_valid & ATTR_SIZE && |
5470 | (attr->ia_size < inode->i_size || | ||
5471 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | ||
5243 | handle_t *handle; | 5472 | handle_t *handle; |
5244 | 5473 | ||
5245 | handle = ext4_journal_start(inode, 3); | 5474 | handle = ext4_journal_start(inode, 3); |
@@ -5270,6 +5499,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5270 | goto err_out; | 5499 | goto err_out; |
5271 | } | 5500 | } |
5272 | } | 5501 | } |
5502 | /* ext4_truncate will clear the flag */ | ||
5503 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | ||
5504 | ext4_truncate(inode); | ||
5273 | } | 5505 | } |
5274 | 5506 | ||
5275 | rc = inode_setattr(inode, attr); | 5507 | rc = inode_setattr(inode, attr); |
@@ -5508,8 +5740,8 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5508 | entry = IFIRST(header); | 5740 | entry = IFIRST(header); |
5509 | 5741 | ||
5510 | /* No extended attributes present */ | 5742 | /* No extended attributes present */ |
5511 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || | 5743 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
5512 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 5744 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { |
5513 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 5745 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, |
5514 | new_extra_isize); | 5746 | new_extra_isize); |
5515 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 5747 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
@@ -5553,7 +5785,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5553 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5785 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5554 | if (ext4_handle_valid(handle) && | 5786 | if (ext4_handle_valid(handle) && |
5555 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5787 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5556 | !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { | 5788 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
5557 | /* | 5789 | /* |
5558 | * We need extra buffer credits since we may write into EA block | 5790 | * We need extra buffer credits since we may write into EA block |
5559 | * with this same handle. If journal_extend fails, then it will | 5791 | * with this same handle. If journal_extend fails, then it will |
@@ -5567,10 +5799,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5567 | sbi->s_want_extra_isize, | 5799 | sbi->s_want_extra_isize, |
5568 | iloc, handle); | 5800 | iloc, handle); |
5569 | if (ret) { | 5801 | if (ret) { |
5570 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | 5802 | ext4_set_inode_state(inode, |
5803 | EXT4_STATE_NO_EXPAND); | ||
5571 | if (mnt_count != | 5804 | if (mnt_count != |
5572 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5805 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
5573 | ext4_warning(inode->i_sb, __func__, | 5806 | ext4_warning(inode->i_sb, |
5574 | "Unable to expand inode %lu. Delete" | 5807 | "Unable to expand inode %lu. Delete" |
5575 | " some EAs or run e2fsck.", | 5808 | " some EAs or run e2fsck.", |
5576 | inode->i_ino); | 5809 | inode->i_ino); |
@@ -5592,7 +5825,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5592 | * i_size has been changed by generic_commit_write() and we thus need | 5825 | * i_size has been changed by generic_commit_write() and we thus need |
5593 | * to include the updated inode in the current transaction. | 5826 | * to include the updated inode in the current transaction. |
5594 | * | 5827 | * |
5595 | * Also, vfs_dq_alloc_block() will always dirty the inode when blocks | 5828 | * Also, dquot_alloc_block() will always dirty the inode when blocks |
5596 | * are allocated to the file. | 5829 | * are allocated to the file. |
5597 | * | 5830 | * |
5598 | * If the inode is marked synchronous, we don't honour that here - doing | 5831 | * If the inode is marked synchronous, we don't honour that here - doing |
@@ -5634,7 +5867,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) | |||
5634 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 5867 | err = jbd2_journal_get_write_access(handle, iloc.bh); |
5635 | if (!err) | 5868 | if (!err) |
5636 | err = ext4_handle_dirty_metadata(handle, | 5869 | err = ext4_handle_dirty_metadata(handle, |
5637 | inode, | 5870 | NULL, |
5638 | iloc.bh); | 5871 | iloc.bh); |
5639 | brelse(iloc.bh); | 5872 | brelse(iloc.bh); |
5640 | } | 5873 | } |