diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:26:37 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:26:37 -0400 |
commit | e4ce30f3779c2ddaa7dfaa4042209e5dbacbada5 (patch) | |
tree | cc64c1dcd16b5dbf71ebc8338b339e6fb04abaee /fs/ext4/inode.c | |
parent | b899ebeb05da4287ce845976727e3e83dadd25d5 (diff) | |
parent | 14ece1028b3ed53ffec1b1213ffc6acaf79ad77c (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
ext4: Make fsync sync new parent directories in no-journal mode
ext4: Drop whitespace at end of lines
ext4: Fix compat EXT4_IOC_ADD_GROUP
ext4: Conditionally define compat ioctl numbers
tracing: Convert more ext4 events to DEFINE_EVENT
ext4: Add new tracepoints to track mballoc's buddy bitmap loads
ext4: Add a missing trace hook
ext4: restart ext4_ext_remove_space() after transaction restart
ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted
ext4: Avoid crashing on NULL ptr dereference on a filesystem error
ext4: Use bitops to read/modify i_flags in struct ext4_inode_info
ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE()
ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks()
ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks()
ext4: Use our own write_cache_pages()
ext4: Show journal_checksum option
ext4: Fix for ext4_mb_collect_stats()
ext4: check for a good block group before loading buddy pages
ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate
ext4: Remove extraneous newlines in ext4_msg() calls
...
Fixed up trivial conflict in fs/ext4/fsync.c
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 723 |
1 files changed, 371 insertions, 352 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e0f6af9d08d..19df61c321fd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
149 | int ret; | 149 | int ret; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | 152 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this |
153 | * moment, get_block can be called only for blocks inside i_size since | 153 | * moment, get_block can be called only for blocks inside i_size since |
154 | * page cache has been already dropped and writes are blocked by | 154 | * page cache has been already dropped and writes are blocked by |
155 | * i_mutex. So we can safely drop the i_data_sem here. | 155 | * i_mutex. So we can safely drop the i_data_sem here. |
@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 348 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 350 | blk, 1))) { |
351 | __ext4_error(inode->i_sb, function, | 351 | ext4_error_inode(function, inode, |
352 | "invalid block reference %u " | 352 | "invalid block reference %u", blk); |
353 | "in inode #%lu", blk, inode->i_ino); | ||
354 | return -EIO; | 353 | return -EIO; |
355 | } | 354 | } |
356 | } | 355 | } |
@@ -785,7 +784,7 @@ failed: | |||
785 | /* Allocation failed, free what we already allocated */ | 784 | /* Allocation failed, free what we already allocated */ |
786 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 785 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); |
787 | for (i = 1; i <= n ; i++) { | 786 | for (i = 1; i <= n ; i++) { |
788 | /* | 787 | /* |
789 | * branch[i].bh is newly allocated, so there is no | 788 | * branch[i].bh is newly allocated, so there is no |
790 | * need to revoke the block, which is why we don't | 789 | * need to revoke the block, which is why we don't |
791 | * need to set EXT4_FREE_BLOCKS_METADATA. | 790 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
875 | 874 | ||
876 | err_out: | 875 | err_out: |
877 | for (i = 1; i <= num; i++) { | 876 | for (i = 1; i <= num; i++) { |
878 | /* | 877 | /* |
879 | * branch[i].bh is newly allocated, so there is no | 878 | * branch[i].bh is newly allocated, so there is no |
880 | * need to revoke the block, which is why we don't | 879 | * need to revoke the block, which is why we don't |
881 | * need to set EXT4_FREE_BLOCKS_METADATA. | 880 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -890,9 +889,9 @@ err_out: | |||
890 | } | 889 | } |
891 | 890 | ||
892 | /* | 891 | /* |
893 | * The ext4_ind_get_blocks() function handles non-extents inodes | 892 | * The ext4_ind_map_blocks() function handles non-extents inodes |
894 | * (i.e., using the traditional indirect/double-indirect i_blocks | 893 | * (i.e., using the traditional indirect/double-indirect i_blocks |
895 | * scheme) for ext4_get_blocks(). | 894 | * scheme) for ext4_map_blocks(). |
896 | * | 895 | * |
897 | * Allocation strategy is simple: if we have to allocate something, we will | 896 | * Allocation strategy is simple: if we have to allocate something, we will |
898 | * have to go the whole way to leaf. So let's do it before attaching anything | 897 | * have to go the whole way to leaf. So let's do it before attaching anything |
@@ -917,9 +916,8 @@ err_out: | |||
917 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | 916 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
918 | * blocks. | 917 | * blocks. |
919 | */ | 918 | */ |
920 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | 919 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
921 | ext4_lblk_t iblock, unsigned int maxblocks, | 920 | struct ext4_map_blocks *map, |
922 | struct buffer_head *bh_result, | ||
923 | int flags) | 921 | int flags) |
924 | { | 922 | { |
925 | int err = -EIO; | 923 | int err = -EIO; |
@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
933 | int count = 0; | 931 | int count = 0; |
934 | ext4_fsblk_t first_block = 0; | 932 | ext4_fsblk_t first_block = 0; |
935 | 933 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 934 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
937 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 935 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 936 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
939 | &blocks_to_boundary); | 937 | &blocks_to_boundary); |
940 | 938 | ||
941 | if (depth == 0) | 939 | if (depth == 0) |
@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
946 | /* Simplest case - block found, no allocation needed */ | 944 | /* Simplest case - block found, no allocation needed */ |
947 | if (!partial) { | 945 | if (!partial) { |
948 | first_block = le32_to_cpu(chain[depth - 1].key); | 946 | first_block = le32_to_cpu(chain[depth - 1].key); |
949 | clear_buffer_new(bh_result); | ||
950 | count++; | 947 | count++; |
951 | /*map more blocks*/ | 948 | /*map more blocks*/ |
952 | while (count < maxblocks && count <= blocks_to_boundary) { | 949 | while (count < map->m_len && count <= blocks_to_boundary) { |
953 | ext4_fsblk_t blk; | 950 | ext4_fsblk_t blk; |
954 | 951 | ||
955 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 952 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
969 | /* | 966 | /* |
970 | * Okay, we need to do block allocation. | 967 | * Okay, we need to do block allocation. |
971 | */ | 968 | */ |
972 | goal = ext4_find_goal(inode, iblock, partial); | 969 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
973 | 970 | ||
974 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 971 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
975 | indirect_blks = (chain + depth) - partial - 1; | 972 | indirect_blks = (chain + depth) - partial - 1; |
@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
979 | * direct blocks to allocate for this branch. | 976 | * direct blocks to allocate for this branch. |
980 | */ | 977 | */ |
981 | count = ext4_blks_to_allocate(partial, indirect_blks, | 978 | count = ext4_blks_to_allocate(partial, indirect_blks, |
982 | maxblocks, blocks_to_boundary); | 979 | map->m_len, blocks_to_boundary); |
983 | /* | 980 | /* |
984 | * Block out ext4_truncate while we alter the tree | 981 | * Block out ext4_truncate while we alter the tree |
985 | */ | 982 | */ |
986 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, | 983 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, |
987 | &count, goal, | 984 | &count, goal, |
988 | offsets + (partial - chain), partial); | 985 | offsets + (partial - chain), partial); |
989 | 986 | ||
@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
995 | * may need to return -EAGAIN upwards in the worst case. --sct | 992 | * may need to return -EAGAIN upwards in the worst case. --sct |
996 | */ | 993 | */ |
997 | if (!err) | 994 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 995 | err = ext4_splice_branch(handle, inode, map->m_lblk, |
999 | partial, indirect_blks, count); | 996 | partial, indirect_blks, count); |
1000 | if (err) | 997 | if (err) |
1001 | goto cleanup; | 998 | goto cleanup; |
1002 | 999 | ||
1003 | set_buffer_new(bh_result); | 1000 | map->m_flags |= EXT4_MAP_NEW; |
1004 | 1001 | ||
1005 | ext4_update_inode_fsync_trans(handle, inode, 1); | 1002 | ext4_update_inode_fsync_trans(handle, inode, 1); |
1006 | got_it: | 1003 | got_it: |
1007 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 1004 | map->m_flags |= EXT4_MAP_MAPPED; |
1005 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
1006 | map->m_len = count; | ||
1008 | if (count > blocks_to_boundary) | 1007 | if (count > blocks_to_boundary) |
1009 | set_buffer_boundary(bh_result); | 1008 | map->m_flags |= EXT4_MAP_BOUNDARY; |
1010 | err = count; | 1009 | err = count; |
1011 | /* Clean up and exit */ | 1010 | /* Clean up and exit */ |
1012 | partial = chain + depth - 1; /* the whole chain */ | 1011 | partial = chain + depth - 1; /* the whole chain */ |
@@ -1016,7 +1015,6 @@ cleanup: | |||
1016 | brelse(partial->bh); | 1015 | brelse(partial->bh); |
1017 | partial--; | 1016 | partial--; |
1018 | } | 1017 | } |
1019 | BUFFER_TRACE(bh_result, "returned"); | ||
1020 | out: | 1018 | out: |
1021 | return err; | 1019 | return err; |
1022 | } | 1020 | } |
@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
1061 | */ | 1059 | */ |
1062 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) | 1060 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1063 | { | 1061 | { |
1064 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1062 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1065 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1063 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1066 | 1064 | ||
1067 | return ext4_indirect_calc_metadata_amount(inode, lblock); | 1065 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1076 | { | 1074 | { |
1077 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1075 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1078 | struct ext4_inode_info *ei = EXT4_I(inode); | 1076 | struct ext4_inode_info *ei = EXT4_I(inode); |
1079 | int mdb_free = 0, allocated_meta_blocks = 0; | ||
1080 | 1077 | ||
1081 | spin_lock(&ei->i_block_reservation_lock); | 1078 | spin_lock(&ei->i_block_reservation_lock); |
1082 | trace_ext4_da_update_reserve_space(inode, used); | 1079 | trace_ext4_da_update_reserve_space(inode, used); |
@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1091 | 1088 | ||
1092 | /* Update per-inode reservations */ | 1089 | /* Update per-inode reservations */ |
1093 | ei->i_reserved_data_blocks -= used; | 1090 | ei->i_reserved_data_blocks -= used; |
1094 | used += ei->i_allocated_meta_blocks; | ||
1095 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 1091 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
1096 | allocated_meta_blocks = ei->i_allocated_meta_blocks; | 1092 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1093 | used + ei->i_allocated_meta_blocks); | ||
1097 | ei->i_allocated_meta_blocks = 0; | 1094 | ei->i_allocated_meta_blocks = 0; |
1098 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1099 | 1095 | ||
1100 | if (ei->i_reserved_data_blocks == 0) { | 1096 | if (ei->i_reserved_data_blocks == 0) { |
1101 | /* | 1097 | /* |
@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1103 | * only when we have written all of the delayed | 1099 | * only when we have written all of the delayed |
1104 | * allocation blocks. | 1100 | * allocation blocks. |
1105 | */ | 1101 | */ |
1106 | mdb_free = ei->i_reserved_meta_blocks; | 1102 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1103 | ei->i_reserved_meta_blocks); | ||
1107 | ei->i_reserved_meta_blocks = 0; | 1104 | ei->i_reserved_meta_blocks = 0; |
1108 | ei->i_da_metadata_calc_len = 0; | 1105 | ei->i_da_metadata_calc_len = 0; |
1109 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | ||
1110 | } | 1106 | } |
1111 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1107 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1112 | 1108 | ||
1113 | /* Update quota subsystem */ | 1109 | /* Update quota subsystem for data blocks */ |
1114 | if (quota_claim) { | 1110 | if (quota_claim) |
1115 | dquot_claim_block(inode, used); | 1111 | dquot_claim_block(inode, used); |
1116 | if (mdb_free) | 1112 | else { |
1117 | dquot_release_reservation_block(inode, mdb_free); | ||
1118 | } else { | ||
1119 | /* | 1113 | /* |
1120 | * We did fallocate with an offset that is already delayed | 1114 | * We did fallocate with an offset that is already delayed |
1121 | * allocated. So on delayed allocated writeback we should | 1115 | * allocated. So on delayed allocated writeback we should |
1122 | * not update the quota for allocated blocks. But then | 1116 | * not re-claim the quota for fallocated blocks. |
1123 | * converting an fallocate region to initialized region would | ||
1124 | * have caused a metadata allocation. So claim quota for | ||
1125 | * that | ||
1126 | */ | 1117 | */ |
1127 | if (allocated_meta_blocks) | 1118 | dquot_release_reservation_block(inode, used); |
1128 | dquot_claim_block(inode, allocated_meta_blocks); | ||
1129 | dquot_release_reservation_block(inode, mdb_free + used); | ||
1130 | } | 1119 | } |
1131 | 1120 | ||
1132 | /* | 1121 | /* |
@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1139 | ext4_discard_preallocations(inode); | 1128 | ext4_discard_preallocations(inode); |
1140 | } | 1129 | } |
1141 | 1130 | ||
1142 | static int check_block_validity(struct inode *inode, const char *msg, | 1131 | static int check_block_validity(struct inode *inode, const char *func, |
1143 | sector_t logical, sector_t phys, int len) | 1132 | struct ext4_map_blocks *map) |
1144 | { | 1133 | { |
1145 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1146 | __ext4_error(inode->i_sb, msg, | 1135 | map->m_len)) { |
1147 | "inode #%lu logical block %llu mapped to %llu " | 1136 | ext4_error_inode(func, inode, |
1148 | "(size %d)", inode->i_ino, | 1137 | "lblock %lu mapped to illegal pblock %llu " |
1149 | (unsigned long long) logical, | 1138 | "(length %d)", (unsigned long) map->m_lblk, |
1150 | (unsigned long long) phys, len); | 1139 | map->m_pblk, map->m_len); |
1151 | return -EIO; | 1140 | return -EIO; |
1152 | } | 1141 | } |
1153 | return 0; | 1142 | return 0; |
@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1212 | } | 1201 | } |
1213 | 1202 | ||
1214 | /* | 1203 | /* |
1215 | * The ext4_get_blocks() function tries to look up the requested blocks, | 1204 | * The ext4_map_blocks() function tries to look up the requested blocks, |
1216 | * and returns if the blocks are already mapped. | 1205 | * and returns if the blocks are already mapped. |
1217 | * | 1206 | * |
1218 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1207 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
1219 | * and store the allocated blocks in the result buffer head and mark it | 1208 | * and store the allocated blocks in the result buffer head and mark it |
1220 | * mapped. | 1209 | * mapped. |
1221 | * | 1210 | * |
1222 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1211 | * If file type is extents based, it will call ext4_ext_map_blocks(), |
1223 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping | 1212 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
1224 | * based files | 1213 | * based files |
1225 | * | 1214 | * |
1226 | * On success, it returns the number of blocks being mapped or allocate. | 1215 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1233 | * | 1222 | * |
1234 | * It returns the error in case of allocation failure. | 1223 | * It returns the error in case of allocation failure. |
1235 | */ | 1224 | */ |
1236 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | 1225 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1237 | unsigned int max_blocks, struct buffer_head *bh, | 1226 | struct ext4_map_blocks *map, int flags) |
1238 | int flags) | ||
1239 | { | 1227 | { |
1240 | int retval; | 1228 | int retval; |
1241 | 1229 | ||
1242 | clear_buffer_mapped(bh); | 1230 | map->m_flags = 0; |
1243 | clear_buffer_unwritten(bh); | 1231 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
1244 | 1232 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | |
1245 | ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," | 1233 | (unsigned long) map->m_lblk); |
1246 | "logical block %lu\n", inode->i_ino, flags, max_blocks, | ||
1247 | (unsigned long)block); | ||
1248 | /* | 1234 | /* |
1249 | * Try to see if we can get the block without requesting a new | 1235 | * Try to see if we can get the block without requesting a new |
1250 | * file system block. | 1236 | * file system block. |
1251 | */ | 1237 | */ |
1252 | down_read((&EXT4_I(inode)->i_data_sem)); | 1238 | down_read((&EXT4_I(inode)->i_data_sem)); |
1253 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1239 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1254 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1240 | retval = ext4_ext_map_blocks(handle, inode, map, 0); |
1255 | bh, 0); | ||
1256 | } else { | 1241 | } else { |
1257 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, | 1242 | retval = ext4_ind_map_blocks(handle, inode, map, 0); |
1258 | bh, 0); | ||
1259 | } | 1243 | } |
1260 | up_read((&EXT4_I(inode)->i_data_sem)); | 1244 | up_read((&EXT4_I(inode)->i_data_sem)); |
1261 | 1245 | ||
1262 | if (retval > 0 && buffer_mapped(bh)) { | 1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1263 | int ret = check_block_validity(inode, "file system corruption", | 1247 | int ret = check_block_validity(inode, __func__, map); |
1264 | block, bh->b_blocknr, retval); | ||
1265 | if (ret != 0) | 1248 | if (ret != 0) |
1266 | return ret; | 1249 | return ret; |
1267 | } | 1250 | } |
@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1277 | * ext4_ext_get_block() returns th create = 0 | 1260 | * ext4_ext_get_block() returns th create = 0 |
1278 | * with buffer head unmapped. | 1261 | * with buffer head unmapped. |
1279 | */ | 1262 | */ |
1280 | if (retval > 0 && buffer_mapped(bh)) | 1263 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
1281 | return retval; | 1264 | return retval; |
1282 | 1265 | ||
1283 | /* | 1266 | /* |
@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1290 | * of BH_Unwritten and BH_Mapped flags being simultaneously | 1273 | * of BH_Unwritten and BH_Mapped flags being simultaneously |
1291 | * set on the buffer_head. | 1274 | * set on the buffer_head. |
1292 | */ | 1275 | */ |
1293 | clear_buffer_unwritten(bh); | 1276 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; |
1294 | 1277 | ||
1295 | /* | 1278 | /* |
1296 | * New blocks allocate and/or writing to uninitialized extent | 1279 | * New blocks allocate and/or writing to uninitialized extent |
@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1312 | * We need to check for EXT4 here because migrate | 1295 | * We need to check for EXT4 here because migrate |
1313 | * could have changed the inode type in between | 1296 | * could have changed the inode type in between |
1314 | */ | 1297 | */ |
1315 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1298 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1316 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1299 | retval = ext4_ext_map_blocks(handle, inode, map, flags); |
1317 | bh, flags); | ||
1318 | } else { | 1300 | } else { |
1319 | retval = ext4_ind_get_blocks(handle, inode, block, | 1301 | retval = ext4_ind_map_blocks(handle, inode, map, flags); |
1320 | max_blocks, bh, flags); | ||
1321 | 1302 | ||
1322 | if (retval > 0 && buffer_new(bh)) { | 1303 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { |
1323 | /* | 1304 | /* |
1324 | * We allocated new blocks which will result in | 1305 | * We allocated new blocks which will result in |
1325 | * i_data's format changing. Force the migrate | 1306 | * i_data's format changing. Force the migrate |
@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1342 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1323 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1343 | 1324 | ||
1344 | up_write((&EXT4_I(inode)->i_data_sem)); | 1325 | up_write((&EXT4_I(inode)->i_data_sem)); |
1345 | if (retval > 0 && buffer_mapped(bh)) { | 1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1346 | int ret = check_block_validity(inode, "file system " | 1327 | int ret = check_block_validity(inode, |
1347 | "corruption after allocation", | 1328 | "ext4_map_blocks_after_alloc", |
1348 | block, bh->b_blocknr, retval); | 1329 | map); |
1349 | if (ret != 0) | 1330 | if (ret != 0) |
1350 | return ret; | 1331 | return ret; |
1351 | } | 1332 | } |
@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1355 | /* Maximum number of blocks we map for direct IO at once. */ | 1336 | /* Maximum number of blocks we map for direct IO at once. */ |
1356 | #define DIO_MAX_BLOCKS 4096 | 1337 | #define DIO_MAX_BLOCKS 4096 |
1357 | 1338 | ||
1358 | int ext4_get_block(struct inode *inode, sector_t iblock, | 1339 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
1359 | struct buffer_head *bh_result, int create) | 1340 | struct buffer_head *bh, int flags) |
1360 | { | 1341 | { |
1361 | handle_t *handle = ext4_journal_current_handle(); | 1342 | handle_t *handle = ext4_journal_current_handle(); |
1343 | struct ext4_map_blocks map; | ||
1362 | int ret = 0, started = 0; | 1344 | int ret = 0, started = 0; |
1363 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1364 | int dio_credits; | 1345 | int dio_credits; |
1365 | 1346 | ||
1366 | if (create && !handle) { | 1347 | map.m_lblk = iblock; |
1348 | map.m_len = bh->b_size >> inode->i_blkbits; | ||
1349 | |||
1350 | if (flags && !handle) { | ||
1367 | /* Direct IO write... */ | 1351 | /* Direct IO write... */ |
1368 | if (max_blocks > DIO_MAX_BLOCKS) | 1352 | if (map.m_len > DIO_MAX_BLOCKS) |
1369 | max_blocks = DIO_MAX_BLOCKS; | 1353 | map.m_len = DIO_MAX_BLOCKS; |
1370 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 1354 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
1371 | handle = ext4_journal_start(inode, dio_credits); | 1355 | handle = ext4_journal_start(inode, dio_credits); |
1372 | if (IS_ERR(handle)) { | 1356 | if (IS_ERR(handle)) { |
1373 | ret = PTR_ERR(handle); | 1357 | ret = PTR_ERR(handle); |
1374 | goto out; | 1358 | return ret; |
1375 | } | 1359 | } |
1376 | started = 1; | 1360 | started = 1; |
1377 | } | 1361 | } |
1378 | 1362 | ||
1379 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 1363 | ret = ext4_map_blocks(handle, inode, &map, flags); |
1380 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1381 | if (ret > 0) { | 1364 | if (ret > 0) { |
1382 | bh_result->b_size = (ret << inode->i_blkbits); | 1365 | map_bh(bh, inode->i_sb, map.m_pblk); |
1366 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
1367 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
1383 | ret = 0; | 1368 | ret = 0; |
1384 | } | 1369 | } |
1385 | if (started) | 1370 | if (started) |
1386 | ext4_journal_stop(handle); | 1371 | ext4_journal_stop(handle); |
1387 | out: | ||
1388 | return ret; | 1372 | return ret; |
1389 | } | 1373 | } |
1390 | 1374 | ||
1375 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1376 | struct buffer_head *bh, int create) | ||
1377 | { | ||
1378 | return _ext4_get_block(inode, iblock, bh, | ||
1379 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1380 | } | ||
1381 | |||
1391 | /* | 1382 | /* |
1392 | * `handle' can be NULL if create is zero | 1383 | * `handle' can be NULL if create is zero |
1393 | */ | 1384 | */ |
1394 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 1385 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
1395 | ext4_lblk_t block, int create, int *errp) | 1386 | ext4_lblk_t block, int create, int *errp) |
1396 | { | 1387 | { |
1397 | struct buffer_head dummy; | 1388 | struct ext4_map_blocks map; |
1389 | struct buffer_head *bh; | ||
1398 | int fatal = 0, err; | 1390 | int fatal = 0, err; |
1399 | int flags = 0; | ||
1400 | 1391 | ||
1401 | J_ASSERT(handle != NULL || create == 0); | 1392 | J_ASSERT(handle != NULL || create == 0); |
1402 | 1393 | ||
1403 | dummy.b_state = 0; | 1394 | map.m_lblk = block; |
1404 | dummy.b_blocknr = -1000; | 1395 | map.m_len = 1; |
1405 | buffer_trace_init(&dummy.b_history); | 1396 | err = ext4_map_blocks(handle, inode, &map, |
1406 | if (create) | 1397 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1407 | flags |= EXT4_GET_BLOCKS_CREATE; | 1398 | |
1408 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | 1399 | if (err < 0) |
1409 | /* | 1400 | *errp = err; |
1410 | * ext4_get_blocks() returns number of blocks mapped. 0 in | 1401 | if (err <= 0) |
1411 | * case of a HOLE. | 1402 | return NULL; |
1412 | */ | 1403 | *errp = 0; |
1413 | if (err > 0) { | 1404 | |
1414 | if (err > 1) | 1405 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
1415 | WARN_ON(1); | 1406 | if (!bh) { |
1416 | err = 0; | 1407 | *errp = -EIO; |
1408 | return NULL; | ||
1417 | } | 1409 | } |
1418 | *errp = err; | 1410 | if (map.m_flags & EXT4_MAP_NEW) { |
1419 | if (!err && buffer_mapped(&dummy)) { | 1411 | J_ASSERT(create != 0); |
1420 | struct buffer_head *bh; | 1412 | J_ASSERT(handle != NULL); |
1421 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); | ||
1422 | if (!bh) { | ||
1423 | *errp = -EIO; | ||
1424 | goto err; | ||
1425 | } | ||
1426 | if (buffer_new(&dummy)) { | ||
1427 | J_ASSERT(create != 0); | ||
1428 | J_ASSERT(handle != NULL); | ||
1429 | 1413 | ||
1430 | /* | 1414 | /* |
1431 | * Now that we do not always journal data, we should | 1415 | * Now that we do not always journal data, we should |
1432 | * keep in mind whether this should always journal the | 1416 | * keep in mind whether this should always journal the |
1433 | * new buffer as metadata. For now, regular file | 1417 | * new buffer as metadata. For now, regular file |
1434 | * writes use ext4_get_block instead, so it's not a | 1418 | * writes use ext4_get_block instead, so it's not a |
1435 | * problem. | 1419 | * problem. |
1436 | */ | 1420 | */ |
1437 | lock_buffer(bh); | 1421 | lock_buffer(bh); |
1438 | BUFFER_TRACE(bh, "call get_create_access"); | 1422 | BUFFER_TRACE(bh, "call get_create_access"); |
1439 | fatal = ext4_journal_get_create_access(handle, bh); | 1423 | fatal = ext4_journal_get_create_access(handle, bh); |
1440 | if (!fatal && !buffer_uptodate(bh)) { | 1424 | if (!fatal && !buffer_uptodate(bh)) { |
1441 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 1425 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1442 | set_buffer_uptodate(bh); | 1426 | set_buffer_uptodate(bh); |
1443 | } | ||
1444 | unlock_buffer(bh); | ||
1445 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1446 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1447 | if (!fatal) | ||
1448 | fatal = err; | ||
1449 | } else { | ||
1450 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1451 | } | ||
1452 | if (fatal) { | ||
1453 | *errp = fatal; | ||
1454 | brelse(bh); | ||
1455 | bh = NULL; | ||
1456 | } | 1427 | } |
1457 | return bh; | 1428 | unlock_buffer(bh); |
1429 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1430 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1431 | if (!fatal) | ||
1432 | fatal = err; | ||
1433 | } else { | ||
1434 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1458 | } | 1435 | } |
1459 | err: | 1436 | if (fatal) { |
1460 | return NULL; | 1437 | *errp = fatal; |
1438 | brelse(bh); | ||
1439 | bh = NULL; | ||
1440 | } | ||
1441 | return bh; | ||
1461 | } | 1442 | } |
1462 | 1443 | ||
1463 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1444 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1860 | int retries = 0; | 1841 | int retries = 0; |
1861 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1842 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1862 | struct ext4_inode_info *ei = EXT4_I(inode); | 1843 | struct ext4_inode_info *ei = EXT4_I(inode); |
1863 | unsigned long md_needed, md_reserved; | 1844 | unsigned long md_needed; |
1864 | int ret; | 1845 | int ret; |
1865 | 1846 | ||
1866 | /* | 1847 | /* |
@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1870 | */ | 1851 | */ |
1871 | repeat: | 1852 | repeat: |
1872 | spin_lock(&ei->i_block_reservation_lock); | 1853 | spin_lock(&ei->i_block_reservation_lock); |
1873 | md_reserved = ei->i_reserved_meta_blocks; | ||
1874 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1854 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1875 | trace_ext4_da_reserve_space(inode, md_needed); | 1855 | trace_ext4_da_reserve_space(inode, md_needed); |
1876 | spin_unlock(&ei->i_block_reservation_lock); | 1856 | spin_unlock(&ei->i_block_reservation_lock); |
1877 | 1857 | ||
1878 | /* | 1858 | /* |
1879 | * Make quota reservation here to prevent quota overflow | 1859 | * We will charge metadata quota at writeout time; this saves |
1880 | * later. Real quota accounting is done at pages writeout | 1860 | * us from metadata over-estimation, though we may go over by |
1881 | * time. | 1861 | * a small amount in the end. Here we just reserve for data. |
1882 | */ | 1862 | */ |
1883 | ret = dquot_reserve_block(inode, md_needed + 1); | 1863 | ret = dquot_reserve_block(inode, 1); |
1884 | if (ret) | 1864 | if (ret) |
1885 | return ret; | 1865 | return ret; |
1886 | 1866 | /* | |
1867 | * We do still charge estimated metadata to the sb though; | ||
1868 | * we cannot afford to run out of free blocks. | ||
1869 | */ | ||
1887 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1870 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1888 | dquot_release_reservation_block(inode, md_needed + 1); | 1871 | dquot_release_reservation_block(inode, 1); |
1889 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1872 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1890 | yield(); | 1873 | yield(); |
1891 | goto repeat; | 1874 | goto repeat; |
@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1910 | 1893 | ||
1911 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1894 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1912 | 1895 | ||
1896 | trace_ext4_da_release_space(inode, to_free); | ||
1913 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { | 1897 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1914 | /* | 1898 | /* |
1915 | * if there aren't enough reserved blocks, then the | 1899 | * if there aren't enough reserved blocks, then the |
@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1932 | * only when we have written all of the delayed | 1916 | * only when we have written all of the delayed |
1933 | * allocation blocks. | 1917 | * allocation blocks. |
1934 | */ | 1918 | */ |
1935 | to_free += ei->i_reserved_meta_blocks; | 1919 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1920 | ei->i_reserved_meta_blocks); | ||
1936 | ei->i_reserved_meta_blocks = 0; | 1921 | ei->i_reserved_meta_blocks = 0; |
1937 | ei->i_da_metadata_calc_len = 0; | 1922 | ei->i_da_metadata_calc_len = 0; |
1938 | } | 1923 | } |
1939 | 1924 | ||
1940 | /* update fs dirty blocks counter */ | 1925 | /* update fs dirty data blocks counter */ |
1941 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1926 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1942 | 1927 | ||
1943 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1928 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2042 | /* | 2027 | /* |
2043 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2028 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers |
2044 | * | 2029 | * |
2045 | * @mpd->inode - inode to walk through | ||
2046 | * @exbh->b_blocknr - first block on a disk | ||
2047 | * @exbh->b_size - amount of space in bytes | ||
2048 | * @logical - first logical block to start assignment with | ||
2049 | * | ||
2050 | * the function goes through all passed space and put actual disk | 2030 | * the function goes through all passed space and put actual disk |
2051 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | 2031 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
2052 | */ | 2032 | */ |
2053 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 2033 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, |
2054 | struct buffer_head *exbh) | 2034 | struct ext4_map_blocks *map) |
2055 | { | 2035 | { |
2056 | struct inode *inode = mpd->inode; | 2036 | struct inode *inode = mpd->inode; |
2057 | struct address_space *mapping = inode->i_mapping; | 2037 | struct address_space *mapping = inode->i_mapping; |
2058 | int blocks = exbh->b_size >> inode->i_blkbits; | 2038 | int blocks = map->m_len; |
2059 | sector_t pblock = exbh->b_blocknr, cur_logical; | 2039 | sector_t pblock = map->m_pblk, cur_logical; |
2060 | struct buffer_head *head, *bh; | 2040 | struct buffer_head *head, *bh; |
2061 | pgoff_t index, end; | 2041 | pgoff_t index, end; |
2062 | struct pagevec pvec; | 2042 | struct pagevec pvec; |
2063 | int nr_pages, i; | 2043 | int nr_pages, i; |
2064 | 2044 | ||
2065 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2045 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2066 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2046 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2067 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2047 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2068 | 2048 | ||
2069 | pagevec_init(&pvec, 0); | 2049 | pagevec_init(&pvec, 0); |
@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2090 | 2070 | ||
2091 | /* skip blocks out of the range */ | 2071 | /* skip blocks out of the range */ |
2092 | do { | 2072 | do { |
2093 | if (cur_logical >= logical) | 2073 | if (cur_logical >= map->m_lblk) |
2094 | break; | 2074 | break; |
2095 | cur_logical++; | 2075 | cur_logical++; |
2096 | } while ((bh = bh->b_this_page) != head); | 2076 | } while ((bh = bh->b_this_page) != head); |
2097 | 2077 | ||
2098 | do { | 2078 | do { |
2099 | if (cur_logical >= logical + blocks) | 2079 | if (cur_logical >= map->m_lblk + blocks) |
2100 | break; | 2080 | break; |
2101 | 2081 | ||
2102 | if (buffer_delay(bh) || | 2082 | if (buffer_delay(bh) || buffer_unwritten(bh)) { |
2103 | buffer_unwritten(bh)) { | ||
2104 | 2083 | ||
2105 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | 2084 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
2106 | 2085 | ||
@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2119 | } else if (buffer_mapped(bh)) | 2098 | } else if (buffer_mapped(bh)) |
2120 | BUG_ON(bh->b_blocknr != pblock); | 2099 | BUG_ON(bh->b_blocknr != pblock); |
2121 | 2100 | ||
2122 | if (buffer_uninit(exbh)) | 2101 | if (map->m_flags & EXT4_MAP_UNINIT) |
2123 | set_buffer_uninit(bh); | 2102 | set_buffer_uninit(bh); |
2124 | cur_logical++; | 2103 | cur_logical++; |
2125 | pblock++; | 2104 | pblock++; |
@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2130 | } | 2109 | } |
2131 | 2110 | ||
2132 | 2111 | ||
2133 | /* | ||
2134 | * __unmap_underlying_blocks - just a helper function to unmap | ||
2135 | * set of blocks described by @bh | ||
2136 | */ | ||
2137 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
2138 | struct buffer_head *bh) | ||
2139 | { | ||
2140 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2141 | int blocks, i; | ||
2142 | |||
2143 | blocks = bh->b_size >> inode->i_blkbits; | ||
2144 | for (i = 0; i < blocks; i++) | ||
2145 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
2146 | } | ||
2147 | |||
2148 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2112 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2149 | sector_t logical, long blk_cnt) | 2113 | sector_t logical, long blk_cnt) |
2150 | { | 2114 | { |
@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2206 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2170 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2207 | { | 2171 | { |
2208 | int err, blks, get_blocks_flags; | 2172 | int err, blks, get_blocks_flags; |
2209 | struct buffer_head new; | 2173 | struct ext4_map_blocks map; |
2210 | sector_t next = mpd->b_blocknr; | 2174 | sector_t next = mpd->b_blocknr; |
2211 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2175 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2212 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2176 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2247 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | 2211 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting |
2248 | * variables are updated after the blocks have been allocated. | 2212 | * variables are updated after the blocks have been allocated. |
2249 | */ | 2213 | */ |
2250 | new.b_state = 0; | 2214 | map.m_lblk = next; |
2215 | map.m_len = max_blocks; | ||
2251 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2216 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2252 | if (ext4_should_dioread_nolock(mpd->inode)) | 2217 | if (ext4_should_dioread_nolock(mpd->inode)) |
2253 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2218 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
2254 | if (mpd->b_state & (1 << BH_Delay)) | 2219 | if (mpd->b_state & (1 << BH_Delay)) |
2255 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2220 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2256 | 2221 | ||
2257 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | 2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2258 | &new, get_blocks_flags); | ||
2259 | if (blks < 0) { | 2223 | if (blks < 0) { |
2260 | err = blks; | 2224 | err = blks; |
2261 | /* | 2225 | /* |
@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2282 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, |
2283 | "delayed block allocation failed for inode %lu at " | 2247 | "delayed block allocation failed for inode %lu at " |
2284 | "logical offset %llu with max blocks %zd with " | 2248 | "logical offset %llu with max blocks %zd with " |
2285 | "error %d\n", mpd->inode->i_ino, | 2249 | "error %d", mpd->inode->i_ino, |
2286 | (unsigned long long) next, | 2250 | (unsigned long long) next, |
2287 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2251 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2288 | printk(KERN_CRIT "This should not happen!! " | 2252 | printk(KERN_CRIT "This should not happen!! " |
@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2297 | } | 2261 | } |
2298 | BUG_ON(blks == 0); | 2262 | BUG_ON(blks == 0); |
2299 | 2263 | ||
2300 | new.b_size = (blks << mpd->inode->i_blkbits); | 2264 | if (map.m_flags & EXT4_MAP_NEW) { |
2265 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
2266 | int i; | ||
2301 | 2267 | ||
2302 | if (buffer_new(&new)) | 2268 | for (i = 0; i < map.m_len; i++) |
2303 | __unmap_underlying_blocks(mpd->inode, &new); | 2269 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2270 | } | ||
2304 | 2271 | ||
2305 | /* | 2272 | /* |
2306 | * If blocks are delayed marked, we need to | 2273 | * If blocks are delayed marked, we need to |
@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2308 | */ | 2275 | */ |
2309 | if ((mpd->b_state & (1 << BH_Delay)) || | 2276 | if ((mpd->b_state & (1 << BH_Delay)) || |
2310 | (mpd->b_state & (1 << BH_Unwritten))) | 2277 | (mpd->b_state & (1 << BH_Unwritten))) |
2311 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2278 | mpage_put_bnr_to_bhs(mpd, &map); |
2312 | 2279 | ||
2313 | if (ext4_should_order_data(mpd->inode)) { | 2280 | if (ext4_should_order_data(mpd->inode)) { |
2314 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2281 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2349 | sector_t next; | 2316 | sector_t next; |
2350 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; | 2317 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; |
2351 | 2318 | ||
2319 | /* | ||
2320 | * XXX Don't go larger than mballoc is willing to allocate | ||
2321 | * This is a stopgap solution. We eventually need to fold | ||
2322 | * mpage_da_submit_io() into this function and then call | ||
2323 | * ext4_get_blocks() multiple times in a loop | ||
2324 | */ | ||
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | ||
2326 | goto flush_it; | ||
2327 | |||
2352 | /* check if thereserved journal credits might overflow */ | 2328 | /* check if thereserved journal credits might overflow */ |
2353 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | 2329 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { |
2354 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | 2330 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { |
2355 | /* | 2331 | /* |
2356 | * With non-extent format we are limited by the journal | 2332 | * With non-extent format we are limited by the journal |
@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page, | |||
2423 | struct buffer_head *bh, *head; | 2399 | struct buffer_head *bh, *head; |
2424 | sector_t logical; | 2400 | sector_t logical; |
2425 | 2401 | ||
2426 | if (mpd->io_done) { | ||
2427 | /* | ||
2428 | * Rest of the page in the page_vec | ||
2429 | * redirty then and skip then. We will | ||
2430 | * try to write them again after | ||
2431 | * starting a new transaction | ||
2432 | */ | ||
2433 | redirty_page_for_writepage(wbc, page); | ||
2434 | unlock_page(page); | ||
2435 | return MPAGE_DA_EXTENT_TAIL; | ||
2436 | } | ||
2437 | /* | 2402 | /* |
2438 | * Can we merge this page to current extent? | 2403 | * Can we merge this page to current extent? |
2439 | */ | 2404 | */ |
@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page, | |||
2528 | * initialized properly. | 2493 | * initialized properly. |
2529 | */ | 2494 | */ |
2530 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2495 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2531 | struct buffer_head *bh_result, int create) | 2496 | struct buffer_head *bh, int create) |
2532 | { | 2497 | { |
2498 | struct ext4_map_blocks map; | ||
2533 | int ret = 0; | 2499 | int ret = 0; |
2534 | sector_t invalid_block = ~((sector_t) 0xffff); | 2500 | sector_t invalid_block = ~((sector_t) 0xffff); |
2535 | 2501 | ||
@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2537 | invalid_block = ~0; | 2503 | invalid_block = ~0; |
2538 | 2504 | ||
2539 | BUG_ON(create == 0); | 2505 | BUG_ON(create == 0); |
2540 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2506 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
2507 | |||
2508 | map.m_lblk = iblock; | ||
2509 | map.m_len = 1; | ||
2541 | 2510 | ||
2542 | /* | 2511 | /* |
2543 | * first, we need to know whether the block is allocated already | 2512 | * first, we need to know whether the block is allocated already |
2544 | * preallocated blocks are unmapped but should treated | 2513 | * preallocated blocks are unmapped but should treated |
2545 | * the same as allocated blocks. | 2514 | * the same as allocated blocks. |
2546 | */ | 2515 | */ |
2547 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); | 2516 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
2548 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2517 | if (ret < 0) |
2549 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2518 | return ret; |
2519 | if (ret == 0) { | ||
2520 | if (buffer_delay(bh)) | ||
2521 | return 0; /* Not sure this could or should happen */ | ||
2550 | /* | 2522 | /* |
2551 | * XXX: __block_prepare_write() unmaps passed block, | 2523 | * XXX: __block_prepare_write() unmaps passed block, |
2552 | * is it OK? | 2524 | * is it OK? |
@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2556 | /* not enough space to reserve */ | 2528 | /* not enough space to reserve */ |
2557 | return ret; | 2529 | return ret; |
2558 | 2530 | ||
2559 | map_bh(bh_result, inode->i_sb, invalid_block); | 2531 | map_bh(bh, inode->i_sb, invalid_block); |
2560 | set_buffer_new(bh_result); | 2532 | set_buffer_new(bh); |
2561 | set_buffer_delay(bh_result); | 2533 | set_buffer_delay(bh); |
2562 | } else if (ret > 0) { | 2534 | return 0; |
2563 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2564 | if (buffer_unwritten(bh_result)) { | ||
2565 | /* A delayed write to unwritten bh should | ||
2566 | * be marked new and mapped. Mapped ensures | ||
2567 | * that we don't do get_block multiple times | ||
2568 | * when we write to the same offset and new | ||
2569 | * ensures that we do proper zero out for | ||
2570 | * partial write. | ||
2571 | */ | ||
2572 | set_buffer_new(bh_result); | ||
2573 | set_buffer_mapped(bh_result); | ||
2574 | } | ||
2575 | ret = 0; | ||
2576 | } | 2535 | } |
2577 | 2536 | ||
2578 | return ret; | 2537 | map_bh(bh, inode->i_sb, map.m_pblk); |
2538 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
2539 | |||
2540 | if (buffer_unwritten(bh)) { | ||
2541 | /* A delayed write to unwritten bh should be marked | ||
2542 | * new and mapped. Mapped ensures that we don't do | ||
2543 | * get_block multiple times when we write to the same | ||
2544 | * offset and new ensures that we do proper zero out | ||
2545 | * for partial write. | ||
2546 | */ | ||
2547 | set_buffer_new(bh); | ||
2548 | set_buffer_mapped(bh); | ||
2549 | } | ||
2550 | return 0; | ||
2579 | } | 2551 | } |
2580 | 2552 | ||
2581 | /* | 2553 | /* |
@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2598 | struct buffer_head *bh_result, int create) | 2570 | struct buffer_head *bh_result, int create) |
2599 | { | 2571 | { |
2600 | int ret = 0; | ||
2601 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2602 | |||
2603 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2572 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); |
2604 | 2573 | return _ext4_get_block(inode, iblock, bh_result, 0); | |
2605 | /* | ||
2606 | * we don't want to do block allocation in writepage | ||
2607 | * so call get_block_wrap with create = 0 | ||
2608 | */ | ||
2609 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); | ||
2610 | if (ret > 0) { | ||
2611 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2612 | ret = 0; | ||
2613 | } | ||
2614 | return ret; | ||
2615 | } | 2574 | } |
2616 | 2575 | ||
2617 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2576 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2821 | * number of contiguous block. So we will limit | 2780 | * number of contiguous block. So we will limit |
2822 | * number of contiguous block to a sane value | 2781 | * number of contiguous block to a sane value |
2823 | */ | 2782 | */ |
2824 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && | 2783 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && |
2825 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2784 | (max_blocks > EXT4_MAX_TRANS_DATA)) |
2826 | max_blocks = EXT4_MAX_TRANS_DATA; | 2785 | max_blocks = EXT4_MAX_TRANS_DATA; |
2827 | 2786 | ||
2828 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2787 | return ext4_chunk_trans_blocks(inode, max_blocks); |
2829 | } | 2788 | } |
2830 | 2789 | ||
2790 | /* | ||
2791 | * write_cache_pages_da - walk the list of dirty pages of the given | ||
2792 | * address space and call the callback function (which usually writes | ||
2793 | * the pages). | ||
2794 | * | ||
2795 | * This is a forked version of write_cache_pages(). Differences: | ||
2796 | * Range cyclic is ignored. | ||
2797 | * no_nrwrite_index_update is always presumed true | ||
2798 | */ | ||
2799 | static int write_cache_pages_da(struct address_space *mapping, | ||
2800 | struct writeback_control *wbc, | ||
2801 | struct mpage_da_data *mpd) | ||
2802 | { | ||
2803 | int ret = 0; | ||
2804 | int done = 0; | ||
2805 | struct pagevec pvec; | ||
2806 | int nr_pages; | ||
2807 | pgoff_t index; | ||
2808 | pgoff_t end; /* Inclusive */ | ||
2809 | long nr_to_write = wbc->nr_to_write; | ||
2810 | |||
2811 | pagevec_init(&pvec, 0); | ||
2812 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2813 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2814 | |||
2815 | while (!done && (index <= end)) { | ||
2816 | int i; | ||
2817 | |||
2818 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
2819 | PAGECACHE_TAG_DIRTY, | ||
2820 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
2821 | if (nr_pages == 0) | ||
2822 | break; | ||
2823 | |||
2824 | for (i = 0; i < nr_pages; i++) { | ||
2825 | struct page *page = pvec.pages[i]; | ||
2826 | |||
2827 | /* | ||
2828 | * At this point, the page may be truncated or | ||
2829 | * invalidated (changing page->mapping to NULL), or | ||
2830 | * even swizzled back from swapper_space to tmpfs file | ||
2831 | * mapping. However, page->index will not change | ||
2832 | * because we have a reference on the page. | ||
2833 | */ | ||
2834 | if (page->index > end) { | ||
2835 | done = 1; | ||
2836 | break; | ||
2837 | } | ||
2838 | |||
2839 | lock_page(page); | ||
2840 | |||
2841 | /* | ||
2842 | * Page truncated or invalidated. We can freely skip it | ||
2843 | * then, even for data integrity operations: the page | ||
2844 | * has disappeared concurrently, so there could be no | ||
2845 | * real expectation of this data interity operation | ||
2846 | * even if there is now a new, dirty page at the same | ||
2847 | * pagecache address. | ||
2848 | */ | ||
2849 | if (unlikely(page->mapping != mapping)) { | ||
2850 | continue_unlock: | ||
2851 | unlock_page(page); | ||
2852 | continue; | ||
2853 | } | ||
2854 | |||
2855 | if (!PageDirty(page)) { | ||
2856 | /* someone wrote it for us */ | ||
2857 | goto continue_unlock; | ||
2858 | } | ||
2859 | |||
2860 | if (PageWriteback(page)) { | ||
2861 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2862 | wait_on_page_writeback(page); | ||
2863 | else | ||
2864 | goto continue_unlock; | ||
2865 | } | ||
2866 | |||
2867 | BUG_ON(PageWriteback(page)); | ||
2868 | if (!clear_page_dirty_for_io(page)) | ||
2869 | goto continue_unlock; | ||
2870 | |||
2871 | ret = __mpage_da_writepage(page, wbc, mpd); | ||
2872 | if (unlikely(ret)) { | ||
2873 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
2874 | unlock_page(page); | ||
2875 | ret = 0; | ||
2876 | } else { | ||
2877 | done = 1; | ||
2878 | break; | ||
2879 | } | ||
2880 | } | ||
2881 | |||
2882 | if (nr_to_write > 0) { | ||
2883 | nr_to_write--; | ||
2884 | if (nr_to_write == 0 && | ||
2885 | wbc->sync_mode == WB_SYNC_NONE) { | ||
2886 | /* | ||
2887 | * We stop writing back only if we are | ||
2888 | * not doing integrity sync. In case of | ||
2889 | * integrity sync we have to keep going | ||
2890 | * because someone may be concurrently | ||
2891 | * dirtying pages, and we might have | ||
2892 | * synced a lot of newly appeared dirty | ||
2893 | * pages, but have not synced all of the | ||
2894 | * old dirty pages. | ||
2895 | */ | ||
2896 | done = 1; | ||
2897 | break; | ||
2898 | } | ||
2899 | } | ||
2900 | } | ||
2901 | pagevec_release(&pvec); | ||
2902 | cond_resched(); | ||
2903 | } | ||
2904 | return ret; | ||
2905 | } | ||
2906 | |||
2907 | |||
2831 | static int ext4_da_writepages(struct address_space *mapping, | 2908 | static int ext4_da_writepages(struct address_space *mapping, |
2832 | struct writeback_control *wbc) | 2909 | struct writeback_control *wbc) |
2833 | { | 2910 | { |
@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2836 | handle_t *handle = NULL; | 2913 | handle_t *handle = NULL; |
2837 | struct mpage_da_data mpd; | 2914 | struct mpage_da_data mpd; |
2838 | struct inode *inode = mapping->host; | 2915 | struct inode *inode = mapping->host; |
2839 | int no_nrwrite_index_update; | ||
2840 | int pages_written = 0; | 2916 | int pages_written = 0; |
2841 | long pages_skipped; | 2917 | long pages_skipped; |
2842 | unsigned int max_pages; | 2918 | unsigned int max_pages; |
@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2916 | mpd.wbc = wbc; | 2992 | mpd.wbc = wbc; |
2917 | mpd.inode = mapping->host; | 2993 | mpd.inode = mapping->host; |
2918 | 2994 | ||
2919 | /* | ||
2920 | * we don't want write_cache_pages to update | ||
2921 | * nr_to_write and writeback_index | ||
2922 | */ | ||
2923 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2924 | wbc->no_nrwrite_index_update = 1; | ||
2925 | pages_skipped = wbc->pages_skipped; | 2995 | pages_skipped = wbc->pages_skipped; |
2926 | 2996 | ||
2927 | retry: | 2997 | retry: |
@@ -2941,7 +3011,7 @@ retry: | |||
2941 | if (IS_ERR(handle)) { | 3011 | if (IS_ERR(handle)) { |
2942 | ret = PTR_ERR(handle); | 3012 | ret = PTR_ERR(handle); |
2943 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 3013 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2944 | "%ld pages, ino %lu; err %d\n", __func__, | 3014 | "%ld pages, ino %lu; err %d", __func__, |
2945 | wbc->nr_to_write, inode->i_ino, ret); | 3015 | wbc->nr_to_write, inode->i_ino, ret); |
2946 | goto out_writepages; | 3016 | goto out_writepages; |
2947 | } | 3017 | } |
@@ -2963,8 +3033,7 @@ retry: | |||
2963 | mpd.io_done = 0; | 3033 | mpd.io_done = 0; |
2964 | mpd.pages_written = 0; | 3034 | mpd.pages_written = 0; |
2965 | mpd.retval = 0; | 3035 | mpd.retval = 0; |
2966 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 3036 | ret = write_cache_pages_da(mapping, wbc, &mpd); |
2967 | &mpd); | ||
2968 | /* | 3037 | /* |
2969 | * If we have a contiguous extent of pages and we | 3038 | * If we have a contiguous extent of pages and we |
2970 | * haven't done the I/O yet, map the blocks and submit | 3039 | * haven't done the I/O yet, map the blocks and submit |
@@ -3016,7 +3085,7 @@ retry: | |||
3016 | if (pages_skipped != wbc->pages_skipped) | 3085 | if (pages_skipped != wbc->pages_skipped) |
3017 | ext4_msg(inode->i_sb, KERN_CRIT, | 3086 | ext4_msg(inode->i_sb, KERN_CRIT, |
3018 | "This should not happen leaving %s " | 3087 | "This should not happen leaving %s " |
3019 | "with nr_to_write = %ld ret = %d\n", | 3088 | "with nr_to_write = %ld ret = %d", |
3020 | __func__, wbc->nr_to_write, ret); | 3089 | __func__, wbc->nr_to_write, ret); |
3021 | 3090 | ||
3022 | /* Update index */ | 3091 | /* Update index */ |
@@ -3030,8 +3099,6 @@ retry: | |||
3030 | mapping->writeback_index = index; | 3099 | mapping->writeback_index = index; |
3031 | 3100 | ||
3032 | out_writepages: | 3101 | out_writepages: |
3033 | if (!no_nrwrite_index_update) | ||
3034 | wbc->no_nrwrite_index_update = 0; | ||
3035 | wbc->nr_to_write -= nr_to_writebump; | 3102 | wbc->nr_to_write -= nr_to_writebump; |
3036 | wbc->range_start = range_start; | 3103 | wbc->range_start = range_start; |
3037 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3104 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3076 | loff_t pos, unsigned len, unsigned flags, | 3143 | loff_t pos, unsigned len, unsigned flags, |
3077 | struct page **pagep, void **fsdata) | 3144 | struct page **pagep, void **fsdata) |
3078 | { | 3145 | { |
3079 | int ret, retries = 0, quota_retries = 0; | 3146 | int ret, retries = 0; |
3080 | struct page *page; | 3147 | struct page *page; |
3081 | pgoff_t index; | 3148 | pgoff_t index; |
3082 | unsigned from, to; | 3149 | unsigned from, to; |
@@ -3135,22 +3202,6 @@ retry: | |||
3135 | 3202 | ||
3136 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3203 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3137 | goto retry; | 3204 | goto retry; |
3138 | |||
3139 | if ((ret == -EDQUOT) && | ||
3140 | EXT4_I(inode)->i_reserved_meta_blocks && | ||
3141 | (quota_retries++ < 3)) { | ||
3142 | /* | ||
3143 | * Since we often over-estimate the number of meta | ||
3144 | * data blocks required, we may sometimes get a | ||
3145 | * spurios out of quota error even though there would | ||
3146 | * be enough space once we write the data blocks and | ||
3147 | * find out how many meta data blocks were _really_ | ||
3148 | * required. So try forcing the inode write to see if | ||
3149 | * that helps. | ||
3150 | */ | ||
3151 | write_inode_now(inode, (quota_retries == 3)); | ||
3152 | goto retry; | ||
3153 | } | ||
3154 | out: | 3205 | out: |
3155 | return ret; | 3206 | return ret; |
3156 | } | 3207 | } |
@@ -3546,46 +3597,18 @@ out: | |||
3546 | return ret; | 3597 | return ret; |
3547 | } | 3598 | } |
3548 | 3599 | ||
3600 | /* | ||
3601 | * ext4_get_block used when preparing for a DIO write or buffer write. | ||
3602 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3603 | * The extent will be converted to initialized after the IO is complete. | ||
3604 | */ | ||
3549 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 3605 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3550 | struct buffer_head *bh_result, int create) | 3606 | struct buffer_head *bh_result, int create) |
3551 | { | 3607 | { |
3552 | handle_t *handle = ext4_journal_current_handle(); | ||
3553 | int ret = 0; | ||
3554 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
3555 | int dio_credits; | ||
3556 | int started = 0; | ||
3557 | |||
3558 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 3608 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3559 | inode->i_ino, create); | 3609 | inode->i_ino, create); |
3560 | /* | 3610 | return _ext4_get_block(inode, iblock, bh_result, |
3561 | * ext4_get_block in prepare for a DIO write or buffer write. | 3611 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3562 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3563 | * The extent will be converted to initialized after IO complete. | ||
3564 | */ | ||
3565 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
3566 | |||
3567 | if (!handle) { | ||
3568 | if (max_blocks > DIO_MAX_BLOCKS) | ||
3569 | max_blocks = DIO_MAX_BLOCKS; | ||
3570 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
3571 | handle = ext4_journal_start(inode, dio_credits); | ||
3572 | if (IS_ERR(handle)) { | ||
3573 | ret = PTR_ERR(handle); | ||
3574 | goto out; | ||
3575 | } | ||
3576 | started = 1; | ||
3577 | } | ||
3578 | |||
3579 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | ||
3580 | create); | ||
3581 | if (ret > 0) { | ||
3582 | bh_result->b_size = (ret << inode->i_blkbits); | ||
3583 | ret = 0; | ||
3584 | } | ||
3585 | if (started) | ||
3586 | ext4_journal_stop(handle); | ||
3587 | out: | ||
3588 | return ret; | ||
3589 | } | 3612 | } |
3590 | 3613 | ||
3591 | static void dump_completed_IO(struct inode * inode) | 3614 | static void dump_completed_IO(struct inode * inode) |
@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3973 | struct file *file = iocb->ki_filp; | 3996 | struct file *file = iocb->ki_filp; |
3974 | struct inode *inode = file->f_mapping->host; | 3997 | struct inode *inode = file->f_mapping->host; |
3975 | 3998 | ||
3976 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 3999 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3977 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 4000 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3978 | 4001 | ||
3979 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 4002 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4302 | 4325 | ||
4303 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 4326 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
4304 | count)) { | 4327 | count)) { |
4305 | ext4_error(inode->i_sb, "inode #%lu: " | 4328 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " |
4306 | "attempt to clear blocks %llu len %lu, invalid", | 4329 | "blocks %llu len %lu", |
4307 | inode->i_ino, (unsigned long long) block_to_free, | 4330 | (unsigned long long) block_to_free, count); |
4308 | count); | ||
4309 | return 1; | 4331 | return 1; |
4310 | } | 4332 | } |
4311 | 4333 | ||
@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4410 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4432 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4411 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4433 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4412 | else | 4434 | else |
4413 | ext4_error(inode->i_sb, | 4435 | EXT4_ERROR_INODE(inode, |
4414 | "circular indirect block detected, " | 4436 | "circular indirect block detected at " |
4415 | "inode=%lu, block=%llu", | 4437 | "block %llu", |
4416 | inode->i_ino, | 4438 | (unsigned long long) this_bh->b_blocknr); |
4417 | (unsigned long long) this_bh->b_blocknr); | ||
4418 | } | 4439 | } |
4419 | } | 4440 | } |
4420 | 4441 | ||
@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4452 | 4473 | ||
4453 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 4474 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
4454 | nr, 1)) { | 4475 | nr, 1)) { |
4455 | ext4_error(inode->i_sb, | 4476 | EXT4_ERROR_INODE(inode, |
4456 | "indirect mapped block in inode " | 4477 | "invalid indirect mapped " |
4457 | "#%lu invalid (level %d, blk #%lu)", | 4478 | "block %lu (level %d)", |
4458 | inode->i_ino, depth, | 4479 | (unsigned long) nr, depth); |
4459 | (unsigned long) nr); | ||
4460 | break; | 4480 | break; |
4461 | } | 4481 | } |
4462 | 4482 | ||
@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4468 | * (should be rare). | 4488 | * (should be rare). |
4469 | */ | 4489 | */ |
4470 | if (!bh) { | 4490 | if (!bh) { |
4471 | ext4_error(inode->i_sb, | 4491 | EXT4_ERROR_INODE(inode, |
4472 | "Read failure, inode=%lu, block=%llu", | 4492 | "Read failure block=%llu", |
4473 | inode->i_ino, nr); | 4493 | (unsigned long long) nr); |
4474 | continue; | 4494 | continue; |
4475 | } | 4495 | } |
4476 | 4496 | ||
@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode) | |||
4612 | if (!ext4_can_truncate(inode)) | 4632 | if (!ext4_can_truncate(inode)) |
4613 | return; | 4633 | return; |
4614 | 4634 | ||
4615 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | 4635 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4616 | 4636 | ||
4617 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4637 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4618 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 4638 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4619 | 4639 | ||
4620 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4640 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4621 | ext4_ext_truncate(inode); | 4641 | ext4_ext_truncate(inode); |
4622 | return; | 4642 | return; |
4623 | } | 4643 | } |
@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4785 | 4805 | ||
4786 | bh = sb_getblk(sb, block); | 4806 | bh = sb_getblk(sb, block); |
4787 | if (!bh) { | 4807 | if (!bh) { |
4788 | ext4_error(sb, "unable to read inode block - " | 4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " |
4789 | "inode=%lu, block=%llu", inode->i_ino, block); | 4809 | "block %llu", block); |
4790 | return -EIO; | 4810 | return -EIO; |
4791 | } | 4811 | } |
4792 | if (!buffer_uptodate(bh)) { | 4812 | if (!buffer_uptodate(bh)) { |
@@ -4884,8 +4904,8 @@ make_io: | |||
4884 | submit_bh(READ_META, bh); | 4904 | submit_bh(READ_META, bh); |
4885 | wait_on_buffer(bh); | 4905 | wait_on_buffer(bh); |
4886 | if (!buffer_uptodate(bh)) { | 4906 | if (!buffer_uptodate(bh)) { |
4887 | ext4_error(sb, "unable to read inode block - inode=%lu," | 4907 | EXT4_ERROR_INODE(inode, "unable to read inode " |
4888 | " block=%llu", inode->i_ino, block); | 4908 | "block %llu", block); |
4889 | brelse(bh); | 4909 | brelse(bh); |
4890 | return -EIO; | 4910 | return -EIO; |
4891 | } | 4911 | } |
@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5096 | ret = 0; | 5116 | ret = 0; |
5097 | if (ei->i_file_acl && | 5117 | if (ei->i_file_acl && |
5098 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5118 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
5099 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", | 5119 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", |
5100 | ei->i_file_acl, inode->i_ino); | 5120 | ei->i_file_acl); |
5101 | ret = -EIO; | 5121 | ret = -EIO; |
5102 | goto bad_inode; | 5122 | goto bad_inode; |
5103 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5123 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { |
@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5142 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5162 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
5143 | } else { | 5163 | } else { |
5144 | ret = -EIO; | 5164 | ret = -EIO; |
5145 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", | 5165 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); |
5146 | inode->i_mode, inode->i_ino); | ||
5147 | goto bad_inode; | 5166 | goto bad_inode; |
5148 | } | 5167 | } |
5149 | brelse(iloc.bh); | 5168 | brelse(iloc.bh); |
@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5381 | if (wbc->sync_mode == WB_SYNC_ALL) | 5400 | if (wbc->sync_mode == WB_SYNC_ALL) |
5382 | sync_dirty_buffer(iloc.bh); | 5401 | sync_dirty_buffer(iloc.bh); |
5383 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5402 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5384 | ext4_error(inode->i_sb, "IO error syncing inode, " | 5403 | EXT4_ERROR_INODE(inode, |
5385 | "inode=%lu, block=%llu", inode->i_ino, | 5404 | "IO error syncing inode (block=%llu)", |
5386 | (unsigned long long)iloc.bh->b_blocknr); | 5405 | (unsigned long long) iloc.bh->b_blocknr); |
5387 | err = -EIO; | 5406 | err = -EIO; |
5388 | } | 5407 | } |
5389 | brelse(iloc.bh); | 5408 | brelse(iloc.bh); |
@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5455 | } | 5474 | } |
5456 | 5475 | ||
5457 | if (attr->ia_valid & ATTR_SIZE) { | 5476 | if (attr->ia_valid & ATTR_SIZE) { |
5458 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | 5477 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5459 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5478 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5460 | 5479 | ||
5461 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5480 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { |
@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5468 | if (S_ISREG(inode->i_mode) && | 5487 | if (S_ISREG(inode->i_mode) && |
5469 | attr->ia_valid & ATTR_SIZE && | 5488 | attr->ia_valid & ATTR_SIZE && |
5470 | (attr->ia_size < inode->i_size || | 5489 | (attr->ia_size < inode->i_size || |
5471 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | 5490 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { |
5472 | handle_t *handle; | 5491 | handle_t *handle; |
5473 | 5492 | ||
5474 | handle = ext4_journal_start(inode, 3); | 5493 | handle = ext4_journal_start(inode, 3); |
@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5500 | } | 5519 | } |
5501 | } | 5520 | } |
5502 | /* ext4_truncate will clear the flag */ | 5521 | /* ext4_truncate will clear the flag */ |
5503 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | 5522 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) |
5504 | ext4_truncate(inode); | 5523 | ext4_truncate(inode); |
5505 | } | 5524 | } |
5506 | 5525 | ||
@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5576 | 5595 | ||
5577 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5596 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5578 | { | 5597 | { |
5579 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 5598 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5580 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); | 5599 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); |
5581 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 5600 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); |
5582 | } | 5601 | } |
@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5911 | */ | 5930 | */ |
5912 | 5931 | ||
5913 | if (val) | 5932 | if (val) |
5914 | EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; | 5933 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5915 | else | 5934 | else |
5916 | EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; | 5935 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5917 | ext4_set_aops(inode); | 5936 | ext4_set_aops(inode); |
5918 | 5937 | ||
5919 | jbd2_journal_unlock_updates(journal); | 5938 | jbd2_journal_unlock_updates(journal); |