diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 593 |
1 files changed, 365 insertions, 228 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd528dd1..875db944b22f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode, | |||
372 | } | 372 | } |
373 | 373 | ||
374 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 374 | static int __ext4_check_blockref(const char *function, struct inode *inode, |
375 | __le32 *p, unsigned int max) { | 375 | __le32 *p, unsigned int max) |
376 | 376 | { | |
377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); | ||
378 | __le32 *bref = p; | 377 | __le32 *bref = p; |
378 | unsigned int blk; | ||
379 | |||
379 | while (bref < p+max) { | 380 | while (bref < p+max) { |
380 | if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { | 381 | blk = le32_to_cpu(*bref++); |
382 | if (blk && | ||
383 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
384 | blk, 1))) { | ||
381 | ext4_error(inode->i_sb, function, | 385 | ext4_error(inode->i_sb, function, |
382 | "block reference %u >= max (%u) " | 386 | "invalid block reference %u " |
383 | "in inode #%lu, offset=%d", | 387 | "in inode #%lu", blk, inode->i_ino); |
384 | le32_to_cpu(*bref), maxblocks, | ||
385 | inode->i_ino, (int)(bref-p)); | ||
386 | return -EIO; | 388 | return -EIO; |
387 | } | 389 | } |
388 | bref++; | ||
389 | } | 390 | } |
390 | return 0; | 391 | return 0; |
391 | } | 392 | } |
@@ -892,6 +893,10 @@ err_out: | |||
892 | } | 893 | } |
893 | 894 | ||
894 | /* | 895 | /* |
896 | * The ext4_ind_get_blocks() function handles non-extents inodes | ||
897 | * (i.e., using the traditional indirect/double-indirect i_blocks | ||
898 | * scheme) for ext4_get_blocks(). | ||
899 | * | ||
895 | * Allocation strategy is simple: if we have to allocate something, we will | 900 | * Allocation strategy is simple: if we have to allocate something, we will |
896 | * have to go the whole way to leaf. So let's do it before attaching anything | 901 | * have to go the whole way to leaf. So let's do it before attaching anything |
897 | * to tree, set linkage between the newborn blocks, write them if sync is | 902 | * to tree, set linkage between the newborn blocks, write them if sync is |
@@ -909,15 +914,16 @@ err_out: | |||
909 | * return = 0, if plain lookup failed. | 914 | * return = 0, if plain lookup failed. |
910 | * return < 0, error case. | 915 | * return < 0, error case. |
911 | * | 916 | * |
912 | * | 917 | * The ext4_ind_get_blocks() function should be called with |
913 | * Need to be called with | 918 | * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem |
914 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block | 919 | * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or |
915 | * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) | 920 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
921 | * blocks. | ||
916 | */ | 922 | */ |
917 | static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 923 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, |
918 | ext4_lblk_t iblock, unsigned int maxblocks, | 924 | ext4_lblk_t iblock, unsigned int maxblocks, |
919 | struct buffer_head *bh_result, | 925 | struct buffer_head *bh_result, |
920 | int create, int extend_disksize) | 926 | int flags) |
921 | { | 927 | { |
922 | int err = -EIO; | 928 | int err = -EIO; |
923 | ext4_lblk_t offsets[4]; | 929 | ext4_lblk_t offsets[4]; |
@@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
927 | int indirect_blks; | 933 | int indirect_blks; |
928 | int blocks_to_boundary = 0; | 934 | int blocks_to_boundary = 0; |
929 | int depth; | 935 | int depth; |
930 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
931 | int count = 0; | 936 | int count = 0; |
932 | ext4_fsblk_t first_block = 0; | 937 | ext4_fsblk_t first_block = 0; |
933 | loff_t disksize; | ||
934 | |||
935 | 938 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 939 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
937 | J_ASSERT(handle != NULL || create == 0); | 940 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 941 | depth = ext4_block_to_path(inode, iblock, offsets, |
939 | &blocks_to_boundary); | 942 | &blocks_to_boundary); |
940 | 943 | ||
@@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
963 | } | 966 | } |
964 | 967 | ||
965 | /* Next simple case - plain lookup or failed read of indirect block */ | 968 | /* Next simple case - plain lookup or failed read of indirect block */ |
966 | if (!create || err == -EIO) | 969 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) |
967 | goto cleanup; | 970 | goto cleanup; |
968 | 971 | ||
969 | /* | 972 | /* |
@@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
997 | if (!err) | 1000 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 1001 | err = ext4_splice_branch(handle, inode, iblock, |
999 | partial, indirect_blks, count); | 1002 | partial, indirect_blks, count); |
1000 | /* | 1003 | else |
1001 | * i_disksize growing is protected by i_data_sem. Don't forget to | ||
1002 | * protect it if you're about to implement concurrent | ||
1003 | * ext4_get_block() -bzzz | ||
1004 | */ | ||
1005 | if (!err && extend_disksize) { | ||
1006 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; | ||
1007 | if (disksize > i_size_read(inode)) | ||
1008 | disksize = i_size_read(inode); | ||
1009 | if (disksize > ei->i_disksize) | ||
1010 | ei->i_disksize = disksize; | ||
1011 | } | ||
1012 | if (err) | ||
1013 | goto cleanup; | 1004 | goto cleanup; |
1014 | 1005 | ||
1015 | set_buffer_new(bh_result); | 1006 | set_buffer_new(bh_result); |
@@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1120 | ext4_discard_preallocations(inode); | 1111 | ext4_discard_preallocations(inode); |
1121 | } | 1112 | } |
1122 | 1113 | ||
1114 | static int check_block_validity(struct inode *inode, sector_t logical, | ||
1115 | sector_t phys, int len) | ||
1116 | { | ||
1117 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | ||
1118 | ext4_error(inode->i_sb, "check_block_validity", | ||
1119 | "inode #%lu logical block %llu mapped to %llu " | ||
1120 | "(size %d)", inode->i_ino, | ||
1121 | (unsigned long long) logical, | ||
1122 | (unsigned long long) phys, len); | ||
1123 | WARN_ON(1); | ||
1124 | return -EIO; | ||
1125 | } | ||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1123 | /* | 1129 | /* |
1124 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1130 | * The ext4_get_blocks() function tries to look up the requested blocks, |
1125 | * and returns if the blocks are already mapped. | 1131 | * and returns if the blocks are already mapped. |
1126 | * | 1132 | * |
1127 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1133 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
@@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1129 | * mapped. | 1135 | * mapped. |
1130 | * | 1136 | * |
1131 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1137 | * If file type is extents based, it will call ext4_ext_get_blocks(), |
1132 | * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping | 1138 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping |
1133 | * based files | 1139 | * based files |
1134 | * | 1140 | * |
1135 | * On success, it returns the number of blocks being mapped or allocate. | 1141 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1142 | * | 1148 | * |
1143 | * It returns the error in case of allocation failure. | 1149 | * It returns the error in case of allocation failure. |
1144 | */ | 1150 | */ |
1145 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 1151 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, |
1146 | unsigned int max_blocks, struct buffer_head *bh, | 1152 | unsigned int max_blocks, struct buffer_head *bh, |
1147 | int create, int extend_disksize, int flag) | 1153 | int flags) |
1148 | { | 1154 | { |
1149 | int retval; | 1155 | int retval; |
1150 | 1156 | ||
@@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1152 | clear_buffer_unwritten(bh); | 1158 | clear_buffer_unwritten(bh); |
1153 | 1159 | ||
1154 | /* | 1160 | /* |
1155 | * Try to see if we can get the block without requesting | 1161 | * Try to see if we can get the block without requesting a new |
1156 | * for new file system block. | 1162 | * file system block. |
1157 | */ | 1163 | */ |
1158 | down_read((&EXT4_I(inode)->i_data_sem)); | 1164 | down_read((&EXT4_I(inode)->i_data_sem)); |
1159 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1165 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1160 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1166 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1161 | bh, 0, 0); | 1167 | bh, 0); |
1162 | } else { | 1168 | } else { |
1163 | retval = ext4_get_blocks_handle(handle, | 1169 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, |
1164 | inode, block, max_blocks, bh, 0, 0); | 1170 | bh, 0); |
1165 | } | 1171 | } |
1166 | up_read((&EXT4_I(inode)->i_data_sem)); | 1172 | up_read((&EXT4_I(inode)->i_data_sem)); |
1167 | 1173 | ||
1174 | if (retval > 0 && buffer_mapped(bh)) { | ||
1175 | int ret = check_block_validity(inode, block, | ||
1176 | bh->b_blocknr, retval); | ||
1177 | if (ret != 0) | ||
1178 | return ret; | ||
1179 | } | ||
1180 | |||
1168 | /* If it is only a block(s) look up */ | 1181 | /* If it is only a block(s) look up */ |
1169 | if (!create) | 1182 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) |
1170 | return retval; | 1183 | return retval; |
1171 | 1184 | ||
1172 | /* | 1185 | /* |
@@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1205 | * let the underlying get_block() function know to | 1218 | * let the underlying get_block() function know to |
1206 | * avoid double accounting | 1219 | * avoid double accounting |
1207 | */ | 1220 | */ |
1208 | if (flag) | 1221 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1209 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | 1222 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; |
1210 | /* | 1223 | /* |
1211 | * We need to check for EXT4 here because migrate | 1224 | * We need to check for EXT4 here because migrate |
@@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1213 | */ | 1226 | */ |
1214 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1227 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
1215 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1228 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, |
1216 | bh, create, extend_disksize); | 1229 | bh, flags); |
1217 | } else { | 1230 | } else { |
1218 | retval = ext4_get_blocks_handle(handle, inode, block, | 1231 | retval = ext4_ind_get_blocks(handle, inode, block, |
1219 | max_blocks, bh, create, extend_disksize); | 1232 | max_blocks, bh, flags); |
1220 | 1233 | ||
1221 | if (retval > 0 && buffer_new(bh)) { | 1234 | if (retval > 0 && buffer_new(bh)) { |
1222 | /* | 1235 | /* |
@@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1229 | } | 1242 | } |
1230 | } | 1243 | } |
1231 | 1244 | ||
1232 | if (flag) { | 1245 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1233 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1246 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1234 | /* | 1247 | |
1235 | * Update reserved blocks/metadata blocks | 1248 | /* |
1236 | * after successful block allocation | 1249 | * Update reserved blocks/metadata blocks after successful |
1237 | * which were deferred till now | 1250 | * block allocation which had been deferred till now. |
1238 | */ | 1251 | */ |
1239 | if ((retval > 0) && buffer_delay(bh)) | 1252 | if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) |
1240 | ext4_da_update_reserve_space(inode, retval); | 1253 | ext4_da_update_reserve_space(inode, retval); |
1241 | } | ||
1242 | 1254 | ||
1243 | up_write((&EXT4_I(inode)->i_data_sem)); | 1255 | up_write((&EXT4_I(inode)->i_data_sem)); |
1256 | if (retval > 0 && buffer_mapped(bh)) { | ||
1257 | int ret = check_block_validity(inode, block, | ||
1258 | bh->b_blocknr, retval); | ||
1259 | if (ret != 0) | ||
1260 | return ret; | ||
1261 | } | ||
1244 | return retval; | 1262 | return retval; |
1245 | } | 1263 | } |
1246 | 1264 | ||
@@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
1268 | started = 1; | 1286 | started = 1; |
1269 | } | 1287 | } |
1270 | 1288 | ||
1271 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1289 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
1272 | max_blocks, bh_result, create, 0, 0); | 1290 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1273 | if (ret > 0) { | 1291 | if (ret > 0) { |
1274 | bh_result->b_size = (ret << inode->i_blkbits); | 1292 | bh_result->b_size = (ret << inode->i_blkbits); |
1275 | ret = 0; | 1293 | ret = 0; |
@@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1288 | { | 1306 | { |
1289 | struct buffer_head dummy; | 1307 | struct buffer_head dummy; |
1290 | int fatal = 0, err; | 1308 | int fatal = 0, err; |
1309 | int flags = 0; | ||
1291 | 1310 | ||
1292 | J_ASSERT(handle != NULL || create == 0); | 1311 | J_ASSERT(handle != NULL || create == 0); |
1293 | 1312 | ||
1294 | dummy.b_state = 0; | 1313 | dummy.b_state = 0; |
1295 | dummy.b_blocknr = -1000; | 1314 | dummy.b_blocknr = -1000; |
1296 | buffer_trace_init(&dummy.b_history); | 1315 | buffer_trace_init(&dummy.b_history); |
1297 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1316 | if (create) |
1298 | &dummy, create, 1, 0); | 1317 | flags |= EXT4_GET_BLOCKS_CREATE; |
1318 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | ||
1299 | /* | 1319 | /* |
1300 | * ext4_get_blocks_handle() returns number of blocks | 1320 | * ext4_get_blocks() returns number of blocks mapped. 0 in |
1301 | * mapped. 0 in case of a HOLE. | 1321 | * case of a HOLE. |
1302 | */ | 1322 | */ |
1303 | if (err > 0) { | 1323 | if (err > 0) { |
1304 | if (err > 1) | 1324 | if (err > 1) |
@@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1439 | struct page **pagep, void **fsdata) | 1459 | struct page **pagep, void **fsdata) |
1440 | { | 1460 | { |
1441 | struct inode *inode = mapping->host; | 1461 | struct inode *inode = mapping->host; |
1442 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1462 | int ret, needed_blocks; |
1443 | handle_t *handle; | 1463 | handle_t *handle; |
1444 | int retries = 0; | 1464 | int retries = 0; |
1445 | struct page *page; | 1465 | struct page *page; |
@@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1450 | "dev %s ino %lu pos %llu len %u flags %u", | 1470 | "dev %s ino %lu pos %llu len %u flags %u", |
1451 | inode->i_sb->s_id, inode->i_ino, | 1471 | inode->i_sb->s_id, inode->i_ino, |
1452 | (unsigned long long) pos, len, flags); | 1472 | (unsigned long long) pos, len, flags); |
1473 | /* | ||
1474 | * Reserve one block more for addition to orphan list in case | ||
1475 | * we allocate blocks but write fails for some reason | ||
1476 | */ | ||
1477 | needed_blocks = ext4_writepage_trans_blocks(inode) + 1; | ||
1453 | index = pos >> PAGE_CACHE_SHIFT; | 1478 | index = pos >> PAGE_CACHE_SHIFT; |
1454 | from = pos & (PAGE_CACHE_SIZE - 1); | 1479 | from = pos & (PAGE_CACHE_SIZE - 1); |
1455 | to = from + len; | 1480 | to = from + len; |
@@ -1483,15 +1508,30 @@ retry: | |||
1483 | 1508 | ||
1484 | if (ret) { | 1509 | if (ret) { |
1485 | unlock_page(page); | 1510 | unlock_page(page); |
1486 | ext4_journal_stop(handle); | ||
1487 | page_cache_release(page); | 1511 | page_cache_release(page); |
1488 | /* | 1512 | /* |
1489 | * block_write_begin may have instantiated a few blocks | 1513 | * block_write_begin may have instantiated a few blocks |
1490 | * outside i_size. Trim these off again. Don't need | 1514 | * outside i_size. Trim these off again. Don't need |
1491 | * i_size_read because we hold i_mutex. | 1515 | * i_size_read because we hold i_mutex. |
1516 | * | ||
1517 | * Add inode to orphan list in case we crash before | ||
1518 | * truncate finishes | ||
1492 | */ | 1519 | */ |
1493 | if (pos + len > inode->i_size) | 1520 | if (pos + len > inode->i_size) |
1521 | ext4_orphan_add(handle, inode); | ||
1522 | |||
1523 | ext4_journal_stop(handle); | ||
1524 | if (pos + len > inode->i_size) { | ||
1494 | vmtruncate(inode, inode->i_size); | 1525 | vmtruncate(inode, inode->i_size); |
1526 | /* | ||
1527 | * If vmtruncate failed early the inode might | ||
1528 | * still be on the orphan list; we need to | ||
1529 | * make sure the inode is removed from the | ||
1530 | * orphan list in that case. | ||
1531 | */ | ||
1532 | if (inode->i_nlink) | ||
1533 | ext4_orphan_del(NULL, inode); | ||
1534 | } | ||
1495 | } | 1535 | } |
1496 | 1536 | ||
1497 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1537 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1509 | return ext4_handle_dirty_metadata(handle, NULL, bh); | 1549 | return ext4_handle_dirty_metadata(handle, NULL, bh); |
1510 | } | 1550 | } |
1511 | 1551 | ||
1552 | static int ext4_generic_write_end(struct file *file, | ||
1553 | struct address_space *mapping, | ||
1554 | loff_t pos, unsigned len, unsigned copied, | ||
1555 | struct page *page, void *fsdata) | ||
1556 | { | ||
1557 | int i_size_changed = 0; | ||
1558 | struct inode *inode = mapping->host; | ||
1559 | handle_t *handle = ext4_journal_current_handle(); | ||
1560 | |||
1561 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1562 | |||
1563 | /* | ||
1564 | * No need to use i_size_read() here, the i_size | ||
1565 | * cannot change under us because we hold i_mutex. | ||
1566 | * | ||
1567 | * But it's important to update i_size while still holding page lock: | ||
1568 | * page writeout could otherwise come in and zero beyond i_size. | ||
1569 | */ | ||
1570 | if (pos + copied > inode->i_size) { | ||
1571 | i_size_write(inode, pos + copied); | ||
1572 | i_size_changed = 1; | ||
1573 | } | ||
1574 | |||
1575 | if (pos + copied > EXT4_I(inode)->i_disksize) { | ||
1576 | /* We need to mark inode dirty even if | ||
1577 | * new_i_size is less that inode->i_size | ||
1578 | * bu greater than i_disksize.(hint delalloc) | ||
1579 | */ | ||
1580 | ext4_update_i_disksize(inode, (pos + copied)); | ||
1581 | i_size_changed = 1; | ||
1582 | } | ||
1583 | unlock_page(page); | ||
1584 | page_cache_release(page); | ||
1585 | |||
1586 | /* | ||
1587 | * Don't mark the inode dirty under page lock. First, it unnecessarily | ||
1588 | * makes the holding time of page lock longer. Second, it forces lock | ||
1589 | * ordering of page lock and transaction start for journaling | ||
1590 | * filesystems. | ||
1591 | */ | ||
1592 | if (i_size_changed) | ||
1593 | ext4_mark_inode_dirty(handle, inode); | ||
1594 | |||
1595 | return copied; | ||
1596 | } | ||
1597 | |||
1512 | /* | 1598 | /* |
1513 | * We need to pick up the new inode size which generic_commit_write gave us | 1599 | * We need to pick up the new inode size which generic_commit_write gave us |
1514 | * `file' can be NULL - eg, when called from page_symlink(). | 1600 | * `file' can be NULL - eg, when called from page_symlink(). |
@@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file, | |||
1532 | ret = ext4_jbd2_file_inode(handle, inode); | 1618 | ret = ext4_jbd2_file_inode(handle, inode); |
1533 | 1619 | ||
1534 | if (ret == 0) { | 1620 | if (ret == 0) { |
1535 | loff_t new_i_size; | 1621 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1536 | |||
1537 | new_i_size = pos + copied; | ||
1538 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1539 | ext4_update_i_disksize(inode, new_i_size); | ||
1540 | /* We need to mark inode dirty even if | ||
1541 | * new_i_size is less that inode->i_size | ||
1542 | * bu greater than i_disksize.(hint delalloc) | ||
1543 | */ | ||
1544 | ext4_mark_inode_dirty(handle, inode); | ||
1545 | } | ||
1546 | |||
1547 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1548 | page, fsdata); | 1622 | page, fsdata); |
1549 | copied = ret2; | 1623 | copied = ret2; |
1624 | if (pos + len > inode->i_size) | ||
1625 | /* if we have allocated more blocks and copied | ||
1626 | * less. We will have blocks allocated outside | ||
1627 | * inode->i_size. So truncate them | ||
1628 | */ | ||
1629 | ext4_orphan_add(handle, inode); | ||
1550 | if (ret2 < 0) | 1630 | if (ret2 < 0) |
1551 | ret = ret2; | 1631 | ret = ret2; |
1552 | } | 1632 | } |
@@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
1554 | if (!ret) | 1634 | if (!ret) |
1555 | ret = ret2; | 1635 | ret = ret2; |
1556 | 1636 | ||
1637 | if (pos + len > inode->i_size) { | ||
1638 | vmtruncate(inode, inode->i_size); | ||
1639 | /* | ||
1640 | * If vmtruncate failed early the inode might still be | ||
1641 | * on the orphan list; we need to make sure the inode | ||
1642 | * is removed from the orphan list in that case. | ||
1643 | */ | ||
1644 | if (inode->i_nlink) | ||
1645 | ext4_orphan_del(NULL, inode); | ||
1646 | } | ||
1647 | |||
1648 | |||
1557 | return ret ? ret : copied; | 1649 | return ret ? ret : copied; |
1558 | } | 1650 | } |
1559 | 1651 | ||
@@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file, | |||
1565 | handle_t *handle = ext4_journal_current_handle(); | 1657 | handle_t *handle = ext4_journal_current_handle(); |
1566 | struct inode *inode = mapping->host; | 1658 | struct inode *inode = mapping->host; |
1567 | int ret = 0, ret2; | 1659 | int ret = 0, ret2; |
1568 | loff_t new_i_size; | ||
1569 | 1660 | ||
1570 | trace_mark(ext4_writeback_write_end, | 1661 | trace_mark(ext4_writeback_write_end, |
1571 | "dev %s ino %lu pos %llu len %u copied %u", | 1662 | "dev %s ino %lu pos %llu len %u copied %u", |
1572 | inode->i_sb->s_id, inode->i_ino, | 1663 | inode->i_sb->s_id, inode->i_ino, |
1573 | (unsigned long long) pos, len, copied); | 1664 | (unsigned long long) pos, len, copied); |
1574 | new_i_size = pos + copied; | 1665 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
1575 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
1576 | ext4_update_i_disksize(inode, new_i_size); | ||
1577 | /* We need to mark inode dirty even if | ||
1578 | * new_i_size is less that inode->i_size | ||
1579 | * bu greater than i_disksize.(hint delalloc) | ||
1580 | */ | ||
1581 | ext4_mark_inode_dirty(handle, inode); | ||
1582 | } | ||
1583 | |||
1584 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
1585 | page, fsdata); | 1666 | page, fsdata); |
1586 | copied = ret2; | 1667 | copied = ret2; |
1668 | if (pos + len > inode->i_size) | ||
1669 | /* if we have allocated more blocks and copied | ||
1670 | * less. We will have blocks allocated outside | ||
1671 | * inode->i_size. So truncate them | ||
1672 | */ | ||
1673 | ext4_orphan_add(handle, inode); | ||
1674 | |||
1587 | if (ret2 < 0) | 1675 | if (ret2 < 0) |
1588 | ret = ret2; | 1676 | ret = ret2; |
1589 | 1677 | ||
@@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file, | |||
1591 | if (!ret) | 1679 | if (!ret) |
1592 | ret = ret2; | 1680 | ret = ret2; |
1593 | 1681 | ||
1682 | if (pos + len > inode->i_size) { | ||
1683 | vmtruncate(inode, inode->i_size); | ||
1684 | /* | ||
1685 | * If vmtruncate failed early the inode might still be | ||
1686 | * on the orphan list; we need to make sure the inode | ||
1687 | * is removed from the orphan list in that case. | ||
1688 | */ | ||
1689 | if (inode->i_nlink) | ||
1690 | ext4_orphan_del(NULL, inode); | ||
1691 | } | ||
1692 | |||
1594 | return ret ? ret : copied; | 1693 | return ret ? ret : copied; |
1595 | } | 1694 | } |
1596 | 1695 | ||
@@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file, | |||
1635 | } | 1734 | } |
1636 | 1735 | ||
1637 | unlock_page(page); | 1736 | unlock_page(page); |
1737 | page_cache_release(page); | ||
1738 | if (pos + len > inode->i_size) | ||
1739 | /* if we have allocated more blocks and copied | ||
1740 | * less. We will have blocks allocated outside | ||
1741 | * inode->i_size. So truncate them | ||
1742 | */ | ||
1743 | ext4_orphan_add(handle, inode); | ||
1744 | |||
1638 | ret2 = ext4_journal_stop(handle); | 1745 | ret2 = ext4_journal_stop(handle); |
1639 | if (!ret) | 1746 | if (!ret) |
1640 | ret = ret2; | 1747 | ret = ret2; |
1641 | page_cache_release(page); | 1748 | if (pos + len > inode->i_size) { |
1749 | vmtruncate(inode, inode->i_size); | ||
1750 | /* | ||
1751 | * If vmtruncate failed early the inode might still be | ||
1752 | * on the orphan list; we need to make sure the inode | ||
1753 | * is removed from the orphan list in that case. | ||
1754 | */ | ||
1755 | if (inode->i_nlink) | ||
1756 | ext4_orphan_del(NULL, inode); | ||
1757 | } | ||
1642 | 1758 | ||
1643 | return ret ? ret : copied; | 1759 | return ret ? ret : copied; |
1644 | } | 1760 | } |
@@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1852 | * @logical - first logical block to start assignment with | 1968 | * @logical - first logical block to start assignment with |
1853 | * | 1969 | * |
1854 | * the function goes through all passed space and put actual disk | 1970 | * the function goes through all passed space and put actual disk |
1855 | * block numbers into buffer heads, dropping BH_Delay | 1971 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
1856 | */ | 1972 | */ |
1857 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 1973 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, |
1858 | struct buffer_head *exbh) | 1974 | struct buffer_head *exbh) |
@@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1902 | do { | 2018 | do { |
1903 | if (cur_logical >= logical + blocks) | 2019 | if (cur_logical >= logical + blocks) |
1904 | break; | 2020 | break; |
1905 | if (buffer_delay(bh)) { | 2021 | |
1906 | bh->b_blocknr = pblock; | 2022 | if (buffer_delay(bh) || |
1907 | clear_buffer_delay(bh); | 2023 | buffer_unwritten(bh)) { |
1908 | bh->b_bdev = inode->i_sb->s_bdev; | 2024 | |
1909 | } else if (buffer_unwritten(bh)) { | 2025 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
1910 | bh->b_blocknr = pblock; | 2026 | |
1911 | clear_buffer_unwritten(bh); | 2027 | if (buffer_delay(bh)) { |
1912 | set_buffer_mapped(bh); | 2028 | clear_buffer_delay(bh); |
1913 | set_buffer_new(bh); | 2029 | bh->b_blocknr = pblock; |
1914 | bh->b_bdev = inode->i_sb->s_bdev; | 2030 | } else { |
2031 | /* | ||
2032 | * unwritten already should have | ||
2033 | * blocknr assigned. Verify that | ||
2034 | */ | ||
2035 | clear_buffer_unwritten(bh); | ||
2036 | BUG_ON(bh->b_blocknr != pblock); | ||
2037 | } | ||
2038 | |||
1915 | } else if (buffer_mapped(bh)) | 2039 | } else if (buffer_mapped(bh)) |
1916 | BUG_ON(bh->b_blocknr != pblock); | 2040 | BUG_ON(bh->b_blocknr != pblock); |
1917 | 2041 | ||
@@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1990 | return; | 2114 | return; |
1991 | } | 2115 | } |
1992 | 2116 | ||
1993 | #define EXT4_DELALLOC_RSVED 1 | ||
1994 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
1995 | struct buffer_head *bh_result, int create) | ||
1996 | { | ||
1997 | int ret; | ||
1998 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1999 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
2000 | handle_t *handle = NULL; | ||
2001 | |||
2002 | handle = ext4_journal_current_handle(); | ||
2003 | BUG_ON(!handle); | ||
2004 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2005 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2006 | if (ret <= 0) | ||
2007 | return ret; | ||
2008 | |||
2009 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2010 | |||
2011 | if (ext4_should_order_data(inode)) { | ||
2012 | int retval; | ||
2013 | retval = ext4_jbd2_file_inode(handle, inode); | ||
2014 | if (retval) | ||
2015 | /* | ||
2016 | * Failed to add inode for ordered mode. Don't | ||
2017 | * update file size | ||
2018 | */ | ||
2019 | return retval; | ||
2020 | } | ||
2021 | |||
2022 | /* | ||
2023 | * Update on-disk size along with block allocation we don't | ||
2024 | * use 'extend_disksize' as size may change within already | ||
2025 | * allocated block -bzzz | ||
2026 | */ | ||
2027 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2028 | if (disksize > i_size_read(inode)) | ||
2029 | disksize = i_size_read(inode); | ||
2030 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2031 | ext4_update_i_disksize(inode, disksize); | ||
2032 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2033 | return ret; | ||
2034 | } | ||
2035 | return 0; | ||
2036 | } | ||
2037 | |||
2038 | /* | 2117 | /* |
2039 | * mpage_da_map_blocks - go through given space | 2118 | * mpage_da_map_blocks - go through given space |
2040 | * | 2119 | * |
@@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
2045 | */ | 2124 | */ |
2046 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2125 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2047 | { | 2126 | { |
2048 | int err = 0; | 2127 | int err, blks, get_blocks_flags; |
2049 | struct buffer_head new; | 2128 | struct buffer_head new; |
2050 | sector_t next; | 2129 | sector_t next = mpd->b_blocknr; |
2130 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
2131 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
2132 | handle_t *handle = NULL; | ||
2051 | 2133 | ||
2052 | /* | 2134 | /* |
2053 | * We consider only non-mapped and non-allocated blocks | 2135 | * We consider only non-mapped and non-allocated blocks |
2054 | */ | 2136 | */ |
2055 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2137 | if ((mpd->b_state & (1 << BH_Mapped)) && |
2056 | !(mpd->b_state & (1 << BH_Delay))) | 2138 | !(mpd->b_state & (1 << BH_Delay)) && |
2139 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2057 | return 0; | 2140 | return 0; |
2058 | new.b_state = mpd->b_state; | 2141 | |
2059 | new.b_blocknr = 0; | ||
2060 | new.b_size = mpd->b_size; | ||
2061 | next = mpd->b_blocknr; | ||
2062 | /* | 2142 | /* |
2063 | * If we didn't accumulate anything | 2143 | * If we didn't accumulate anything to write simply return |
2064 | * to write simply return | ||
2065 | */ | 2144 | */ |
2066 | if (!new.b_size) | 2145 | if (!mpd->b_size) |
2067 | return 0; | 2146 | return 0; |
2068 | 2147 | ||
2069 | err = ext4_da_get_block_write(mpd->inode, next, &new, 1); | 2148 | handle = ext4_journal_current_handle(); |
2070 | if (err) { | 2149 | BUG_ON(!handle); |
2150 | |||
2151 | /* | ||
2152 | * Call ext4_get_blocks() to allocate any delayed allocation | ||
2153 | * blocks, or to convert an uninitialized extent to be | ||
2154 | * initialized (in the case where we have written into | ||
2155 | * one or more preallocated blocks). | ||
2156 | * | ||
2157 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
2158 | * indicate that we are on the delayed allocation path. This | ||
2159 | * affects functions in many different parts of the allocation | ||
2160 | * call path. This flag exists primarily because we don't | ||
2161 | * want to change *many* call functions, so ext4_get_blocks() | ||
2162 | * will set the magic i_delalloc_reserved_flag once the | ||
2163 | * inode's allocation semaphore is taken. | ||
2164 | * | ||
2165 | * If the blocks in questions were delalloc blocks, set | ||
2166 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
2167 | * variables are updated after the blocks have been allocated. | ||
2168 | */ | ||
2169 | new.b_state = 0; | ||
2170 | get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | | ||
2171 | EXT4_GET_BLOCKS_DELALLOC_RESERVE); | ||
2172 | if (mpd->b_state & (1 << BH_Delay)) | ||
2173 | get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; | ||
2174 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | ||
2175 | &new, get_blocks_flags); | ||
2176 | if (blks < 0) { | ||
2177 | err = blks; | ||
2071 | /* | 2178 | /* |
2072 | * If get block returns with error we simply | 2179 | * If get block returns with error we simply |
2073 | * return. Later writepage will redirty the page and | 2180 | * return. Later writepage will redirty the page and |
@@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2100 | if (err == -ENOSPC) { | 2207 | if (err == -ENOSPC) { |
2101 | ext4_print_free_blocks(mpd->inode); | 2208 | ext4_print_free_blocks(mpd->inode); |
2102 | } | 2209 | } |
2103 | /* invlaidate all the pages */ | 2210 | /* invalidate all the pages */ |
2104 | ext4_da_block_invalidatepages(mpd, next, | 2211 | ext4_da_block_invalidatepages(mpd, next, |
2105 | mpd->b_size >> mpd->inode->i_blkbits); | 2212 | mpd->b_size >> mpd->inode->i_blkbits); |
2106 | return err; | 2213 | return err; |
2107 | } | 2214 | } |
2108 | BUG_ON(new.b_size == 0); | 2215 | BUG_ON(blks == 0); |
2216 | |||
2217 | new.b_size = (blks << mpd->inode->i_blkbits); | ||
2109 | 2218 | ||
2110 | if (buffer_new(&new)) | 2219 | if (buffer_new(&new)) |
2111 | __unmap_underlying_blocks(mpd->inode, &new); | 2220 | __unmap_underlying_blocks(mpd->inode, &new); |
@@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2118 | (mpd->b_state & (1 << BH_Unwritten))) | 2227 | (mpd->b_state & (1 << BH_Unwritten))) |
2119 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2228 | mpage_put_bnr_to_bhs(mpd, next, &new); |
2120 | 2229 | ||
2230 | if (ext4_should_order_data(mpd->inode)) { | ||
2231 | err = ext4_jbd2_file_inode(handle, mpd->inode); | ||
2232 | if (err) | ||
2233 | return err; | ||
2234 | } | ||
2235 | |||
2236 | /* | ||
2237 | * Update on-disk size along with block allocation. | ||
2238 | */ | ||
2239 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
2240 | if (disksize > i_size_read(mpd->inode)) | ||
2241 | disksize = i_size_read(mpd->inode); | ||
2242 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
2243 | ext4_update_i_disksize(mpd->inode, disksize); | ||
2244 | return ext4_mark_inode_dirty(handle, mpd->inode); | ||
2245 | } | ||
2246 | |||
2121 | return 0; | 2247 | return 0; |
2122 | } | 2248 | } |
2123 | 2249 | ||
@@ -2192,6 +2318,17 @@ flush_it: | |||
2192 | return; | 2318 | return; |
2193 | } | 2319 | } |
2194 | 2320 | ||
2321 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2322 | { | ||
2323 | /* | ||
2324 | * unmapped buffer is possible for holes. | ||
2325 | * delay buffer is possible with delayed allocation. | ||
2326 | * We also need to consider unwritten buffer as unmapped. | ||
2327 | */ | ||
2328 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
2329 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
2330 | } | ||
2331 | |||
2195 | /* | 2332 | /* |
2196 | * __mpage_da_writepage - finds extent of pages and blocks | 2333 | * __mpage_da_writepage - finds extent of pages and blocks |
2197 | * | 2334 | * |
@@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2276 | * Otherwise we won't make progress | 2413 | * Otherwise we won't make progress |
2277 | * with the page in ext4_da_writepage | 2414 | * with the page in ext4_da_writepage |
2278 | */ | 2415 | */ |
2279 | if (buffer_dirty(bh) && | 2416 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { |
2280 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
2281 | mpage_add_bh_to_extent(mpd, logical, | 2417 | mpage_add_bh_to_extent(mpd, logical, |
2282 | bh->b_size, | 2418 | bh->b_size, |
2283 | bh->b_state); | 2419 | bh->b_state); |
@@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page, | |||
2303 | } | 2439 | } |
2304 | 2440 | ||
2305 | /* | 2441 | /* |
2306 | * this is a special callback for ->write_begin() only | 2442 | * This is a special get_blocks_t callback which is used by |
2307 | * it's intention is to return mapped block or reserve space | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2444 | * reserve space for a single block. | ||
2445 | * | ||
2446 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | ||
2447 | * We also have b_blocknr = -1 and b_bdev initialized properly | ||
2448 | * | ||
2449 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | ||
2450 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | ||
2451 | * initialized properly. | ||
2308 | */ | 2452 | */ |
2309 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2453 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2310 | struct buffer_head *bh_result, int create) | 2454 | struct buffer_head *bh_result, int create) |
@@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2323 | * preallocated blocks are unmapped but should treated | 2467 | * preallocated blocks are unmapped but should treated |
2324 | * the same as allocated blocks. | 2468 | * the same as allocated blocks. |
2325 | */ | 2469 | */ |
2326 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | 2470 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); |
2327 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2471 | if ((ret == 0) && !buffer_delay(bh_result)) { |
2328 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2472 | /* the block isn't (pre)allocated yet, let's reserve space */ |
2329 | /* | 2473 | /* |
@@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2340 | set_buffer_delay(bh_result); | 2484 | set_buffer_delay(bh_result); |
2341 | } else if (ret > 0) { | 2485 | } else if (ret > 0) { |
2342 | bh_result->b_size = (ret << inode->i_blkbits); | 2486 | bh_result->b_size = (ret << inode->i_blkbits); |
2343 | /* | 2487 | if (buffer_unwritten(bh_result)) { |
2344 | * With sub-block writes into unwritten extents | 2488 | /* A delayed write to unwritten bh should |
2345 | * we also need to mark the buffer as new so that | 2489 | * be marked new and mapped. Mapped ensures |
2346 | * the unwritten parts of the buffer gets correctly zeroed. | 2490 | * that we don't do get_block multiple times |
2347 | */ | 2491 | * when we write to the same offset and new |
2348 | if (buffer_unwritten(bh_result)) | 2492 | * ensures that we do proper zero out for |
2493 | * partial write. | ||
2494 | */ | ||
2349 | set_buffer_new(bh_result); | 2495 | set_buffer_new(bh_result); |
2496 | set_buffer_mapped(bh_result); | ||
2497 | } | ||
2350 | ret = 0; | 2498 | ret = 0; |
2351 | } | 2499 | } |
2352 | 2500 | ||
2353 | return ret; | 2501 | return ret; |
2354 | } | 2502 | } |
2355 | 2503 | ||
2356 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2504 | /* |
2357 | { | 2505 | * This function is used as a standard get_block_t calback function |
2358 | /* | 2506 | * when there is no desire to allocate any blocks. It is used as a |
2359 | * unmapped buffer is possible for holes. | 2507 | * callback function for block_prepare_write(), nobh_writepage(), and |
2360 | * delay buffer is possible with delayed allocation | 2508 | * block_write_full_page(). These functions should only try to map a |
2361 | */ | 2509 | * single block at a time. |
2362 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | 2510 | * |
2363 | } | 2511 | * Since this function doesn't do block allocations even if the caller |
2364 | 2512 | * requests it by passing in create=1, it is critically important that | |
2365 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | 2513 | * any caller checks to make sure that any buffer heads are returned |
2514 | * by this function are either all already mapped or marked for | ||
2515 | * delayed allocation before calling nobh_writepage() or | ||
2516 | * block_write_full_page(). Otherwise, b_blocknr could be left | ||
2517 | * unitialized, and the page write functions will be taken by | ||
2518 | * surprise. | ||
2519 | */ | ||
2520 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
2366 | struct buffer_head *bh_result, int create) | 2521 | struct buffer_head *bh_result, int create) |
2367 | { | 2522 | { |
2368 | int ret = 0; | 2523 | int ret = 0; |
2369 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 2524 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
2370 | 2525 | ||
2526 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
2527 | |||
2371 | /* | 2528 | /* |
2372 | * we don't want to do block allocation in writepage | 2529 | * we don't want to do block allocation in writepage |
2373 | * so call get_block_wrap with create = 0 | 2530 | * so call get_block_wrap with create = 0 |
2374 | */ | 2531 | */ |
2375 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | 2532 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); |
2376 | bh_result, 0, 0, 0); | 2533 | BUG_ON(create && ret == 0); |
2377 | if (ret > 0) { | 2534 | if (ret > 0) { |
2378 | bh_result->b_size = (ret << inode->i_blkbits); | 2535 | bh_result->b_size = (ret << inode->i_blkbits); |
2379 | ret = 0; | 2536 | ret = 0; |
@@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | |||
2382 | } | 2539 | } |
2383 | 2540 | ||
2384 | /* | 2541 | /* |
2385 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | 2542 | * This function can get called via... |
2386 | * get called via journal_submit_inode_data_buffers (no journal handle) | 2543 | * - ext4_da_writepages after taking page lock (have journal handle) |
2387 | * get called via shrink_page_list via pdflush (no journal handle) | 2544 | * - journal_submit_inode_data_buffers (no journal handle) |
2388 | * or grab_page_cache when doing write_begin (have journal handle) | 2545 | * - shrink_page_list via pdflush (no journal handle) |
2546 | * - grab_page_cache when doing write_begin (have journal handle) | ||
2389 | */ | 2547 | */ |
2390 | static int ext4_da_writepage(struct page *page, | 2548 | static int ext4_da_writepage(struct page *page, |
2391 | struct writeback_control *wbc) | 2549 | struct writeback_control *wbc) |
@@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page, | |||
2436 | * do block allocation here. | 2594 | * do block allocation here. |
2437 | */ | 2595 | */ |
2438 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2596 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
2439 | ext4_normal_get_block_write); | 2597 | noalloc_get_block_write); |
2440 | if (!ret) { | 2598 | if (!ret) { |
2441 | page_bufs = page_buffers(page); | 2599 | page_bufs = page_buffers(page); |
2442 | /* check whether all are mapped and non delay */ | 2600 | /* check whether all are mapped and non delay */ |
@@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page, | |||
2461 | } | 2619 | } |
2462 | 2620 | ||
2463 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2621 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2464 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | 2622 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2465 | else | 2623 | else |
2466 | ret = block_write_full_page(page, | 2624 | ret = block_write_full_page(page, noalloc_get_block_write, |
2467 | ext4_normal_get_block_write, | 2625 | wbc); |
2468 | wbc); | ||
2469 | 2626 | ||
2470 | return ret; | 2627 | return ret; |
2471 | } | 2628 | } |
@@ -2777,7 +2934,7 @@ retry: | |||
2777 | *pagep = page; | 2934 | *pagep = page; |
2778 | 2935 | ||
2779 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 2936 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
2780 | ext4_da_get_block_prep); | 2937 | ext4_da_get_block_prep); |
2781 | if (ret < 0) { | 2938 | if (ret < 0) { |
2782 | unlock_page(page); | 2939 | unlock_page(page); |
2783 | ext4_journal_stop(handle); | 2940 | ext4_journal_stop(handle); |
@@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2815 | for (i = 0; i < idx; i++) | 2972 | for (i = 0; i < idx; i++) |
2816 | bh = bh->b_this_page; | 2973 | bh = bh->b_this_page; |
2817 | 2974 | ||
2818 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2975 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
2819 | return 0; | 2976 | return 0; |
2820 | return 1; | 2977 | return 1; |
2821 | } | 2978 | } |
@@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page, | |||
3085 | struct inode *inode = page->mapping->host; | 3242 | struct inode *inode = page->mapping->host; |
3086 | 3243 | ||
3087 | if (test_opt(inode->i_sb, NOBH)) | 3244 | if (test_opt(inode->i_sb, NOBH)) |
3088 | return nobh_writepage(page, | 3245 | return nobh_writepage(page, noalloc_get_block_write, wbc); |
3089 | ext4_normal_get_block_write, wbc); | ||
3090 | else | 3246 | else |
3091 | return block_write_full_page(page, | 3247 | return block_write_full_page(page, noalloc_get_block_write, |
3092 | ext4_normal_get_block_write, | 3248 | wbc); |
3093 | wbc); | ||
3094 | } | 3249 | } |
3095 | 3250 | ||
3096 | static int ext4_normal_writepage(struct page *page, | 3251 | static int ext4_normal_writepage(struct page *page, |
@@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
3142 | int err; | 3297 | int err; |
3143 | 3298 | ||
3144 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 3299 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
3145 | ext4_normal_get_block_write); | 3300 | noalloc_get_block_write); |
3146 | if (ret != 0) | 3301 | if (ret != 0) |
3147 | goto out_unlock; | 3302 | goto out_unlock; |
3148 | 3303 | ||
@@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page, | |||
3227 | * really know unless we go poke around in the buffer_heads. | 3382 | * really know unless we go poke around in the buffer_heads. |
3228 | * But block_write_full_page will do the right thing. | 3383 | * But block_write_full_page will do the right thing. |
3229 | */ | 3384 | */ |
3230 | return block_write_full_page(page, | 3385 | return block_write_full_page(page, noalloc_get_block_write, |
3231 | ext4_normal_get_block_write, | 3386 | wbc); |
3232 | wbc); | ||
3233 | } | 3387 | } |
3234 | no_write: | 3388 | no_write: |
3235 | redirty_page_for_writepage(wbc, page); | 3389 | redirty_page_for_writepage(wbc, page); |
@@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode) | |||
3973 | if (!ext4_can_truncate(inode)) | 4127 | if (!ext4_can_truncate(inode)) |
3974 | return; | 4128 | return; |
3975 | 4129 | ||
3976 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4130 | if (ei->i_disksize && inode->i_size == 0 && |
4131 | !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | ||
3977 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4132 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; |
3978 | 4133 | ||
3979 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4134 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
@@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
4715 | return ext4_force_commit(inode->i_sb); | 4870 | return ext4_force_commit(inode->i_sb); |
4716 | } | 4871 | } |
4717 | 4872 | ||
4718 | int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) | ||
4719 | { | ||
4720 | int err = 0; | ||
4721 | |||
4722 | mark_buffer_dirty(bh); | ||
4723 | if (inode && inode_needs_sync(inode)) { | ||
4724 | sync_dirty_buffer(bh); | ||
4725 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | ||
4726 | ext4_error(inode->i_sb, __func__, | ||
4727 | "IO error syncing inode, " | ||
4728 | "inode=%lu, block=%llu", | ||
4729 | inode->i_ino, | ||
4730 | (unsigned long long)bh->b_blocknr); | ||
4731 | err = -EIO; | ||
4732 | } | ||
4733 | } | ||
4734 | return err; | ||
4735 | } | ||
4736 | |||
4737 | /* | 4873 | /* |
4738 | * ext4_setattr() | 4874 | * ext4_setattr() |
4739 | * | 4875 | * |
@@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4930 | */ | 5066 | */ |
4931 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5067 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
4932 | { | 5068 | { |
4933 | int groups, gdpblocks; | 5069 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5070 | int gdpblocks; | ||
4934 | int idxblocks; | 5071 | int idxblocks; |
4935 | int ret = 0; | 5072 | int ret = 0; |
4936 | 5073 | ||
@@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4957 | groups += nrblocks; | 5094 | groups += nrblocks; |
4958 | 5095 | ||
4959 | gdpblocks = groups; | 5096 | gdpblocks = groups; |
4960 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | 5097 | if (groups > ngroups) |
4961 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | 5098 | groups = ngroups; |
4962 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | 5099 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) |
4963 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | 5100 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; |
4964 | 5101 | ||
@@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4998 | * Calculate the journal credits for a chunk of data modification. | 5135 | * Calculate the journal credits for a chunk of data modification. |
4999 | * | 5136 | * |
5000 | * This is called from DIO, fallocate or whoever calling | 5137 | * This is called from DIO, fallocate or whoever calling |
5001 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | 5138 | * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. |
5002 | * | 5139 | * |
5003 | * journal buffers for data blocks are not included here, as DIO | 5140 | * journal buffers for data blocks are not included here, as DIO |
5004 | * and fallocate do no need to journal data buffers. | 5141 | * and fallocate do no need to journal data buffers. |