diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 1655 |
1 files changed, 1392 insertions, 263 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d9707746413..9843b046c235 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -32,12 +32,23 @@ | |||
32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
33 | #include <linux/buffer_head.h> | 33 | #include <linux/buffer_head.h> |
34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
35 | #include <linux/pagevec.h> | ||
35 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
36 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
37 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
38 | #include "ext4_jbd2.h" | 39 | #include "ext4_jbd2.h" |
39 | #include "xattr.h" | 40 | #include "xattr.h" |
40 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | ||
43 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | ||
45 | loff_t new_size) | ||
46 | { | ||
47 | return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | ||
48 | new_size); | ||
49 | } | ||
50 | |||
51 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | ||
41 | 52 | ||
42 | /* | 53 | /* |
43 | * Test whether an inode is a fast symlink. | 54 | * Test whether an inode is a fast symlink. |
@@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode) | |||
181 | { | 192 | { |
182 | handle_t *handle; | 193 | handle_t *handle; |
183 | 194 | ||
195 | if (ext4_should_order_data(inode)) | ||
196 | ext4_begin_ordered_truncate(inode, 0); | ||
184 | truncate_inode_pages(&inode->i_data, 0); | 197 | truncate_inode_pages(&inode->i_data, 0); |
185 | 198 | ||
186 | if (is_bad_inode(inode)) | 199 | if (is_bad_inode(inode)) |
@@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, | |||
508 | * direct blocks | 521 | * direct blocks |
509 | */ | 522 | */ |
510 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 523 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
511 | ext4_fsblk_t goal, int indirect_blks, int blks, | 524 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
512 | ext4_fsblk_t new_blocks[4], int *err) | 525 | int indirect_blks, int blks, |
526 | ext4_fsblk_t new_blocks[4], int *err) | ||
513 | { | 527 | { |
514 | int target, i; | 528 | int target, i; |
515 | unsigned long count = 0; | 529 | unsigned long count = 0, blk_allocated = 0; |
516 | int index = 0; | 530 | int index = 0; |
517 | ext4_fsblk_t current_block = 0; | 531 | ext4_fsblk_t current_block = 0; |
518 | int ret = 0; | 532 | int ret = 0; |
@@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
525 | * the first direct block of this branch. That's the | 539 | * the first direct block of this branch. That's the |
526 | * minimum number of blocks need to allocate(required) | 540 | * minimum number of blocks need to allocate(required) |
527 | */ | 541 | */ |
528 | target = blks + indirect_blks; | 542 | /* first we try to allocate the indirect blocks */ |
529 | 543 | target = indirect_blks; | |
530 | while (1) { | 544 | while (target > 0) { |
531 | count = target; | 545 | count = target; |
532 | /* allocating blocks for indirect blocks and direct blocks */ | 546 | /* allocating blocks for indirect blocks and direct blocks */ |
533 | current_block = ext4_new_blocks(handle,inode,goal,&count,err); | 547 | current_block = ext4_new_meta_blocks(handle, inode, |
548 | goal, &count, err); | ||
534 | if (*err) | 549 | if (*err) |
535 | goto failed_out; | 550 | goto failed_out; |
536 | 551 | ||
@@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
540 | new_blocks[index++] = current_block++; | 555 | new_blocks[index++] = current_block++; |
541 | count--; | 556 | count--; |
542 | } | 557 | } |
543 | 558 | if (count > 0) { | |
544 | if (count > 0) | 559 | /* |
560 | * save the new block number | ||
561 | * for the first direct block | ||
562 | */ | ||
563 | new_blocks[index] = current_block; | ||
564 | printk(KERN_INFO "%s returned more blocks than " | ||
565 | "requested\n", __func__); | ||
566 | WARN_ON(1); | ||
545 | break; | 567 | break; |
568 | } | ||
546 | } | 569 | } |
547 | 570 | ||
548 | /* save the new block number for the first direct block */ | 571 | target = blks - count ; |
549 | new_blocks[index] = current_block; | 572 | blk_allocated = count; |
550 | 573 | if (!target) | |
574 | goto allocated; | ||
575 | /* Now allocate data blocks */ | ||
576 | count = target; | ||
577 | /* allocating blocks for data blocks */ | ||
578 | current_block = ext4_new_blocks(handle, inode, iblock, | ||
579 | goal, &count, err); | ||
580 | if (*err && (target == blks)) { | ||
581 | /* | ||
582 | * if the allocation failed and we didn't allocate | ||
583 | * any blocks before | ||
584 | */ | ||
585 | goto failed_out; | ||
586 | } | ||
587 | if (!*err) { | ||
588 | if (target == blks) { | ||
589 | /* | ||
590 | * save the new block number | ||
591 | * for the first direct block | ||
592 | */ | ||
593 | new_blocks[index] = current_block; | ||
594 | } | ||
595 | blk_allocated += count; | ||
596 | } | ||
597 | allocated: | ||
551 | /* total number of blocks allocated for direct blocks */ | 598 | /* total number of blocks allocated for direct blocks */ |
552 | ret = count; | 599 | ret = blk_allocated; |
553 | *err = 0; | 600 | *err = 0; |
554 | return ret; | 601 | return ret; |
555 | failed_out: | 602 | failed_out: |
@@ -584,8 +631,9 @@ failed_out: | |||
584 | * as described above and return 0. | 631 | * as described above and return 0. |
585 | */ | 632 | */ |
586 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 633 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
587 | int indirect_blks, int *blks, ext4_fsblk_t goal, | 634 | ext4_lblk_t iblock, int indirect_blks, |
588 | ext4_lblk_t *offsets, Indirect *branch) | 635 | int *blks, ext4_fsblk_t goal, |
636 | ext4_lblk_t *offsets, Indirect *branch) | ||
589 | { | 637 | { |
590 | int blocksize = inode->i_sb->s_blocksize; | 638 | int blocksize = inode->i_sb->s_blocksize; |
591 | int i, n = 0; | 639 | int i, n = 0; |
@@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
595 | ext4_fsblk_t new_blocks[4]; | 643 | ext4_fsblk_t new_blocks[4]; |
596 | ext4_fsblk_t current_block; | 644 | ext4_fsblk_t current_block; |
597 | 645 | ||
598 | num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, | 646 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, |
599 | *blks, new_blocks, &err); | 647 | *blks, new_blocks, &err); |
600 | if (err) | 648 | if (err) |
601 | return err; | 649 | return err; |
@@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
799 | struct ext4_inode_info *ei = EXT4_I(inode); | 847 | struct ext4_inode_info *ei = EXT4_I(inode); |
800 | int count = 0; | 848 | int count = 0; |
801 | ext4_fsblk_t first_block = 0; | 849 | ext4_fsblk_t first_block = 0; |
850 | loff_t disksize; | ||
802 | 851 | ||
803 | 852 | ||
804 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 853 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
@@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
855 | /* | 904 | /* |
856 | * Block out ext4_truncate while we alter the tree | 905 | * Block out ext4_truncate while we alter the tree |
857 | */ | 906 | */ |
858 | err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, | 907 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, |
859 | offsets + (partial - chain), partial); | 908 | &count, goal, |
909 | offsets + (partial - chain), partial); | ||
860 | 910 | ||
861 | /* | 911 | /* |
862 | * The ext4_splice_branch call will free and forget any buffers | 912 | * The ext4_splice_branch call will free and forget any buffers |
@@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
873 | * protect it if you're about to implement concurrent | 923 | * protect it if you're about to implement concurrent |
874 | * ext4_get_block() -bzzz | 924 | * ext4_get_block() -bzzz |
875 | */ | 925 | */ |
876 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | 926 | if (!err && extend_disksize) { |
877 | ei->i_disksize = inode->i_size; | 927 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; |
928 | if (disksize > i_size_read(inode)) | ||
929 | disksize = i_size_read(inode); | ||
930 | if (disksize > ei->i_disksize) | ||
931 | ei->i_disksize = disksize; | ||
932 | } | ||
878 | if (err) | 933 | if (err) |
879 | goto cleanup; | 934 | goto cleanup; |
880 | 935 | ||
@@ -934,7 +989,7 @@ out: | |||
934 | */ | 989 | */ |
935 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 990 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, |
936 | unsigned long max_blocks, struct buffer_head *bh, | 991 | unsigned long max_blocks, struct buffer_head *bh, |
937 | int create, int extend_disksize) | 992 | int create, int extend_disksize, int flag) |
938 | { | 993 | { |
939 | int retval; | 994 | int retval; |
940 | 995 | ||
@@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
975 | * with create == 1 flag. | 1030 | * with create == 1 flag. |
976 | */ | 1031 | */ |
977 | down_write((&EXT4_I(inode)->i_data_sem)); | 1032 | down_write((&EXT4_I(inode)->i_data_sem)); |
1033 | |||
1034 | /* | ||
1035 | * if the caller is from delayed allocation writeout path | ||
1036 | * we have already reserved fs blocks for allocation | ||
1037 | * let the underlying get_block() function know to | ||
1038 | * avoid double accounting | ||
1039 | */ | ||
1040 | if (flag) | ||
1041 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | ||
978 | /* | 1042 | /* |
979 | * We need to check for EXT4 here because migrate | 1043 | * We need to check for EXT4 here because migrate |
980 | * could have changed the inode type in between | 1044 | * could have changed the inode type in between |
@@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
996 | ~EXT4_EXT_MIGRATE; | 1060 | ~EXT4_EXT_MIGRATE; |
997 | } | 1061 | } |
998 | } | 1062 | } |
1063 | |||
1064 | if (flag) { | ||
1065 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | ||
1066 | /* | ||
1067 | * Update reserved blocks/metadata blocks | ||
1068 | * after successful block allocation | ||
1069 | * which were deferred till now | ||
1070 | */ | ||
1071 | if ((retval > 0) && buffer_delay(bh)) | ||
1072 | ext4_da_release_space(inode, retval, 0); | ||
1073 | } | ||
1074 | |||
999 | up_write((&EXT4_I(inode)->i_data_sem)); | 1075 | up_write((&EXT4_I(inode)->i_data_sem)); |
1000 | return retval; | 1076 | return retval; |
1001 | } | 1077 | } |
@@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, | |||
1021 | } | 1097 | } |
1022 | 1098 | ||
1023 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1099 | ret = ext4_get_blocks_wrap(handle, inode, iblock, |
1024 | max_blocks, bh_result, create, 0); | 1100 | max_blocks, bh_result, create, 0, 0); |
1025 | if (ret > 0) { | 1101 | if (ret > 0) { |
1026 | bh_result->b_size = (ret << inode->i_blkbits); | 1102 | bh_result->b_size = (ret << inode->i_blkbits); |
1027 | ret = 0; | 1103 | ret = 0; |
@@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
1047 | dummy.b_blocknr = -1000; | 1123 | dummy.b_blocknr = -1000; |
1048 | buffer_trace_init(&dummy.b_history); | 1124 | buffer_trace_init(&dummy.b_history); |
1049 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1125 | err = ext4_get_blocks_wrap(handle, inode, block, 1, |
1050 | &dummy, create, 1); | 1126 | &dummy, create, 1, 0); |
1051 | /* | 1127 | /* |
1052 | * ext4_get_blocks_handle() returns number of blocks | 1128 | * ext4_get_blocks_handle() returns number of blocks |
1053 | * mapped. 0 in case of a HOLE. | 1129 | * mapped. 0 in case of a HOLE. |
@@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
1203 | to = from + len; | 1279 | to = from + len; |
1204 | 1280 | ||
1205 | retry: | 1281 | retry: |
1206 | page = __grab_cache_page(mapping, index); | ||
1207 | if (!page) | ||
1208 | return -ENOMEM; | ||
1209 | *pagep = page; | ||
1210 | |||
1211 | handle = ext4_journal_start(inode, needed_blocks); | 1282 | handle = ext4_journal_start(inode, needed_blocks); |
1212 | if (IS_ERR(handle)) { | 1283 | if (IS_ERR(handle)) { |
1213 | unlock_page(page); | ||
1214 | page_cache_release(page); | ||
1215 | ret = PTR_ERR(handle); | 1284 | ret = PTR_ERR(handle); |
1216 | goto out; | 1285 | goto out; |
1217 | } | 1286 | } |
1218 | 1287 | ||
1288 | page = __grab_cache_page(mapping, index); | ||
1289 | if (!page) { | ||
1290 | ext4_journal_stop(handle); | ||
1291 | ret = -ENOMEM; | ||
1292 | goto out; | ||
1293 | } | ||
1294 | *pagep = page; | ||
1295 | |||
1219 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1296 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
1220 | ext4_get_block); | 1297 | ext4_get_block); |
1221 | 1298 | ||
@@ -1225,8 +1302,8 @@ retry: | |||
1225 | } | 1302 | } |
1226 | 1303 | ||
1227 | if (ret) { | 1304 | if (ret) { |
1228 | ext4_journal_stop(handle); | ||
1229 | unlock_page(page); | 1305 | unlock_page(page); |
1306 | ext4_journal_stop(handle); | ||
1230 | page_cache_release(page); | 1307 | page_cache_release(page); |
1231 | } | 1308 | } |
1232 | 1309 | ||
@@ -1236,15 +1313,6 @@ out: | |||
1236 | return ret; | 1313 | return ret; |
1237 | } | 1314 | } |
1238 | 1315 | ||
1239 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
1240 | { | ||
1241 | int err = jbd2_journal_dirty_data(handle, bh); | ||
1242 | if (err) | ||
1243 | ext4_journal_abort_handle(__func__, __func__, | ||
1244 | bh, handle, err); | ||
1245 | return err; | ||
1246 | } | ||
1247 | |||
1248 | /* For write_end() in data=journal mode */ | 1316 | /* For write_end() in data=journal mode */ |
1249 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1317 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
1250 | { | 1318 | { |
@@ -1255,29 +1323,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
1255 | } | 1323 | } |
1256 | 1324 | ||
1257 | /* | 1325 | /* |
1258 | * Generic write_end handler for ordered and writeback ext4 journal modes. | ||
1259 | * We can't use generic_write_end, because that unlocks the page and we need to | ||
1260 | * unlock the page after ext4_journal_stop, but ext4_journal_stop must run | ||
1261 | * after block_write_end. | ||
1262 | */ | ||
1263 | static int ext4_generic_write_end(struct file *file, | ||
1264 | struct address_space *mapping, | ||
1265 | loff_t pos, unsigned len, unsigned copied, | ||
1266 | struct page *page, void *fsdata) | ||
1267 | { | ||
1268 | struct inode *inode = file->f_mapping->host; | ||
1269 | |||
1270 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1271 | |||
1272 | if (pos+copied > inode->i_size) { | ||
1273 | i_size_write(inode, pos+copied); | ||
1274 | mark_inode_dirty(inode); | ||
1275 | } | ||
1276 | |||
1277 | return copied; | ||
1278 | } | ||
1279 | |||
1280 | /* | ||
1281 | * We need to pick up the new inode size which generic_commit_write gave us | 1326 | * We need to pick up the new inode size which generic_commit_write gave us |
1282 | * `file' can be NULL - eg, when called from page_symlink(). | 1327 | * `file' can be NULL - eg, when called from page_symlink(). |
1283 | * | 1328 | * |
@@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file, | |||
1290 | struct page *page, void *fsdata) | 1335 | struct page *page, void *fsdata) |
1291 | { | 1336 | { |
1292 | handle_t *handle = ext4_journal_current_handle(); | 1337 | handle_t *handle = ext4_journal_current_handle(); |
1293 | struct inode *inode = file->f_mapping->host; | 1338 | struct inode *inode = mapping->host; |
1294 | unsigned from, to; | 1339 | unsigned from, to; |
1295 | int ret = 0, ret2; | 1340 | int ret = 0, ret2; |
1296 | 1341 | ||
1297 | from = pos & (PAGE_CACHE_SIZE - 1); | 1342 | from = pos & (PAGE_CACHE_SIZE - 1); |
1298 | to = from + len; | 1343 | to = from + len; |
1299 | 1344 | ||
1300 | ret = walk_page_buffers(handle, page_buffers(page), | 1345 | ret = ext4_jbd2_file_inode(handle, inode); |
1301 | from, to, NULL, ext4_journal_dirty_data); | ||
1302 | 1346 | ||
1303 | if (ret == 0) { | 1347 | if (ret == 0) { |
1304 | /* | 1348 | /* |
@@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
1311 | new_i_size = pos + copied; | 1355 | new_i_size = pos + copied; |
1312 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1356 | if (new_i_size > EXT4_I(inode)->i_disksize) |
1313 | EXT4_I(inode)->i_disksize = new_i_size; | 1357 | EXT4_I(inode)->i_disksize = new_i_size; |
1314 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1358 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1315 | page, fsdata); | 1359 | page, fsdata); |
1316 | copied = ret2; | 1360 | copied = ret2; |
1317 | if (ret2 < 0) | 1361 | if (ret2 < 0) |
@@ -1320,8 +1364,6 @@ static int ext4_ordered_write_end(struct file *file, | |||
1320 | ret2 = ext4_journal_stop(handle); | 1364 | ret2 = ext4_journal_stop(handle); |
1321 | if (!ret) | 1365 | if (!ret) |
1322 | ret = ret2; | 1366 | ret = ret2; |
1323 | unlock_page(page); | ||
1324 | page_cache_release(page); | ||
1325 | 1367 | ||
1326 | return ret ? ret : copied; | 1368 | return ret ? ret : copied; |
1327 | } | 1369 | } |
@@ -1332,7 +1374,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
1332 | struct page *page, void *fsdata) | 1374 | struct page *page, void *fsdata) |
1333 | { | 1375 | { |
1334 | handle_t *handle = ext4_journal_current_handle(); | 1376 | handle_t *handle = ext4_journal_current_handle(); |
1335 | struct inode *inode = file->f_mapping->host; | 1377 | struct inode *inode = mapping->host; |
1336 | int ret = 0, ret2; | 1378 | int ret = 0, ret2; |
1337 | loff_t new_i_size; | 1379 | loff_t new_i_size; |
1338 | 1380 | ||
@@ -1340,7 +1382,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
1340 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1382 | if (new_i_size > EXT4_I(inode)->i_disksize) |
1341 | EXT4_I(inode)->i_disksize = new_i_size; | 1383 | EXT4_I(inode)->i_disksize = new_i_size; |
1342 | 1384 | ||
1343 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1385 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
1344 | page, fsdata); | 1386 | page, fsdata); |
1345 | copied = ret2; | 1387 | copied = ret2; |
1346 | if (ret2 < 0) | 1388 | if (ret2 < 0) |
@@ -1349,8 +1391,6 @@ static int ext4_writeback_write_end(struct file *file, | |||
1349 | ret2 = ext4_journal_stop(handle); | 1391 | ret2 = ext4_journal_stop(handle); |
1350 | if (!ret) | 1392 | if (!ret) |
1351 | ret = ret2; | 1393 | ret = ret2; |
1352 | unlock_page(page); | ||
1353 | page_cache_release(page); | ||
1354 | 1394 | ||
1355 | return ret ? ret : copied; | 1395 | return ret ? ret : copied; |
1356 | } | 1396 | } |
@@ -1389,14 +1429,965 @@ static int ext4_journalled_write_end(struct file *file, | |||
1389 | ret = ret2; | 1429 | ret = ret2; |
1390 | } | 1430 | } |
1391 | 1431 | ||
1432 | unlock_page(page); | ||
1392 | ret2 = ext4_journal_stop(handle); | 1433 | ret2 = ext4_journal_stop(handle); |
1393 | if (!ret) | 1434 | if (!ret) |
1394 | ret = ret2; | 1435 | ret = ret2; |
1395 | unlock_page(page); | ||
1396 | page_cache_release(page); | 1436 | page_cache_release(page); |
1397 | 1437 | ||
1398 | return ret ? ret : copied; | 1438 | return ret ? ret : copied; |
1399 | } | 1439 | } |
1440 | /* | ||
1441 | * Calculate the number of metadata blocks need to reserve | ||
1442 | * to allocate @blocks for non extent file based file | ||
1443 | */ | ||
1444 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | ||
1445 | { | ||
1446 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1447 | int ind_blks, dind_blks, tind_blks; | ||
1448 | |||
1449 | /* number of new indirect blocks needed */ | ||
1450 | ind_blks = (blocks + icap - 1) / icap; | ||
1451 | |||
1452 | dind_blks = (ind_blks + icap - 1) / icap; | ||
1453 | |||
1454 | tind_blks = 1; | ||
1455 | |||
1456 | return ind_blks + dind_blks + tind_blks; | ||
1457 | } | ||
1458 | |||
1459 | /* | ||
1460 | * Calculate the number of metadata blocks need to reserve | ||
1461 | * to allocate given number of blocks | ||
1462 | */ | ||
1463 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | ||
1464 | { | ||
1465 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | ||
1466 | return ext4_ext_calc_metadata_amount(inode, blocks); | ||
1467 | |||
1468 | return ext4_indirect_calc_metadata_amount(inode, blocks); | ||
1469 | } | ||
1470 | |||
1471 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | ||
1472 | { | ||
1473 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1474 | unsigned long md_needed, mdblocks, total = 0; | ||
1475 | |||
1476 | /* | ||
1477 | * recalculate the amount of metadata blocks to reserve | ||
1478 | * in order to allocate nrblocks | ||
1479 | * worse case is one extent per block | ||
1480 | */ | ||
1481 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1482 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | ||
1483 | mdblocks = ext4_calc_metadata_amount(inode, total); | ||
1484 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | ||
1485 | |||
1486 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
1487 | total = md_needed + nrblocks; | ||
1488 | |||
1489 | if (ext4_has_free_blocks(sbi, total) < total) { | ||
1490 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1491 | return -ENOSPC; | ||
1492 | } | ||
1493 | |||
1494 | /* reduce fs free blocks counter */ | ||
1495 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
1496 | |||
1497 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | ||
1498 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | ||
1499 | |||
1500 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1501 | return 0; /* success */ | ||
1502 | } | ||
1503 | |||
1504 | void ext4_da_release_space(struct inode *inode, int used, int to_free) | ||
1505 | { | ||
1506 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1507 | int total, mdb, mdb_free, release; | ||
1508 | |||
1509 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1510 | /* recalculate the number of metablocks still need to be reserved */ | ||
1511 | total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; | ||
1512 | mdb = ext4_calc_metadata_amount(inode, total); | ||
1513 | |||
1514 | /* figure out how many metablocks to release */ | ||
1515 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1516 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | ||
1517 | |||
1518 | /* Account for allocated meta_blocks */ | ||
1519 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
1520 | |||
1521 | release = to_free + mdb_free; | ||
1522 | |||
1523 | /* update fs free blocks counter for truncate case */ | ||
1524 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | ||
1525 | |||
1526 | /* update per-inode reservations */ | ||
1527 | BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); | ||
1528 | EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); | ||
1529 | |||
1530 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
1531 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
1532 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
1533 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1534 | } | ||
1535 | |||
1536 | static void ext4_da_page_release_reservation(struct page *page, | ||
1537 | unsigned long offset) | ||
1538 | { | ||
1539 | int to_release = 0; | ||
1540 | struct buffer_head *head, *bh; | ||
1541 | unsigned int curr_off = 0; | ||
1542 | |||
1543 | head = page_buffers(page); | ||
1544 | bh = head; | ||
1545 | do { | ||
1546 | unsigned int next_off = curr_off + bh->b_size; | ||
1547 | |||
1548 | if ((offset <= curr_off) && (buffer_delay(bh))) { | ||
1549 | to_release++; | ||
1550 | clear_buffer_delay(bh); | ||
1551 | } | ||
1552 | curr_off = next_off; | ||
1553 | } while ((bh = bh->b_this_page) != head); | ||
1554 | ext4_da_release_space(page->mapping->host, 0, to_release); | ||
1555 | } | ||
1556 | |||
1557 | /* | ||
1558 | * Delayed allocation stuff | ||
1559 | */ | ||
1560 | |||
1561 | struct mpage_da_data { | ||
1562 | struct inode *inode; | ||
1563 | struct buffer_head lbh; /* extent of blocks */ | ||
1564 | unsigned long first_page, next_page; /* extent of pages */ | ||
1565 | get_block_t *get_block; | ||
1566 | struct writeback_control *wbc; | ||
1567 | }; | ||
1568 | |||
1569 | /* | ||
1570 | * mpage_da_submit_io - walks through extent of pages and try to write | ||
1571 | * them with __mpage_writepage() | ||
1572 | * | ||
1573 | * @mpd->inode: inode | ||
1574 | * @mpd->first_page: first page of the extent | ||
1575 | * @mpd->next_page: page after the last page of the extent | ||
1576 | * @mpd->get_block: the filesystem's block mapper function | ||
1577 | * | ||
1578 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
1579 | * to be allocated. this may be wrong if allocation failed. | ||
1580 | * | ||
1581 | * As pages are already locked by write_cache_pages(), we can't use it | ||
1582 | */ | ||
1583 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | ||
1584 | { | ||
1585 | struct address_space *mapping = mpd->inode->i_mapping; | ||
1586 | struct mpage_data mpd_pp = { | ||
1587 | .bio = NULL, | ||
1588 | .last_block_in_bio = 0, | ||
1589 | .get_block = mpd->get_block, | ||
1590 | .use_writepage = 1, | ||
1591 | }; | ||
1592 | int ret = 0, err, nr_pages, i; | ||
1593 | unsigned long index, end; | ||
1594 | struct pagevec pvec; | ||
1595 | |||
1596 | BUG_ON(mpd->next_page <= mpd->first_page); | ||
1597 | |||
1598 | pagevec_init(&pvec, 0); | ||
1599 | index = mpd->first_page; | ||
1600 | end = mpd->next_page - 1; | ||
1601 | |||
1602 | while (index <= end) { | ||
1603 | /* XXX: optimize tail */ | ||
1604 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1605 | if (nr_pages == 0) | ||
1606 | break; | ||
1607 | for (i = 0; i < nr_pages; i++) { | ||
1608 | struct page *page = pvec.pages[i]; | ||
1609 | |||
1610 | index = page->index; | ||
1611 | if (index > end) | ||
1612 | break; | ||
1613 | index++; | ||
1614 | |||
1615 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | ||
1616 | |||
1617 | /* | ||
1618 | * In error case, we have to continue because | ||
1619 | * remaining pages are still locked | ||
1620 | * XXX: unlock and re-dirty them? | ||
1621 | */ | ||
1622 | if (ret == 0) | ||
1623 | ret = err; | ||
1624 | } | ||
1625 | pagevec_release(&pvec); | ||
1626 | } | ||
1627 | if (mpd_pp.bio) | ||
1628 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1629 | |||
1630 | return ret; | ||
1631 | } | ||
1632 | |||
1633 | /* | ||
1634 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
1635 | * | ||
1636 | * @mpd->inode - inode to walk through | ||
1637 | * @exbh->b_blocknr - first block on a disk | ||
1638 | * @exbh->b_size - amount of space in bytes | ||
1639 | * @logical - first logical block to start assignment with | ||
1640 | * | ||
1641 | * the function goes through all passed space and put actual disk | ||
1642 | * block numbers into buffer heads, dropping BH_Delay | ||
1643 | */ | ||
1644 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | ||
1645 | struct buffer_head *exbh) | ||
1646 | { | ||
1647 | struct inode *inode = mpd->inode; | ||
1648 | struct address_space *mapping = inode->i_mapping; | ||
1649 | int blocks = exbh->b_size >> inode->i_blkbits; | ||
1650 | sector_t pblock = exbh->b_blocknr, cur_logical; | ||
1651 | struct buffer_head *head, *bh; | ||
1652 | unsigned long index, end; | ||
1653 | struct pagevec pvec; | ||
1654 | int nr_pages, i; | ||
1655 | |||
1656 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1657 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1658 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1659 | |||
1660 | pagevec_init(&pvec, 0); | ||
1661 | |||
1662 | while (index <= end) { | ||
1663 | /* XXX: optimize tail */ | ||
1664 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
1665 | if (nr_pages == 0) | ||
1666 | break; | ||
1667 | for (i = 0; i < nr_pages; i++) { | ||
1668 | struct page *page = pvec.pages[i]; | ||
1669 | |||
1670 | index = page->index; | ||
1671 | if (index > end) | ||
1672 | break; | ||
1673 | index++; | ||
1674 | |||
1675 | BUG_ON(!PageLocked(page)); | ||
1676 | BUG_ON(PageWriteback(page)); | ||
1677 | BUG_ON(!page_has_buffers(page)); | ||
1678 | |||
1679 | bh = page_buffers(page); | ||
1680 | head = bh; | ||
1681 | |||
1682 | /* skip blocks out of the range */ | ||
1683 | do { | ||
1684 | if (cur_logical >= logical) | ||
1685 | break; | ||
1686 | cur_logical++; | ||
1687 | } while ((bh = bh->b_this_page) != head); | ||
1688 | |||
1689 | do { | ||
1690 | if (cur_logical >= logical + blocks) | ||
1691 | break; | ||
1692 | if (buffer_delay(bh)) { | ||
1693 | bh->b_blocknr = pblock; | ||
1694 | clear_buffer_delay(bh); | ||
1695 | } else if (buffer_mapped(bh)) | ||
1696 | BUG_ON(bh->b_blocknr != pblock); | ||
1697 | |||
1698 | cur_logical++; | ||
1699 | pblock++; | ||
1700 | } while ((bh = bh->b_this_page) != head); | ||
1701 | } | ||
1702 | pagevec_release(&pvec); | ||
1703 | } | ||
1704 | } | ||
1705 | |||
1706 | |||
1707 | /* | ||
1708 | * __unmap_underlying_blocks - just a helper function to unmap | ||
1709 | * set of blocks described by @bh | ||
1710 | */ | ||
1711 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
1712 | struct buffer_head *bh) | ||
1713 | { | ||
1714 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
1715 | int blocks, i; | ||
1716 | |||
1717 | blocks = bh->b_size >> inode->i_blkbits; | ||
1718 | for (i = 0; i < blocks; i++) | ||
1719 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
1720 | } | ||
1721 | |||
1722 | /* | ||
1723 | * mpage_da_map_blocks - go through given space | ||
1724 | * | ||
1725 | * @mpd->lbh - bh describing space | ||
1726 | * @mpd->get_block - the filesystem's block mapper function | ||
1727 | * | ||
1728 | * The function skips space we know is already mapped to disk blocks. | ||
1729 | * | ||
1730 | * The function ignores errors ->get_block() returns, thus real | ||
1731 | * error handling is postponed to __mpage_writepage() | ||
1732 | */ | ||
1733 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | ||
1734 | { | ||
1735 | struct buffer_head *lbh = &mpd->lbh; | ||
1736 | int err = 0, remain = lbh->b_size; | ||
1737 | sector_t next = lbh->b_blocknr; | ||
1738 | struct buffer_head new; | ||
1739 | |||
1740 | /* | ||
1741 | * We consider only non-mapped and non-allocated blocks | ||
1742 | */ | ||
1743 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | ||
1744 | return; | ||
1745 | |||
1746 | while (remain) { | ||
1747 | new.b_state = lbh->b_state; | ||
1748 | new.b_blocknr = 0; | ||
1749 | new.b_size = remain; | ||
1750 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1751 | if (err) { | ||
1752 | /* | ||
1753 | * Rather than implement own error handling | ||
1754 | * here, we just leave remaining blocks | ||
1755 | * unallocated and try again with ->writepage() | ||
1756 | */ | ||
1757 | break; | ||
1758 | } | ||
1759 | BUG_ON(new.b_size == 0); | ||
1760 | |||
1761 | if (buffer_new(&new)) | ||
1762 | __unmap_underlying_blocks(mpd->inode, &new); | ||
1763 | |||
1764 | /* | ||
1765 | * If blocks are delayed marked, we need to | ||
1766 | * put actual blocknr and drop delayed bit | ||
1767 | */ | ||
1768 | if (buffer_delay(lbh)) | ||
1769 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1770 | |||
1771 | /* go for the remaining blocks */ | ||
1772 | next += new.b_size >> mpd->inode->i_blkbits; | ||
1773 | remain -= new.b_size; | ||
1774 | } | ||
1775 | } | ||
1776 | |||
1777 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | ||
1778 | |||
1779 | /* | ||
1780 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
1781 | * | ||
1782 | * @mpd->lbh - extent of blocks | ||
1783 | * @logical - logical number of the block in the file | ||
1784 | * @bh - bh of the block (used to access block's state) | ||
1785 | * | ||
1786 | * the function is used to collect contig. blocks in same state | ||
1787 | */ | ||
1788 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | ||
1789 | sector_t logical, struct buffer_head *bh) | ||
1790 | { | ||
1791 | struct buffer_head *lbh = &mpd->lbh; | ||
1792 | sector_t next; | ||
1793 | |||
1794 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | ||
1795 | |||
1796 | /* | ||
1797 | * First block in the extent | ||
1798 | */ | ||
1799 | if (lbh->b_size == 0) { | ||
1800 | lbh->b_blocknr = logical; | ||
1801 | lbh->b_size = bh->b_size; | ||
1802 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1803 | return; | ||
1804 | } | ||
1805 | |||
1806 | /* | ||
1807 | * Can we merge the block to our big extent? | ||
1808 | */ | ||
1809 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | ||
1810 | lbh->b_size += bh->b_size; | ||
1811 | return; | ||
1812 | } | ||
1813 | |||
1814 | /* | ||
1815 | * We couldn't merge the block to our extent, so we | ||
1816 | * need to flush current extent and start new one | ||
1817 | */ | ||
1818 | mpage_da_map_blocks(mpd); | ||
1819 | |||
1820 | /* | ||
1821 | * Now start a new extent | ||
1822 | */ | ||
1823 | lbh->b_size = bh->b_size; | ||
1824 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1825 | lbh->b_blocknr = logical; | ||
1826 | } | ||
1827 | |||
1828 | /* | ||
1829 | * __mpage_da_writepage - finds extent of pages and blocks | ||
1830 | * | ||
1831 | * @page: page to consider | ||
1832 | * @wbc: not used, we just follow rules | ||
1833 | * @data: context | ||
1834 | * | ||
1835 | * The function finds extents of pages and scan them for all blocks. | ||
1836 | */ | ||
1837 | static int __mpage_da_writepage(struct page *page, | ||
1838 | struct writeback_control *wbc, void *data) | ||
1839 | { | ||
1840 | struct mpage_da_data *mpd = data; | ||
1841 | struct inode *inode = mpd->inode; | ||
1842 | struct buffer_head *bh, *head, fake; | ||
1843 | sector_t logical; | ||
1844 | |||
1845 | /* | ||
1846 | * Can we merge this page to current extent? | ||
1847 | */ | ||
1848 | if (mpd->next_page != page->index) { | ||
1849 | /* | ||
1850 | * Nope, we can't. So, we map non-allocated blocks | ||
1851 | * and start IO on them using __mpage_writepage() | ||
1852 | */ | ||
1853 | if (mpd->next_page != mpd->first_page) { | ||
1854 | mpage_da_map_blocks(mpd); | ||
1855 | mpage_da_submit_io(mpd); | ||
1856 | } | ||
1857 | |||
1858 | /* | ||
1859 | * Start next extent of pages ... | ||
1860 | */ | ||
1861 | mpd->first_page = page->index; | ||
1862 | |||
1863 | /* | ||
1864 | * ... and blocks | ||
1865 | */ | ||
1866 | mpd->lbh.b_size = 0; | ||
1867 | mpd->lbh.b_state = 0; | ||
1868 | mpd->lbh.b_blocknr = 0; | ||
1869 | } | ||
1870 | |||
1871 | mpd->next_page = page->index + 1; | ||
1872 | logical = (sector_t) page->index << | ||
1873 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1874 | |||
1875 | if (!page_has_buffers(page)) { | ||
1876 | /* | ||
1877 | * There is no attached buffer heads yet (mmap?) | ||
1878 | * we treat the page asfull of dirty blocks | ||
1879 | */ | ||
1880 | bh = &fake; | ||
1881 | bh->b_size = PAGE_CACHE_SIZE; | ||
1882 | bh->b_state = 0; | ||
1883 | set_buffer_dirty(bh); | ||
1884 | set_buffer_uptodate(bh); | ||
1885 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
1886 | } else { | ||
1887 | /* | ||
1888 | * Page with regular buffer heads, just add all dirty ones | ||
1889 | */ | ||
1890 | head = page_buffers(page); | ||
1891 | bh = head; | ||
1892 | do { | ||
1893 | BUG_ON(buffer_locked(bh)); | ||
1894 | if (buffer_dirty(bh)) | ||
1895 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
1896 | logical++; | ||
1897 | } while ((bh = bh->b_this_page) != head); | ||
1898 | } | ||
1899 | |||
1900 | return 0; | ||
1901 | } | ||
1902 | |||
1903 | /* | ||
1904 | * mpage_da_writepages - walk the list of dirty pages of the given | ||
1905 | * address space, allocates non-allocated blocks, maps newly-allocated | ||
1906 | * blocks to existing bhs and issue IO them | ||
1907 | * | ||
1908 | * @mapping: address space structure to write | ||
1909 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
1910 | * @get_block: the filesystem's block mapper function. | ||
1911 | * | ||
1912 | * This is a library function, which implements the writepages() | ||
1913 | * address_space_operation. | ||
1914 | * | ||
1915 | * In order to avoid duplication of logic that deals with partial pages, | ||
1916 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1917 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1918 | * | ||
1919 | * It's important that we call __mpage_writepage() only once for each | ||
1920 | * involved page, otherwise we'd have to implement more complicated logic | ||
1921 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1922 | * | ||
1923 | * See comments to mpage_writepages() | ||
1924 | */ | ||
1925 | static int mpage_da_writepages(struct address_space *mapping, | ||
1926 | struct writeback_control *wbc, | ||
1927 | get_block_t get_block) | ||
1928 | { | ||
1929 | struct mpage_da_data mpd; | ||
1930 | int ret; | ||
1931 | |||
1932 | if (!get_block) | ||
1933 | return generic_writepages(mapping, wbc); | ||
1934 | |||
1935 | mpd.wbc = wbc; | ||
1936 | mpd.inode = mapping->host; | ||
1937 | mpd.lbh.b_size = 0; | ||
1938 | mpd.lbh.b_state = 0; | ||
1939 | mpd.lbh.b_blocknr = 0; | ||
1940 | mpd.first_page = 0; | ||
1941 | mpd.next_page = 0; | ||
1942 | mpd.get_block = get_block; | ||
1943 | |||
1944 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
1945 | |||
1946 | /* | ||
1947 | * Handle last extent of pages | ||
1948 | */ | ||
1949 | if (mpd.next_page != mpd.first_page) { | ||
1950 | mpage_da_map_blocks(&mpd); | ||
1951 | mpage_da_submit_io(&mpd); | ||
1952 | } | ||
1953 | |||
1954 | return ret; | ||
1955 | } | ||
1956 | |||
1957 | /* | ||
1958 | * this is a special callback for ->write_begin() only | ||
1959 | * it's intention is to return mapped block or reserve space | ||
1960 | */ | ||
1961 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | ||
1962 | struct buffer_head *bh_result, int create) | ||
1963 | { | ||
1964 | int ret = 0; | ||
1965 | |||
1966 | BUG_ON(create == 0); | ||
1967 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
1968 | |||
1969 | /* | ||
1970 | * first, we need to know whether the block is allocated already | ||
1971 | * preallocated blocks are unmapped but should treated | ||
1972 | * the same as allocated blocks. | ||
1973 | */ | ||
1974 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | ||
1975 | if ((ret == 0) && !buffer_delay(bh_result)) { | ||
1976 | /* the block isn't (pre)allocated yet, let's reserve space */ | ||
1977 | /* | ||
1978 | * XXX: __block_prepare_write() unmaps passed block, | ||
1979 | * is it OK? | ||
1980 | */ | ||
1981 | ret = ext4_da_reserve_space(inode, 1); | ||
1982 | if (ret) | ||
1983 | /* not enough space to reserve */ | ||
1984 | return ret; | ||
1985 | |||
1986 | map_bh(bh_result, inode->i_sb, 0); | ||
1987 | set_buffer_new(bh_result); | ||
1988 | set_buffer_delay(bh_result); | ||
1989 | } else if (ret > 0) { | ||
1990 | bh_result->b_size = (ret << inode->i_blkbits); | ||
1991 | ret = 0; | ||
1992 | } | ||
1993 | |||
1994 | return ret; | ||
1995 | } | ||
1996 | #define EXT4_DELALLOC_RSVED 1 | ||
1997 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
1998 | struct buffer_head *bh_result, int create) | ||
1999 | { | ||
2000 | int ret; | ||
2001 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2002 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
2003 | handle_t *handle = NULL; | ||
2004 | |||
2005 | handle = ext4_journal_current_handle(); | ||
2006 | if (!handle) { | ||
2007 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2008 | bh_result, 0, 0, 0); | ||
2009 | BUG_ON(!ret); | ||
2010 | } else { | ||
2011 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
2012 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
2013 | } | ||
2014 | |||
2015 | if (ret > 0) { | ||
2016 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2017 | |||
2018 | /* | ||
2019 | * Update on-disk size along with block allocation | ||
2020 | * we don't use 'extend_disksize' as size may change | ||
2021 | * within already allocated block -bzzz | ||
2022 | */ | ||
2023 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
2024 | if (disksize > i_size_read(inode)) | ||
2025 | disksize = i_size_read(inode); | ||
2026 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
2027 | /* | ||
2028 | * XXX: replace with spinlock if seen contended -bzzz | ||
2029 | */ | ||
2030 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2031 | if (disksize > EXT4_I(inode)->i_disksize) | ||
2032 | EXT4_I(inode)->i_disksize = disksize; | ||
2033 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2034 | |||
2035 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
2036 | ret = ext4_mark_inode_dirty(handle, inode); | ||
2037 | return ret; | ||
2038 | } | ||
2039 | } | ||
2040 | ret = 0; | ||
2041 | } | ||
2042 | return ret; | ||
2043 | } | ||
2044 | |||
2045 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2046 | { | ||
2047 | /* | ||
2048 | * unmapped buffer is possible for holes. | ||
2049 | * delay buffer is possible with delayed allocation | ||
2050 | */ | ||
2051 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
2052 | } | ||
2053 | |||
2054 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | ||
2055 | struct buffer_head *bh_result, int create) | ||
2056 | { | ||
2057 | int ret = 0; | ||
2058 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2059 | |||
2060 | /* | ||
2061 | * we don't want to do block allocation in writepage | ||
2062 | * so call get_block_wrap with create = 0 | ||
2063 | */ | ||
2064 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | ||
2065 | bh_result, 0, 0, 0); | ||
2066 | if (ret > 0) { | ||
2067 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2068 | ret = 0; | ||
2069 | } | ||
2070 | return ret; | ||
2071 | } | ||
2072 | |||
2073 | /* | ||
2074 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | ||
2075 | * get called via journal_submit_inode_data_buffers (no journal handle) | ||
2076 | * get called via shrink_page_list via pdflush (no journal handle) | ||
2077 | * or grab_page_cache when doing write_begin (have journal handle) | ||
2078 | */ | ||
2079 | static int ext4_da_writepage(struct page *page, | ||
2080 | struct writeback_control *wbc) | ||
2081 | { | ||
2082 | int ret = 0; | ||
2083 | loff_t size; | ||
2084 | unsigned long len; | ||
2085 | struct buffer_head *page_bufs; | ||
2086 | struct inode *inode = page->mapping->host; | ||
2087 | |||
2088 | size = i_size_read(inode); | ||
2089 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
2090 | len = size & ~PAGE_CACHE_MASK; | ||
2091 | else | ||
2092 | len = PAGE_CACHE_SIZE; | ||
2093 | |||
2094 | if (page_has_buffers(page)) { | ||
2095 | page_bufs = page_buffers(page); | ||
2096 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2097 | ext4_bh_unmapped_or_delay)) { | ||
2098 | /* | ||
2099 | * We don't want to do block allocation | ||
2100 | * So redirty the page and return | ||
2101 | * We may reach here when we do a journal commit | ||
2102 | * via journal_submit_inode_data_buffers. | ||
2103 | * If we don't have mapping block we just ignore | ||
2104 | * them. We can also reach here via shrink_page_list | ||
2105 | */ | ||
2106 | redirty_page_for_writepage(wbc, page); | ||
2107 | unlock_page(page); | ||
2108 | return 0; | ||
2109 | } | ||
2110 | } else { | ||
2111 | /* | ||
2112 | * The test for page_has_buffers() is subtle: | ||
2113 | * We know the page is dirty but it lost buffers. That means | ||
2114 | * that at some moment in time after write_begin()/write_end() | ||
2115 | * has been called all buffers have been clean and thus they | ||
2116 | * must have been written at least once. So they are all | ||
2117 | * mapped and we can happily proceed with mapping them | ||
2118 | * and writing the page. | ||
2119 | * | ||
2120 | * Try to initialize the buffer_heads and check whether | ||
2121 | * all are mapped and non delay. We don't want to | ||
2122 | * do block allocation here. | ||
2123 | */ | ||
2124 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
2125 | ext4_normal_get_block_write); | ||
2126 | if (!ret) { | ||
2127 | page_bufs = page_buffers(page); | ||
2128 | /* check whether all are mapped and non delay */ | ||
2129 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2130 | ext4_bh_unmapped_or_delay)) { | ||
2131 | redirty_page_for_writepage(wbc, page); | ||
2132 | unlock_page(page); | ||
2133 | return 0; | ||
2134 | } | ||
2135 | } else { | ||
2136 | /* | ||
2137 | * We can't do block allocation here | ||
2138 | * so just redity the page and unlock | ||
2139 | * and return | ||
2140 | */ | ||
2141 | redirty_page_for_writepage(wbc, page); | ||
2142 | unlock_page(page); | ||
2143 | return 0; | ||
2144 | } | ||
2145 | } | ||
2146 | |||
2147 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | ||
2148 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | ||
2149 | else | ||
2150 | ret = block_write_full_page(page, | ||
2151 | ext4_normal_get_block_write, | ||
2152 | wbc); | ||
2153 | |||
2154 | return ret; | ||
2155 | } | ||
2156 | |||
2157 | /* | ||
2158 | * For now just follow the DIO way to estimate the max credits | ||
2159 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | ||
2160 | * todo: need to calculate the max credits need for | ||
2161 | * extent based files, currently the DIO credits is based on | ||
2162 | * indirect-blocks mapping way. | ||
2163 | * | ||
2164 | * Probably should have a generic way to calculate credits | ||
2165 | * for DIO, writepages, and truncate | ||
2166 | */ | ||
2167 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | ||
2168 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | ||
2169 | |||
2170 | static int ext4_da_writepages(struct address_space *mapping, | ||
2171 | struct writeback_control *wbc) | ||
2172 | { | ||
2173 | struct inode *inode = mapping->host; | ||
2174 | handle_t *handle = NULL; | ||
2175 | int needed_blocks; | ||
2176 | int ret = 0; | ||
2177 | long to_write; | ||
2178 | loff_t range_start = 0; | ||
2179 | |||
2180 | /* | ||
2181 | * No pages to write? This is mainly a kludge to avoid starting | ||
2182 | * a transaction for special inodes like journal inode on last iput() | ||
2183 | * because that could violate lock ordering on umount | ||
2184 | */ | ||
2185 | if (!mapping->nrpages) | ||
2186 | return 0; | ||
2187 | |||
2188 | /* | ||
2189 | * Estimate the worse case needed credits to write out | ||
2190 | * EXT4_MAX_BUF_BLOCKS pages | ||
2191 | */ | ||
2192 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | ||
2193 | |||
2194 | to_write = wbc->nr_to_write; | ||
2195 | if (!wbc->range_cyclic) { | ||
2196 | /* | ||
2197 | * If range_cyclic is not set force range_cont | ||
2198 | * and save the old writeback_index | ||
2199 | */ | ||
2200 | wbc->range_cont = 1; | ||
2201 | range_start = wbc->range_start; | ||
2202 | } | ||
2203 | |||
2204 | while (!ret && to_write) { | ||
2205 | /* start a new transaction*/ | ||
2206 | handle = ext4_journal_start(inode, needed_blocks); | ||
2207 | if (IS_ERR(handle)) { | ||
2208 | ret = PTR_ERR(handle); | ||
2209 | goto out_writepages; | ||
2210 | } | ||
2211 | if (ext4_should_order_data(inode)) { | ||
2212 | /* | ||
2213 | * With ordered mode we need to add | ||
2214 | * the inode to the journal handle | ||
2215 | * when we do block allocation. | ||
2216 | */ | ||
2217 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2218 | if (ret) { | ||
2219 | ext4_journal_stop(handle); | ||
2220 | goto out_writepages; | ||
2221 | } | ||
2222 | |||
2223 | } | ||
2224 | /* | ||
2225 | * set the max dirty pages could be write at a time | ||
2226 | * to fit into the reserved transaction credits | ||
2227 | */ | ||
2228 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2229 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2230 | |||
2231 | to_write -= wbc->nr_to_write; | ||
2232 | ret = mpage_da_writepages(mapping, wbc, | ||
2233 | ext4_da_get_block_write); | ||
2234 | ext4_journal_stop(handle); | ||
2235 | if (wbc->nr_to_write) { | ||
2236 | /* | ||
2237 | * There is no more writeout needed | ||
2238 | * or we requested for a noblocking writeout | ||
2239 | * and we found the device congested | ||
2240 | */ | ||
2241 | to_write += wbc->nr_to_write; | ||
2242 | break; | ||
2243 | } | ||
2244 | wbc->nr_to_write = to_write; | ||
2245 | } | ||
2246 | |||
2247 | out_writepages: | ||
2248 | wbc->nr_to_write = to_write; | ||
2249 | if (range_start) | ||
2250 | wbc->range_start = range_start; | ||
2251 | return ret; | ||
2252 | } | ||
2253 | |||
2254 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | ||
2255 | loff_t pos, unsigned len, unsigned flags, | ||
2256 | struct page **pagep, void **fsdata) | ||
2257 | { | ||
2258 | int ret, retries = 0; | ||
2259 | struct page *page; | ||
2260 | pgoff_t index; | ||
2261 | unsigned from, to; | ||
2262 | struct inode *inode = mapping->host; | ||
2263 | handle_t *handle; | ||
2264 | |||
2265 | index = pos >> PAGE_CACHE_SHIFT; | ||
2266 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
2267 | to = from + len; | ||
2268 | |||
2269 | retry: | ||
2270 | /* | ||
2271 | * With delayed allocation, we don't log the i_disksize update | ||
2272 | * if there is delayed block allocation. But we still need | ||
2273 | * to journalling the i_disksize update if writes to the end | ||
2274 | * of file which has an already mapped buffer. | ||
2275 | */ | ||
2276 | handle = ext4_journal_start(inode, 1); | ||
2277 | if (IS_ERR(handle)) { | ||
2278 | ret = PTR_ERR(handle); | ||
2279 | goto out; | ||
2280 | } | ||
2281 | |||
2282 | page = __grab_cache_page(mapping, index); | ||
2283 | if (!page) | ||
2284 | return -ENOMEM; | ||
2285 | *pagep = page; | ||
2286 | |||
2287 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
2288 | ext4_da_get_block_prep); | ||
2289 | if (ret < 0) { | ||
2290 | unlock_page(page); | ||
2291 | ext4_journal_stop(handle); | ||
2292 | page_cache_release(page); | ||
2293 | } | ||
2294 | |||
2295 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
2296 | goto retry; | ||
2297 | out: | ||
2298 | return ret; | ||
2299 | } | ||
2300 | |||
2301 | /* | ||
2302 | * Check if we should update i_disksize | ||
2303 | * when write to the end of file but not require block allocation | ||
2304 | */ | ||
2305 | static int ext4_da_should_update_i_disksize(struct page *page, | ||
2306 | unsigned long offset) | ||
2307 | { | ||
2308 | struct buffer_head *bh; | ||
2309 | struct inode *inode = page->mapping->host; | ||
2310 | unsigned int idx; | ||
2311 | int i; | ||
2312 | |||
2313 | bh = page_buffers(page); | ||
2314 | idx = offset >> inode->i_blkbits; | ||
2315 | |||
2316 | for (i=0; i < idx; i++) | ||
2317 | bh = bh->b_this_page; | ||
2318 | |||
2319 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | ||
2320 | return 0; | ||
2321 | return 1; | ||
2322 | } | ||
2323 | |||
2324 | static int ext4_da_write_end(struct file *file, | ||
2325 | struct address_space *mapping, | ||
2326 | loff_t pos, unsigned len, unsigned copied, | ||
2327 | struct page *page, void *fsdata) | ||
2328 | { | ||
2329 | struct inode *inode = mapping->host; | ||
2330 | int ret = 0, ret2; | ||
2331 | handle_t *handle = ext4_journal_current_handle(); | ||
2332 | loff_t new_i_size; | ||
2333 | unsigned long start, end; | ||
2334 | |||
2335 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
2336 | end = start + copied -1; | ||
2337 | |||
2338 | /* | ||
2339 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
2340 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
2341 | * into that. | ||
2342 | */ | ||
2343 | |||
2344 | new_i_size = pos + copied; | ||
2345 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
2346 | if (ext4_da_should_update_i_disksize(page, end)) { | ||
2347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
2348 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
2349 | /* | ||
2350 | * Updating i_disksize when extending file | ||
2351 | * without needing block allocation | ||
2352 | */ | ||
2353 | if (ext4_should_order_data(inode)) | ||
2354 | ret = ext4_jbd2_file_inode(handle, | ||
2355 | inode); | ||
2356 | |||
2357 | EXT4_I(inode)->i_disksize = new_i_size; | ||
2358 | } | ||
2359 | up_write(&EXT4_I(inode)->i_data_sem); | ||
2360 | } | ||
2361 | } | ||
2362 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
2363 | page, fsdata); | ||
2364 | copied = ret2; | ||
2365 | if (ret2 < 0) | ||
2366 | ret = ret2; | ||
2367 | ret2 = ext4_journal_stop(handle); | ||
2368 | if (!ret) | ||
2369 | ret = ret2; | ||
2370 | |||
2371 | return ret ? ret : copied; | ||
2372 | } | ||
2373 | |||
2374 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | ||
2375 | { | ||
2376 | /* | ||
2377 | * Drop reserved blocks | ||
2378 | */ | ||
2379 | BUG_ON(!PageLocked(page)); | ||
2380 | if (!page_has_buffers(page)) | ||
2381 | goto out; | ||
2382 | |||
2383 | ext4_da_page_release_reservation(page, offset); | ||
2384 | |||
2385 | out: | ||
2386 | ext4_invalidatepage(page, offset); | ||
2387 | |||
2388 | return; | ||
2389 | } | ||
2390 | |||
1400 | 2391 | ||
1401 | /* | 2392 | /* |
1402 | * bmap() is special. It gets used by applications such as lilo and by | 2393 | * bmap() is special. It gets used by applications such as lilo and by |
@@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
1418 | journal_t *journal; | 2409 | journal_t *journal; |
1419 | int err; | 2410 | int err; |
1420 | 2411 | ||
2412 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | ||
2413 | test_opt(inode->i_sb, DELALLOC)) { | ||
2414 | /* | ||
2415 | * With delalloc we want to sync the file | ||
2416 | * so that we can make sure we allocate | ||
2417 | * blocks for file | ||
2418 | */ | ||
2419 | filemap_write_and_wait(mapping); | ||
2420 | } | ||
2421 | |||
1421 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 2422 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { |
1422 | /* | 2423 | /* |
1423 | * This is a REALLY heavyweight approach, but the use of | 2424 | * This is a REALLY heavyweight approach, but the use of |
@@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
1462 | return 0; | 2463 | return 0; |
1463 | } | 2464 | } |
1464 | 2465 | ||
1465 | static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
1466 | { | ||
1467 | if (buffer_mapped(bh)) | ||
1468 | return ext4_journal_dirty_data(handle, bh); | ||
1469 | return 0; | ||
1470 | } | ||
1471 | |||
1472 | /* | 2466 | /* |
1473 | * Note that we always start a transaction even if we're not journalling | 2467 | * Note that we don't need to start a transaction unless we're journaling data |
1474 | * data. This is to preserve ordering: any hole instantiation within | 2468 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
1475 | * __block_write_full_page -> ext4_get_block() should be journalled | 2469 | * need to file the inode to the transaction's list in ordered mode because if |
1476 | * along with the data so we don't crash and then get metadata which | 2470 | * we are writing back data added by write(), the inode is already there and if |
1477 | * refers to old data. | 2471 | * we are writing back data modified via mmap(), noone guarantees in which |
2472 | * transaction the data will hit the disk. In case we are journaling data, we | ||
2473 | * cannot start transaction directly because transaction start ranks above page | ||
2474 | * lock so we have to do some magic. | ||
1478 | * | 2475 | * |
1479 | * In all journalling modes block_write_full_page() will start the I/O. | 2476 | * In all journaling modes block_write_full_page() will start the I/O. |
1480 | * | 2477 | * |
1481 | * Problem: | 2478 | * Problem: |
1482 | * | 2479 | * |
@@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
1518 | * disastrous. Any write() or metadata operation will sync the fs for | 2515 | * disastrous. Any write() or metadata operation will sync the fs for |
1519 | * us. | 2516 | * us. |
1520 | * | 2517 | * |
1521 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
1522 | * we don't need to open a transaction here. | ||
1523 | */ | 2518 | */ |
1524 | static int ext4_ordered_writepage(struct page *page, | 2519 | static int __ext4_normal_writepage(struct page *page, |
1525 | struct writeback_control *wbc) | 2520 | struct writeback_control *wbc) |
1526 | { | 2521 | { |
1527 | struct inode *inode = page->mapping->host; | 2522 | struct inode *inode = page->mapping->host; |
1528 | struct buffer_head *page_bufs; | ||
1529 | handle_t *handle = NULL; | ||
1530 | int ret = 0; | ||
1531 | int err; | ||
1532 | |||
1533 | J_ASSERT(PageLocked(page)); | ||
1534 | 2523 | ||
1535 | /* | 2524 | if (test_opt(inode->i_sb, NOBH)) |
1536 | * We give up here if we're reentered, because it might be for a | 2525 | return nobh_writepage(page, |
1537 | * different filesystem. | 2526 | ext4_normal_get_block_write, wbc); |
1538 | */ | 2527 | else |
1539 | if (ext4_journal_current_handle()) | 2528 | return block_write_full_page(page, |
1540 | goto out_fail; | 2529 | ext4_normal_get_block_write, |
1541 | 2530 | wbc); | |
1542 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2531 | } |
1543 | 2532 | ||
1544 | if (IS_ERR(handle)) { | 2533 | static int ext4_normal_writepage(struct page *page, |
1545 | ret = PTR_ERR(handle); | 2534 | struct writeback_control *wbc) |
1546 | goto out_fail; | 2535 | { |
1547 | } | 2536 | struct inode *inode = page->mapping->host; |
2537 | loff_t size = i_size_read(inode); | ||
2538 | loff_t len; | ||
1548 | 2539 | ||
1549 | if (!page_has_buffers(page)) { | 2540 | J_ASSERT(PageLocked(page)); |
1550 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 2541 | if (page->index == size >> PAGE_CACHE_SHIFT) |
1551 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 2542 | len = size & ~PAGE_CACHE_MASK; |
2543 | else | ||
2544 | len = PAGE_CACHE_SIZE; | ||
2545 | |||
2546 | if (page_has_buffers(page)) { | ||
2547 | /* if page has buffers it should all be mapped | ||
2548 | * and allocated. If there are not buffers attached | ||
2549 | * to the page we know the page is dirty but it lost | ||
2550 | * buffers. That means that at some moment in time | ||
2551 | * after write_begin() / write_end() has been called | ||
2552 | * all buffers have been clean and thus they must have been | ||
2553 | * written at least once. So they are all mapped and we can | ||
2554 | * happily proceed with mapping them and writing the page. | ||
2555 | */ | ||
2556 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
2557 | ext4_bh_unmapped_or_delay)); | ||
1552 | } | 2558 | } |
1553 | page_bufs = page_buffers(page); | ||
1554 | walk_page_buffers(handle, page_bufs, 0, | ||
1555 | PAGE_CACHE_SIZE, NULL, bget_one); | ||
1556 | |||
1557 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
1558 | |||
1559 | /* | ||
1560 | * The page can become unlocked at any point now, and | ||
1561 | * truncate can then come in and change things. So we | ||
1562 | * can't touch *page from now on. But *page_bufs is | ||
1563 | * safe due to elevated refcount. | ||
1564 | */ | ||
1565 | 2559 | ||
1566 | /* | 2560 | if (!ext4_journal_current_handle()) |
1567 | * And attach them to the current transaction. But only if | 2561 | return __ext4_normal_writepage(page, wbc); |
1568 | * block_write_full_page() succeeded. Otherwise they are unmapped, | ||
1569 | * and generally junk. | ||
1570 | */ | ||
1571 | if (ret == 0) { | ||
1572 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | ||
1573 | NULL, jbd2_journal_dirty_data_fn); | ||
1574 | if (!ret) | ||
1575 | ret = err; | ||
1576 | } | ||
1577 | walk_page_buffers(handle, page_bufs, 0, | ||
1578 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
1579 | err = ext4_journal_stop(handle); | ||
1580 | if (!ret) | ||
1581 | ret = err; | ||
1582 | return ret; | ||
1583 | 2562 | ||
1584 | out_fail: | ||
1585 | redirty_page_for_writepage(wbc, page); | 2563 | redirty_page_for_writepage(wbc, page); |
1586 | unlock_page(page); | 2564 | unlock_page(page); |
1587 | return ret; | 2565 | return 0; |
1588 | } | 2566 | } |
1589 | 2567 | ||
1590 | static int ext4_writeback_writepage(struct page *page, | 2568 | static int __ext4_journalled_writepage(struct page *page, |
1591 | struct writeback_control *wbc) | 2569 | struct writeback_control *wbc) |
1592 | { | 2570 | { |
1593 | struct inode *inode = page->mapping->host; | 2571 | struct address_space *mapping = page->mapping; |
2572 | struct inode *inode = mapping->host; | ||
2573 | struct buffer_head *page_bufs; | ||
1594 | handle_t *handle = NULL; | 2574 | handle_t *handle = NULL; |
1595 | int ret = 0; | 2575 | int ret = 0; |
1596 | int err; | 2576 | int err; |
1597 | 2577 | ||
1598 | if (ext4_journal_current_handle()) | 2578 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
1599 | goto out_fail; | 2579 | ext4_normal_get_block_write); |
2580 | if (ret != 0) | ||
2581 | goto out_unlock; | ||
2582 | |||
2583 | page_bufs = page_buffers(page); | ||
2584 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
2585 | bget_one); | ||
2586 | /* As soon as we unlock the page, it can go away, but we have | ||
2587 | * references to buffers so we are safe */ | ||
2588 | unlock_page(page); | ||
1600 | 2589 | ||
1601 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2590 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
1602 | if (IS_ERR(handle)) { | 2591 | if (IS_ERR(handle)) { |
1603 | ret = PTR_ERR(handle); | 2592 | ret = PTR_ERR(handle); |
1604 | goto out_fail; | 2593 | goto out; |
1605 | } | 2594 | } |
1606 | 2595 | ||
1607 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2596 | ret = walk_page_buffers(handle, page_bufs, 0, |
1608 | ret = nobh_writepage(page, ext4_get_block, wbc); | 2597 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); |
1609 | else | ||
1610 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
1611 | 2598 | ||
2599 | err = walk_page_buffers(handle, page_bufs, 0, | ||
2600 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
2601 | if (ret == 0) | ||
2602 | ret = err; | ||
1612 | err = ext4_journal_stop(handle); | 2603 | err = ext4_journal_stop(handle); |
1613 | if (!ret) | 2604 | if (!ret) |
1614 | ret = err; | 2605 | ret = err; |
1615 | return ret; | ||
1616 | 2606 | ||
1617 | out_fail: | 2607 | walk_page_buffers(handle, page_bufs, 0, |
1618 | redirty_page_for_writepage(wbc, page); | 2608 | PAGE_CACHE_SIZE, NULL, bput_one); |
2609 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
2610 | goto out; | ||
2611 | |||
2612 | out_unlock: | ||
1619 | unlock_page(page); | 2613 | unlock_page(page); |
2614 | out: | ||
1620 | return ret; | 2615 | return ret; |
1621 | } | 2616 | } |
1622 | 2617 | ||
@@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page, | |||
1624 | struct writeback_control *wbc) | 2619 | struct writeback_control *wbc) |
1625 | { | 2620 | { |
1626 | struct inode *inode = page->mapping->host; | 2621 | struct inode *inode = page->mapping->host; |
1627 | handle_t *handle = NULL; | 2622 | loff_t size = i_size_read(inode); |
1628 | int ret = 0; | 2623 | loff_t len; |
1629 | int err; | ||
1630 | 2624 | ||
1631 | if (ext4_journal_current_handle()) | 2625 | J_ASSERT(PageLocked(page)); |
1632 | goto no_write; | 2626 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2627 | len = size & ~PAGE_CACHE_MASK; | ||
2628 | else | ||
2629 | len = PAGE_CACHE_SIZE; | ||
2630 | |||
2631 | if (page_has_buffers(page)) { | ||
2632 | /* if page has buffers it should all be mapped | ||
2633 | * and allocated. If there are not buffers attached | ||
2634 | * to the page we know the page is dirty but it lost | ||
2635 | * buffers. That means that at some moment in time | ||
2636 | * after write_begin() / write_end() has been called | ||
2637 | * all buffers have been clean and thus they must have been | ||
2638 | * written at least once. So they are all mapped and we can | ||
2639 | * happily proceed with mapping them and writing the page. | ||
2640 | */ | ||
2641 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
2642 | ext4_bh_unmapped_or_delay)); | ||
2643 | } | ||
1633 | 2644 | ||
1634 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2645 | if (ext4_journal_current_handle()) |
1635 | if (IS_ERR(handle)) { | ||
1636 | ret = PTR_ERR(handle); | ||
1637 | goto no_write; | 2646 | goto no_write; |
1638 | } | ||
1639 | 2647 | ||
1640 | if (!page_has_buffers(page) || PageChecked(page)) { | 2648 | if (PageChecked(page)) { |
1641 | /* | 2649 | /* |
1642 | * It's mmapped pagecache. Add buffers and journal it. There | 2650 | * It's mmapped pagecache. Add buffers and journal it. There |
1643 | * doesn't seem much point in redirtying the page here. | 2651 | * doesn't seem much point in redirtying the page here. |
1644 | */ | 2652 | */ |
1645 | ClearPageChecked(page); | 2653 | ClearPageChecked(page); |
1646 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2654 | return __ext4_journalled_writepage(page, wbc); |
1647 | ext4_get_block); | ||
1648 | if (ret != 0) { | ||
1649 | ext4_journal_stop(handle); | ||
1650 | goto out_unlock; | ||
1651 | } | ||
1652 | ret = walk_page_buffers(handle, page_buffers(page), 0, | ||
1653 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
1654 | |||
1655 | err = walk_page_buffers(handle, page_buffers(page), 0, | ||
1656 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
1657 | if (ret == 0) | ||
1658 | ret = err; | ||
1659 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
1660 | unlock_page(page); | ||
1661 | } else { | 2655 | } else { |
1662 | /* | 2656 | /* |
1663 | * It may be a page full of checkpoint-mode buffers. We don't | 2657 | * It may be a page full of checkpoint-mode buffers. We don't |
1664 | * really know unless we go poke around in the buffer_heads. | 2658 | * really know unless we go poke around in the buffer_heads. |
1665 | * But block_write_full_page will do the right thing. | 2659 | * But block_write_full_page will do the right thing. |
1666 | */ | 2660 | */ |
1667 | ret = block_write_full_page(page, ext4_get_block, wbc); | 2661 | return block_write_full_page(page, |
2662 | ext4_normal_get_block_write, | ||
2663 | wbc); | ||
1668 | } | 2664 | } |
1669 | err = ext4_journal_stop(handle); | ||
1670 | if (!ret) | ||
1671 | ret = err; | ||
1672 | out: | ||
1673 | return ret; | ||
1674 | |||
1675 | no_write: | 2665 | no_write: |
1676 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
1677 | out_unlock: | ||
1678 | unlock_page(page); | 2667 | unlock_page(page); |
1679 | goto out; | 2668 | return 0; |
1680 | } | 2669 | } |
1681 | 2670 | ||
1682 | static int ext4_readpage(struct file *file, struct page *page) | 2671 | static int ext4_readpage(struct file *file, struct page *page) |
@@ -1817,50 +2806,75 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
1817 | } | 2806 | } |
1818 | 2807 | ||
1819 | static const struct address_space_operations ext4_ordered_aops = { | 2808 | static const struct address_space_operations ext4_ordered_aops = { |
1820 | .readpage = ext4_readpage, | 2809 | .readpage = ext4_readpage, |
1821 | .readpages = ext4_readpages, | 2810 | .readpages = ext4_readpages, |
1822 | .writepage = ext4_ordered_writepage, | 2811 | .writepage = ext4_normal_writepage, |
1823 | .sync_page = block_sync_page, | 2812 | .sync_page = block_sync_page, |
1824 | .write_begin = ext4_write_begin, | 2813 | .write_begin = ext4_write_begin, |
1825 | .write_end = ext4_ordered_write_end, | 2814 | .write_end = ext4_ordered_write_end, |
1826 | .bmap = ext4_bmap, | 2815 | .bmap = ext4_bmap, |
1827 | .invalidatepage = ext4_invalidatepage, | 2816 | .invalidatepage = ext4_invalidatepage, |
1828 | .releasepage = ext4_releasepage, | 2817 | .releasepage = ext4_releasepage, |
1829 | .direct_IO = ext4_direct_IO, | 2818 | .direct_IO = ext4_direct_IO, |
1830 | .migratepage = buffer_migrate_page, | 2819 | .migratepage = buffer_migrate_page, |
2820 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1831 | }; | 2821 | }; |
1832 | 2822 | ||
1833 | static const struct address_space_operations ext4_writeback_aops = { | 2823 | static const struct address_space_operations ext4_writeback_aops = { |
1834 | .readpage = ext4_readpage, | 2824 | .readpage = ext4_readpage, |
1835 | .readpages = ext4_readpages, | 2825 | .readpages = ext4_readpages, |
1836 | .writepage = ext4_writeback_writepage, | 2826 | .writepage = ext4_normal_writepage, |
1837 | .sync_page = block_sync_page, | 2827 | .sync_page = block_sync_page, |
1838 | .write_begin = ext4_write_begin, | 2828 | .write_begin = ext4_write_begin, |
1839 | .write_end = ext4_writeback_write_end, | 2829 | .write_end = ext4_writeback_write_end, |
1840 | .bmap = ext4_bmap, | 2830 | .bmap = ext4_bmap, |
1841 | .invalidatepage = ext4_invalidatepage, | 2831 | .invalidatepage = ext4_invalidatepage, |
1842 | .releasepage = ext4_releasepage, | 2832 | .releasepage = ext4_releasepage, |
1843 | .direct_IO = ext4_direct_IO, | 2833 | .direct_IO = ext4_direct_IO, |
1844 | .migratepage = buffer_migrate_page, | 2834 | .migratepage = buffer_migrate_page, |
2835 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1845 | }; | 2836 | }; |
1846 | 2837 | ||
1847 | static const struct address_space_operations ext4_journalled_aops = { | 2838 | static const struct address_space_operations ext4_journalled_aops = { |
1848 | .readpage = ext4_readpage, | 2839 | .readpage = ext4_readpage, |
1849 | .readpages = ext4_readpages, | 2840 | .readpages = ext4_readpages, |
1850 | .writepage = ext4_journalled_writepage, | 2841 | .writepage = ext4_journalled_writepage, |
1851 | .sync_page = block_sync_page, | 2842 | .sync_page = block_sync_page, |
1852 | .write_begin = ext4_write_begin, | 2843 | .write_begin = ext4_write_begin, |
1853 | .write_end = ext4_journalled_write_end, | 2844 | .write_end = ext4_journalled_write_end, |
1854 | .set_page_dirty = ext4_journalled_set_page_dirty, | 2845 | .set_page_dirty = ext4_journalled_set_page_dirty, |
1855 | .bmap = ext4_bmap, | 2846 | .bmap = ext4_bmap, |
1856 | .invalidatepage = ext4_invalidatepage, | 2847 | .invalidatepage = ext4_invalidatepage, |
1857 | .releasepage = ext4_releasepage, | 2848 | .releasepage = ext4_releasepage, |
2849 | .is_partially_uptodate = block_is_partially_uptodate, | ||
2850 | }; | ||
2851 | |||
2852 | static const struct address_space_operations ext4_da_aops = { | ||
2853 | .readpage = ext4_readpage, | ||
2854 | .readpages = ext4_readpages, | ||
2855 | .writepage = ext4_da_writepage, | ||
2856 | .writepages = ext4_da_writepages, | ||
2857 | .sync_page = block_sync_page, | ||
2858 | .write_begin = ext4_da_write_begin, | ||
2859 | .write_end = ext4_da_write_end, | ||
2860 | .bmap = ext4_bmap, | ||
2861 | .invalidatepage = ext4_da_invalidatepage, | ||
2862 | .releasepage = ext4_releasepage, | ||
2863 | .direct_IO = ext4_direct_IO, | ||
2864 | .migratepage = buffer_migrate_page, | ||
2865 | .is_partially_uptodate = block_is_partially_uptodate, | ||
1858 | }; | 2866 | }; |
1859 | 2867 | ||
1860 | void ext4_set_aops(struct inode *inode) | 2868 | void ext4_set_aops(struct inode *inode) |
1861 | { | 2869 | { |
1862 | if (ext4_should_order_data(inode)) | 2870 | if (ext4_should_order_data(inode) && |
2871 | test_opt(inode->i_sb, DELALLOC)) | ||
2872 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
2873 | else if (ext4_should_order_data(inode)) | ||
1863 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 2874 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
2875 | else if (ext4_should_writeback_data(inode) && | ||
2876 | test_opt(inode->i_sb, DELALLOC)) | ||
2877 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
1864 | else if (ext4_should_writeback_data(inode)) | 2878 | else if (ext4_should_writeback_data(inode)) |
1865 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 2879 | inode->i_mapping->a_ops = &ext4_writeback_aops; |
1866 | else | 2880 | else |
@@ -1873,7 +2887,7 @@ void ext4_set_aops(struct inode *inode) | |||
1873 | * This required during truncate. We need to physically zero the tail end | 2887 | * This required during truncate. We need to physically zero the tail end |
1874 | * of that block so it doesn't yield old data if the file is later grown. | 2888 | * of that block so it doesn't yield old data if the file is later grown. |
1875 | */ | 2889 | */ |
1876 | int ext4_block_truncate_page(handle_t *handle, struct page *page, | 2890 | int ext4_block_truncate_page(handle_t *handle, |
1877 | struct address_space *mapping, loff_t from) | 2891 | struct address_space *mapping, loff_t from) |
1878 | { | 2892 | { |
1879 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2893 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
@@ -1882,8 +2896,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1882 | ext4_lblk_t iblock; | 2896 | ext4_lblk_t iblock; |
1883 | struct inode *inode = mapping->host; | 2897 | struct inode *inode = mapping->host; |
1884 | struct buffer_head *bh; | 2898 | struct buffer_head *bh; |
2899 | struct page *page; | ||
1885 | int err = 0; | 2900 | int err = 0; |
1886 | 2901 | ||
2902 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | ||
2903 | if (!page) | ||
2904 | return -EINVAL; | ||
2905 | |||
1887 | blocksize = inode->i_sb->s_blocksize; | 2906 | blocksize = inode->i_sb->s_blocksize; |
1888 | length = blocksize - (offset & (blocksize - 1)); | 2907 | length = blocksize - (offset & (blocksize - 1)); |
1889 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2908 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
@@ -1956,7 +2975,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1956 | err = ext4_journal_dirty_metadata(handle, bh); | 2975 | err = ext4_journal_dirty_metadata(handle, bh); |
1957 | } else { | 2976 | } else { |
1958 | if (ext4_should_order_data(inode)) | 2977 | if (ext4_should_order_data(inode)) |
1959 | err = ext4_journal_dirty_data(handle, bh); | 2978 | err = ext4_jbd2_file_inode(handle, inode); |
1960 | mark_buffer_dirty(bh); | 2979 | mark_buffer_dirty(bh); |
1961 | } | 2980 | } |
1962 | 2981 | ||
@@ -2179,7 +3198,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
2179 | 3198 | ||
2180 | if (this_bh) { | 3199 | if (this_bh) { |
2181 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); | 3200 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); |
2182 | ext4_journal_dirty_metadata(handle, this_bh); | 3201 | |
3202 | /* | ||
3203 | * The buffer head should have an attached journal head at this | ||
3204 | * point. However, if the data is corrupted and an indirect | ||
3205 | * block pointed to itself, it would have been detached when | ||
3206 | * the block was cleared. Check for this instead of OOPSing. | ||
3207 | */ | ||
3208 | if (bh2jh(this_bh)) | ||
3209 | ext4_journal_dirty_metadata(handle, this_bh); | ||
3210 | else | ||
3211 | ext4_error(inode->i_sb, __func__, | ||
3212 | "circular indirect block detected, " | ||
3213 | "inode=%lu, block=%llu", | ||
3214 | inode->i_ino, | ||
3215 | (unsigned long long) this_bh->b_blocknr); | ||
2183 | } | 3216 | } |
2184 | } | 3217 | } |
2185 | 3218 | ||
@@ -2305,6 +3338,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
2305 | } | 3338 | } |
2306 | } | 3339 | } |
2307 | 3340 | ||
3341 | int ext4_can_truncate(struct inode *inode) | ||
3342 | { | ||
3343 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
3344 | return 0; | ||
3345 | if (S_ISREG(inode->i_mode)) | ||
3346 | return 1; | ||
3347 | if (S_ISDIR(inode->i_mode)) | ||
3348 | return 1; | ||
3349 | if (S_ISLNK(inode->i_mode)) | ||
3350 | return !ext4_inode_is_fast_symlink(inode); | ||
3351 | return 0; | ||
3352 | } | ||
3353 | |||
2308 | /* | 3354 | /* |
2309 | * ext4_truncate() | 3355 | * ext4_truncate() |
2310 | * | 3356 | * |
@@ -2347,51 +3393,25 @@ void ext4_truncate(struct inode *inode) | |||
2347 | int n; | 3393 | int n; |
2348 | ext4_lblk_t last_block; | 3394 | ext4_lblk_t last_block; |
2349 | unsigned blocksize = inode->i_sb->s_blocksize; | 3395 | unsigned blocksize = inode->i_sb->s_blocksize; |
2350 | struct page *page; | ||
2351 | 3396 | ||
2352 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3397 | if (!ext4_can_truncate(inode)) |
2353 | S_ISLNK(inode->i_mode))) | ||
2354 | return; | ||
2355 | if (ext4_inode_is_fast_symlink(inode)) | ||
2356 | return; | ||
2357 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
2358 | return; | 3398 | return; |
2359 | 3399 | ||
2360 | /* | ||
2361 | * We have to lock the EOF page here, because lock_page() nests | ||
2362 | * outside jbd2_journal_start(). | ||
2363 | */ | ||
2364 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
2365 | /* Block boundary? Nothing to do */ | ||
2366 | page = NULL; | ||
2367 | } else { | ||
2368 | page = grab_cache_page(mapping, | ||
2369 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
2370 | if (!page) | ||
2371 | return; | ||
2372 | } | ||
2373 | |||
2374 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3400 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
2375 | ext4_ext_truncate(inode, page); | 3401 | ext4_ext_truncate(inode); |
2376 | return; | 3402 | return; |
2377 | } | 3403 | } |
2378 | 3404 | ||
2379 | handle = start_transaction(inode); | 3405 | handle = start_transaction(inode); |
2380 | if (IS_ERR(handle)) { | 3406 | if (IS_ERR(handle)) |
2381 | if (page) { | ||
2382 | clear_highpage(page); | ||
2383 | flush_dcache_page(page); | ||
2384 | unlock_page(page); | ||
2385 | page_cache_release(page); | ||
2386 | } | ||
2387 | return; /* AKPM: return what? */ | 3407 | return; /* AKPM: return what? */ |
2388 | } | ||
2389 | 3408 | ||
2390 | last_block = (inode->i_size + blocksize-1) | 3409 | last_block = (inode->i_size + blocksize-1) |
2391 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 3410 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
2392 | 3411 | ||
2393 | if (page) | 3412 | if (inode->i_size & (blocksize - 1)) |
2394 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 3413 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
3414 | goto out_stop; | ||
2395 | 3415 | ||
2396 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 3416 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
2397 | if (n == 0) | 3417 | if (n == 0) |
@@ -2410,6 +3430,11 @@ void ext4_truncate(struct inode *inode) | |||
2410 | goto out_stop; | 3430 | goto out_stop; |
2411 | 3431 | ||
2412 | /* | 3432 | /* |
3433 | * From here we block out all ext4_get_block() callers who want to | ||
3434 | * modify the block allocation tree. | ||
3435 | */ | ||
3436 | down_write(&ei->i_data_sem); | ||
3437 | /* | ||
2413 | * The orphan list entry will now protect us from any crash which | 3438 | * The orphan list entry will now protect us from any crash which |
2414 | * occurs before the truncate completes, so it is now safe to propagate | 3439 | * occurs before the truncate completes, so it is now safe to propagate |
2415 | * the new, shorter inode size (held for now in i_size) into the | 3440 | * the new, shorter inode size (held for now in i_size) into the |
@@ -2418,12 +3443,6 @@ void ext4_truncate(struct inode *inode) | |||
2418 | */ | 3443 | */ |
2419 | ei->i_disksize = inode->i_size; | 3444 | ei->i_disksize = inode->i_size; |
2420 | 3445 | ||
2421 | /* | ||
2422 | * From here we block out all ext4_get_block() callers who want to | ||
2423 | * modify the block allocation tree. | ||
2424 | */ | ||
2425 | down_write(&ei->i_data_sem); | ||
2426 | |||
2427 | if (n == 1) { /* direct blocks */ | 3446 | if (n == 1) { /* direct blocks */ |
2428 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 3447 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
2429 | i_data + EXT4_NDIR_BLOCKS); | 3448 | i_data + EXT4_NDIR_BLOCKS); |
@@ -3107,7 +4126,14 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
3107 | * be freed, so we have a strong guarantee that no future commit will | 4126 | * be freed, so we have a strong guarantee that no future commit will |
3108 | * leave these blocks visible to the user.) | 4127 | * leave these blocks visible to the user.) |
3109 | * | 4128 | * |
3110 | * Called with inode->sem down. | 4129 | * Another thing we have to assure is that if we are in ordered mode |
4130 | * and inode is still attached to the committing transaction, we must | ||
4131 | * we start writeout of all the dirty pages which are being truncated. | ||
4132 | * This way we are sure that all the data written in the previous | ||
4133 | * transaction are already on disk (truncate waits for pages under | ||
4134 | * writeback). | ||
4135 | * | ||
4136 | * Called with inode->i_mutex down. | ||
3111 | */ | 4137 | */ |
3112 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 4138 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
3113 | { | 4139 | { |
@@ -3173,6 +4199,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
3173 | if (!error) | 4199 | if (!error) |
3174 | error = rc; | 4200 | error = rc; |
3175 | ext4_journal_stop(handle); | 4201 | ext4_journal_stop(handle); |
4202 | |||
4203 | if (ext4_should_order_data(inode)) { | ||
4204 | error = ext4_begin_ordered_truncate(inode, | ||
4205 | attr->ia_size); | ||
4206 | if (error) { | ||
4207 | /* Do as much error cleanup as possible */ | ||
4208 | handle = ext4_journal_start(inode, 3); | ||
4209 | if (IS_ERR(handle)) { | ||
4210 | ext4_orphan_del(NULL, inode); | ||
4211 | goto err_out; | ||
4212 | } | ||
4213 | ext4_orphan_del(handle, inode); | ||
4214 | ext4_journal_stop(handle); | ||
4215 | goto err_out; | ||
4216 | } | ||
4217 | } | ||
3176 | } | 4218 | } |
3177 | 4219 | ||
3178 | rc = inode_setattr(inode, attr); | 4220 | rc = inode_setattr(inode, attr); |
@@ -3193,6 +4235,32 @@ err_out: | |||
3193 | return error; | 4235 | return error; |
3194 | } | 4236 | } |
3195 | 4237 | ||
4238 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
4239 | struct kstat *stat) | ||
4240 | { | ||
4241 | struct inode *inode; | ||
4242 | unsigned long delalloc_blocks; | ||
4243 | |||
4244 | inode = dentry->d_inode; | ||
4245 | generic_fillattr(inode, stat); | ||
4246 | |||
4247 | /* | ||
4248 | * We can't update i_blocks if the block allocation is delayed | ||
4249 | * otherwise in the case of system crash before the real block | ||
4250 | * allocation is done, we will have i_blocks inconsistent with | ||
4251 | * on-disk file blocks. | ||
4252 | * We always keep i_blocks updated together with real | ||
4253 | * allocation. But to not confuse with user, stat | ||
4254 | * will return the blocks that include the delayed allocation | ||
4255 | * blocks for this file. | ||
4256 | */ | ||
4257 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
4258 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
4259 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
4260 | |||
4261 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | ||
4262 | return 0; | ||
4263 | } | ||
3196 | 4264 | ||
3197 | /* | 4265 | /* |
3198 | * How many blocks doth make a writepage()? | 4266 | * How many blocks doth make a writepage()? |
@@ -3506,3 +4574,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
3506 | 4574 | ||
3507 | return err; | 4575 | return err; |
3508 | } | 4576 | } |
4577 | |||
4578 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | ||
4579 | { | ||
4580 | return !buffer_mapped(bh); | ||
4581 | } | ||
4582 | |||
4583 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
4584 | { | ||
4585 | loff_t size; | ||
4586 | unsigned long len; | ||
4587 | int ret = -EINVAL; | ||
4588 | struct file *file = vma->vm_file; | ||
4589 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4590 | struct address_space *mapping = inode->i_mapping; | ||
4591 | |||
4592 | /* | ||
4593 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | ||
4594 | * get i_mutex because we are already holding mmap_sem. | ||
4595 | */ | ||
4596 | down_read(&inode->i_alloc_sem); | ||
4597 | size = i_size_read(inode); | ||
4598 | if (page->mapping != mapping || size <= page_offset(page) | ||
4599 | || !PageUptodate(page)) { | ||
4600 | /* page got truncated from under us? */ | ||
4601 | goto out_unlock; | ||
4602 | } | ||
4603 | ret = 0; | ||
4604 | if (PageMappedToDisk(page)) | ||
4605 | goto out_unlock; | ||
4606 | |||
4607 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
4608 | len = size & ~PAGE_CACHE_MASK; | ||
4609 | else | ||
4610 | len = PAGE_CACHE_SIZE; | ||
4611 | |||
4612 | if (page_has_buffers(page)) { | ||
4613 | /* return if we have all the buffers mapped */ | ||
4614 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
4615 | ext4_bh_unmapped)) | ||
4616 | goto out_unlock; | ||
4617 | } | ||
4618 | /* | ||
4619 | * OK, we need to fill the hole... Do write_begin write_end | ||
4620 | * to do block allocation/reservation.We are not holding | ||
4621 | * inode.i__mutex here. That allow * parallel write_begin, | ||
4622 | * write_end call. lock_page prevent this from happening | ||
4623 | * on the same page though | ||
4624 | */ | ||
4625 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | ||
4626 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
4627 | if (ret < 0) | ||
4628 | goto out_unlock; | ||
4629 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
4630 | len, len, page, NULL); | ||
4631 | if (ret < 0) | ||
4632 | goto out_unlock; | ||
4633 | ret = 0; | ||
4634 | out_unlock: | ||
4635 | up_read(&inode->i_alloc_sem); | ||
4636 | return ret; | ||
4637 | } | ||