diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 1591 |
1 files changed, 1358 insertions, 233 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8d9707746413..8ca2763df091 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -32,12 +32,23 @@ | |||
| 32 | #include <linux/string.h> | 32 | #include <linux/string.h> |
| 33 | #include <linux/buffer_head.h> | 33 | #include <linux/buffer_head.h> |
| 34 | #include <linux/writeback.h> | 34 | #include <linux/writeback.h> |
| 35 | #include <linux/pagevec.h> | ||
| 35 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
| 36 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
| 37 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
| 38 | #include "ext4_jbd2.h" | 39 | #include "ext4_jbd2.h" |
| 39 | #include "xattr.h" | 40 | #include "xattr.h" |
| 40 | #include "acl.h" | 41 | #include "acl.h" |
| 42 | #include "ext4_extents.h" | ||
| 43 | |||
| 44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | ||
| 45 | loff_t new_size) | ||
| 46 | { | ||
| 47 | return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | ||
| 48 | new_size); | ||
| 49 | } | ||
| 50 | |||
| 51 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | ||
| 41 | 52 | ||
| 42 | /* | 53 | /* |
| 43 | * Test whether an inode is a fast symlink. | 54 | * Test whether an inode is a fast symlink. |
| @@ -181,6 +192,8 @@ void ext4_delete_inode (struct inode * inode) | |||
| 181 | { | 192 | { |
| 182 | handle_t *handle; | 193 | handle_t *handle; |
| 183 | 194 | ||
| 195 | if (ext4_should_order_data(inode)) | ||
| 196 | ext4_begin_ordered_truncate(inode, 0); | ||
| 184 | truncate_inode_pages(&inode->i_data, 0); | 197 | truncate_inode_pages(&inode->i_data, 0); |
| 185 | 198 | ||
| 186 | if (is_bad_inode(inode)) | 199 | if (is_bad_inode(inode)) |
| @@ -508,11 +521,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks, | |||
| 508 | * direct blocks | 521 | * direct blocks |
| 509 | */ | 522 | */ |
| 510 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 523 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
| 511 | ext4_fsblk_t goal, int indirect_blks, int blks, | 524 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
| 512 | ext4_fsblk_t new_blocks[4], int *err) | 525 | int indirect_blks, int blks, |
| 526 | ext4_fsblk_t new_blocks[4], int *err) | ||
| 513 | { | 527 | { |
| 514 | int target, i; | 528 | int target, i; |
| 515 | unsigned long count = 0; | 529 | unsigned long count = 0, blk_allocated = 0; |
| 516 | int index = 0; | 530 | int index = 0; |
| 517 | ext4_fsblk_t current_block = 0; | 531 | ext4_fsblk_t current_block = 0; |
| 518 | int ret = 0; | 532 | int ret = 0; |
| @@ -525,12 +539,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 525 | * the first direct block of this branch. That's the | 539 | * the first direct block of this branch. That's the |
| 526 | * minimum number of blocks need to allocate(required) | 540 | * minimum number of blocks need to allocate(required) |
| 527 | */ | 541 | */ |
| 528 | target = blks + indirect_blks; | 542 | /* first we try to allocate the indirect blocks */ |
| 529 | 543 | target = indirect_blks; | |
| 530 | while (1) { | 544 | while (target > 0) { |
| 531 | count = target; | 545 | count = target; |
| 532 | /* allocating blocks for indirect blocks and direct blocks */ | 546 | /* allocating blocks for indirect blocks and direct blocks */ |
| 533 | current_block = ext4_new_blocks(handle,inode,goal,&count,err); | 547 | current_block = ext4_new_meta_blocks(handle, inode, |
| 548 | goal, &count, err); | ||
| 534 | if (*err) | 549 | if (*err) |
| 535 | goto failed_out; | 550 | goto failed_out; |
| 536 | 551 | ||
| @@ -540,16 +555,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 540 | new_blocks[index++] = current_block++; | 555 | new_blocks[index++] = current_block++; |
| 541 | count--; | 556 | count--; |
| 542 | } | 557 | } |
| 543 | 558 | if (count > 0) { | |
| 544 | if (count > 0) | 559 | /* |
| 560 | * save the new block number | ||
| 561 | * for the first direct block | ||
| 562 | */ | ||
| 563 | new_blocks[index] = current_block; | ||
| 564 | printk(KERN_INFO "%s returned more blocks than " | ||
| 565 | "requested\n", __func__); | ||
| 566 | WARN_ON(1); | ||
| 545 | break; | 567 | break; |
| 568 | } | ||
| 546 | } | 569 | } |
| 547 | 570 | ||
| 548 | /* save the new block number for the first direct block */ | 571 | target = blks - count ; |
| 549 | new_blocks[index] = current_block; | 572 | blk_allocated = count; |
| 550 | 573 | if (!target) | |
| 574 | goto allocated; | ||
| 575 | /* Now allocate data blocks */ | ||
| 576 | count = target; | ||
| 577 | /* allocating blocks for data blocks */ | ||
| 578 | current_block = ext4_new_blocks(handle, inode, iblock, | ||
| 579 | goal, &count, err); | ||
| 580 | if (*err && (target == blks)) { | ||
| 581 | /* | ||
| 582 | * if the allocation failed and we didn't allocate | ||
| 583 | * any blocks before | ||
| 584 | */ | ||
| 585 | goto failed_out; | ||
| 586 | } | ||
| 587 | if (!*err) { | ||
| 588 | if (target == blks) { | ||
| 589 | /* | ||
| 590 | * save the new block number | ||
| 591 | * for the first direct block | ||
| 592 | */ | ||
| 593 | new_blocks[index] = current_block; | ||
| 594 | } | ||
| 595 | blk_allocated += count; | ||
| 596 | } | ||
| 597 | allocated: | ||
| 551 | /* total number of blocks allocated for direct blocks */ | 598 | /* total number of blocks allocated for direct blocks */ |
| 552 | ret = count; | 599 | ret = blk_allocated; |
| 553 | *err = 0; | 600 | *err = 0; |
| 554 | return ret; | 601 | return ret; |
| 555 | failed_out: | 602 | failed_out: |
| @@ -584,8 +631,9 @@ failed_out: | |||
| 584 | * as described above and return 0. | 631 | * as described above and return 0. |
| 585 | */ | 632 | */ |
| 586 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | 633 | static int ext4_alloc_branch(handle_t *handle, struct inode *inode, |
| 587 | int indirect_blks, int *blks, ext4_fsblk_t goal, | 634 | ext4_lblk_t iblock, int indirect_blks, |
| 588 | ext4_lblk_t *offsets, Indirect *branch) | 635 | int *blks, ext4_fsblk_t goal, |
| 636 | ext4_lblk_t *offsets, Indirect *branch) | ||
| 589 | { | 637 | { |
| 590 | int blocksize = inode->i_sb->s_blocksize; | 638 | int blocksize = inode->i_sb->s_blocksize; |
| 591 | int i, n = 0; | 639 | int i, n = 0; |
| @@ -595,7 +643,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 595 | ext4_fsblk_t new_blocks[4]; | 643 | ext4_fsblk_t new_blocks[4]; |
| 596 | ext4_fsblk_t current_block; | 644 | ext4_fsblk_t current_block; |
| 597 | 645 | ||
| 598 | num = ext4_alloc_blocks(handle, inode, goal, indirect_blks, | 646 | num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks, |
| 599 | *blks, new_blocks, &err); | 647 | *blks, new_blocks, &err); |
| 600 | if (err) | 648 | if (err) |
| 601 | return err; | 649 | return err; |
| @@ -799,6 +847,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 799 | struct ext4_inode_info *ei = EXT4_I(inode); | 847 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 800 | int count = 0; | 848 | int count = 0; |
| 801 | ext4_fsblk_t first_block = 0; | 849 | ext4_fsblk_t first_block = 0; |
| 850 | loff_t disksize; | ||
| 802 | 851 | ||
| 803 | 852 | ||
| 804 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 853 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); |
| @@ -855,8 +904,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 855 | /* | 904 | /* |
| 856 | * Block out ext4_truncate while we alter the tree | 905 | * Block out ext4_truncate while we alter the tree |
| 857 | */ | 906 | */ |
| 858 | err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal, | 907 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, |
| 859 | offsets + (partial - chain), partial); | 908 | &count, goal, |
| 909 | offsets + (partial - chain), partial); | ||
| 860 | 910 | ||
| 861 | /* | 911 | /* |
| 862 | * The ext4_splice_branch call will free and forget any buffers | 912 | * The ext4_splice_branch call will free and forget any buffers |
| @@ -873,8 +923,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 873 | * protect it if you're about to implement concurrent | 923 | * protect it if you're about to implement concurrent |
| 874 | * ext4_get_block() -bzzz | 924 | * ext4_get_block() -bzzz |
| 875 | */ | 925 | */ |
| 876 | if (!err && extend_disksize && inode->i_size > ei->i_disksize) | 926 | if (!err && extend_disksize) { |
| 877 | ei->i_disksize = inode->i_size; | 927 | disksize = ((loff_t) iblock + count) << inode->i_blkbits; |
| 928 | if (disksize > i_size_read(inode)) | ||
| 929 | disksize = i_size_read(inode); | ||
| 930 | if (disksize > ei->i_disksize) | ||
| 931 | ei->i_disksize = disksize; | ||
| 932 | } | ||
| 878 | if (err) | 933 | if (err) |
| 879 | goto cleanup; | 934 | goto cleanup; |
| 880 | 935 | ||
| @@ -934,7 +989,7 @@ out: | |||
| 934 | */ | 989 | */ |
| 935 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 990 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, |
| 936 | unsigned long max_blocks, struct buffer_head *bh, | 991 | unsigned long max_blocks, struct buffer_head *bh, |
| 937 | int create, int extend_disksize) | 992 | int create, int extend_disksize, int flag) |
| 938 | { | 993 | { |
| 939 | int retval; | 994 | int retval; |
| 940 | 995 | ||
| @@ -975,6 +1030,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 975 | * with create == 1 flag. | 1030 | * with create == 1 flag. |
| 976 | */ | 1031 | */ |
| 977 | down_write((&EXT4_I(inode)->i_data_sem)); | 1032 | down_write((&EXT4_I(inode)->i_data_sem)); |
| 1033 | |||
| 1034 | /* | ||
| 1035 | * if the caller is from delayed allocation writeout path | ||
| 1036 | * we have already reserved fs blocks for allocation | ||
| 1037 | * let the underlying get_block() function know to | ||
| 1038 | * avoid double accounting | ||
| 1039 | */ | ||
| 1040 | if (flag) | ||
| 1041 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | ||
| 978 | /* | 1042 | /* |
| 979 | * We need to check for EXT4 here because migrate | 1043 | * We need to check for EXT4 here because migrate |
| 980 | * could have changed the inode type in between | 1044 | * could have changed the inode type in between |
| @@ -996,6 +1060,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 996 | ~EXT4_EXT_MIGRATE; | 1060 | ~EXT4_EXT_MIGRATE; |
| 997 | } | 1061 | } |
| 998 | } | 1062 | } |
| 1063 | |||
| 1064 | if (flag) { | ||
| 1065 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | ||
| 1066 | /* | ||
| 1067 | * Update reserved blocks/metadata blocks | ||
| 1068 | * after successful block allocation | ||
| 1069 | * which were deferred till now | ||
| 1070 | */ | ||
| 1071 | if ((retval > 0) && buffer_delay(bh)) | ||
| 1072 | ext4_da_release_space(inode, retval, 0); | ||
| 1073 | } | ||
| 1074 | |||
| 999 | up_write((&EXT4_I(inode)->i_data_sem)); | 1075 | up_write((&EXT4_I(inode)->i_data_sem)); |
| 1000 | return retval; | 1076 | return retval; |
| 1001 | } | 1077 | } |
| @@ -1021,7 +1097,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, | |||
| 1021 | } | 1097 | } |
| 1022 | 1098 | ||
| 1023 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | 1099 | ret = ext4_get_blocks_wrap(handle, inode, iblock, |
| 1024 | max_blocks, bh_result, create, 0); | 1100 | max_blocks, bh_result, create, 0, 0); |
| 1025 | if (ret > 0) { | 1101 | if (ret > 0) { |
| 1026 | bh_result->b_size = (ret << inode->i_blkbits); | 1102 | bh_result->b_size = (ret << inode->i_blkbits); |
| 1027 | ret = 0; | 1103 | ret = 0; |
| @@ -1047,7 +1123,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 1047 | dummy.b_blocknr = -1000; | 1123 | dummy.b_blocknr = -1000; |
| 1048 | buffer_trace_init(&dummy.b_history); | 1124 | buffer_trace_init(&dummy.b_history); |
| 1049 | err = ext4_get_blocks_wrap(handle, inode, block, 1, | 1125 | err = ext4_get_blocks_wrap(handle, inode, block, 1, |
| 1050 | &dummy, create, 1); | 1126 | &dummy, create, 1, 0); |
| 1051 | /* | 1127 | /* |
| 1052 | * ext4_get_blocks_handle() returns number of blocks | 1128 | * ext4_get_blocks_handle() returns number of blocks |
| 1053 | * mapped. 0 in case of a HOLE. | 1129 | * mapped. 0 in case of a HOLE. |
| @@ -1203,19 +1279,20 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
| 1203 | to = from + len; | 1279 | to = from + len; |
| 1204 | 1280 | ||
| 1205 | retry: | 1281 | retry: |
| 1206 | page = __grab_cache_page(mapping, index); | ||
| 1207 | if (!page) | ||
| 1208 | return -ENOMEM; | ||
| 1209 | *pagep = page; | ||
| 1210 | |||
| 1211 | handle = ext4_journal_start(inode, needed_blocks); | 1282 | handle = ext4_journal_start(inode, needed_blocks); |
| 1212 | if (IS_ERR(handle)) { | 1283 | if (IS_ERR(handle)) { |
| 1213 | unlock_page(page); | ||
| 1214 | page_cache_release(page); | ||
| 1215 | ret = PTR_ERR(handle); | 1284 | ret = PTR_ERR(handle); |
| 1216 | goto out; | 1285 | goto out; |
| 1217 | } | 1286 | } |
| 1218 | 1287 | ||
| 1288 | page = __grab_cache_page(mapping, index); | ||
| 1289 | if (!page) { | ||
| 1290 | ext4_journal_stop(handle); | ||
| 1291 | ret = -ENOMEM; | ||
| 1292 | goto out; | ||
| 1293 | } | ||
| 1294 | *pagep = page; | ||
| 1295 | |||
| 1219 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1296 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
| 1220 | ext4_get_block); | 1297 | ext4_get_block); |
| 1221 | 1298 | ||
| @@ -1225,8 +1302,8 @@ retry: | |||
| 1225 | } | 1302 | } |
| 1226 | 1303 | ||
| 1227 | if (ret) { | 1304 | if (ret) { |
| 1228 | ext4_journal_stop(handle); | ||
| 1229 | unlock_page(page); | 1305 | unlock_page(page); |
| 1306 | ext4_journal_stop(handle); | ||
| 1230 | page_cache_release(page); | 1307 | page_cache_release(page); |
| 1231 | } | 1308 | } |
| 1232 | 1309 | ||
| @@ -1236,15 +1313,6 @@ out: | |||
| 1236 | return ret; | 1313 | return ret; |
| 1237 | } | 1314 | } |
| 1238 | 1315 | ||
| 1239 | int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 1240 | { | ||
| 1241 | int err = jbd2_journal_dirty_data(handle, bh); | ||
| 1242 | if (err) | ||
| 1243 | ext4_journal_abort_handle(__func__, __func__, | ||
| 1244 | bh, handle, err); | ||
| 1245 | return err; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* For write_end() in data=journal mode */ | 1316 | /* For write_end() in data=journal mode */ |
| 1249 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) | 1317 | static int write_end_fn(handle_t *handle, struct buffer_head *bh) |
| 1250 | { | 1318 | { |
| @@ -1255,29 +1323,6 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1255 | } | 1323 | } |
| 1256 | 1324 | ||
| 1257 | /* | 1325 | /* |
| 1258 | * Generic write_end handler for ordered and writeback ext4 journal modes. | ||
| 1259 | * We can't use generic_write_end, because that unlocks the page and we need to | ||
| 1260 | * unlock the page after ext4_journal_stop, but ext4_journal_stop must run | ||
| 1261 | * after block_write_end. | ||
| 1262 | */ | ||
| 1263 | static int ext4_generic_write_end(struct file *file, | ||
| 1264 | struct address_space *mapping, | ||
| 1265 | loff_t pos, unsigned len, unsigned copied, | ||
| 1266 | struct page *page, void *fsdata) | ||
| 1267 | { | ||
| 1268 | struct inode *inode = file->f_mapping->host; | ||
| 1269 | |||
| 1270 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
| 1271 | |||
| 1272 | if (pos+copied > inode->i_size) { | ||
| 1273 | i_size_write(inode, pos+copied); | ||
| 1274 | mark_inode_dirty(inode); | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | return copied; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | /* | ||
| 1281 | * We need to pick up the new inode size which generic_commit_write gave us | 1326 | * We need to pick up the new inode size which generic_commit_write gave us |
| 1282 | * `file' can be NULL - eg, when called from page_symlink(). | 1327 | * `file' can be NULL - eg, when called from page_symlink(). |
| 1283 | * | 1328 | * |
| @@ -1290,15 +1335,14 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1290 | struct page *page, void *fsdata) | 1335 | struct page *page, void *fsdata) |
| 1291 | { | 1336 | { |
| 1292 | handle_t *handle = ext4_journal_current_handle(); | 1337 | handle_t *handle = ext4_journal_current_handle(); |
| 1293 | struct inode *inode = file->f_mapping->host; | 1338 | struct inode *inode = mapping->host; |
| 1294 | unsigned from, to; | 1339 | unsigned from, to; |
| 1295 | int ret = 0, ret2; | 1340 | int ret = 0, ret2; |
| 1296 | 1341 | ||
| 1297 | from = pos & (PAGE_CACHE_SIZE - 1); | 1342 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 1298 | to = from + len; | 1343 | to = from + len; |
| 1299 | 1344 | ||
| 1300 | ret = walk_page_buffers(handle, page_buffers(page), | 1345 | ret = ext4_jbd2_file_inode(handle, inode); |
| 1301 | from, to, NULL, ext4_journal_dirty_data); | ||
| 1302 | 1346 | ||
| 1303 | if (ret == 0) { | 1347 | if (ret == 0) { |
| 1304 | /* | 1348 | /* |
| @@ -1311,7 +1355,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1311 | new_i_size = pos + copied; | 1355 | new_i_size = pos + copied; |
| 1312 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1356 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1313 | EXT4_I(inode)->i_disksize = new_i_size; | 1357 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1314 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1358 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1315 | page, fsdata); | 1359 | page, fsdata); |
| 1316 | copied = ret2; | 1360 | copied = ret2; |
| 1317 | if (ret2 < 0) | 1361 | if (ret2 < 0) |
| @@ -1320,8 +1364,6 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1320 | ret2 = ext4_journal_stop(handle); | 1364 | ret2 = ext4_journal_stop(handle); |
| 1321 | if (!ret) | 1365 | if (!ret) |
| 1322 | ret = ret2; | 1366 | ret = ret2; |
| 1323 | unlock_page(page); | ||
| 1324 | page_cache_release(page); | ||
| 1325 | 1367 | ||
| 1326 | return ret ? ret : copied; | 1368 | return ret ? ret : copied; |
| 1327 | } | 1369 | } |
| @@ -1332,7 +1374,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1332 | struct page *page, void *fsdata) | 1374 | struct page *page, void *fsdata) |
| 1333 | { | 1375 | { |
| 1334 | handle_t *handle = ext4_journal_current_handle(); | 1376 | handle_t *handle = ext4_journal_current_handle(); |
| 1335 | struct inode *inode = file->f_mapping->host; | 1377 | struct inode *inode = mapping->host; |
| 1336 | int ret = 0, ret2; | 1378 | int ret = 0, ret2; |
| 1337 | loff_t new_i_size; | 1379 | loff_t new_i_size; |
| 1338 | 1380 | ||
| @@ -1340,7 +1382,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1340 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1382 | if (new_i_size > EXT4_I(inode)->i_disksize) |
| 1341 | EXT4_I(inode)->i_disksize = new_i_size; | 1383 | EXT4_I(inode)->i_disksize = new_i_size; |
| 1342 | 1384 | ||
| 1343 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1385 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1344 | page, fsdata); | 1386 | page, fsdata); |
| 1345 | copied = ret2; | 1387 | copied = ret2; |
| 1346 | if (ret2 < 0) | 1388 | if (ret2 < 0) |
| @@ -1349,8 +1391,6 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1349 | ret2 = ext4_journal_stop(handle); | 1391 | ret2 = ext4_journal_stop(handle); |
| 1350 | if (!ret) | 1392 | if (!ret) |
| 1351 | ret = ret2; | 1393 | ret = ret2; |
| 1352 | unlock_page(page); | ||
| 1353 | page_cache_release(page); | ||
| 1354 | 1394 | ||
| 1355 | return ret ? ret : copied; | 1395 | return ret ? ret : copied; |
| 1356 | } | 1396 | } |
| @@ -1389,14 +1429,965 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1389 | ret = ret2; | 1429 | ret = ret2; |
| 1390 | } | 1430 | } |
| 1391 | 1431 | ||
| 1432 | unlock_page(page); | ||
| 1392 | ret2 = ext4_journal_stop(handle); | 1433 | ret2 = ext4_journal_stop(handle); |
| 1393 | if (!ret) | 1434 | if (!ret) |
| 1394 | ret = ret2; | 1435 | ret = ret2; |
| 1395 | unlock_page(page); | ||
| 1396 | page_cache_release(page); | 1436 | page_cache_release(page); |
| 1397 | 1437 | ||
| 1398 | return ret ? ret : copied; | 1438 | return ret ? ret : copied; |
| 1399 | } | 1439 | } |
| 1440 | /* | ||
| 1441 | * Calculate the number of metadata blocks need to reserve | ||
| 1442 | * to allocate @blocks for non extent file based file | ||
| 1443 | */ | ||
| 1444 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1445 | { | ||
| 1446 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
| 1447 | int ind_blks, dind_blks, tind_blks; | ||
| 1448 | |||
| 1449 | /* number of new indirect blocks needed */ | ||
| 1450 | ind_blks = (blocks + icap - 1) / icap; | ||
| 1451 | |||
| 1452 | dind_blks = (ind_blks + icap - 1) / icap; | ||
| 1453 | |||
| 1454 | tind_blks = 1; | ||
| 1455 | |||
| 1456 | return ind_blks + dind_blks + tind_blks; | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | ||
| 1460 | * Calculate the number of metadata blocks need to reserve | ||
| 1461 | * to allocate given number of blocks | ||
| 1462 | */ | ||
| 1463 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | ||
| 1464 | { | ||
| 1465 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | ||
| 1466 | return ext4_ext_calc_metadata_amount(inode, blocks); | ||
| 1467 | |||
| 1468 | return ext4_indirect_calc_metadata_amount(inode, blocks); | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | ||
| 1472 | { | ||
| 1473 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1474 | unsigned long md_needed, mdblocks, total = 0; | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * recalculate the amount of metadata blocks to reserve | ||
| 1478 | * in order to allocate nrblocks | ||
| 1479 | * worse case is one extent per block | ||
| 1480 | */ | ||
| 1481 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1482 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | ||
| 1483 | mdblocks = ext4_calc_metadata_amount(inode, total); | ||
| 1484 | BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1485 | |||
| 1486 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | ||
| 1487 | total = md_needed + nrblocks; | ||
| 1488 | |||
| 1489 | if (ext4_has_free_blocks(sbi, total) < total) { | ||
| 1490 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1491 | return -ENOSPC; | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | /* reduce fs free blocks counter */ | ||
| 1495 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
| 1496 | |||
| 1497 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | ||
| 1498 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | ||
| 1499 | |||
| 1500 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1501 | return 0; /* success */ | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | void ext4_da_release_space(struct inode *inode, int used, int to_free) | ||
| 1505 | { | ||
| 1506 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1507 | int total, mdb, mdb_free, release; | ||
| 1508 | |||
| 1509 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1510 | /* recalculate the number of metablocks still need to be reserved */ | ||
| 1511 | total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; | ||
| 1512 | mdb = ext4_calc_metadata_amount(inode, total); | ||
| 1513 | |||
| 1514 | /* figure out how many metablocks to release */ | ||
| 1515 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1516 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | ||
| 1517 | |||
| 1518 | /* Account for allocated meta_blocks */ | ||
| 1519 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
| 1520 | |||
| 1521 | release = to_free + mdb_free; | ||
| 1522 | |||
| 1523 | /* update fs free blocks counter for truncate case */ | ||
| 1524 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | ||
| 1525 | |||
| 1526 | /* update per-inode reservations */ | ||
| 1527 | BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); | ||
| 1528 | EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); | ||
| 1529 | |||
| 1530 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1531 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1532 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
| 1533 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | static void ext4_da_page_release_reservation(struct page *page, | ||
| 1537 | unsigned long offset) | ||
| 1538 | { | ||
| 1539 | int to_release = 0; | ||
| 1540 | struct buffer_head *head, *bh; | ||
| 1541 | unsigned int curr_off = 0; | ||
| 1542 | |||
| 1543 | head = page_buffers(page); | ||
| 1544 | bh = head; | ||
| 1545 | do { | ||
| 1546 | unsigned int next_off = curr_off + bh->b_size; | ||
| 1547 | |||
| 1548 | if ((offset <= curr_off) && (buffer_delay(bh))) { | ||
| 1549 | to_release++; | ||
| 1550 | clear_buffer_delay(bh); | ||
| 1551 | } | ||
| 1552 | curr_off = next_off; | ||
| 1553 | } while ((bh = bh->b_this_page) != head); | ||
| 1554 | ext4_da_release_space(page->mapping->host, 0, to_release); | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | /* | ||
| 1558 | * Delayed allocation stuff | ||
| 1559 | */ | ||
| 1560 | |||
| 1561 | struct mpage_da_data { | ||
| 1562 | struct inode *inode; | ||
| 1563 | struct buffer_head lbh; /* extent of blocks */ | ||
| 1564 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 1565 | get_block_t *get_block; | ||
| 1566 | struct writeback_control *wbc; | ||
| 1567 | }; | ||
| 1568 | |||
| 1569 | /* | ||
| 1570 | * mpage_da_submit_io - walks through extent of pages and try to write | ||
| 1571 | * them with __mpage_writepage() | ||
| 1572 | * | ||
| 1573 | * @mpd->inode: inode | ||
| 1574 | * @mpd->first_page: first page of the extent | ||
| 1575 | * @mpd->next_page: page after the last page of the extent | ||
| 1576 | * @mpd->get_block: the filesystem's block mapper function | ||
| 1577 | * | ||
| 1578 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
| 1579 | * to be allocated. this may be wrong if allocation failed. | ||
| 1580 | * | ||
| 1581 | * As pages are already locked by write_cache_pages(), we can't use it | ||
| 1582 | */ | ||
| 1583 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | ||
| 1584 | { | ||
| 1585 | struct address_space *mapping = mpd->inode->i_mapping; | ||
| 1586 | struct mpage_data mpd_pp = { | ||
| 1587 | .bio = NULL, | ||
| 1588 | .last_block_in_bio = 0, | ||
| 1589 | .get_block = mpd->get_block, | ||
| 1590 | .use_writepage = 1, | ||
| 1591 | }; | ||
| 1592 | int ret = 0, err, nr_pages, i; | ||
| 1593 | unsigned long index, end; | ||
| 1594 | struct pagevec pvec; | ||
| 1595 | |||
| 1596 | BUG_ON(mpd->next_page <= mpd->first_page); | ||
| 1597 | |||
| 1598 | pagevec_init(&pvec, 0); | ||
| 1599 | index = mpd->first_page; | ||
| 1600 | end = mpd->next_page - 1; | ||
| 1601 | |||
| 1602 | while (index <= end) { | ||
| 1603 | /* XXX: optimize tail */ | ||
| 1604 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1605 | if (nr_pages == 0) | ||
| 1606 | break; | ||
| 1607 | for (i = 0; i < nr_pages; i++) { | ||
| 1608 | struct page *page = pvec.pages[i]; | ||
| 1609 | |||
| 1610 | index = page->index; | ||
| 1611 | if (index > end) | ||
| 1612 | break; | ||
| 1613 | index++; | ||
| 1614 | |||
| 1615 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | ||
| 1616 | |||
| 1617 | /* | ||
| 1618 | * In error case, we have to continue because | ||
| 1619 | * remaining pages are still locked | ||
| 1620 | * XXX: unlock and re-dirty them? | ||
| 1621 | */ | ||
| 1622 | if (ret == 0) | ||
| 1623 | ret = err; | ||
| 1624 | } | ||
| 1625 | pagevec_release(&pvec); | ||
| 1626 | } | ||
| 1627 | if (mpd_pp.bio) | ||
| 1628 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
| 1629 | |||
| 1630 | return ret; | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | /* | ||
| 1634 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
| 1635 | * | ||
| 1636 | * @mpd->inode - inode to walk through | ||
| 1637 | * @exbh->b_blocknr - first block on a disk | ||
| 1638 | * @exbh->b_size - amount of space in bytes | ||
| 1639 | * @logical - first logical block to start assignment with | ||
| 1640 | * | ||
| 1641 | * the function goes through all passed space and put actual disk | ||
| 1642 | * block numbers into buffer heads, dropping BH_Delay | ||
| 1643 | */ | ||
| 1644 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | ||
| 1645 | struct buffer_head *exbh) | ||
| 1646 | { | ||
| 1647 | struct inode *inode = mpd->inode; | ||
| 1648 | struct address_space *mapping = inode->i_mapping; | ||
| 1649 | int blocks = exbh->b_size >> inode->i_blkbits; | ||
| 1650 | sector_t pblock = exbh->b_blocknr, cur_logical; | ||
| 1651 | struct buffer_head *head, *bh; | ||
| 1652 | unsigned long index, end; | ||
| 1653 | struct pagevec pvec; | ||
| 1654 | int nr_pages, i; | ||
| 1655 | |||
| 1656 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1657 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1658 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1659 | |||
| 1660 | pagevec_init(&pvec, 0); | ||
| 1661 | |||
| 1662 | while (index <= end) { | ||
| 1663 | /* XXX: optimize tail */ | ||
| 1664 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1665 | if (nr_pages == 0) | ||
| 1666 | break; | ||
| 1667 | for (i = 0; i < nr_pages; i++) { | ||
| 1668 | struct page *page = pvec.pages[i]; | ||
| 1669 | |||
| 1670 | index = page->index; | ||
| 1671 | if (index > end) | ||
| 1672 | break; | ||
| 1673 | index++; | ||
| 1674 | |||
| 1675 | BUG_ON(!PageLocked(page)); | ||
| 1676 | BUG_ON(PageWriteback(page)); | ||
| 1677 | BUG_ON(!page_has_buffers(page)); | ||
| 1678 | |||
| 1679 | bh = page_buffers(page); | ||
| 1680 | head = bh; | ||
| 1681 | |||
| 1682 | /* skip blocks out of the range */ | ||
| 1683 | do { | ||
| 1684 | if (cur_logical >= logical) | ||
| 1685 | break; | ||
| 1686 | cur_logical++; | ||
| 1687 | } while ((bh = bh->b_this_page) != head); | ||
| 1688 | |||
| 1689 | do { | ||
| 1690 | if (cur_logical >= logical + blocks) | ||
| 1691 | break; | ||
| 1692 | if (buffer_delay(bh)) { | ||
| 1693 | bh->b_blocknr = pblock; | ||
| 1694 | clear_buffer_delay(bh); | ||
| 1695 | } else if (buffer_mapped(bh)) | ||
| 1696 | BUG_ON(bh->b_blocknr != pblock); | ||
| 1697 | |||
| 1698 | cur_logical++; | ||
| 1699 | pblock++; | ||
| 1700 | } while ((bh = bh->b_this_page) != head); | ||
| 1701 | } | ||
| 1702 | pagevec_release(&pvec); | ||
| 1703 | } | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | |||
| 1707 | /* | ||
| 1708 | * __unmap_underlying_blocks - just a helper function to unmap | ||
| 1709 | * set of blocks described by @bh | ||
| 1710 | */ | ||
| 1711 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
| 1712 | struct buffer_head *bh) | ||
| 1713 | { | ||
| 1714 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
| 1715 | int blocks, i; | ||
| 1716 | |||
| 1717 | blocks = bh->b_size >> inode->i_blkbits; | ||
| 1718 | for (i = 0; i < blocks; i++) | ||
| 1719 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | /* | ||
| 1723 | * mpage_da_map_blocks - go through given space | ||
| 1724 | * | ||
| 1725 | * @mpd->lbh - bh describing space | ||
| 1726 | * @mpd->get_block - the filesystem's block mapper function | ||
| 1727 | * | ||
| 1728 | * The function skips space we know is already mapped to disk blocks. | ||
| 1729 | * | ||
| 1730 | * The function ignores errors ->get_block() returns, thus real | ||
| 1731 | * error handling is postponed to __mpage_writepage() | ||
| 1732 | */ | ||
| 1733 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | ||
| 1734 | { | ||
| 1735 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1736 | int err = 0, remain = lbh->b_size; | ||
| 1737 | sector_t next = lbh->b_blocknr; | ||
| 1738 | struct buffer_head new; | ||
| 1739 | |||
| 1740 | /* | ||
| 1741 | * We consider only non-mapped and non-allocated blocks | ||
| 1742 | */ | ||
| 1743 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | ||
| 1744 | return; | ||
| 1745 | |||
| 1746 | while (remain) { | ||
| 1747 | new.b_state = lbh->b_state; | ||
| 1748 | new.b_blocknr = 0; | ||
| 1749 | new.b_size = remain; | ||
| 1750 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
| 1751 | if (err) { | ||
| 1752 | /* | ||
| 1753 | * Rather than implement own error handling | ||
| 1754 | * here, we just leave remaining blocks | ||
| 1755 | * unallocated and try again with ->writepage() | ||
| 1756 | */ | ||
| 1757 | break; | ||
| 1758 | } | ||
| 1759 | BUG_ON(new.b_size == 0); | ||
| 1760 | |||
| 1761 | if (buffer_new(&new)) | ||
| 1762 | __unmap_underlying_blocks(mpd->inode, &new); | ||
| 1763 | |||
| 1764 | /* | ||
| 1765 | * If blocks are delayed marked, we need to | ||
| 1766 | * put actual blocknr and drop delayed bit | ||
| 1767 | */ | ||
| 1768 | if (buffer_delay(lbh)) | ||
| 1769 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
| 1770 | |||
| 1771 | /* go for the remaining blocks */ | ||
| 1772 | next += new.b_size >> mpd->inode->i_blkbits; | ||
| 1773 | remain -= new.b_size; | ||
| 1774 | } | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | ||
| 1778 | |||
| 1779 | /* | ||
| 1780 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
| 1781 | * | ||
| 1782 | * @mpd->lbh - extent of blocks | ||
| 1783 | * @logical - logical number of the block in the file | ||
| 1784 | * @bh - bh of the block (used to access block's state) | ||
| 1785 | * | ||
| 1786 | * the function is used to collect contig. blocks in same state | ||
| 1787 | */ | ||
| 1788 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | ||
| 1789 | sector_t logical, struct buffer_head *bh) | ||
| 1790 | { | ||
| 1791 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1792 | sector_t next; | ||
| 1793 | |||
| 1794 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | ||
| 1795 | |||
| 1796 | /* | ||
| 1797 | * First block in the extent | ||
| 1798 | */ | ||
| 1799 | if (lbh->b_size == 0) { | ||
| 1800 | lbh->b_blocknr = logical; | ||
| 1801 | lbh->b_size = bh->b_size; | ||
| 1802 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1803 | return; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | /* | ||
| 1807 | * Can we merge the block to our big extent? | ||
| 1808 | */ | ||
| 1809 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | ||
| 1810 | lbh->b_size += bh->b_size; | ||
| 1811 | return; | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | /* | ||
| 1815 | * We couldn't merge the block to our extent, so we | ||
| 1816 | * need to flush current extent and start new one | ||
| 1817 | */ | ||
| 1818 | mpage_da_map_blocks(mpd); | ||
| 1819 | |||
| 1820 | /* | ||
| 1821 | * Now start a new extent | ||
| 1822 | */ | ||
| 1823 | lbh->b_size = bh->b_size; | ||
| 1824 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1825 | lbh->b_blocknr = logical; | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | /* | ||
| 1829 | * __mpage_da_writepage - finds extent of pages and blocks | ||
| 1830 | * | ||
| 1831 | * @page: page to consider | ||
| 1832 | * @wbc: not used, we just follow rules | ||
| 1833 | * @data: context | ||
| 1834 | * | ||
| 1835 | * The function finds extents of pages and scan them for all blocks. | ||
| 1836 | */ | ||
| 1837 | static int __mpage_da_writepage(struct page *page, | ||
| 1838 | struct writeback_control *wbc, void *data) | ||
| 1839 | { | ||
| 1840 | struct mpage_da_data *mpd = data; | ||
| 1841 | struct inode *inode = mpd->inode; | ||
| 1842 | struct buffer_head *bh, *head, fake; | ||
| 1843 | sector_t logical; | ||
| 1844 | |||
| 1845 | /* | ||
| 1846 | * Can we merge this page to current extent? | ||
| 1847 | */ | ||
| 1848 | if (mpd->next_page != page->index) { | ||
| 1849 | /* | ||
| 1850 | * Nope, we can't. So, we map non-allocated blocks | ||
| 1851 | * and start IO on them using __mpage_writepage() | ||
| 1852 | */ | ||
| 1853 | if (mpd->next_page != mpd->first_page) { | ||
| 1854 | mpage_da_map_blocks(mpd); | ||
| 1855 | mpage_da_submit_io(mpd); | ||
| 1856 | } | ||
| 1857 | |||
| 1858 | /* | ||
| 1859 | * Start next extent of pages ... | ||
| 1860 | */ | ||
| 1861 | mpd->first_page = page->index; | ||
| 1862 | |||
| 1863 | /* | ||
| 1864 | * ... and blocks | ||
| 1865 | */ | ||
| 1866 | mpd->lbh.b_size = 0; | ||
| 1867 | mpd->lbh.b_state = 0; | ||
| 1868 | mpd->lbh.b_blocknr = 0; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | mpd->next_page = page->index + 1; | ||
| 1872 | logical = (sector_t) page->index << | ||
| 1873 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1874 | |||
| 1875 | if (!page_has_buffers(page)) { | ||
| 1876 | /* | ||
| 1877 | * There is no attached buffer heads yet (mmap?) | ||
| 1878 | * we treat the page asfull of dirty blocks | ||
| 1879 | */ | ||
| 1880 | bh = &fake; | ||
| 1881 | bh->b_size = PAGE_CACHE_SIZE; | ||
| 1882 | bh->b_state = 0; | ||
| 1883 | set_buffer_dirty(bh); | ||
| 1884 | set_buffer_uptodate(bh); | ||
| 1885 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1886 | } else { | ||
| 1887 | /* | ||
| 1888 | * Page with regular buffer heads, just add all dirty ones | ||
| 1889 | */ | ||
| 1890 | head = page_buffers(page); | ||
| 1891 | bh = head; | ||
| 1892 | do { | ||
| 1893 | BUG_ON(buffer_locked(bh)); | ||
| 1894 | if (buffer_dirty(bh)) | ||
| 1895 | mpage_add_bh_to_extent(mpd, logical, bh); | ||
| 1896 | logical++; | ||
| 1897 | } while ((bh = bh->b_this_page) != head); | ||
| 1898 | } | ||
| 1899 | |||
| 1900 | return 0; | ||
| 1901 | } | ||
| 1902 | |||
| 1903 | /* | ||
| 1904 | * mpage_da_writepages - walk the list of dirty pages of the given | ||
| 1905 | * address space, allocates non-allocated blocks, maps newly-allocated | ||
| 1906 | * blocks to existing bhs and issue IO them | ||
| 1907 | * | ||
| 1908 | * @mapping: address space structure to write | ||
| 1909 | * @wbc: subtract the number of written pages from *@wbc->nr_to_write | ||
| 1910 | * @get_block: the filesystem's block mapper function. | ||
| 1911 | * | ||
| 1912 | * This is a library function, which implements the writepages() | ||
| 1913 | * address_space_operation. | ||
| 1914 | * | ||
| 1915 | * In order to avoid duplication of logic that deals with partial pages, | ||
| 1916 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
| 1917 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
| 1918 | * | ||
| 1919 | * It's important that we call __mpage_writepage() only once for each | ||
| 1920 | * involved page, otherwise we'd have to implement more complicated logic | ||
| 1921 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
| 1922 | * | ||
| 1923 | * See comments to mpage_writepages() | ||
| 1924 | */ | ||
| 1925 | static int mpage_da_writepages(struct address_space *mapping, | ||
| 1926 | struct writeback_control *wbc, | ||
| 1927 | get_block_t get_block) | ||
| 1928 | { | ||
| 1929 | struct mpage_da_data mpd; | ||
| 1930 | int ret; | ||
| 1931 | |||
| 1932 | if (!get_block) | ||
| 1933 | return generic_writepages(mapping, wbc); | ||
| 1934 | |||
| 1935 | mpd.wbc = wbc; | ||
| 1936 | mpd.inode = mapping->host; | ||
| 1937 | mpd.lbh.b_size = 0; | ||
| 1938 | mpd.lbh.b_state = 0; | ||
| 1939 | mpd.lbh.b_blocknr = 0; | ||
| 1940 | mpd.first_page = 0; | ||
| 1941 | mpd.next_page = 0; | ||
| 1942 | mpd.get_block = get_block; | ||
| 1943 | |||
| 1944 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
| 1945 | |||
| 1946 | /* | ||
| 1947 | * Handle last extent of pages | ||
| 1948 | */ | ||
| 1949 | if (mpd.next_page != mpd.first_page) { | ||
| 1950 | mpage_da_map_blocks(&mpd); | ||
| 1951 | mpage_da_submit_io(&mpd); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | return ret; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | /* | ||
| 1958 | * this is a special callback for ->write_begin() only | ||
| 1959 | * it's intention is to return mapped block or reserve space | ||
| 1960 | */ | ||
| 1961 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | ||
| 1962 | struct buffer_head *bh_result, int create) | ||
| 1963 | { | ||
| 1964 | int ret = 0; | ||
| 1965 | |||
| 1966 | BUG_ON(create == 0); | ||
| 1967 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | ||
| 1968 | |||
| 1969 | /* | ||
| 1970 | * first, we need to know whether the block is allocated already | ||
| 1971 | * preallocated blocks are unmapped but should treated | ||
| 1972 | * the same as allocated blocks. | ||
| 1973 | */ | ||
| 1974 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); | ||
| 1975 | if ((ret == 0) && !buffer_delay(bh_result)) { | ||
| 1976 | /* the block isn't (pre)allocated yet, let's reserve space */ | ||
| 1977 | /* | ||
| 1978 | * XXX: __block_prepare_write() unmaps passed block, | ||
| 1979 | * is it OK? | ||
| 1980 | */ | ||
| 1981 | ret = ext4_da_reserve_space(inode, 1); | ||
| 1982 | if (ret) | ||
| 1983 | /* not enough space to reserve */ | ||
| 1984 | return ret; | ||
| 1985 | |||
| 1986 | map_bh(bh_result, inode->i_sb, 0); | ||
| 1987 | set_buffer_new(bh_result); | ||
| 1988 | set_buffer_delay(bh_result); | ||
| 1989 | } else if (ret > 0) { | ||
| 1990 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 1991 | ret = 0; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | return ret; | ||
| 1995 | } | ||
| 1996 | #define EXT4_DELALLOC_RSVED 1 | ||
| 1997 | static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | ||
| 1998 | struct buffer_head *bh_result, int create) | ||
| 1999 | { | ||
| 2000 | int ret; | ||
| 2001 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2002 | loff_t disksize = EXT4_I(inode)->i_disksize; | ||
| 2003 | handle_t *handle = NULL; | ||
| 2004 | |||
| 2005 | handle = ext4_journal_current_handle(); | ||
| 2006 | if (!handle) { | ||
| 2007 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2008 | bh_result, 0, 0, 0); | ||
| 2009 | BUG_ON(!ret); | ||
| 2010 | } else { | ||
| 2011 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2012 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | if (ret > 0) { | ||
| 2016 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2017 | |||
| 2018 | /* | ||
| 2019 | * Update on-disk size along with block allocation | ||
| 2020 | * we don't use 'extend_disksize' as size may change | ||
| 2021 | * within already allocated block -bzzz | ||
| 2022 | */ | ||
| 2023 | disksize = ((loff_t) iblock + ret) << inode->i_blkbits; | ||
| 2024 | if (disksize > i_size_read(inode)) | ||
| 2025 | disksize = i_size_read(inode); | ||
| 2026 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
| 2027 | /* | ||
| 2028 | * XXX: replace with spinlock if seen contended -bzzz | ||
| 2029 | */ | ||
| 2030 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2031 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2032 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2033 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2034 | |||
| 2035 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
| 2036 | ret = ext4_mark_inode_dirty(handle, inode); | ||
| 2037 | return ret; | ||
| 2038 | } | ||
| 2039 | } | ||
| 2040 | ret = 0; | ||
| 2041 | } | ||
| 2042 | return ret; | ||
| 2043 | } | ||
| 2044 | |||
| 2045 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
| 2046 | { | ||
| 2047 | /* | ||
| 2048 | * unmapped buffer is possible for holes. | ||
| 2049 | * delay buffer is possible with delayed allocation | ||
| 2050 | */ | ||
| 2051 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
| 2052 | } | ||
| 2053 | |||
| 2054 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | ||
| 2055 | struct buffer_head *bh_result, int create) | ||
| 2056 | { | ||
| 2057 | int ret = 0; | ||
| 2058 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
| 2059 | |||
| 2060 | /* | ||
| 2061 | * we don't want to do block allocation in writepage | ||
| 2062 | * so call get_block_wrap with create = 0 | ||
| 2063 | */ | ||
| 2064 | ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, | ||
| 2065 | bh_result, 0, 0, 0); | ||
| 2066 | if (ret > 0) { | ||
| 2067 | bh_result->b_size = (ret << inode->i_blkbits); | ||
| 2068 | ret = 0; | ||
| 2069 | } | ||
| 2070 | return ret; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | /* | ||
| 2074 | * get called vi ext4_da_writepages after taking page lock (have journal handle) | ||
| 2075 | * get called via journal_submit_inode_data_buffers (no journal handle) | ||
| 2076 | * get called via shrink_page_list via pdflush (no journal handle) | ||
| 2077 | * or grab_page_cache when doing write_begin (have journal handle) | ||
| 2078 | */ | ||
| 2079 | static int ext4_da_writepage(struct page *page, | ||
| 2080 | struct writeback_control *wbc) | ||
| 2081 | { | ||
| 2082 | int ret = 0; | ||
| 2083 | loff_t size; | ||
| 2084 | unsigned long len; | ||
| 2085 | struct buffer_head *page_bufs; | ||
| 2086 | struct inode *inode = page->mapping->host; | ||
| 2087 | |||
| 2088 | size = i_size_read(inode); | ||
| 2089 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 2090 | len = size & ~PAGE_CACHE_MASK; | ||
| 2091 | else | ||
| 2092 | len = PAGE_CACHE_SIZE; | ||
| 2093 | |||
| 2094 | if (page_has_buffers(page)) { | ||
| 2095 | page_bufs = page_buffers(page); | ||
| 2096 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2097 | ext4_bh_unmapped_or_delay)) { | ||
| 2098 | /* | ||
| 2099 | * We don't want to do block allocation | ||
| 2100 | * So redirty the page and return | ||
| 2101 | * We may reach here when we do a journal commit | ||
| 2102 | * via journal_submit_inode_data_buffers. | ||
| 2103 | * If we don't have mapping block we just ignore | ||
| 2104 | * them. We can also reach here via shrink_page_list | ||
| 2105 | */ | ||
| 2106 | redirty_page_for_writepage(wbc, page); | ||
| 2107 | unlock_page(page); | ||
| 2108 | return 0; | ||
| 2109 | } | ||
| 2110 | } else { | ||
| 2111 | /* | ||
| 2112 | * The test for page_has_buffers() is subtle: | ||
| 2113 | * We know the page is dirty but it lost buffers. That means | ||
| 2114 | * that at some moment in time after write_begin()/write_end() | ||
| 2115 | * has been called all buffers have been clean and thus they | ||
| 2116 | * must have been written at least once. So they are all | ||
| 2117 | * mapped and we can happily proceed with mapping them | ||
| 2118 | * and writing the page. | ||
| 2119 | * | ||
| 2120 | * Try to initialize the buffer_heads and check whether | ||
| 2121 | * all are mapped and non delay. We don't want to | ||
| 2122 | * do block allocation here. | ||
| 2123 | */ | ||
| 2124 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
| 2125 | ext4_normal_get_block_write); | ||
| 2126 | if (!ret) { | ||
| 2127 | page_bufs = page_buffers(page); | ||
| 2128 | /* check whether all are mapped and non delay */ | ||
| 2129 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
| 2130 | ext4_bh_unmapped_or_delay)) { | ||
| 2131 | redirty_page_for_writepage(wbc, page); | ||
| 2132 | unlock_page(page); | ||
| 2133 | return 0; | ||
| 2134 | } | ||
| 2135 | } else { | ||
| 2136 | /* | ||
| 2137 | * We can't do block allocation here | ||
| 2138 | * so just redity the page and unlock | ||
| 2139 | * and return | ||
| 2140 | */ | ||
| 2141 | redirty_page_for_writepage(wbc, page); | ||
| 2142 | unlock_page(page); | ||
| 2143 | return 0; | ||
| 2144 | } | ||
| 2145 | } | ||
| 2146 | |||
| 2147 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | ||
| 2148 | ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); | ||
| 2149 | else | ||
| 2150 | ret = block_write_full_page(page, | ||
| 2151 | ext4_normal_get_block_write, | ||
| 2152 | wbc); | ||
| 2153 | |||
| 2154 | return ret; | ||
| 2155 | } | ||
| 2156 | |||
| 2157 | /* | ||
| 2158 | * For now just follow the DIO way to estimate the max credits | ||
| 2159 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | ||
| 2160 | * todo: need to calculate the max credits need for | ||
| 2161 | * extent based files, currently the DIO credits is based on | ||
| 2162 | * indirect-blocks mapping way. | ||
| 2163 | * | ||
| 2164 | * Probably should have a generic way to calculate credits | ||
| 2165 | * for DIO, writepages, and truncate | ||
| 2166 | */ | ||
| 2167 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | ||
| 2168 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | ||
| 2169 | |||
| 2170 | static int ext4_da_writepages(struct address_space *mapping, | ||
| 2171 | struct writeback_control *wbc) | ||
| 2172 | { | ||
| 2173 | struct inode *inode = mapping->host; | ||
| 2174 | handle_t *handle = NULL; | ||
| 2175 | int needed_blocks; | ||
| 2176 | int ret = 0; | ||
| 2177 | long to_write; | ||
| 2178 | loff_t range_start = 0; | ||
| 2179 | |||
| 2180 | /* | ||
| 2181 | * No pages to write? This is mainly a kludge to avoid starting | ||
| 2182 | * a transaction for special inodes like journal inode on last iput() | ||
| 2183 | * because that could violate lock ordering on umount | ||
| 2184 | */ | ||
| 2185 | if (!mapping->nrpages) | ||
| 2186 | return 0; | ||
| 2187 | |||
| 2188 | /* | ||
| 2189 | * Estimate the worse case needed credits to write out | ||
| 2190 | * EXT4_MAX_BUF_BLOCKS pages | ||
| 2191 | */ | ||
| 2192 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | ||
| 2193 | |||
| 2194 | to_write = wbc->nr_to_write; | ||
| 2195 | if (!wbc->range_cyclic) { | ||
| 2196 | /* | ||
| 2197 | * If range_cyclic is not set force range_cont | ||
| 2198 | * and save the old writeback_index | ||
| 2199 | */ | ||
| 2200 | wbc->range_cont = 1; | ||
| 2201 | range_start = wbc->range_start; | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | while (!ret && to_write) { | ||
| 2205 | /* start a new transaction*/ | ||
| 2206 | handle = ext4_journal_start(inode, needed_blocks); | ||
| 2207 | if (IS_ERR(handle)) { | ||
| 2208 | ret = PTR_ERR(handle); | ||
| 2209 | goto out_writepages; | ||
| 2210 | } | ||
| 2211 | if (ext4_should_order_data(inode)) { | ||
| 2212 | /* | ||
| 2213 | * With ordered mode we need to add | ||
| 2214 | * the inode to the journal handle | ||
| 2215 | * when we do block allocation. | ||
| 2216 | */ | ||
| 2217 | ret = ext4_jbd2_file_inode(handle, inode); | ||
| 2218 | if (ret) { | ||
| 2219 | ext4_journal_stop(handle); | ||
| 2220 | goto out_writepages; | ||
| 2221 | } | ||
| 2222 | |||
| 2223 | } | ||
| 2224 | /* | ||
| 2225 | * set the max dirty pages could be write at a time | ||
| 2226 | * to fit into the reserved transaction credits | ||
| 2227 | */ | ||
| 2228 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
| 2229 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
| 2230 | |||
| 2231 | to_write -= wbc->nr_to_write; | ||
| 2232 | ret = mpage_da_writepages(mapping, wbc, | ||
| 2233 | ext4_da_get_block_write); | ||
| 2234 | ext4_journal_stop(handle); | ||
| 2235 | if (wbc->nr_to_write) { | ||
| 2236 | /* | ||
| 2237 | * There is no more writeout needed | ||
| 2238 | * or we requested for a noblocking writeout | ||
| 2239 | * and we found the device congested | ||
| 2240 | */ | ||
| 2241 | to_write += wbc->nr_to_write; | ||
| 2242 | break; | ||
| 2243 | } | ||
| 2244 | wbc->nr_to_write = to_write; | ||
| 2245 | } | ||
| 2246 | |||
| 2247 | out_writepages: | ||
| 2248 | wbc->nr_to_write = to_write; | ||
| 2249 | if (range_start) | ||
| 2250 | wbc->range_start = range_start; | ||
| 2251 | return ret; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | ||
| 2255 | loff_t pos, unsigned len, unsigned flags, | ||
| 2256 | struct page **pagep, void **fsdata) | ||
| 2257 | { | ||
| 2258 | int ret, retries = 0; | ||
| 2259 | struct page *page; | ||
| 2260 | pgoff_t index; | ||
| 2261 | unsigned from, to; | ||
| 2262 | struct inode *inode = mapping->host; | ||
| 2263 | handle_t *handle; | ||
| 2264 | |||
| 2265 | index = pos >> PAGE_CACHE_SHIFT; | ||
| 2266 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2267 | to = from + len; | ||
| 2268 | |||
| 2269 | retry: | ||
| 2270 | /* | ||
| 2271 | * With delayed allocation, we don't log the i_disksize update | ||
| 2272 | * if there is delayed block allocation. But we still need | ||
| 2273 | * to journalling the i_disksize update if writes to the end | ||
| 2274 | * of file which has an already mapped buffer. | ||
| 2275 | */ | ||
| 2276 | handle = ext4_journal_start(inode, 1); | ||
| 2277 | if (IS_ERR(handle)) { | ||
| 2278 | ret = PTR_ERR(handle); | ||
| 2279 | goto out; | ||
| 2280 | } | ||
| 2281 | |||
| 2282 | page = __grab_cache_page(mapping, index); | ||
| 2283 | if (!page) | ||
| 2284 | return -ENOMEM; | ||
| 2285 | *pagep = page; | ||
| 2286 | |||
| 2287 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
| 2288 | ext4_da_get_block_prep); | ||
| 2289 | if (ret < 0) { | ||
| 2290 | unlock_page(page); | ||
| 2291 | ext4_journal_stop(handle); | ||
| 2292 | page_cache_release(page); | ||
| 2293 | } | ||
| 2294 | |||
| 2295 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | ||
| 2296 | goto retry; | ||
| 2297 | out: | ||
| 2298 | return ret; | ||
| 2299 | } | ||
| 2300 | |||
| 2301 | /* | ||
| 2302 | * Check if we should update i_disksize | ||
| 2303 | * when write to the end of file but not require block allocation | ||
| 2304 | */ | ||
| 2305 | static int ext4_da_should_update_i_disksize(struct page *page, | ||
| 2306 | unsigned long offset) | ||
| 2307 | { | ||
| 2308 | struct buffer_head *bh; | ||
| 2309 | struct inode *inode = page->mapping->host; | ||
| 2310 | unsigned int idx; | ||
| 2311 | int i; | ||
| 2312 | |||
| 2313 | bh = page_buffers(page); | ||
| 2314 | idx = offset >> inode->i_blkbits; | ||
| 2315 | |||
| 2316 | for (i=0; i < idx; i++) | ||
| 2317 | bh = bh->b_this_page; | ||
| 2318 | |||
| 2319 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | ||
| 2320 | return 0; | ||
| 2321 | return 1; | ||
| 2322 | } | ||
| 2323 | |||
| 2324 | static int ext4_da_write_end(struct file *file, | ||
| 2325 | struct address_space *mapping, | ||
| 2326 | loff_t pos, unsigned len, unsigned copied, | ||
| 2327 | struct page *page, void *fsdata) | ||
| 2328 | { | ||
| 2329 | struct inode *inode = mapping->host; | ||
| 2330 | int ret = 0, ret2; | ||
| 2331 | handle_t *handle = ext4_journal_current_handle(); | ||
| 2332 | loff_t new_i_size; | ||
| 2333 | unsigned long start, end; | ||
| 2334 | |||
| 2335 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
| 2336 | end = start + copied -1; | ||
| 2337 | |||
| 2338 | /* | ||
| 2339 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
| 2340 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
| 2341 | * into that. | ||
| 2342 | */ | ||
| 2343 | |||
| 2344 | new_i_size = pos + copied; | ||
| 2345 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2346 | if (ext4_da_should_update_i_disksize(page, end)) { | ||
| 2347 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2348 | if (new_i_size > EXT4_I(inode)->i_disksize) { | ||
| 2349 | /* | ||
| 2350 | * Updating i_disksize when extending file | ||
| 2351 | * without needing block allocation | ||
| 2352 | */ | ||
| 2353 | if (ext4_should_order_data(inode)) | ||
| 2354 | ret = ext4_jbd2_file_inode(handle, | ||
| 2355 | inode); | ||
| 2356 | |||
| 2357 | EXT4_I(inode)->i_disksize = new_i_size; | ||
| 2358 | } | ||
| 2359 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2360 | } | ||
| 2361 | } | ||
| 2362 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
| 2363 | page, fsdata); | ||
| 2364 | copied = ret2; | ||
| 2365 | if (ret2 < 0) | ||
| 2366 | ret = ret2; | ||
| 2367 | ret2 = ext4_journal_stop(handle); | ||
| 2368 | if (!ret) | ||
| 2369 | ret = ret2; | ||
| 2370 | |||
| 2371 | return ret ? ret : copied; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | ||
| 2375 | { | ||
| 2376 | /* | ||
| 2377 | * Drop reserved blocks | ||
| 2378 | */ | ||
| 2379 | BUG_ON(!PageLocked(page)); | ||
| 2380 | if (!page_has_buffers(page)) | ||
| 2381 | goto out; | ||
| 2382 | |||
| 2383 | ext4_da_page_release_reservation(page, offset); | ||
| 2384 | |||
| 2385 | out: | ||
| 2386 | ext4_invalidatepage(page, offset); | ||
| 2387 | |||
| 2388 | return; | ||
| 2389 | } | ||
| 2390 | |||
| 1400 | 2391 | ||
| 1401 | /* | 2392 | /* |
| 1402 | * bmap() is special. It gets used by applications such as lilo and by | 2393 | * bmap() is special. It gets used by applications such as lilo and by |
| @@ -1418,6 +2409,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 1418 | journal_t *journal; | 2409 | journal_t *journal; |
| 1419 | int err; | 2410 | int err; |
| 1420 | 2411 | ||
| 2412 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | ||
| 2413 | test_opt(inode->i_sb, DELALLOC)) { | ||
| 2414 | /* | ||
| 2415 | * With delalloc we want to sync the file | ||
| 2416 | * so that we can make sure we allocate | ||
| 2417 | * blocks for file | ||
| 2418 | */ | ||
| 2419 | filemap_write_and_wait(mapping); | ||
| 2420 | } | ||
| 2421 | |||
| 1421 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 2422 | if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { |
| 1422 | /* | 2423 | /* |
| 1423 | * This is a REALLY heavyweight approach, but the use of | 2424 | * This is a REALLY heavyweight approach, but the use of |
| @@ -1462,21 +2463,17 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) | |||
| 1462 | return 0; | 2463 | return 0; |
| 1463 | } | 2464 | } |
| 1464 | 2465 | ||
| 1465 | static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | ||
| 1466 | { | ||
| 1467 | if (buffer_mapped(bh)) | ||
| 1468 | return ext4_journal_dirty_data(handle, bh); | ||
| 1469 | return 0; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | /* | 2466 | /* |
| 1473 | * Note that we always start a transaction even if we're not journalling | 2467 | * Note that we don't need to start a transaction unless we're journaling data |
| 1474 | * data. This is to preserve ordering: any hole instantiation within | 2468 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
| 1475 | * __block_write_full_page -> ext4_get_block() should be journalled | 2469 | * need to file the inode to the transaction's list in ordered mode because if |
| 1476 | * along with the data so we don't crash and then get metadata which | 2470 | * we are writing back data added by write(), the inode is already there and if |
| 1477 | * refers to old data. | 2471 | * we are writing back data modified via mmap(), noone guarantees in which |
| 2472 | * transaction the data will hit the disk. In case we are journaling data, we | ||
| 2473 | * cannot start transaction directly because transaction start ranks above page | ||
| 2474 | * lock so we have to do some magic. | ||
| 1478 | * | 2475 | * |
| 1479 | * In all journalling modes block_write_full_page() will start the I/O. | 2476 | * In all journaling modes block_write_full_page() will start the I/O. |
| 1480 | * | 2477 | * |
| 1481 | * Problem: | 2478 | * Problem: |
| 1482 | * | 2479 | * |
| @@ -1518,105 +2515,103 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) | |||
| 1518 | * disastrous. Any write() or metadata operation will sync the fs for | 2515 | * disastrous. Any write() or metadata operation will sync the fs for |
| 1519 | * us. | 2516 | * us. |
| 1520 | * | 2517 | * |
| 1521 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
| 1522 | * we don't need to open a transaction here. | ||
| 1523 | */ | 2518 | */ |
| 1524 | static int ext4_ordered_writepage(struct page *page, | 2519 | static int __ext4_normal_writepage(struct page *page, |
| 1525 | struct writeback_control *wbc) | 2520 | struct writeback_control *wbc) |
| 1526 | { | 2521 | { |
| 1527 | struct inode *inode = page->mapping->host; | 2522 | struct inode *inode = page->mapping->host; |
| 1528 | struct buffer_head *page_bufs; | ||
| 1529 | handle_t *handle = NULL; | ||
| 1530 | int ret = 0; | ||
| 1531 | int err; | ||
| 1532 | |||
| 1533 | J_ASSERT(PageLocked(page)); | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * We give up here if we're reentered, because it might be for a | ||
| 1537 | * different filesystem. | ||
| 1538 | */ | ||
| 1539 | if (ext4_journal_current_handle()) | ||
| 1540 | goto out_fail; | ||
| 1541 | 2523 | ||
| 1542 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2524 | if (test_opt(inode->i_sb, NOBH)) |
| 2525 | return nobh_writepage(page, | ||
| 2526 | ext4_normal_get_block_write, wbc); | ||
| 2527 | else | ||
| 2528 | return block_write_full_page(page, | ||
| 2529 | ext4_normal_get_block_write, | ||
| 2530 | wbc); | ||
| 2531 | } | ||
| 1543 | 2532 | ||
| 1544 | if (IS_ERR(handle)) { | 2533 | static int ext4_normal_writepage(struct page *page, |
| 1545 | ret = PTR_ERR(handle); | 2534 | struct writeback_control *wbc) |
| 1546 | goto out_fail; | 2535 | { |
| 1547 | } | 2536 | struct inode *inode = page->mapping->host; |
| 2537 | loff_t size = i_size_read(inode); | ||
| 2538 | loff_t len; | ||
| 1548 | 2539 | ||
| 1549 | if (!page_has_buffers(page)) { | 2540 | J_ASSERT(PageLocked(page)); |
| 1550 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 2541 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 1551 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 2542 | len = size & ~PAGE_CACHE_MASK; |
| 2543 | else | ||
| 2544 | len = PAGE_CACHE_SIZE; | ||
| 2545 | |||
| 2546 | if (page_has_buffers(page)) { | ||
| 2547 | /* if page has buffers it should all be mapped | ||
| 2548 | * and allocated. If there are not buffers attached | ||
| 2549 | * to the page we know the page is dirty but it lost | ||
| 2550 | * buffers. That means that at some moment in time | ||
| 2551 | * after write_begin() / write_end() has been called | ||
| 2552 | * all buffers have been clean and thus they must have been | ||
| 2553 | * written at least once. So they are all mapped and we can | ||
| 2554 | * happily proceed with mapping them and writing the page. | ||
| 2555 | */ | ||
| 2556 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2557 | ext4_bh_unmapped_or_delay)); | ||
| 1552 | } | 2558 | } |
| 1553 | page_bufs = page_buffers(page); | ||
| 1554 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1555 | PAGE_CACHE_SIZE, NULL, bget_one); | ||
| 1556 | |||
| 1557 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1558 | 2559 | ||
| 1559 | /* | 2560 | if (!ext4_journal_current_handle()) |
| 1560 | * The page can become unlocked at any point now, and | 2561 | return __ext4_normal_writepage(page, wbc); |
| 1561 | * truncate can then come in and change things. So we | ||
| 1562 | * can't touch *page from now on. But *page_bufs is | ||
| 1563 | * safe due to elevated refcount. | ||
| 1564 | */ | ||
| 1565 | 2562 | ||
| 1566 | /* | ||
| 1567 | * And attach them to the current transaction. But only if | ||
| 1568 | * block_write_full_page() succeeded. Otherwise they are unmapped, | ||
| 1569 | * and generally junk. | ||
| 1570 | */ | ||
| 1571 | if (ret == 0) { | ||
| 1572 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | ||
| 1573 | NULL, jbd2_journal_dirty_data_fn); | ||
| 1574 | if (!ret) | ||
| 1575 | ret = err; | ||
| 1576 | } | ||
| 1577 | walk_page_buffers(handle, page_bufs, 0, | ||
| 1578 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
| 1579 | err = ext4_journal_stop(handle); | ||
| 1580 | if (!ret) | ||
| 1581 | ret = err; | ||
| 1582 | return ret; | ||
| 1583 | |||
| 1584 | out_fail: | ||
| 1585 | redirty_page_for_writepage(wbc, page); | 2563 | redirty_page_for_writepage(wbc, page); |
| 1586 | unlock_page(page); | 2564 | unlock_page(page); |
| 1587 | return ret; | 2565 | return 0; |
| 1588 | } | 2566 | } |
| 1589 | 2567 | ||
| 1590 | static int ext4_writeback_writepage(struct page *page, | 2568 | static int __ext4_journalled_writepage(struct page *page, |
| 1591 | struct writeback_control *wbc) | 2569 | struct writeback_control *wbc) |
| 1592 | { | 2570 | { |
| 1593 | struct inode *inode = page->mapping->host; | 2571 | struct address_space *mapping = page->mapping; |
| 2572 | struct inode *inode = mapping->host; | ||
| 2573 | struct buffer_head *page_bufs; | ||
| 1594 | handle_t *handle = NULL; | 2574 | handle_t *handle = NULL; |
| 1595 | int ret = 0; | 2575 | int ret = 0; |
| 1596 | int err; | 2576 | int err; |
| 1597 | 2577 | ||
| 1598 | if (ext4_journal_current_handle()) | 2578 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, |
| 1599 | goto out_fail; | 2579 | ext4_normal_get_block_write); |
| 2580 | if (ret != 0) | ||
| 2581 | goto out_unlock; | ||
| 2582 | |||
| 2583 | page_bufs = page_buffers(page); | ||
| 2584 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
| 2585 | bget_one); | ||
| 2586 | /* As soon as we unlock the page, it can go away, but we have | ||
| 2587 | * references to buffers so we are safe */ | ||
| 2588 | unlock_page(page); | ||
| 1600 | 2589 | ||
| 1601 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2590 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); |
| 1602 | if (IS_ERR(handle)) { | 2591 | if (IS_ERR(handle)) { |
| 1603 | ret = PTR_ERR(handle); | 2592 | ret = PTR_ERR(handle); |
| 1604 | goto out_fail; | 2593 | goto out; |
| 1605 | } | 2594 | } |
| 1606 | 2595 | ||
| 1607 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2596 | ret = walk_page_buffers(handle, page_bufs, 0, |
| 1608 | ret = nobh_writepage(page, ext4_get_block, wbc); | 2597 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); |
| 1609 | else | ||
| 1610 | ret = block_write_full_page(page, ext4_get_block, wbc); | ||
| 1611 | 2598 | ||
| 2599 | err = walk_page_buffers(handle, page_bufs, 0, | ||
| 2600 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 2601 | if (ret == 0) | ||
| 2602 | ret = err; | ||
| 1612 | err = ext4_journal_stop(handle); | 2603 | err = ext4_journal_stop(handle); |
| 1613 | if (!ret) | 2604 | if (!ret) |
| 1614 | ret = err; | 2605 | ret = err; |
| 1615 | return ret; | ||
| 1616 | 2606 | ||
| 1617 | out_fail: | 2607 | walk_page_buffers(handle, page_bufs, 0, |
| 1618 | redirty_page_for_writepage(wbc, page); | 2608 | PAGE_CACHE_SIZE, NULL, bput_one); |
| 2609 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 2610 | goto out; | ||
| 2611 | |||
| 2612 | out_unlock: | ||
| 1619 | unlock_page(page); | 2613 | unlock_page(page); |
| 2614 | out: | ||
| 1620 | return ret; | 2615 | return ret; |
| 1621 | } | 2616 | } |
| 1622 | 2617 | ||
| @@ -1624,59 +2619,53 @@ static int ext4_journalled_writepage(struct page *page, | |||
| 1624 | struct writeback_control *wbc) | 2619 | struct writeback_control *wbc) |
| 1625 | { | 2620 | { |
| 1626 | struct inode *inode = page->mapping->host; | 2621 | struct inode *inode = page->mapping->host; |
| 1627 | handle_t *handle = NULL; | 2622 | loff_t size = i_size_read(inode); |
| 1628 | int ret = 0; | 2623 | loff_t len; |
| 1629 | int err; | ||
| 1630 | 2624 | ||
| 1631 | if (ext4_journal_current_handle()) | 2625 | J_ASSERT(PageLocked(page)); |
| 1632 | goto no_write; | 2626 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 2627 | len = size & ~PAGE_CACHE_MASK; | ||
| 2628 | else | ||
| 2629 | len = PAGE_CACHE_SIZE; | ||
| 2630 | |||
| 2631 | if (page_has_buffers(page)) { | ||
| 2632 | /* if page has buffers it should all be mapped | ||
| 2633 | * and allocated. If there are not buffers attached | ||
| 2634 | * to the page we know the page is dirty but it lost | ||
| 2635 | * buffers. That means that at some moment in time | ||
| 2636 | * after write_begin() / write_end() has been called | ||
| 2637 | * all buffers have been clean and thus they must have been | ||
| 2638 | * written at least once. So they are all mapped and we can | ||
| 2639 | * happily proceed with mapping them and writing the page. | ||
| 2640 | */ | ||
| 2641 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 2642 | ext4_bh_unmapped_or_delay)); | ||
| 2643 | } | ||
| 1633 | 2644 | ||
| 1634 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | 2645 | if (ext4_journal_current_handle()) |
| 1635 | if (IS_ERR(handle)) { | ||
| 1636 | ret = PTR_ERR(handle); | ||
| 1637 | goto no_write; | 2646 | goto no_write; |
| 1638 | } | ||
| 1639 | 2647 | ||
| 1640 | if (!page_has_buffers(page) || PageChecked(page)) { | 2648 | if (PageChecked(page)) { |
| 1641 | /* | 2649 | /* |
| 1642 | * It's mmapped pagecache. Add buffers and journal it. There | 2650 | * It's mmapped pagecache. Add buffers and journal it. There |
| 1643 | * doesn't seem much point in redirtying the page here. | 2651 | * doesn't seem much point in redirtying the page here. |
| 1644 | */ | 2652 | */ |
| 1645 | ClearPageChecked(page); | 2653 | ClearPageChecked(page); |
| 1646 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2654 | return __ext4_journalled_writepage(page, wbc); |
| 1647 | ext4_get_block); | ||
| 1648 | if (ret != 0) { | ||
| 1649 | ext4_journal_stop(handle); | ||
| 1650 | goto out_unlock; | ||
| 1651 | } | ||
| 1652 | ret = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1653 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
| 1654 | |||
| 1655 | err = walk_page_buffers(handle, page_buffers(page), 0, | ||
| 1656 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 1657 | if (ret == 0) | ||
| 1658 | ret = err; | ||
| 1659 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 1660 | unlock_page(page); | ||
| 1661 | } else { | 2655 | } else { |
| 1662 | /* | 2656 | /* |
| 1663 | * It may be a page full of checkpoint-mode buffers. We don't | 2657 | * It may be a page full of checkpoint-mode buffers. We don't |
| 1664 | * really know unless we go poke around in the buffer_heads. | 2658 | * really know unless we go poke around in the buffer_heads. |
| 1665 | * But block_write_full_page will do the right thing. | 2659 | * But block_write_full_page will do the right thing. |
| 1666 | */ | 2660 | */ |
| 1667 | ret = block_write_full_page(page, ext4_get_block, wbc); | 2661 | return block_write_full_page(page, |
| 2662 | ext4_normal_get_block_write, | ||
| 2663 | wbc); | ||
| 1668 | } | 2664 | } |
| 1669 | err = ext4_journal_stop(handle); | ||
| 1670 | if (!ret) | ||
| 1671 | ret = err; | ||
| 1672 | out: | ||
| 1673 | return ret; | ||
| 1674 | |||
| 1675 | no_write: | 2665 | no_write: |
| 1676 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
| 1677 | out_unlock: | ||
| 1678 | unlock_page(page); | 2667 | unlock_page(page); |
| 1679 | goto out; | 2668 | return 0; |
| 1680 | } | 2669 | } |
| 1681 | 2670 | ||
| 1682 | static int ext4_readpage(struct file *file, struct page *page) | 2671 | static int ext4_readpage(struct file *file, struct page *page) |
| @@ -1819,7 +2808,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
| 1819 | static const struct address_space_operations ext4_ordered_aops = { | 2808 | static const struct address_space_operations ext4_ordered_aops = { |
| 1820 | .readpage = ext4_readpage, | 2809 | .readpage = ext4_readpage, |
| 1821 | .readpages = ext4_readpages, | 2810 | .readpages = ext4_readpages, |
| 1822 | .writepage = ext4_ordered_writepage, | 2811 | .writepage = ext4_normal_writepage, |
| 1823 | .sync_page = block_sync_page, | 2812 | .sync_page = block_sync_page, |
| 1824 | .write_begin = ext4_write_begin, | 2813 | .write_begin = ext4_write_begin, |
| 1825 | .write_end = ext4_ordered_write_end, | 2814 | .write_end = ext4_ordered_write_end, |
| @@ -1833,7 +2822,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
| 1833 | static const struct address_space_operations ext4_writeback_aops = { | 2822 | static const struct address_space_operations ext4_writeback_aops = { |
| 1834 | .readpage = ext4_readpage, | 2823 | .readpage = ext4_readpage, |
| 1835 | .readpages = ext4_readpages, | 2824 | .readpages = ext4_readpages, |
| 1836 | .writepage = ext4_writeback_writepage, | 2825 | .writepage = ext4_normal_writepage, |
| 1837 | .sync_page = block_sync_page, | 2826 | .sync_page = block_sync_page, |
| 1838 | .write_begin = ext4_write_begin, | 2827 | .write_begin = ext4_write_begin, |
| 1839 | .write_end = ext4_writeback_write_end, | 2828 | .write_end = ext4_writeback_write_end, |
| @@ -1857,10 +2846,31 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 1857 | .releasepage = ext4_releasepage, | 2846 | .releasepage = ext4_releasepage, |
| 1858 | }; | 2847 | }; |
| 1859 | 2848 | ||
| 2849 | static const struct address_space_operations ext4_da_aops = { | ||
| 2850 | .readpage = ext4_readpage, | ||
| 2851 | .readpages = ext4_readpages, | ||
| 2852 | .writepage = ext4_da_writepage, | ||
| 2853 | .writepages = ext4_da_writepages, | ||
| 2854 | .sync_page = block_sync_page, | ||
| 2855 | .write_begin = ext4_da_write_begin, | ||
| 2856 | .write_end = ext4_da_write_end, | ||
| 2857 | .bmap = ext4_bmap, | ||
| 2858 | .invalidatepage = ext4_da_invalidatepage, | ||
| 2859 | .releasepage = ext4_releasepage, | ||
| 2860 | .direct_IO = ext4_direct_IO, | ||
| 2861 | .migratepage = buffer_migrate_page, | ||
| 2862 | }; | ||
| 2863 | |||
| 1860 | void ext4_set_aops(struct inode *inode) | 2864 | void ext4_set_aops(struct inode *inode) |
| 1861 | { | 2865 | { |
| 1862 | if (ext4_should_order_data(inode)) | 2866 | if (ext4_should_order_data(inode) && |
| 2867 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2868 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 2869 | else if (ext4_should_order_data(inode)) | ||
| 1863 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 2870 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
| 2871 | else if (ext4_should_writeback_data(inode) && | ||
| 2872 | test_opt(inode->i_sb, DELALLOC)) | ||
| 2873 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
| 1864 | else if (ext4_should_writeback_data(inode)) | 2874 | else if (ext4_should_writeback_data(inode)) |
| 1865 | inode->i_mapping->a_ops = &ext4_writeback_aops; | 2875 | inode->i_mapping->a_ops = &ext4_writeback_aops; |
| 1866 | else | 2876 | else |
| @@ -1873,7 +2883,7 @@ void ext4_set_aops(struct inode *inode) | |||
| 1873 | * This required during truncate. We need to physically zero the tail end | 2883 | * This required during truncate. We need to physically zero the tail end |
| 1874 | * of that block so it doesn't yield old data if the file is later grown. | 2884 | * of that block so it doesn't yield old data if the file is later grown. |
| 1875 | */ | 2885 | */ |
| 1876 | int ext4_block_truncate_page(handle_t *handle, struct page *page, | 2886 | int ext4_block_truncate_page(handle_t *handle, |
| 1877 | struct address_space *mapping, loff_t from) | 2887 | struct address_space *mapping, loff_t from) |
| 1878 | { | 2888 | { |
| 1879 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2889 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
| @@ -1882,8 +2892,13 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1882 | ext4_lblk_t iblock; | 2892 | ext4_lblk_t iblock; |
| 1883 | struct inode *inode = mapping->host; | 2893 | struct inode *inode = mapping->host; |
| 1884 | struct buffer_head *bh; | 2894 | struct buffer_head *bh; |
| 2895 | struct page *page; | ||
| 1885 | int err = 0; | 2896 | int err = 0; |
| 1886 | 2897 | ||
| 2898 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | ||
| 2899 | if (!page) | ||
| 2900 | return -EINVAL; | ||
| 2901 | |||
| 1887 | blocksize = inode->i_sb->s_blocksize; | 2902 | blocksize = inode->i_sb->s_blocksize; |
| 1888 | length = blocksize - (offset & (blocksize - 1)); | 2903 | length = blocksize - (offset & (blocksize - 1)); |
| 1889 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2904 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
| @@ -1956,7 +2971,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
| 1956 | err = ext4_journal_dirty_metadata(handle, bh); | 2971 | err = ext4_journal_dirty_metadata(handle, bh); |
| 1957 | } else { | 2972 | } else { |
| 1958 | if (ext4_should_order_data(inode)) | 2973 | if (ext4_should_order_data(inode)) |
| 1959 | err = ext4_journal_dirty_data(handle, bh); | 2974 | err = ext4_jbd2_file_inode(handle, inode); |
| 1960 | mark_buffer_dirty(bh); | 2975 | mark_buffer_dirty(bh); |
| 1961 | } | 2976 | } |
| 1962 | 2977 | ||
| @@ -2179,7 +3194,21 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
| 2179 | 3194 | ||
| 2180 | if (this_bh) { | 3195 | if (this_bh) { |
| 2181 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); | 3196 | BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); |
| 2182 | ext4_journal_dirty_metadata(handle, this_bh); | 3197 | |
| 3198 | /* | ||
| 3199 | * The buffer head should have an attached journal head at this | ||
| 3200 | * point. However, if the data is corrupted and an indirect | ||
| 3201 | * block pointed to itself, it would have been detached when | ||
| 3202 | * the block was cleared. Check for this instead of OOPSing. | ||
| 3203 | */ | ||
| 3204 | if (bh2jh(this_bh)) | ||
| 3205 | ext4_journal_dirty_metadata(handle, this_bh); | ||
| 3206 | else | ||
| 3207 | ext4_error(inode->i_sb, __func__, | ||
| 3208 | "circular indirect block detected, " | ||
| 3209 | "inode=%lu, block=%llu", | ||
| 3210 | inode->i_ino, | ||
| 3211 | (unsigned long long) this_bh->b_blocknr); | ||
| 2183 | } | 3212 | } |
| 2184 | } | 3213 | } |
| 2185 | 3214 | ||
| @@ -2305,6 +3334,19 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 2305 | } | 3334 | } |
| 2306 | } | 3335 | } |
| 2307 | 3336 | ||
| 3337 | int ext4_can_truncate(struct inode *inode) | ||
| 3338 | { | ||
| 3339 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 3340 | return 0; | ||
| 3341 | if (S_ISREG(inode->i_mode)) | ||
| 3342 | return 1; | ||
| 3343 | if (S_ISDIR(inode->i_mode)) | ||
| 3344 | return 1; | ||
| 3345 | if (S_ISLNK(inode->i_mode)) | ||
| 3346 | return !ext4_inode_is_fast_symlink(inode); | ||
| 3347 | return 0; | ||
| 3348 | } | ||
| 3349 | |||
| 2308 | /* | 3350 | /* |
| 2309 | * ext4_truncate() | 3351 | * ext4_truncate() |
| 2310 | * | 3352 | * |
| @@ -2347,51 +3389,25 @@ void ext4_truncate(struct inode *inode) | |||
| 2347 | int n; | 3389 | int n; |
| 2348 | ext4_lblk_t last_block; | 3390 | ext4_lblk_t last_block; |
| 2349 | unsigned blocksize = inode->i_sb->s_blocksize; | 3391 | unsigned blocksize = inode->i_sb->s_blocksize; |
| 2350 | struct page *page; | ||
| 2351 | 3392 | ||
| 2352 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3393 | if (!ext4_can_truncate(inode)) |
| 2353 | S_ISLNK(inode->i_mode))) | ||
| 2354 | return; | ||
| 2355 | if (ext4_inode_is_fast_symlink(inode)) | ||
| 2356 | return; | ||
| 2357 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
| 2358 | return; | 3394 | return; |
| 2359 | 3395 | ||
| 2360 | /* | ||
| 2361 | * We have to lock the EOF page here, because lock_page() nests | ||
| 2362 | * outside jbd2_journal_start(). | ||
| 2363 | */ | ||
| 2364 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
| 2365 | /* Block boundary? Nothing to do */ | ||
| 2366 | page = NULL; | ||
| 2367 | } else { | ||
| 2368 | page = grab_cache_page(mapping, | ||
| 2369 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
| 2370 | if (!page) | ||
| 2371 | return; | ||
| 2372 | } | ||
| 2373 | |||
| 2374 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 3396 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
| 2375 | ext4_ext_truncate(inode, page); | 3397 | ext4_ext_truncate(inode); |
| 2376 | return; | 3398 | return; |
| 2377 | } | 3399 | } |
| 2378 | 3400 | ||
| 2379 | handle = start_transaction(inode); | 3401 | handle = start_transaction(inode); |
| 2380 | if (IS_ERR(handle)) { | 3402 | if (IS_ERR(handle)) |
| 2381 | if (page) { | ||
| 2382 | clear_highpage(page); | ||
| 2383 | flush_dcache_page(page); | ||
| 2384 | unlock_page(page); | ||
| 2385 | page_cache_release(page); | ||
| 2386 | } | ||
| 2387 | return; /* AKPM: return what? */ | 3403 | return; /* AKPM: return what? */ |
| 2388 | } | ||
| 2389 | 3404 | ||
| 2390 | last_block = (inode->i_size + blocksize-1) | 3405 | last_block = (inode->i_size + blocksize-1) |
| 2391 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 3406 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
| 2392 | 3407 | ||
| 2393 | if (page) | 3408 | if (inode->i_size & (blocksize - 1)) |
| 2394 | ext4_block_truncate_page(handle, page, mapping, inode->i_size); | 3409 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
| 3410 | goto out_stop; | ||
| 2395 | 3411 | ||
| 2396 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 3412 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
| 2397 | if (n == 0) | 3413 | if (n == 0) |
| @@ -2410,6 +3426,11 @@ void ext4_truncate(struct inode *inode) | |||
| 2410 | goto out_stop; | 3426 | goto out_stop; |
| 2411 | 3427 | ||
| 2412 | /* | 3428 | /* |
| 3429 | * From here we block out all ext4_get_block() callers who want to | ||
| 3430 | * modify the block allocation tree. | ||
| 3431 | */ | ||
| 3432 | down_write(&ei->i_data_sem); | ||
| 3433 | /* | ||
| 2413 | * The orphan list entry will now protect us from any crash which | 3434 | * The orphan list entry will now protect us from any crash which |
| 2414 | * occurs before the truncate completes, so it is now safe to propagate | 3435 | * occurs before the truncate completes, so it is now safe to propagate |
| 2415 | * the new, shorter inode size (held for now in i_size) into the | 3436 | * the new, shorter inode size (held for now in i_size) into the |
| @@ -2418,12 +3439,6 @@ void ext4_truncate(struct inode *inode) | |||
| 2418 | */ | 3439 | */ |
| 2419 | ei->i_disksize = inode->i_size; | 3440 | ei->i_disksize = inode->i_size; |
| 2420 | 3441 | ||
| 2421 | /* | ||
| 2422 | * From here we block out all ext4_get_block() callers who want to | ||
| 2423 | * modify the block allocation tree. | ||
| 2424 | */ | ||
| 2425 | down_write(&ei->i_data_sem); | ||
| 2426 | |||
| 2427 | if (n == 1) { /* direct blocks */ | 3442 | if (n == 1) { /* direct blocks */ |
| 2428 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 3443 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
| 2429 | i_data + EXT4_NDIR_BLOCKS); | 3444 | i_data + EXT4_NDIR_BLOCKS); |
| @@ -3107,7 +4122,14 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
| 3107 | * be freed, so we have a strong guarantee that no future commit will | 4122 | * be freed, so we have a strong guarantee that no future commit will |
| 3108 | * leave these blocks visible to the user.) | 4123 | * leave these blocks visible to the user.) |
| 3109 | * | 4124 | * |
| 3110 | * Called with inode->sem down. | 4125 | * Another thing we have to assure is that if we are in ordered mode |
| 4126 | * and inode is still attached to the committing transaction, we must | ||
| 4127 | * we start writeout of all the dirty pages which are being truncated. | ||
| 4128 | * This way we are sure that all the data written in the previous | ||
| 4129 | * transaction are already on disk (truncate waits for pages under | ||
| 4130 | * writeback). | ||
| 4131 | * | ||
| 4132 | * Called with inode->i_mutex down. | ||
| 3111 | */ | 4133 | */ |
| 3112 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) | 4134 | int ext4_setattr(struct dentry *dentry, struct iattr *attr) |
| 3113 | { | 4135 | { |
| @@ -3173,6 +4195,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 3173 | if (!error) | 4195 | if (!error) |
| 3174 | error = rc; | 4196 | error = rc; |
| 3175 | ext4_journal_stop(handle); | 4197 | ext4_journal_stop(handle); |
| 4198 | |||
| 4199 | if (ext4_should_order_data(inode)) { | ||
| 4200 | error = ext4_begin_ordered_truncate(inode, | ||
| 4201 | attr->ia_size); | ||
| 4202 | if (error) { | ||
| 4203 | /* Do as much error cleanup as possible */ | ||
| 4204 | handle = ext4_journal_start(inode, 3); | ||
| 4205 | if (IS_ERR(handle)) { | ||
| 4206 | ext4_orphan_del(NULL, inode); | ||
| 4207 | goto err_out; | ||
| 4208 | } | ||
| 4209 | ext4_orphan_del(handle, inode); | ||
| 4210 | ext4_journal_stop(handle); | ||
| 4211 | goto err_out; | ||
| 4212 | } | ||
| 4213 | } | ||
| 3176 | } | 4214 | } |
| 3177 | 4215 | ||
| 3178 | rc = inode_setattr(inode, attr); | 4216 | rc = inode_setattr(inode, attr); |
| @@ -3193,6 +4231,32 @@ err_out: | |||
| 3193 | return error; | 4231 | return error; |
| 3194 | } | 4232 | } |
| 3195 | 4233 | ||
| 4234 | int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | ||
| 4235 | struct kstat *stat) | ||
| 4236 | { | ||
| 4237 | struct inode *inode; | ||
| 4238 | unsigned long delalloc_blocks; | ||
| 4239 | |||
| 4240 | inode = dentry->d_inode; | ||
| 4241 | generic_fillattr(inode, stat); | ||
| 4242 | |||
| 4243 | /* | ||
| 4244 | * We can't update i_blocks if the block allocation is delayed | ||
| 4245 | * otherwise in the case of system crash before the real block | ||
| 4246 | * allocation is done, we will have i_blocks inconsistent with | ||
| 4247 | * on-disk file blocks. | ||
| 4248 | * We always keep i_blocks updated together with real | ||
| 4249 | * allocation. But to not confuse with user, stat | ||
| 4250 | * will return the blocks that include the delayed allocation | ||
| 4251 | * blocks for this file. | ||
| 4252 | */ | ||
| 4253 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4254 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
| 4255 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 4256 | |||
| 4257 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | ||
| 4258 | return 0; | ||
| 4259 | } | ||
| 3196 | 4260 | ||
| 3197 | /* | 4261 | /* |
| 3198 | * How many blocks doth make a writepage()? | 4262 | * How many blocks doth make a writepage()? |
| @@ -3506,3 +4570,64 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
| 3506 | 4570 | ||
| 3507 | return err; | 4571 | return err; |
| 3508 | } | 4572 | } |
| 4573 | |||
| 4574 | static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) | ||
| 4575 | { | ||
| 4576 | return !buffer_mapped(bh); | ||
| 4577 | } | ||
| 4578 | |||
| 4579 | int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
| 4580 | { | ||
| 4581 | loff_t size; | ||
| 4582 | unsigned long len; | ||
| 4583 | int ret = -EINVAL; | ||
| 4584 | struct file *file = vma->vm_file; | ||
| 4585 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 4586 | struct address_space *mapping = inode->i_mapping; | ||
| 4587 | |||
| 4588 | /* | ||
| 4589 | * Get i_alloc_sem to stop truncates messing with the inode. We cannot | ||
| 4590 | * get i_mutex because we are already holding mmap_sem. | ||
| 4591 | */ | ||
| 4592 | down_read(&inode->i_alloc_sem); | ||
| 4593 | size = i_size_read(inode); | ||
| 4594 | if (page->mapping != mapping || size <= page_offset(page) | ||
| 4595 | || !PageUptodate(page)) { | ||
| 4596 | /* page got truncated from under us? */ | ||
| 4597 | goto out_unlock; | ||
| 4598 | } | ||
| 4599 | ret = 0; | ||
| 4600 | if (PageMappedToDisk(page)) | ||
| 4601 | goto out_unlock; | ||
| 4602 | |||
| 4603 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 4604 | len = size & ~PAGE_CACHE_MASK; | ||
| 4605 | else | ||
| 4606 | len = PAGE_CACHE_SIZE; | ||
| 4607 | |||
| 4608 | if (page_has_buffers(page)) { | ||
| 4609 | /* return if we have all the buffers mapped */ | ||
| 4610 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 4611 | ext4_bh_unmapped)) | ||
| 4612 | goto out_unlock; | ||
| 4613 | } | ||
| 4614 | /* | ||
| 4615 | * OK, we need to fill the hole... Do write_begin write_end | ||
| 4616 | * to do block allocation/reservation.We are not holding | ||
| 4617 | * inode.i__mutex here. That allow * parallel write_begin, | ||
| 4618 | * write_end call. lock_page prevent this from happening | ||
| 4619 | * on the same page though | ||
| 4620 | */ | ||
| 4621 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | ||
| 4622 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
| 4623 | if (ret < 0) | ||
| 4624 | goto out_unlock; | ||
| 4625 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | ||
| 4626 | len, len, page, NULL); | ||
| 4627 | if (ret < 0) | ||
| 4628 | goto out_unlock; | ||
| 4629 | ret = 0; | ||
| 4630 | out_unlock: | ||
| 4631 | up_read(&inode->i_alloc_sem); | ||
| 4632 | return ret; | ||
| 4633 | } | ||
