diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:54:05 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | dbe674a99c8af088faa4c95eddaeb271a3140ab6 (patch) | |
tree | c6b5a08e93d0b09df873fd5c304156622d03368a | |
parent | 247e743cbe6e655768c3679f84821e03c1577902 (diff) |
Btrfs: Update on disk i_size only after pending ordered extents are done
This changes the ordered data code to update i_size after the extent
is on disk. An on disk i_size is maintained in the in-memory btrfs inode
structures, and this is updated as extents finish.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 9 | ||||
-rw-r--r-- | fs/btrfs/file.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 28 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 89 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 2 |
5 files changed, 119 insertions, 11 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8d03687510e0..81c0444f37b6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -40,11 +40,20 @@ struct btrfs_inode { | |||
40 | */ | 40 | */ |
41 | u64 last_trans; | 41 | u64 last_trans; |
42 | u64 delalloc_bytes; | 42 | u64 delalloc_bytes; |
43 | u64 disk_i_size; | ||
43 | u32 flags; | 44 | u32 flags; |
44 | }; | 45 | }; |
46 | |||
45 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) | 47 | static inline struct btrfs_inode *BTRFS_I(struct inode *inode) |
46 | { | 48 | { |
47 | return container_of(inode, struct btrfs_inode, vfs_inode); | 49 | return container_of(inode, struct btrfs_inode, vfs_inode); |
48 | } | 50 | } |
49 | 51 | ||
52 | static inline void btrfs_i_size_write(struct inode *inode, u64 size) | ||
53 | { | ||
54 | inode->i_size = size; | ||
55 | BTRFS_I(inode)->disk_i_size = size; | ||
56 | } | ||
57 | |||
58 | |||
50 | #endif | 59 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 20928639d173..3e4e5c227c0c 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -338,7 +338,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
338 | btrfs_update_inode(trans, root, inode); | 338 | btrfs_update_inode(trans, root, inode); |
339 | } | 339 | } |
340 | failed: | 340 | failed: |
341 | err = btrfs_end_transaction(trans, root); | 341 | err = btrfs_end_transaction_throttle(trans, root); |
342 | out_unlock: | 342 | out_unlock: |
343 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | 343 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); |
344 | return err; | 344 | return err; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 47a008c19308..baf46017d0d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -542,6 +542,7 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
542 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 542 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
543 | &ordered_extent->list); | 543 | &ordered_extent->list); |
544 | 544 | ||
545 | btrfs_ordered_update_i_size(inode, ordered_extent); | ||
545 | btrfs_remove_ordered_extent(inode, ordered_extent); | 546 | btrfs_remove_ordered_extent(inode, ordered_extent); |
546 | /* once for us */ | 547 | /* once for us */ |
547 | btrfs_put_ordered_extent(ordered_extent); | 548 | btrfs_put_ordered_extent(ordered_extent); |
@@ -792,7 +793,7 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
792 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 793 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
793 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 794 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); |
794 | inode->i_gid = btrfs_inode_gid(leaf, inode_item); | 795 | inode->i_gid = btrfs_inode_gid(leaf, inode_item); |
795 | inode->i_size = btrfs_inode_size(leaf, inode_item); | 796 | btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); |
796 | 797 | ||
797 | tspec = btrfs_inode_atime(inode_item); | 798 | tspec = btrfs_inode_atime(inode_item); |
798 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); | 799 | inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); |
@@ -860,7 +861,7 @@ static void fill_inode_item(struct extent_buffer *leaf, | |||
860 | { | 861 | { |
861 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 862 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
862 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 863 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
863 | btrfs_set_inode_size(leaf, item, inode->i_size); | 864 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
864 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | 865 | btrfs_set_inode_mode(leaf, item, inode->i_mode); |
865 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | 866 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); |
866 | 867 | ||
@@ -982,7 +983,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, | |||
982 | err: | 983 | err: |
983 | btrfs_free_path(path); | 984 | btrfs_free_path(path); |
984 | if (!ret) { | 985 | if (!ret) { |
985 | dir->i_size -= name_len * 2; | 986 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
986 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 987 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
987 | btrfs_update_inode(trans, root, dir); | 988 | btrfs_update_inode(trans, root, dir); |
988 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 989 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) |
@@ -1044,7 +1045,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1044 | /* now the directory is empty */ | 1045 | /* now the directory is empty */ |
1045 | err = btrfs_unlink_trans(trans, root, dir, dentry); | 1046 | err = btrfs_unlink_trans(trans, root, dir, dentry); |
1046 | if (!err) { | 1047 | if (!err) { |
1047 | inode->i_size = 0; | 1048 | btrfs_i_size_write(inode, 0); |
1048 | } | 1049 | } |
1049 | 1050 | ||
1050 | nr = trans->blocks_used; | 1051 | nr = trans->blocks_used; |
@@ -1089,7 +1090,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, | |||
1089 | int extent_type = -1; | 1090 | int extent_type = -1; |
1090 | u64 mask = root->sectorsize - 1; | 1091 | u64 mask = root->sectorsize - 1; |
1091 | 1092 | ||
1092 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | ||
1093 | btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); | 1093 | btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1); |
1094 | path = btrfs_alloc_path(); | 1094 | path = btrfs_alloc_path(); |
1095 | path->reada = -1; | 1095 | path->reada = -1; |
@@ -1427,7 +1427,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
1427 | goto no_delete; | 1427 | goto no_delete; |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | inode->i_size = 0; | 1430 | btrfs_i_size_write(inode, 0); |
1431 | trans = btrfs_start_transaction(root, 1); | 1431 | trans = btrfs_start_transaction(root, 1); |
1432 | 1432 | ||
1433 | btrfs_set_trans_block_group(trans, inode); | 1433 | btrfs_set_trans_block_group(trans, inode); |
@@ -1561,6 +1561,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) | |||
1561 | inode->i_ino = args->ino; | 1561 | inode->i_ino = args->ino; |
1562 | BTRFS_I(inode)->root = args->root; | 1562 | BTRFS_I(inode)->root = args->root; |
1563 | BTRFS_I(inode)->delalloc_bytes = 0; | 1563 | BTRFS_I(inode)->delalloc_bytes = 0; |
1564 | BTRFS_I(inode)->disk_i_size = 0; | ||
1564 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | 1565 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); |
1565 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | 1566 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, |
1566 | inode->i_mapping, GFP_NOFS); | 1567 | inode->i_mapping, GFP_NOFS); |
@@ -1869,6 +1870,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
1869 | inode->i_mapping, GFP_NOFS); | 1870 | inode->i_mapping, GFP_NOFS); |
1870 | mutex_init(&BTRFS_I(inode)->csum_mutex); | 1871 | mutex_init(&BTRFS_I(inode)->csum_mutex); |
1871 | BTRFS_I(inode)->delalloc_bytes = 0; | 1872 | BTRFS_I(inode)->delalloc_bytes = 0; |
1873 | BTRFS_I(inode)->disk_i_size = 0; | ||
1872 | BTRFS_I(inode)->root = root; | 1874 | BTRFS_I(inode)->root = root; |
1873 | 1875 | ||
1874 | if (mode & S_IFDIR) | 1876 | if (mode & S_IFDIR) |
@@ -1964,7 +1966,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
1964 | dentry->d_parent->d_inode->i_ino); | 1966 | dentry->d_parent->d_inode->i_ino); |
1965 | } | 1967 | } |
1966 | parent_inode = dentry->d_parent->d_inode; | 1968 | parent_inode = dentry->d_parent->d_inode; |
1967 | parent_inode->i_size += dentry->d_name.len * 2; | 1969 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
1970 | dentry->d_name.len * 2); | ||
1968 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 1971 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
1969 | ret = btrfs_update_inode(trans, root, | 1972 | ret = btrfs_update_inode(trans, root, |
1970 | dentry->d_parent->d_inode); | 1973 | dentry->d_parent->d_inode); |
@@ -2092,6 +2095,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
2092 | inode->i_mapping, GFP_NOFS); | 2095 | inode->i_mapping, GFP_NOFS); |
2093 | mutex_init(&BTRFS_I(inode)->csum_mutex); | 2096 | mutex_init(&BTRFS_I(inode)->csum_mutex); |
2094 | BTRFS_I(inode)->delalloc_bytes = 0; | 2097 | BTRFS_I(inode)->delalloc_bytes = 0; |
2098 | BTRFS_I(inode)->disk_i_size = 0; | ||
2095 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 2099 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
2096 | } | 2100 | } |
2097 | dir->i_sb->s_dirt = 1; | 2101 | dir->i_sb->s_dirt = 1; |
@@ -2199,7 +2203,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
2199 | inode->i_fop = &btrfs_dir_file_operations; | 2203 | inode->i_fop = &btrfs_dir_file_operations; |
2200 | btrfs_set_trans_block_group(trans, inode); | 2204 | btrfs_set_trans_block_group(trans, inode); |
2201 | 2205 | ||
2202 | inode->i_size = 0; | 2206 | btrfs_i_size_write(inode, 0); |
2203 | err = btrfs_update_inode(trans, root, inode); | 2207 | err = btrfs_update_inode(trans, root, inode); |
2204 | if (err) | 2208 | if (err) |
2205 | goto out_fail; | 2209 | goto out_fail; |
@@ -2756,6 +2760,7 @@ static void btrfs_truncate(struct inode *inode) | |||
2756 | int ret; | 2760 | int ret; |
2757 | struct btrfs_trans_handle *trans; | 2761 | struct btrfs_trans_handle *trans; |
2758 | unsigned long nr; | 2762 | unsigned long nr; |
2763 | u64 mask = root->sectorsize - 1; | ||
2759 | 2764 | ||
2760 | if (!S_ISREG(inode->i_mode)) | 2765 | if (!S_ISREG(inode->i_mode)) |
2761 | return; | 2766 | return; |
@@ -2766,6 +2771,8 @@ static void btrfs_truncate(struct inode *inode) | |||
2766 | 2771 | ||
2767 | trans = btrfs_start_transaction(root, 1); | 2772 | trans = btrfs_start_transaction(root, 1); |
2768 | btrfs_set_trans_block_group(trans, inode); | 2773 | btrfs_set_trans_block_group(trans, inode); |
2774 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | ||
2775 | btrfs_i_size_write(inode, inode->i_size); | ||
2769 | 2776 | ||
2770 | /* FIXME, add redo link to tree so we don't leak on crash */ | 2777 | /* FIXME, add redo link to tree so we don't leak on crash */ |
2771 | ret = btrfs_truncate_in_trans(trans, root, inode, | 2778 | ret = btrfs_truncate_in_trans(trans, root, inode, |
@@ -2821,7 +2828,7 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, | |||
2821 | ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, | 2828 | ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid, |
2822 | new_dirid); | 2829 | new_dirid); |
2823 | inode->i_nlink = 1; | 2830 | inode->i_nlink = 1; |
2824 | inode->i_size = 0; | 2831 | btrfs_i_size_write(inode, 0); |
2825 | 2832 | ||
2826 | return btrfs_update_inode(trans, new_root, inode); | 2833 | return btrfs_update_inode(trans, new_root, inode); |
2827 | } | 2834 | } |
@@ -3069,6 +3076,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
3069 | inode->i_mapping, GFP_NOFS); | 3076 | inode->i_mapping, GFP_NOFS); |
3070 | mutex_init(&BTRFS_I(inode)->csum_mutex); | 3077 | mutex_init(&BTRFS_I(inode)->csum_mutex); |
3071 | BTRFS_I(inode)->delalloc_bytes = 0; | 3078 | BTRFS_I(inode)->delalloc_bytes = 0; |
3079 | BTRFS_I(inode)->disk_i_size = 0; | ||
3072 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 3080 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
3073 | } | 3081 | } |
3074 | dir->i_sb->s_dirt = 1; | 3082 | dir->i_sb->s_dirt = 1; |
@@ -3103,7 +3111,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
3103 | inode->i_op = &btrfs_symlink_inode_operations; | 3111 | inode->i_op = &btrfs_symlink_inode_operations; |
3104 | inode->i_mapping->a_ops = &btrfs_symlink_aops; | 3112 | inode->i_mapping->a_ops = &btrfs_symlink_aops; |
3105 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 3113 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
3106 | inode->i_size = name_len - 1; | 3114 | btrfs_i_size_write(inode, name_len - 1); |
3107 | err = btrfs_update_inode(trans, root, inode); | 3115 | err = btrfs_update_inode(trans, root, inode); |
3108 | if (err) | 3116 | if (err) |
3109 | drop_inode = 1; | 3117 | drop_inode = 1; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6513270f054c..d86a953ae51d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -406,3 +406,92 @@ out: | |||
406 | mutex_unlock(&tree->mutex); | 406 | mutex_unlock(&tree->mutex); |
407 | return entry; | 407 | return entry; |
408 | } | 408 | } |
409 | |||
410 | int btrfs_ordered_update_i_size(struct inode *inode, | ||
411 | struct btrfs_ordered_extent *ordered) | ||
412 | { | ||
413 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | ||
414 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
415 | u64 disk_i_size; | ||
416 | u64 new_i_size; | ||
417 | u64 i_size_test; | ||
418 | struct rb_node *node; | ||
419 | struct btrfs_ordered_extent *test; | ||
420 | |||
421 | mutex_lock(&tree->mutex); | ||
422 | disk_i_size = BTRFS_I(inode)->disk_i_size; | ||
423 | |||
424 | /* | ||
425 | * if the disk i_size is already at the inode->i_size, or | ||
426 | * this ordered extent is inside the disk i_size, we're done | ||
427 | */ | ||
428 | if (disk_i_size >= inode->i_size || | ||
429 | ordered->file_offset + ordered->len <= disk_i_size) { | ||
430 | goto out; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * we can't update the disk_isize if there are delalloc bytes | ||
435 | * between disk_i_size and this ordered extent | ||
436 | */ | ||
437 | if (test_range_bit(io_tree, disk_i_size, | ||
438 | ordered->file_offset + ordered->len - 1, | ||
439 | EXTENT_DELALLOC, 0)) { | ||
440 | goto out; | ||
441 | } | ||
442 | /* | ||
443 | * walk backward from this ordered extent to disk_i_size. | ||
444 | * if we find an ordered extent then we can't update disk i_size | ||
445 | * yet | ||
446 | */ | ||
447 | while(1) { | ||
448 | node = rb_prev(&ordered->rb_node); | ||
449 | if (!node) | ||
450 | break; | ||
451 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
452 | if (test->file_offset + test->len <= disk_i_size) | ||
453 | break; | ||
454 | if (test->file_offset >= inode->i_size) | ||
455 | break; | ||
456 | if (test->file_offset >= disk_i_size) | ||
457 | goto out; | ||
458 | } | ||
459 | new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); | ||
460 | |||
461 | /* | ||
462 | * at this point, we know we can safely update i_size to at least | ||
463 | * the offset from this ordered extent. But, we need to | ||
464 | * walk forward and see if ios from higher up in the file have | ||
465 | * finished. | ||
466 | */ | ||
467 | node = rb_next(&ordered->rb_node); | ||
468 | i_size_test = 0; | ||
469 | if (node) { | ||
470 | /* | ||
471 | * do we have an area where IO might have finished | ||
472 | * between our ordered extent and the next one. | ||
473 | */ | ||
474 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
475 | if (test->file_offset > entry_end(ordered)) { | ||
476 | i_size_test = test->file_offset - 1; | ||
477 | } | ||
478 | } else { | ||
479 | i_size_test = i_size_read(inode); | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * i_size_test is the end of a region after this ordered | ||
484 | * extent where there are no ordered extents. As long as there | ||
485 | * are no delalloc bytes in this area, it is safe to update | ||
486 | * disk_i_size to the end of the region. | ||
487 | */ | ||
488 | if (i_size_test > entry_end(ordered) && | ||
489 | !test_range_bit(io_tree, entry_end(ordered), i_size_test, | ||
490 | EXTENT_DELALLOC, 0)) { | ||
491 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | ||
492 | } | ||
493 | BTRFS_I(inode)->disk_i_size = new_i_size; | ||
494 | out: | ||
495 | mutex_unlock(&tree->mutex); | ||
496 | return 0; | ||
497 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 33292c5fe90c..40e9126ad954 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -89,4 +89,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
89 | int btrfs_add_ordered_pending(struct inode *inode, | 89 | int btrfs_add_ordered_pending(struct inode *inode, |
90 | struct btrfs_ordered_extent *ordered, | 90 | struct btrfs_ordered_extent *ordered, |
91 | u64 start, u64 len); | 91 | u64 start, u64 len); |
92 | int btrfs_ordered_update_i_size(struct inode *inode, | ||
93 | struct btrfs_ordered_extent *ordered); | ||
92 | #endif | 94 | #endif |