diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 81 |
1 files changed, 74 insertions, 7 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bffd79faffb5..1cff528d5b51 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2907,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
2907 | if (err) | 2907 | if (err) |
2908 | return err; | 2908 | return err; |
2909 | 2909 | ||
2910 | if (S_ISREG(inode->i_mode) && | 2910 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
2911 | attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { | 2911 | if (attr->ia_size > inode->i_size) { |
2912 | err = btrfs_cont_expand(inode, attr->ia_size); | 2912 | err = btrfs_cont_expand(inode, attr->ia_size); |
2913 | if (err) | 2913 | if (err) |
2914 | return err; | 2914 | return err; |
2915 | } else if (inode->i_size > 0 && | ||
2916 | attr->ia_size == 0) { | ||
2917 | |||
2918 | /* we're truncating a file that used to have good | ||
2919 | * data down to zero. Make sure it gets into | ||
2920 | * the ordered flush list so that any new writes | ||
2921 | * get down to disk quickly. | ||
2922 | */ | ||
2923 | BTRFS_I(inode)->ordered_data_close = 1; | ||
2924 | } | ||
2915 | } | 2925 | } |
2916 | 2926 | ||
2917 | err = inode_setattr(inode, attr); | 2927 | err = inode_setattr(inode, attr); |
@@ -3050,6 +3060,7 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3050 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 3060 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
3051 | inode->i_mapping, GFP_NOFS); | 3061 | inode->i_mapping, GFP_NOFS); |
3052 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | 3062 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); |
3063 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3053 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | 3064 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); |
3054 | mutex_init(&BTRFS_I(inode)->extent_mutex); | 3065 | mutex_init(&BTRFS_I(inode)->extent_mutex); |
3055 | mutex_init(&BTRFS_I(inode)->log_mutex); | 3066 | mutex_init(&BTRFS_I(inode)->log_mutex); |
@@ -4419,6 +4430,8 @@ again: | |||
4419 | } | 4430 | } |
4420 | ClearPageChecked(page); | 4431 | ClearPageChecked(page); |
4421 | set_page_dirty(page); | 4432 | set_page_dirty(page); |
4433 | |||
4434 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
4422 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4435 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4423 | 4436 | ||
4424 | out_unlock: | 4437 | out_unlock: |
@@ -4444,6 +4457,27 @@ static void btrfs_truncate(struct inode *inode) | |||
4444 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 4457 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
4445 | 4458 | ||
4446 | trans = btrfs_start_transaction(root, 1); | 4459 | trans = btrfs_start_transaction(root, 1); |
4460 | |||
4461 | /* | ||
4462 | * setattr is responsible for setting the ordered_data_close flag, | ||
4463 | * but that is only tested during the last file release. That | ||
4464 | * could happen well after the next commit, leaving a great big | ||
4465 | * window where new writes may get lost if someone chooses to write | ||
4466 | * to this file after truncating to zero | ||
4467 | * | ||
4468 | * The inode doesn't have any dirty data here, and so if we commit | ||
4469 | * this is a noop. If someone immediately starts writing to the inode | ||
4470 | * it is very likely we'll catch some of their writes in this | ||
4471 | * transaction, and the commit will find this file on the ordered | ||
4472 | * data list with good things to send down. | ||
4473 | * | ||
4474 | * This is a best effort solution, there is still a window where | ||
4475 | * using truncate to replace the contents of the file will | ||
4476 | * end up with a zero length file after a crash. | ||
4477 | */ | ||
4478 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | ||
4479 | btrfs_add_ordered_operation(trans, root, inode); | ||
4480 | |||
4447 | btrfs_set_trans_block_group(trans, inode); | 4481 | btrfs_set_trans_block_group(trans, inode); |
4448 | btrfs_i_size_write(inode, inode->i_size); | 4482 | btrfs_i_size_write(inode, inode->i_size); |
4449 | 4483 | ||
@@ -4520,12 +4554,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
4520 | ei->i_acl = BTRFS_ACL_NOT_CACHED; | 4554 | ei->i_acl = BTRFS_ACL_NOT_CACHED; |
4521 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; | 4555 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; |
4522 | INIT_LIST_HEAD(&ei->i_orphan); | 4556 | INIT_LIST_HEAD(&ei->i_orphan); |
4557 | INIT_LIST_HEAD(&ei->ordered_operations); | ||
4523 | return &ei->vfs_inode; | 4558 | return &ei->vfs_inode; |
4524 | } | 4559 | } |
4525 | 4560 | ||
4526 | void btrfs_destroy_inode(struct inode *inode) | 4561 | void btrfs_destroy_inode(struct inode *inode) |
4527 | { | 4562 | { |
4528 | struct btrfs_ordered_extent *ordered; | 4563 | struct btrfs_ordered_extent *ordered; |
4564 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4565 | |||
4529 | WARN_ON(!list_empty(&inode->i_dentry)); | 4566 | WARN_ON(!list_empty(&inode->i_dentry)); |
4530 | WARN_ON(inode->i_data.nrpages); | 4567 | WARN_ON(inode->i_data.nrpages); |
4531 | 4568 | ||
@@ -4536,13 +4573,24 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4536 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) | 4573 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) |
4537 | posix_acl_release(BTRFS_I(inode)->i_default_acl); | 4574 | posix_acl_release(BTRFS_I(inode)->i_default_acl); |
4538 | 4575 | ||
4539 | spin_lock(&BTRFS_I(inode)->root->list_lock); | 4576 | /* |
4577 | * Make sure we're properly removed from the ordered operation | ||
4578 | * lists. | ||
4579 | */ | ||
4580 | smp_mb(); | ||
4581 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
4582 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
4583 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
4584 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
4585 | } | ||
4586 | |||
4587 | spin_lock(&root->list_lock); | ||
4540 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 4588 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
4541 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" | 4589 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" |
4542 | " list\n", inode->i_ino); | 4590 | " list\n", inode->i_ino); |
4543 | dump_stack(); | 4591 | dump_stack(); |
4544 | } | 4592 | } |
4545 | spin_unlock(&BTRFS_I(inode)->root->list_lock); | 4593 | spin_unlock(&root->list_lock); |
4546 | 4594 | ||
4547 | while (1) { | 4595 | while (1) { |
4548 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 4596 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -4667,9 +4715,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4667 | if (ret) | 4715 | if (ret) |
4668 | goto out_unlock; | 4716 | goto out_unlock; |
4669 | 4717 | ||
4718 | /* | ||
4719 | * we're using rename to replace one file with another. | ||
4720 | * and the replacement file is large. Start IO on it now so | ||
4721 | * we don't add too much work to the end of the transaction | ||
4722 | */ | ||
4723 | if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && | ||
4724 | new_inode->i_size && | ||
4725 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
4726 | filemap_flush(old_inode->i_mapping); | ||
4727 | |||
4670 | trans = btrfs_start_transaction(root, 1); | 4728 | trans = btrfs_start_transaction(root, 1); |
4671 | 4729 | ||
4672 | /* | 4730 | /* |
4731 | * make sure the inode gets flushed if it is replacing | ||
4732 | * something. | ||
4733 | */ | ||
4734 | if (new_inode && new_inode->i_size && | ||
4735 | old_inode && S_ISREG(old_inode->i_mode)) { | ||
4736 | btrfs_add_ordered_operation(trans, root, old_inode); | ||
4737 | } | ||
4738 | |||
4739 | /* | ||
4673 | * this is an ugly little race, but the rename is required to make | 4740 | * this is an ugly little race, but the rename is required to make |
4674 | * sure that if we crash, the inode is either at the old name | 4741 | * sure that if we crash, the inode is either at the old name |
4675 | * or the new one. pinning the log transaction lets us make sure | 4742 | * or the new one. pinning the log transaction lets us make sure |