aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h18
-rw-r--r--fs/btrfs/ctree.h35
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/inode.c81
-rw-r--r--fs/btrfs/ordered-data.c118
-rw-r--r--fs/btrfs/ordered-data.h4
-rw-r--r--fs/btrfs/transaction.c11
8 files changed, 288 insertions, 7 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 3af4cfb5654c..b30986f00b9d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,6 +66,12 @@ struct btrfs_inode {
66 */ 66 */
67 struct list_head delalloc_inodes; 67 struct list_head delalloc_inodes;
68 68
69 /*
70 * list for tracking inodes that must be sent to disk before a
71 * rename or truncate commit
72 */
73 struct list_head ordered_operations;
74
69 /* the space_info for where this inode's data allocations are done */ 75 /* the space_info for where this inode's data allocations are done */
70 struct btrfs_space_info *space_info; 76 struct btrfs_space_info *space_info;
71 77
@@ -122,6 +128,18 @@ struct btrfs_inode {
122 */ 128 */
123 u64 last_unlink_trans; 129 u64 last_unlink_trans;
124 130
131 /*
132 * ordered_data_close is set by truncate when a file that used
133 * to have good data has been truncated to zero. When it is set
134 * the btrfs file release call will add this inode to the
135 * ordered operations list so that we make sure to flush out any
136 * new data the application may have written before commit.
137 *
138 * yes, its silly to have a single bitflag, but we might grow more
139 * of these.
140 */
141 unsigned ordered_data_close:1;
142
125 struct inode vfs_inode; 143 struct inode vfs_inode;
126}; 144};
127 145
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2737facbd341..f48905ee5240 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48/*
49 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total
51 * work done by the commit
52 */
53#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
54
48/* holds pointers to all of the tree roots */ 55/* holds pointers to all of the tree roots */
49#define BTRFS_ROOT_TREE_OBJECTID 1ULL 56#define BTRFS_ROOT_TREE_OBJECTID 1ULL
50 57
@@ -727,6 +734,15 @@ struct btrfs_fs_info {
727 struct mutex volume_mutex; 734 struct mutex volume_mutex;
728 struct mutex tree_reloc_mutex; 735 struct mutex tree_reloc_mutex;
729 736
737 /*
738 * this protects the ordered operations list only while we are
739 * processing all of the entries on it. This way we make
740 * sure the commit code doesn't find the list temporarily empty
741 * because another function happens to be doing non-waiting preflush
742 * before jumping into the main commit.
743 */
744 struct mutex ordered_operations_mutex;
745
730 struct list_head trans_list; 746 struct list_head trans_list;
731 struct list_head hashers; 747 struct list_head hashers;
732 struct list_head dead_roots; 748 struct list_head dead_roots;
@@ -741,10 +757,29 @@ struct btrfs_fs_info {
741 * ordered extents 757 * ordered extents
742 */ 758 */
743 spinlock_t ordered_extent_lock; 759 spinlock_t ordered_extent_lock;
760
761 /*
762 * all of the data=ordered extents pending writeback
763 * these can span multiple transactions and basically include
764 * every dirty data page that isn't from nodatacow
765 */
744 struct list_head ordered_extents; 766 struct list_head ordered_extents;
767
768 /*
769 * all of the inodes that have delalloc bytes. It is possible for
770 * this list to be empty even when there is still dirty data=ordered
771 * extents waiting to finish IO.
772 */
745 struct list_head delalloc_inodes; 773 struct list_head delalloc_inodes;
746 774
747 /* 775 /*
776 * special rename and truncate targets that must be on disk before
777 * we're allowed to commit. This is basically the ext3 style
778 * data=ordered list.
779 */
780 struct list_head ordered_operations;
781
782 /*
748 * there is a pool of worker threads for checksumming during writes 783 * there is a pool of worker threads for checksumming during writes
749 * and a pool for checksumming after reads. This is because readers 784 * and a pool for checksumming after reads. This is because readers
750 * can run with FS locks held, and the writers may be waiting for 785 * can run with FS locks held, and the writers may be waiting for
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9244cd7313d4..1747dfd18654 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1572,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1572 INIT_LIST_HEAD(&fs_info->dead_roots); 1572 INIT_LIST_HEAD(&fs_info->dead_roots);
1573 INIT_LIST_HEAD(&fs_info->hashers); 1573 INIT_LIST_HEAD(&fs_info->hashers);
1574 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1574 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1575 INIT_LIST_HEAD(&fs_info->ordered_operations);
1575 spin_lock_init(&fs_info->delalloc_lock); 1576 spin_lock_init(&fs_info->delalloc_lock);
1576 spin_lock_init(&fs_info->new_trans_lock); 1577 spin_lock_init(&fs_info->new_trans_lock);
1577 spin_lock_init(&fs_info->ref_cache_lock); 1578 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1643,6 +1644,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1643 insert_inode_hash(fs_info->btree_inode); 1644 insert_inode_hash(fs_info->btree_inode);
1644 1645
1645 mutex_init(&fs_info->trans_mutex); 1646 mutex_init(&fs_info->trans_mutex);
1647 mutex_init(&fs_info->ordered_operations_mutex);
1646 mutex_init(&fs_info->tree_log_mutex); 1648 mutex_init(&fs_info->tree_log_mutex);
1647 mutex_init(&fs_info->drop_mutex); 1649 mutex_init(&fs_info->drop_mutex);
1648 mutex_init(&fs_info->pinned_mutex); 1650 mutex_init(&fs_info->pinned_mutex);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 32d10a617613..9c9fb46ccd08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1161,6 +1161,20 @@ out_nolock:
1161 page_cache_release(pinned[1]); 1161 page_cache_release(pinned[1]);
1162 *ppos = pos; 1162 *ppos = pos;
1163 1163
1164 /*
1165 * we want to make sure fsync finds this change
1166 * but we haven't joined a transaction running right now.
1167 *
1168 * Later on, someone is sure to update the inode and get the
1169 * real transid recorded.
1170 *
1171 * We set last_trans now to the fs_info generation + 1,
1172 * this will either be one more than the running transaction
1173 * or the generation used for the next transaction if there isn't
1174 * one running right now.
1175 */
1176 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1177
1164 if (num_written > 0 && will_write) { 1178 if (num_written > 0 && will_write) {
1165 struct btrfs_trans_handle *trans; 1179 struct btrfs_trans_handle *trans;
1166 1180
@@ -1194,6 +1208,18 @@ out_nolock:
1194 1208
1195int btrfs_release_file(struct inode *inode, struct file *filp) 1209int btrfs_release_file(struct inode *inode, struct file *filp)
1196{ 1210{
1211 /*
1212 * ordered_data_close is set by settattr when we are about to truncate
1213 * a file from a non-zero size to a zero size. This tries to
1214 * flush down new bytes that may have been written if the
1215 * application were using truncate to replace a file in place.
1216 */
1217 if (BTRFS_I(inode)->ordered_data_close) {
1218 BTRFS_I(inode)->ordered_data_close = 0;
1219 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
1220 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1221 filemap_flush(inode->i_mapping);
1222 }
1197 if (filp->private_data) 1223 if (filp->private_data)
1198 btrfs_ioctl_trans_end(filp); 1224 btrfs_ioctl_trans_end(filp);
1199 return 0; 1225 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bffd79faffb5..1cff528d5b51 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2907,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
2907 if (err) 2907 if (err)
2908 return err; 2908 return err;
2909 2909
2910 if (S_ISREG(inode->i_mode) && 2910 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
2911 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { 2911 if (attr->ia_size > inode->i_size) {
2912 err = btrfs_cont_expand(inode, attr->ia_size); 2912 err = btrfs_cont_expand(inode, attr->ia_size);
2913 if (err) 2913 if (err)
2914 return err; 2914 return err;
2915 } else if (inode->i_size > 0 &&
2916 attr->ia_size == 0) {
2917
2918 /* we're truncating a file that used to have good
2919 * data down to zero. Make sure it gets into
2920 * the ordered flush list so that any new writes
2921 * get down to disk quickly.
2922 */
2923 BTRFS_I(inode)->ordered_data_close = 1;
2924 }
2915 } 2925 }
2916 2926
2917 err = inode_setattr(inode, attr); 2927 err = inode_setattr(inode, attr);
@@ -3050,6 +3060,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3050 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 3060 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3051 inode->i_mapping, GFP_NOFS); 3061 inode->i_mapping, GFP_NOFS);
3052 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3062 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3063 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3053 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3064 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3054 mutex_init(&BTRFS_I(inode)->extent_mutex); 3065 mutex_init(&BTRFS_I(inode)->extent_mutex);
3055 mutex_init(&BTRFS_I(inode)->log_mutex); 3066 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -4419,6 +4430,8 @@ again:
4419 } 4430 }
4420 ClearPageChecked(page); 4431 ClearPageChecked(page);
4421 set_page_dirty(page); 4432 set_page_dirty(page);
4433
4434 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
4422 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4435 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4423 4436
4424out_unlock: 4437out_unlock:
@@ -4444,6 +4457,27 @@ static void btrfs_truncate(struct inode *inode)
4444 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 4457 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
4445 4458
4446 trans = btrfs_start_transaction(root, 1); 4459 trans = btrfs_start_transaction(root, 1);
4460
4461 /*
4462 * setattr is responsible for setting the ordered_data_close flag,
4463 * but that is only tested during the last file release. That
4464 * could happen well after the next commit, leaving a great big
4465 * window where new writes may get lost if someone chooses to write
4466 * to this file after truncating to zero
4467 *
4468 * The inode doesn't have any dirty data here, and so if we commit
4469 * this is a noop. If someone immediately starts writing to the inode
4470 * it is very likely we'll catch some of their writes in this
4471 * transaction, and the commit will find this file on the ordered
4472 * data list with good things to send down.
4473 *
4474 * This is a best effort solution, there is still a window where
4475 * using truncate to replace the contents of the file will
4476 * end up with a zero length file after a crash.
4477 */
4478 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
4479 btrfs_add_ordered_operation(trans, root, inode);
4480
4447 btrfs_set_trans_block_group(trans, inode); 4481 btrfs_set_trans_block_group(trans, inode);
4448 btrfs_i_size_write(inode, inode->i_size); 4482 btrfs_i_size_write(inode, inode->i_size);
4449 4483
@@ -4520,12 +4554,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4520 ei->i_acl = BTRFS_ACL_NOT_CACHED; 4554 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4521 ei->i_default_acl = BTRFS_ACL_NOT_CACHED; 4555 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4522 INIT_LIST_HEAD(&ei->i_orphan); 4556 INIT_LIST_HEAD(&ei->i_orphan);
4557 INIT_LIST_HEAD(&ei->ordered_operations);
4523 return &ei->vfs_inode; 4558 return &ei->vfs_inode;
4524} 4559}
4525 4560
4526void btrfs_destroy_inode(struct inode *inode) 4561void btrfs_destroy_inode(struct inode *inode)
4527{ 4562{
4528 struct btrfs_ordered_extent *ordered; 4563 struct btrfs_ordered_extent *ordered;
4564 struct btrfs_root *root = BTRFS_I(inode)->root;
4565
4529 WARN_ON(!list_empty(&inode->i_dentry)); 4566 WARN_ON(!list_empty(&inode->i_dentry));
4530 WARN_ON(inode->i_data.nrpages); 4567 WARN_ON(inode->i_data.nrpages);
4531 4568
@@ -4536,13 +4573,24 @@ void btrfs_destroy_inode(struct inode *inode)
4536 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) 4573 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4537 posix_acl_release(BTRFS_I(inode)->i_default_acl); 4574 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4538 4575
4539 spin_lock(&BTRFS_I(inode)->root->list_lock); 4576 /*
4577 * Make sure we're properly removed from the ordered operation
4578 * lists.
4579 */
4580 smp_mb();
4581 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
4582 spin_lock(&root->fs_info->ordered_extent_lock);
4583 list_del_init(&BTRFS_I(inode)->ordered_operations);
4584 spin_unlock(&root->fs_info->ordered_extent_lock);
4585 }
4586
4587 spin_lock(&root->list_lock);
4540 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 4588 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
4541 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 4589 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
4542 " list\n", inode->i_ino); 4590 " list\n", inode->i_ino);
4543 dump_stack(); 4591 dump_stack();
4544 } 4592 }
4545 spin_unlock(&BTRFS_I(inode)->root->list_lock); 4593 spin_unlock(&root->list_lock);
4546 4594
4547 while (1) { 4595 while (1) {
4548 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 4596 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -4667,9 +4715,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4667 if (ret) 4715 if (ret)
4668 goto out_unlock; 4716 goto out_unlock;
4669 4717
4718 /*
4719 * we're using rename to replace one file with another.
4720 * and the replacement file is large. Start IO on it now so
4721 * we don't add too much work to the end of the transaction
4722 */
4723 if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
4724 new_inode->i_size &&
4725 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4726 filemap_flush(old_inode->i_mapping);
4727
4670 trans = btrfs_start_transaction(root, 1); 4728 trans = btrfs_start_transaction(root, 1);
4671 4729
4672 /* 4730 /*
4731 * make sure the inode gets flushed if it is replacing
4732 * something.
4733 */
4734 if (new_inode && new_inode->i_size &&
4735 old_inode && S_ISREG(old_inode->i_mode)) {
4736 btrfs_add_ordered_operation(trans, root, old_inode);
4737 }
4738
4739 /*
4673 * this is an ugly little race, but the rename is required to make 4740 * this is an ugly little race, but the rename is required to make
4674 * sure that if we crash, the inode is either at the old name 4741 * sure that if we crash, the inode is either at the old name
4675 * or the new one. pinning the log transaction lets us make sure 4742 * or the new one. pinning the log transaction lets us make sure
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 77c2411a5f0f..53c87b197d70 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode,
310 310
311 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 311 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
312 list_del_init(&entry->root_extent_list); 312 list_del_init(&entry->root_extent_list);
313
314 /*
315 * we have no more ordered extents for this inode and
316 * no dirty pages. We can safely remove it from the
317 * list of ordered extents
318 */
319 if (RB_EMPTY_ROOT(&tree->tree) &&
320 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
321 list_del_init(&BTRFS_I(inode)->ordered_operations);
322 }
313 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 323 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
314 324
315 mutex_unlock(&tree->mutex); 325 mutex_unlock(&tree->mutex);
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
370} 380}
371 381
372/* 382/*
383 * this is used during transaction commit to write all the inodes
384 * added to the ordered operation list. These files must be fully on
385 * disk before the transaction commits.
386 *
387 * we have two modes here, one is to just start the IO via filemap_flush
388 * and the other is to wait for all the io. When we wait, we have an
389 * extra check to make sure the ordered operation list really is empty
390 * before we return
391 */
392int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
393{
394 struct btrfs_inode *btrfs_inode;
395 struct inode *inode;
396 struct list_head splice;
397
398 INIT_LIST_HEAD(&splice);
399
400 mutex_lock(&root->fs_info->ordered_operations_mutex);
401 spin_lock(&root->fs_info->ordered_extent_lock);
402again:
403 list_splice_init(&root->fs_info->ordered_operations, &splice);
404
405 while (!list_empty(&splice)) {
406 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
407 ordered_operations);
408
409 inode = &btrfs_inode->vfs_inode;
410
411 list_del_init(&btrfs_inode->ordered_operations);
412
413 /*
414 * the inode may be getting freed (in sys_unlink path).
415 */
416 inode = igrab(inode);
417
418 if (!wait && inode) {
419 list_add_tail(&BTRFS_I(inode)->ordered_operations,
420 &root->fs_info->ordered_operations);
421 }
422 spin_unlock(&root->fs_info->ordered_extent_lock);
423
424 if (inode) {
425 if (wait)
426 btrfs_wait_ordered_range(inode, 0, (u64)-1);
427 else
428 filemap_flush(inode->i_mapping);
429 iput(inode);
430 }
431
432 cond_resched();
433 spin_lock(&root->fs_info->ordered_extent_lock);
434 }
435 if (wait && !list_empty(&root->fs_info->ordered_operations))
436 goto again;
437
438 spin_unlock(&root->fs_info->ordered_extent_lock);
439 mutex_unlock(&root->fs_info->ordered_operations_mutex);
440
441 return 0;
442}
443
444/*
373 * Used to start IO or wait for a given ordered extent to finish. 445 * Used to start IO or wait for a given ordered extent to finish.
374 * 446 *
375 * If wait is one, this effectively waits on page writeback for all the pages 447 * If wait is one, this effectively waits on page writeback for all the pages
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
726 798
727 return ret; 799 return ret;
728} 800}
801
802/*
803 * add a given inode to the list of inodes that must be fully on
804 * disk before a transaction commit finishes.
805 *
806 * This basically gives us the ext3 style data=ordered mode, and it is mostly
807 * used to make sure renamed files are fully on disk.
808 *
809 * It is a noop if the inode is already fully on disk.
810 *
811 * If trans is not null, we'll do a friendly check for a transaction that
812 * is already flushing things and force the IO down ourselves.
813 */
814int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
815 struct btrfs_root *root,
816 struct inode *inode)
817{
818 u64 last_mod;
819
820 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
821
822 /*
823 * if this file hasn't been changed since the last transaction
824 * commit, we can safely return without doing anything
825 */
826 if (last_mod < root->fs_info->last_trans_committed)
827 return 0;
828
829 /*
830 * the transaction is already committing. Just start the IO and
831 * don't bother with all of this list nonsense
832 */
833 if (trans && root->fs_info->running_transaction->blocked) {
834 btrfs_wait_ordered_range(inode, 0, (u64)-1);
835 return 0;
836 }
837
838 spin_lock(&root->fs_info->ordered_extent_lock);
839 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
840 list_add_tail(&BTRFS_I(inode)->ordered_operations,
841 &root->fs_info->ordered_operations);
842 }
843 spin_unlock(&root->fs_info->ordered_extent_lock);
844
845 return 0;
846}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index ab66d5e8d6d6..3d31c8827b01 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, 155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
156 loff_t end, int sync_mode); 156 loff_t end, int sync_mode);
157int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); 157int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
158int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
159int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
160 struct btrfs_root *root,
161 struct inode *inode);
158#endif 162#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9c8f158dd2db..664782c6a2df 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -975,6 +975,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
975 int should_grow = 0; 975 int should_grow = 0;
976 unsigned long now = get_seconds(); 976 unsigned long now = get_seconds();
977 977
978 btrfs_run_ordered_operations(root, 0);
979
978 /* make a pass through all the delayed refs we have so far 980 /* make a pass through all the delayed refs we have so far
979 * any runnings procs may add more while we are here 981 * any runnings procs may add more while we are here
980 */ 982 */
@@ -1056,6 +1058,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1056 BUG_ON(ret); 1058 BUG_ON(ret);
1057 } 1059 }
1058 1060
1061 /*
1062 * rename don't use btrfs_join_transaction, so, once we
1063 * set the transaction to blocked above, we aren't going
1064 * to get any new ordered operations. We can safely run
1065 * it here and no for sure that nothing new will be added
1066 * to the list
1067 */
1068 btrfs_run_ordered_operations(root, 1);
1069
1059 smp_mb(); 1070 smp_mb();
1060 if (cur_trans->num_writers > 1 || should_grow) 1071 if (cur_trans->num_writers > 1 || should_grow)
1061 schedule_timeout(timeout); 1072 schedule_timeout(timeout);