aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2015-09-24 16:17:39 -0400
committerChris Mason <clm@fb.com>2015-10-21 21:51:40 -0400
commit161c3549b45aeef05451b6822d8aaaf39c7bedce (patch)
treef14c534323f7b8d6bf181c9610df66313358360b /fs/btrfs/ordered-data.c
parenta408365c62762c30419018587cffd2b89836434e (diff)
Btrfs: change how we wait for pending ordered extents
We have a mechanism to make sure we don't lose updates for ordered extents that were logged in the transaction that is currently running. We add the ordered extent to a transaction list and then the transaction waits on all the ordered extents in that list. However are substantially large file systems this list can be extremely large, and can give us soft lockups, since the ordered extents don't remove themselves from the list when they do complete. To fix this we simply add a counter to the transaction that is incremented any time we have a logged extent that needs to be completed in the current transaction. Then when the ordered extent finally completes it decrements the per transaction counter and wakes up the transaction if we are the last ones. This will eliminate the softlockup. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c64
1 files changed, 50 insertions, 14 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 071005f008c1..8c27292ea9ea 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -490,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
490 490
491 spin_lock_irq(&log->log_extents_lock[index]); 491 spin_lock_irq(&log->log_extents_lock[index]);
492 while (!list_empty(&log->logged_list[index])) { 492 while (!list_empty(&log->logged_list[index])) {
493 struct inode *inode;
493 ordered = list_first_entry(&log->logged_list[index], 494 ordered = list_first_entry(&log->logged_list[index],
494 struct btrfs_ordered_extent, 495 struct btrfs_ordered_extent,
495 log_list); 496 log_list);
496 list_del_init(&ordered->log_list); 497 list_del_init(&ordered->log_list);
498 inode = ordered->inode;
497 spin_unlock_irq(&log->log_extents_lock[index]); 499 spin_unlock_irq(&log->log_extents_lock[index]);
498 500
499 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && 501 if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
500 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { 502 !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
501 struct inode *inode = ordered->inode;
502 u64 start = ordered->file_offset; 503 u64 start = ordered->file_offset;
503 u64 end = ordered->file_offset + ordered->len - 1; 504 u64 end = ordered->file_offset + ordered->len - 1;
504 505
@@ -509,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
509 &ordered->flags)); 510 &ordered->flags));
510 511
511 /* 512 /*
512 * If our ordered extent completed it means it updated the 513 * In order to keep us from losing our ordered extent
513 * fs/subvol and csum trees already, so no need to make the 514 * information when committing the transaction we have to make
514 * current transaction's commit wait for it, as we end up 515 * sure that any logged extents are completed when we go to
515 * holding memory unnecessarily and delaying the inode's iput 516 * commit the transaction. To do this we simply increase the
516 * until the transaction commit (we schedule an iput for the 517 * current transactions pending_ordered counter and decrement it
517 * inode when the ordered extent's refcount drops to 0), which 518 * when the ordered extent completes.
518 * prevents it from being evictable until the transaction
519 * commits.
520 */ 519 */
521 if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) 520 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
522 btrfs_put_ordered_extent(ordered); 521 struct btrfs_ordered_inode_tree *tree;
523 else 522
524 list_add_tail(&ordered->trans_list, &trans->ordered); 523 tree = &BTRFS_I(inode)->ordered_tree;
525 524 spin_lock_irq(&tree->lock);
525 if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
526 set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
527 atomic_inc(&trans->transaction->pending_ordered);
528 }
529 spin_unlock_irq(&tree->lock);
530 }
531 btrfs_put_ordered_extent(ordered);
526 spin_lock_irq(&log->log_extents_lock[index]); 532 spin_lock_irq(&log->log_extents_lock[index]);
527 } 533 }
528 spin_unlock_irq(&log->log_extents_lock[index]); 534 spin_unlock_irq(&log->log_extents_lock[index]);
@@ -584,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
584 struct btrfs_ordered_inode_tree *tree; 590 struct btrfs_ordered_inode_tree *tree;
585 struct btrfs_root *root = BTRFS_I(inode)->root; 591 struct btrfs_root *root = BTRFS_I(inode)->root;
586 struct rb_node *node; 592 struct rb_node *node;
593 bool dec_pending_ordered = false;
587 594
588 tree = &BTRFS_I(inode)->ordered_tree; 595 tree = &BTRFS_I(inode)->ordered_tree;
589 spin_lock_irq(&tree->lock); 596 spin_lock_irq(&tree->lock);
@@ -593,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode,
593 if (tree->last == node) 600 if (tree->last == node)
594 tree->last = NULL; 601 tree->last = NULL;
595 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 602 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
603 if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
604 dec_pending_ordered = true;
596 spin_unlock_irq(&tree->lock); 605 spin_unlock_irq(&tree->lock);
597 606
607 /*
608 * The current running transaction is waiting on us, we need to let it
609 * know that we're complete and wake it up.
610 */
611 if (dec_pending_ordered) {
612 struct btrfs_transaction *trans;
613
614 /*
615 * The checks for trans are just a formality, it should be set,
616 * but if it isn't we don't want to deref/assert under the spin
617 * lock, so be nice and check if trans is set, but ASSERT() so
618 * if it isn't set a developer will notice.
619 */
620 spin_lock(&root->fs_info->trans_lock);
621 trans = root->fs_info->running_transaction;
622 if (trans)
623 atomic_inc(&trans->use_count);
624 spin_unlock(&root->fs_info->trans_lock);
625
626 ASSERT(trans);
627 if (trans) {
628 if (atomic_dec_and_test(&trans->pending_ordered))
629 wake_up(&trans->pending_wait);
630 btrfs_put_transaction(trans);
631 }
632 }
633
598 spin_lock(&root->ordered_extent_lock); 634 spin_lock(&root->ordered_extent_lock);
599 list_del_init(&entry->root_extent_list); 635 list_del_init(&entry->root_extent_list);
600 root->nr_ordered_extents--; 636 root->nr_ordered_extents--;