aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:10:06 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:25 -0400
commit56bec294dea971335d4466b30f2d959f28f6e36d (patch)
treefc0b5bbf4bb6ab35582a4c7f58f5ac88f71c38bf /fs/btrfs/disk-io.c
parent9fa8cfe706f9c20067c042a064999d5825a35330 (diff)
Btrfs: do extent allocation and reference count updates in the background
The extent allocation tree maintains a reference count and full back reference information for every extent allocated in the filesystem. For subvolume and snapshot trees, every time a block goes through COW, the new copy of the block adds a reference on every block it points to. If a btree node points to 150 leaves, then the COW code needs to go and add backrefs on 150 different extents, which might be spread all over the extent allocation tree. These updates currently happen during btrfs_cow_block, and most COWs happen during btrfs_search_slot. btrfs_search_slot has locks held on both the parent and the node we are COWing, and so we really want to avoid IO during the COW if we can. This commit adds an rbtree of pending reference count updates and extent allocations. The tree is ordered by byte number of the extent and byte number of the parent for the back reference. The tree allows us to: 1) Modify back references in something close to disk order, reducing seeks 2) Significantly reduce the number of modifications made as block pointers are balanced around 3) Do all of the extent insertion and back reference modifications outside of the performance critical btrfs_search_slot code. #3 has the added benefit of greatly reducing the btrfs stack footprint. The extent allocation tree modifications are done without the deep (and somewhat recursive) call chains used in the past. These delayed back reference updates must be done before the transaction commits, and so the rbtree is tied to the transaction. Throttling is implemented to help keep the queue of backrefs at a reasonable size. Since there was a similar mechanism in place for the extent tree extents, that is removed and replaced by the delayed reference tree. Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c29
1 files changed, 18 insertions, 11 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3e18175248e0..4f43e227a297 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1458,6 +1458,7 @@ static int transaction_kthread(void *arg)
1458 struct btrfs_root *root = arg; 1458 struct btrfs_root *root = arg;
1459 struct btrfs_trans_handle *trans; 1459 struct btrfs_trans_handle *trans;
1460 struct btrfs_transaction *cur; 1460 struct btrfs_transaction *cur;
1461 struct btrfs_fs_info *info = root->fs_info;
1461 unsigned long now; 1462 unsigned long now;
1462 unsigned long delay; 1463 unsigned long delay;
1463 int ret; 1464 int ret;
@@ -1471,12 +1472,6 @@ static int transaction_kthread(void *arg)
1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1472 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1472 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1473 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1473 1474
1474 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1475 printk(KERN_INFO "btrfs: total reference cache "
1476 "size %llu\n",
1477 root->fs_info->total_ref_cache_size);
1478 }
1479
1480 mutex_lock(&root->fs_info->trans_mutex); 1475 mutex_lock(&root->fs_info->trans_mutex);
1481 cur = root->fs_info->running_transaction; 1476 cur = root->fs_info->running_transaction;
1482 if (!cur) { 1477 if (!cur) {
@@ -1486,13 +1481,30 @@ static int transaction_kthread(void *arg)
1486 1481
1487 now = get_seconds(); 1482 now = get_seconds();
1488 if (now < cur->start_time || now - cur->start_time < 30) { 1483 if (now < cur->start_time || now - cur->start_time < 30) {
1484 unsigned long num_delayed;
1485 num_delayed = cur->delayed_refs.num_entries;
1489 mutex_unlock(&root->fs_info->trans_mutex); 1486 mutex_unlock(&root->fs_info->trans_mutex);
1490 delay = HZ * 5; 1487 delay = HZ * 5;
1488
1489 /*
1490 * we may have been woken up early to start
1491 * processing the delayed extent ref updates
1492 * If so, run some of them and then loop around again
1493 * to see if we need to force a commit
1494 */
1495 if (num_delayed > 64) {
1496 mutex_unlock(&info->transaction_kthread_mutex);
1497 trans = btrfs_start_transaction(root, 1);
1498 btrfs_run_delayed_refs(trans, root, 256);
1499 btrfs_end_transaction(trans, root);
1500 continue;
1501 }
1491 goto sleep; 1502 goto sleep;
1492 } 1503 }
1493 mutex_unlock(&root->fs_info->trans_mutex); 1504 mutex_unlock(&root->fs_info->trans_mutex);
1494 trans = btrfs_start_transaction(root, 1); 1505 trans = btrfs_start_transaction(root, 1);
1495 ret = btrfs_commit_transaction(trans, root); 1506 ret = btrfs_commit_transaction(trans, root);
1507
1496sleep: 1508sleep:
1497 wake_up_process(root->fs_info->cleaner_kthread); 1509 wake_up_process(root->fs_info->cleaner_kthread);
1498 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1510 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1611,10 +1623,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 1623
1612 extent_io_tree_init(&fs_info->pinned_extents, 1624 extent_io_tree_init(&fs_info->pinned_extents,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1625 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 extent_io_tree_init(&fs_info->pending_del,
1615 fs_info->btree_inode->i_mapping, GFP_NOFS);
1616 extent_io_tree_init(&fs_info->extent_ins,
1617 fs_info->btree_inode->i_mapping, GFP_NOFS);
1618 fs_info->do_barriers = 1; 1626 fs_info->do_barriers = 1;
1619 1627
1620 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1628 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1629,7 +1637,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1629 mutex_init(&fs_info->trans_mutex); 1637 mutex_init(&fs_info->trans_mutex);
1630 mutex_init(&fs_info->tree_log_mutex); 1638 mutex_init(&fs_info->tree_log_mutex);
1631 mutex_init(&fs_info->drop_mutex); 1639 mutex_init(&fs_info->drop_mutex);
1632 mutex_init(&fs_info->extent_ins_mutex);
1633 mutex_init(&fs_info->pinned_mutex); 1640 mutex_init(&fs_info->pinned_mutex);
1634 mutex_init(&fs_info->chunk_mutex); 1641 mutex_init(&fs_info->chunk_mutex);
1635 mutex_init(&fs_info->transaction_kthread_mutex); 1642 mutex_init(&fs_info->transaction_kthread_mutex);