aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-02-04 09:25:08 -0500
committerChris Mason <chris.mason@oracle.com>2009-02-04 09:25:08 -0500
commitb4ce94de9b4d64e8ab3cf155d13653c666e22b9b (patch)
treeebc44a9554a50b495b091cb0979d79fd29e50fe7 /fs/btrfs/extent_io.c
parentc487685d7c18a8481900755aa5c56a7a74193101 (diff)
Btrfs: Change btree locking to use explicit blocking points
Most of the btrfs metadata operations can be protected by a spinlock, but some operations still need to schedule. So far, btrfs has been using a mutex along with a trylock loop, most of the time it is able to avoid going for the full mutex, so the trylock loop is a big performance gain. This commit is step one for getting rid of the blocking locks entirely. btrfs_tree_lock takes a spinlock, and the code explicitly switches to a blocking lock when it starts an operation that can schedule. We'll be able get rid of the blocking locks in smaller pieces over time. Tracing allows us to find the most common cause of blocking, so we can start with the hot spots first. The basic idea is: btrfs_tree_lock() returns with the spin lock held btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in the extent buffer flags, and then drops the spin lock. The buffer is still considered locked by all of the btrfs code. If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops the spin lock and waits on a wait queue for the blocking bit to go away. Much of the code that needs to set the blocking bit finishes without actually blocking a good percentage of the time. So, an adaptive spin is still used against the blocking bit to avoid very high context switch rates. btrfs_clear_lock_blocking() clears the blocking bit and returns with the spinlock held again. btrfs_tree_unlock() can be called on either blocking or spinning locks, it does the right thing based on the blocking bit. ctree.c has a helper function to set/clear all the locked buffers in a path as blocking. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c18
1 files changed, 9 insertions, 9 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2ea7f052722c..dd5df53e045a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2990,7 +2990,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2990 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2990 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2991 eb->start = start; 2991 eb->start = start;
2992 eb->len = len; 2992 eb->len = len;
2993 mutex_init(&eb->mutex); 2993 spin_lock_init(&eb->lock);
2994 init_waitqueue_head(&eb->lock_wq);
2995
2994#if LEAK_DEBUG 2996#if LEAK_DEBUG
2995 spin_lock_irqsave(&leak_lock, flags); 2997 spin_lock_irqsave(&leak_lock, flags);
2996 list_add(&eb->leak_list, &buffers); 2998 list_add(&eb->leak_list, &buffers);
@@ -3071,8 +3073,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3071 unlock_page(p); 3073 unlock_page(p);
3072 } 3074 }
3073 if (uptodate) 3075 if (uptodate)
3074 eb->flags |= EXTENT_UPTODATE; 3076 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3075 eb->flags |= EXTENT_BUFFER_FILLED;
3076 3077
3077 spin_lock(&tree->buffer_lock); 3078 spin_lock(&tree->buffer_lock);
3078 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3079 exists = buffer_tree_insert(tree, start, &eb->rb_node);
@@ -3226,7 +3227,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3226 unsigned long num_pages; 3227 unsigned long num_pages;
3227 3228
3228 num_pages = num_extent_pages(eb->start, eb->len); 3229 num_pages = num_extent_pages(eb->start, eb->len);
3229 eb->flags &= ~EXTENT_UPTODATE; 3230 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3230 3231
3231 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3232 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3232 GFP_NOFS); 3233 GFP_NOFS);
@@ -3297,7 +3298,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3297 struct page *page; 3298 struct page *page;
3298 int pg_uptodate = 1; 3299 int pg_uptodate = 1;
3299 3300
3300 if (eb->flags & EXTENT_UPTODATE) 3301 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3301 return 1; 3302 return 1;
3302 3303
3303 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3304 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
@@ -3333,7 +3334,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3333 struct bio *bio = NULL; 3334 struct bio *bio = NULL;
3334 unsigned long bio_flags = 0; 3335 unsigned long bio_flags = 0;
3335 3336
3336 if (eb->flags & EXTENT_UPTODATE) 3337 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
3337 return 0; 3338 return 0;
3338 3339
3339 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3340 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
@@ -3364,7 +3365,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3364 } 3365 }
3365 if (all_uptodate) { 3366 if (all_uptodate) {
3366 if (start_i == 0) 3367 if (start_i == 0)
3367 eb->flags |= EXTENT_UPTODATE; 3368 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3368 goto unlock_exit; 3369 goto unlock_exit;
3369 } 3370 }
3370 3371
@@ -3400,7 +3401,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3400 } 3401 }
3401 3402
3402 if (!ret) 3403 if (!ret)
3403 eb->flags |= EXTENT_UPTODATE; 3404 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3404 return ret; 3405 return ret;
3405 3406
3406unlock_exit: 3407unlock_exit:
@@ -3497,7 +3498,6 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3497 unmap_extent_buffer(eb, eb->map_token, km); 3498 unmap_extent_buffer(eb, eb->map_token, km);
3498 eb->map_token = NULL; 3499 eb->map_token = NULL;
3499 save = 1; 3500 save = 1;
3500 WARN_ON(!mutex_is_locked(&eb->mutex));
3501 } 3501 }
3502 err = map_private_extent_buffer(eb, start, min_len, token, map, 3502 err = map_private_extent_buffer(eb, start, min_len, token, map,
3503 map_start, map_len, km); 3503 map_start, map_len, km);