aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-07-16 15:23:14 -0400
committerChris Mason <chris.mason@oracle.com>2011-07-27 12:46:46 -0400
commitbd681513fa6f2ff29aa391f01e413a2d1c59fd77 (patch)
treebb10ec6ef876b4d7a553cbe54976ec49a0d10b21 /fs/btrfs/extent-tree.c
parent81317fdeddcef259b6ecf7b5c0d04caa167c6b54 (diff)
Btrfs: switch the btrfs tree locks to reader/writer
The btrfs metadata btree is the source of significant lock contention, especially in the root node. This commit changes our locking to use a reader/writer lock. The lock is built on top of rw spinlocks, and it extends the lock tracking to remember if we have a read lock or a write lock when we go to blocking. Atomics count the number of blocking readers or writers at any given time. It removes all of the adaptive spinning from the old code and uses only the spinning/blocking hints inside of btrfs to decide when it should continue spinning. In read heavy workloads this is dramatically faster. In write heavy workloads we're still faster because of less contention on the root node lock. We suffer slightly in dbench because we schedule more often during write locks, but all other benchmarks so far are improved. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c20
1 files changed, 10 insertions, 10 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7021dde74d81..2a782c2fcb62 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5912,7 +5912,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5912 return 1; 5912 return 1;
5913 5913
5914 if (path->locks[level] && !wc->keep_locks) { 5914 if (path->locks[level] && !wc->keep_locks) {
5915 btrfs_tree_unlock(eb); 5915 btrfs_tree_unlock_rw(eb, path->locks[level]);
5916 path->locks[level] = 0; 5916 path->locks[level] = 0;
5917 } 5917 }
5918 return 0; 5918 return 0;
@@ -5936,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5936 * keep the tree lock 5936 * keep the tree lock
5937 */ 5937 */
5938 if (path->locks[level] && level > 0) { 5938 if (path->locks[level] && level > 0) {
5939 btrfs_tree_unlock(eb); 5939 btrfs_tree_unlock_rw(eb, path->locks[level]);
5940 path->locks[level] = 0; 5940 path->locks[level] = 0;
5941 } 5941 }
5942 return 0; 5942 return 0;
@@ -6049,7 +6049,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6049 BUG_ON(level != btrfs_header_level(next)); 6049 BUG_ON(level != btrfs_header_level(next));
6050 path->nodes[level] = next; 6050 path->nodes[level] = next;
6051 path->slots[level] = 0; 6051 path->slots[level] = 0;
6052 path->locks[level] = 1; 6052 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6053 wc->level = level; 6053 wc->level = level;
6054 if (wc->level == 1) 6054 if (wc->level == 1)
6055 wc->reada_slot = 0; 6055 wc->reada_slot = 0;
@@ -6120,7 +6120,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6120 BUG_ON(level == 0); 6120 BUG_ON(level == 0);
6121 btrfs_tree_lock(eb); 6121 btrfs_tree_lock(eb);
6122 btrfs_set_lock_blocking(eb); 6122 btrfs_set_lock_blocking(eb);
6123 path->locks[level] = 1; 6123 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6124 6124
6125 ret = btrfs_lookup_extent_info(trans, root, 6125 ret = btrfs_lookup_extent_info(trans, root,
6126 eb->start, eb->len, 6126 eb->start, eb->len,
@@ -6129,8 +6129,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6129 BUG_ON(ret); 6129 BUG_ON(ret);
6130 BUG_ON(wc->refs[level] == 0); 6130 BUG_ON(wc->refs[level] == 0);
6131 if (wc->refs[level] == 1) { 6131 if (wc->refs[level] == 1) {
6132 btrfs_tree_unlock(eb); 6132 btrfs_tree_unlock_rw(eb, path->locks[level]);
6133 path->locks[level] = 0;
6134 return 1; 6133 return 1;
6135 } 6134 }
6136 } 6135 }
@@ -6152,7 +6151,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6152 btrfs_header_generation(eb) == trans->transid) { 6151 btrfs_header_generation(eb) == trans->transid) {
6153 btrfs_tree_lock(eb); 6152 btrfs_tree_lock(eb);
6154 btrfs_set_lock_blocking(eb); 6153 btrfs_set_lock_blocking(eb);
6155 path->locks[level] = 1; 6154 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6156 } 6155 }
6157 clean_tree_block(trans, root, eb); 6156 clean_tree_block(trans, root, eb);
6158 } 6157 }
@@ -6231,7 +6230,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6231 return 0; 6230 return 0;
6232 6231
6233 if (path->locks[level]) { 6232 if (path->locks[level]) {
6234 btrfs_tree_unlock(path->nodes[level]); 6233 btrfs_tree_unlock_rw(path->nodes[level],
6234 path->locks[level]);
6235 path->locks[level] = 0; 6235 path->locks[level] = 0;
6236 } 6236 }
6237 free_extent_buffer(path->nodes[level]); 6237 free_extent_buffer(path->nodes[level]);
@@ -6283,7 +6283,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6283 path->nodes[level] = btrfs_lock_root_node(root); 6283 path->nodes[level] = btrfs_lock_root_node(root);
6284 btrfs_set_lock_blocking(path->nodes[level]); 6284 btrfs_set_lock_blocking(path->nodes[level]);
6285 path->slots[level] = 0; 6285 path->slots[level] = 0;
6286 path->locks[level] = 1; 6286 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6287 memset(&wc->update_progress, 0, 6287 memset(&wc->update_progress, 0,
6288 sizeof(wc->update_progress)); 6288 sizeof(wc->update_progress));
6289 } else { 6289 } else {
@@ -6451,7 +6451,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6451 level = btrfs_header_level(node); 6451 level = btrfs_header_level(node);
6452 path->nodes[level] = node; 6452 path->nodes[level] = node;
6453 path->slots[level] = 0; 6453 path->slots[level] = 0;
6454 path->locks[level] = 1; 6454 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6455 6455
6456 wc->refs[parent_level] = 1; 6456 wc->refs[parent_level] = 1;
6457 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6457 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;