aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-06-25 16:01:30 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:03 -0400
commit051e1b9f748ae673b7325d3fc049bb838606cffa (patch)
tree82d9d98a56ed776df8eee270dc9e11d7f0eb9d56
parenta213501153fd66e2359e091b1612841305ba6551 (diff)
Drop locks in btrfs_search_slot when reading a tree block.
One lock per btree block can make for significant congestion if everyone has to wait for IO at the high levels of the btree. This drops locks held by a path when doing reads during a tree search. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c74
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c1
4 files changed, 38 insertions, 39 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index dff4da082d06..1b756fae2799 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -63,7 +63,6 @@ void btrfs_free_path(struct btrfs_path *p)
63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64{ 64{
65 int i; 65 int i;
66 int skip = p->skip_locking;
67 int keep = p->keep_locks; 66 int keep = p->keep_locks;
68 67
69 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 68 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
@@ -76,7 +75,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
76 free_extent_buffer(p->nodes[i]); 75 free_extent_buffer(p->nodes[i]);
77 } 76 }
78 memset(p, 0, sizeof(*p)); 77 memset(p, 0, sizeof(*p));
79 p->skip_locking = skip;
80 p->keep_locks = keep; 78 p->keep_locks = keep;
81} 79}
82 80
@@ -1137,7 +1135,6 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1137 return; 1135 return;
1138 1136
1139 node = path->nodes[level]; 1137 node = path->nodes[level];
1140 WARN_ON(!path->skip_locking && !btrfs_tree_locked(node));
1141 1138
1142 search = btrfs_node_blockptr(node, slot); 1139 search = btrfs_node_blockptr(node, slot);
1143 blocksize = btrfs_level_size(root, level - 1); 1140 blocksize = btrfs_level_size(root, level - 1);
@@ -1192,6 +1189,7 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
1192{ 1189{
1193 int i; 1190 int i;
1194 int skip_level = level; 1191 int skip_level = level;
1192 int no_skips = 0;
1195 struct extent_buffer *t; 1193 struct extent_buffer *t;
1196 1194
1197 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1195 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1199,27 +1197,24 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
1199 break; 1197 break;
1200 if (!path->locks[i]) 1198 if (!path->locks[i])
1201 break; 1199 break;
1202 if (path->slots[i] == 0) { 1200 if (!no_skips && path->slots[i] == 0) {
1203 skip_level = i + 1; 1201 skip_level = i + 1;
1204 continue; 1202 continue;
1205 } 1203 }
1206 if (path->keep_locks) { 1204 if (!no_skips && path->keep_locks) {
1207 u32 nritems; 1205 u32 nritems;
1208 t = path->nodes[i]; 1206 t = path->nodes[i];
1209 nritems = btrfs_header_nritems(t); 1207 nritems = btrfs_header_nritems(t);
1210 if (nritems < 2 || path->slots[i] >= nritems - 2) { 1208 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1211if (path->keep_locks) {
1212//printk("path %p skip level now %d\n", path, skip_level);
1213}
1214 skip_level = i + 1; 1209 skip_level = i + 1;
1215 continue; 1210 continue;
1216 } 1211 }
1217 } 1212 }
1213 if (skip_level < i && i >= lowest_unlock)
1214 no_skips = 1;
1215
1218 t = path->nodes[i]; 1216 t = path->nodes[i];
1219 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1217 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1220if (path->keep_locks) {
1221//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level);
1222}
1223 btrfs_tree_unlock(t); 1218 btrfs_tree_unlock(t);
1224 path->locks[i] = 0; 1219 path->locks[i] = 0;
1225 } 1220 }
@@ -1244,6 +1239,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1244 ins_len, int cow) 1239 ins_len, int cow)
1245{ 1240{
1246 struct extent_buffer *b; 1241 struct extent_buffer *b;
1242 struct extent_buffer *tmp;
1247 int slot; 1243 int slot;
1248 int ret; 1244 int ret;
1249 int level; 1245 int level;
@@ -1263,10 +1259,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1263 if (ins_len < 0) 1259 if (ins_len < 0)
1264 lowest_unlock = 2; 1260 lowest_unlock = 2;
1265again: 1261again:
1266 if (!p->skip_locking) 1262 b = btrfs_lock_root_node(root);
1267 b = btrfs_lock_root_node(root);
1268 else
1269 b = btrfs_root_node(root);
1270 1263
1271 while (b) { 1264 while (b) {
1272 level = btrfs_header_level(b); 1265 level = btrfs_header_level(b);
@@ -1286,8 +1279,7 @@ again:
1286 WARN_ON(1); 1279 WARN_ON(1);
1287 level = btrfs_header_level(b); 1280 level = btrfs_header_level(b);
1288 p->nodes[level] = b; 1281 p->nodes[level] = b;
1289 if (!p->skip_locking) 1282 p->locks[level] = 1;
1290 p->locks[level] = 1;
1291 ret = check_block(root, p, level); 1283 ret = check_block(root, p, level);
1292 if (ret) 1284 if (ret)
1293 return -1; 1285 return -1;
@@ -1328,10 +1320,29 @@ again:
1328 reada_for_search(root, p, level, slot, 1320 reada_for_search(root, p, level, slot,
1329 key->objectid); 1321 key->objectid);
1330 1322
1331 b = read_node_slot(root, b, slot); 1323 tmp = btrfs_find_tree_block(root,
1332 if (!p->skip_locking) 1324 btrfs_node_blockptr(b, slot),
1333 btrfs_tree_lock(b); 1325 btrfs_level_size(root, level - 1));
1334 unlock_up(p, level + 1, lowest_unlock); 1326 if (tmp && btrfs_buffer_uptodate(tmp,
1327 btrfs_node_ptr_generation(b, slot))) {
1328 b = tmp;
1329 } else {
1330 /*
1331 * reduce lock contention at high levels
1332 * of the btree by dropping locks before
1333 * we read.
1334 */
1335 if (level > 1) {
1336 btrfs_release_path(NULL, p);
1337 if (tmp)
1338 free_extent_buffer(tmp);
1339 goto again;
1340 } else {
1341 b = read_node_slot(root, b, slot);
1342 }
1343 }
1344 btrfs_tree_lock(b);
1345 unlock_up(p, level, lowest_unlock);
1335 } else { 1346 } else {
1336 p->slots[level] = slot; 1347 p->slots[level] = slot;
1337 if (ins_len > 0 && btrfs_leaf_free_space(root, b) < 1348 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
@@ -3007,17 +3018,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
3007 reada_for_search(root, path, level, slot, 0); 3018 reada_for_search(root, path, level, slot, 0);
3008 3019
3009 next = read_node_slot(root, c, slot); 3020 next = read_node_slot(root, c, slot);
3010 if (!path->skip_locking) { 3021 WARN_ON(!btrfs_tree_locked(c));
3011 if (!btrfs_tree_locked(c)) { 3022 btrfs_tree_lock(next);
3012 int i;
3013 WARN_ON(1);
3014printk("path %p no lock on level %d\n", path, level);
3015for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3016printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i]));
3017}
3018 }
3019 btrfs_tree_lock(next);
3020 }
3021 break; 3023 break;
3022 } 3024 }
3023 path->slots[level] = slot; 3025 path->slots[level] = slot;
@@ -3035,10 +3037,8 @@ printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_h
3035 if (level == 1 && path->locks[1] && path->reada) 3037 if (level == 1 && path->locks[1] && path->reada)
3036 reada_for_search(root, path, level, slot, 0); 3038 reada_for_search(root, path, level, slot, 0);
3037 next = read_node_slot(root, next, 0); 3039 next = read_node_slot(root, next, 0);
3038 if (!path->skip_locking) { 3040 WARN_ON(!btrfs_tree_locked(path->nodes[level]));
3039 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 3041 btrfs_tree_lock(next);
3040 btrfs_tree_lock(next);
3041 }
3042 } 3042 }
3043done: 3043done:
3044 unlock_up(path, 0, 1); 3044 unlock_up(path, 0, 1);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 692b8ea42de1..9ea12d42741c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -336,7 +336,6 @@ struct btrfs_path {
336 /* keep some upper locks as we walk down */ 336 /* keep some upper locks as we walk down */
337 int keep_locks; 337 int keep_locks;
338 int lowest_level; 338 int lowest_level;
339 int skip_locking;
340}; 339};
341 340
342/* 341/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f638803549e0..ffc363d2fb24 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1684,6 +1684,7 @@ void btrfs_throttle(struct btrfs_root *root)
1684#else 1684#else
1685 blk_congestion_wait(WRITE, HZ/20); 1685 blk_congestion_wait(WRITE, HZ/20);
1686#endif 1686#endif
1687
1687 } 1688 }
1688} 1689}
1689 1690
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 890b9e9d8e27..0905653dd3fc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -88,7 +88,6 @@ static int cache_block_group(struct btrfs_root *root,
88 return -ENOMEM; 88 return -ENOMEM;
89 89
90 path->reada = 2; 90 path->reada = 2;
91 path->skip_locking = 1;
92 first_free = block_group->key.objectid; 91 first_free = block_group->key.objectid;
93 key.objectid = block_group->key.objectid; 92 key.objectid = block_group->key.objectid;
94 key.offset = 0; 93 key.offset = 0;