diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-06-25 16:01:30 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:03 -0400 |
commit | 051e1b9f748ae673b7325d3fc049bb838606cffa (patch) | |
tree | 82d9d98a56ed776df8eee270dc9e11d7f0eb9d56 | |
parent | a213501153fd66e2359e091b1612841305ba6551 (diff) |
Drop locks in btrfs_search_slot when reading a tree block.
One lock per btree block can make for significant congestion if everyone
has to wait for IO at the high levels of the btree. This drops
locks held by a path when doing reads during a tree search.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.c | 74 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 1 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 1 |
4 files changed, 38 insertions, 39 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dff4da082d06..1b756fae2799 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -63,7 +63,6 @@ void btrfs_free_path(struct btrfs_path *p) | |||
63 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | 63 | void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) |
64 | { | 64 | { |
65 | int i; | 65 | int i; |
66 | int skip = p->skip_locking; | ||
67 | int keep = p->keep_locks; | 66 | int keep = p->keep_locks; |
68 | 67 | ||
69 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 68 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
@@ -76,7 +75,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
76 | free_extent_buffer(p->nodes[i]); | 75 | free_extent_buffer(p->nodes[i]); |
77 | } | 76 | } |
78 | memset(p, 0, sizeof(*p)); | 77 | memset(p, 0, sizeof(*p)); |
79 | p->skip_locking = skip; | ||
80 | p->keep_locks = keep; | 78 | p->keep_locks = keep; |
81 | } | 79 | } |
82 | 80 | ||
@@ -1137,7 +1135,6 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, | |||
1137 | return; | 1135 | return; |
1138 | 1136 | ||
1139 | node = path->nodes[level]; | 1137 | node = path->nodes[level]; |
1140 | WARN_ON(!path->skip_locking && !btrfs_tree_locked(node)); | ||
1141 | 1138 | ||
1142 | search = btrfs_node_blockptr(node, slot); | 1139 | search = btrfs_node_blockptr(node, slot); |
1143 | blocksize = btrfs_level_size(root, level - 1); | 1140 | blocksize = btrfs_level_size(root, level - 1); |
@@ -1192,6 +1189,7 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) | |||
1192 | { | 1189 | { |
1193 | int i; | 1190 | int i; |
1194 | int skip_level = level; | 1191 | int skip_level = level; |
1192 | int no_skips = 0; | ||
1195 | struct extent_buffer *t; | 1193 | struct extent_buffer *t; |
1196 | 1194 | ||
1197 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 1195 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
@@ -1199,27 +1197,24 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) | |||
1199 | break; | 1197 | break; |
1200 | if (!path->locks[i]) | 1198 | if (!path->locks[i]) |
1201 | break; | 1199 | break; |
1202 | if (path->slots[i] == 0) { | 1200 | if (!no_skips && path->slots[i] == 0) { |
1203 | skip_level = i + 1; | 1201 | skip_level = i + 1; |
1204 | continue; | 1202 | continue; |
1205 | } | 1203 | } |
1206 | if (path->keep_locks) { | 1204 | if (!no_skips && path->keep_locks) { |
1207 | u32 nritems; | 1205 | u32 nritems; |
1208 | t = path->nodes[i]; | 1206 | t = path->nodes[i]; |
1209 | nritems = btrfs_header_nritems(t); | 1207 | nritems = btrfs_header_nritems(t); |
1210 | if (nritems < 2 || path->slots[i] >= nritems - 2) { | 1208 | if (nritems < 1 || path->slots[i] >= nritems - 1) { |
1211 | if (path->keep_locks) { | ||
1212 | //printk("path %p skip level now %d\n", path, skip_level); | ||
1213 | } | ||
1214 | skip_level = i + 1; | 1209 | skip_level = i + 1; |
1215 | continue; | 1210 | continue; |
1216 | } | 1211 | } |
1217 | } | 1212 | } |
1213 | if (skip_level < i && i >= lowest_unlock) | ||
1214 | no_skips = 1; | ||
1215 | |||
1218 | t = path->nodes[i]; | 1216 | t = path->nodes[i]; |
1219 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | 1217 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { |
1220 | if (path->keep_locks) { | ||
1221 | //printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); | ||
1222 | } | ||
1223 | btrfs_tree_unlock(t); | 1218 | btrfs_tree_unlock(t); |
1224 | path->locks[i] = 0; | 1219 | path->locks[i] = 0; |
1225 | } | 1220 | } |
@@ -1244,6 +1239,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1244 | ins_len, int cow) | 1239 | ins_len, int cow) |
1245 | { | 1240 | { |
1246 | struct extent_buffer *b; | 1241 | struct extent_buffer *b; |
1242 | struct extent_buffer *tmp; | ||
1247 | int slot; | 1243 | int slot; |
1248 | int ret; | 1244 | int ret; |
1249 | int level; | 1245 | int level; |
@@ -1263,10 +1259,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1263 | if (ins_len < 0) | 1259 | if (ins_len < 0) |
1264 | lowest_unlock = 2; | 1260 | lowest_unlock = 2; |
1265 | again: | 1261 | again: |
1266 | if (!p->skip_locking) | 1262 | b = btrfs_lock_root_node(root); |
1267 | b = btrfs_lock_root_node(root); | ||
1268 | else | ||
1269 | b = btrfs_root_node(root); | ||
1270 | 1263 | ||
1271 | while (b) { | 1264 | while (b) { |
1272 | level = btrfs_header_level(b); | 1265 | level = btrfs_header_level(b); |
@@ -1286,8 +1279,7 @@ again: | |||
1286 | WARN_ON(1); | 1279 | WARN_ON(1); |
1287 | level = btrfs_header_level(b); | 1280 | level = btrfs_header_level(b); |
1288 | p->nodes[level] = b; | 1281 | p->nodes[level] = b; |
1289 | if (!p->skip_locking) | 1282 | p->locks[level] = 1; |
1290 | p->locks[level] = 1; | ||
1291 | ret = check_block(root, p, level); | 1283 | ret = check_block(root, p, level); |
1292 | if (ret) | 1284 | if (ret) |
1293 | return -1; | 1285 | return -1; |
@@ -1328,10 +1320,29 @@ again: | |||
1328 | reada_for_search(root, p, level, slot, | 1320 | reada_for_search(root, p, level, slot, |
1329 | key->objectid); | 1321 | key->objectid); |
1330 | 1322 | ||
1331 | b = read_node_slot(root, b, slot); | 1323 | tmp = btrfs_find_tree_block(root, |
1332 | if (!p->skip_locking) | 1324 | btrfs_node_blockptr(b, slot), |
1333 | btrfs_tree_lock(b); | 1325 | btrfs_level_size(root, level - 1)); |
1334 | unlock_up(p, level + 1, lowest_unlock); | 1326 | if (tmp && btrfs_buffer_uptodate(tmp, |
1327 | btrfs_node_ptr_generation(b, slot))) { | ||
1328 | b = tmp; | ||
1329 | } else { | ||
1330 | /* | ||
1331 | * reduce lock contention at high levels | ||
1332 | * of the btree by dropping locks before | ||
1333 | * we read. | ||
1334 | */ | ||
1335 | if (level > 1) { | ||
1336 | btrfs_release_path(NULL, p); | ||
1337 | if (tmp) | ||
1338 | free_extent_buffer(tmp); | ||
1339 | goto again; | ||
1340 | } else { | ||
1341 | b = read_node_slot(root, b, slot); | ||
1342 | } | ||
1343 | } | ||
1344 | btrfs_tree_lock(b); | ||
1345 | unlock_up(p, level, lowest_unlock); | ||
1335 | } else { | 1346 | } else { |
1336 | p->slots[level] = slot; | 1347 | p->slots[level] = slot; |
1337 | if (ins_len > 0 && btrfs_leaf_free_space(root, b) < | 1348 | if (ins_len > 0 && btrfs_leaf_free_space(root, b) < |
@@ -3007,17 +3018,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3007 | reada_for_search(root, path, level, slot, 0); | 3018 | reada_for_search(root, path, level, slot, 0); |
3008 | 3019 | ||
3009 | next = read_node_slot(root, c, slot); | 3020 | next = read_node_slot(root, c, slot); |
3010 | if (!path->skip_locking) { | 3021 | WARN_ON(!btrfs_tree_locked(c)); |
3011 | if (!btrfs_tree_locked(c)) { | 3022 | btrfs_tree_lock(next); |
3012 | int i; | ||
3013 | WARN_ON(1); | ||
3014 | printk("path %p no lock on level %d\n", path, level); | ||
3015 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
3016 | printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); | ||
3017 | } | ||
3018 | } | ||
3019 | btrfs_tree_lock(next); | ||
3020 | } | ||
3021 | break; | 3023 | break; |
3022 | } | 3024 | } |
3023 | path->slots[level] = slot; | 3025 | path->slots[level] = slot; |
@@ -3035,10 +3037,8 @@ printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_h | |||
3035 | if (level == 1 && path->locks[1] && path->reada) | 3037 | if (level == 1 && path->locks[1] && path->reada) |
3036 | reada_for_search(root, path, level, slot, 0); | 3038 | reada_for_search(root, path, level, slot, 0); |
3037 | next = read_node_slot(root, next, 0); | 3039 | next = read_node_slot(root, next, 0); |
3038 | if (!path->skip_locking) { | 3040 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); |
3039 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); | 3041 | btrfs_tree_lock(next); |
3040 | btrfs_tree_lock(next); | ||
3041 | } | ||
3042 | } | 3042 | } |
3043 | done: | 3043 | done: |
3044 | unlock_up(path, 0, 1); | 3044 | unlock_up(path, 0, 1); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 692b8ea42de1..9ea12d42741c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -336,7 +336,6 @@ struct btrfs_path { | |||
336 | /* keep some upper locks as we walk down */ | 336 | /* keep some upper locks as we walk down */ |
337 | int keep_locks; | 337 | int keep_locks; |
338 | int lowest_level; | 338 | int lowest_level; |
339 | int skip_locking; | ||
340 | }; | 339 | }; |
341 | 340 | ||
342 | /* | 341 | /* |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f638803549e0..ffc363d2fb24 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1684,6 +1684,7 @@ void btrfs_throttle(struct btrfs_root *root) | |||
1684 | #else | 1684 | #else |
1685 | blk_congestion_wait(WRITE, HZ/20); | 1685 | blk_congestion_wait(WRITE, HZ/20); |
1686 | #endif | 1686 | #endif |
1687 | |||
1687 | } | 1688 | } |
1688 | } | 1689 | } |
1689 | 1690 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 890b9e9d8e27..0905653dd3fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -88,7 +88,6 @@ static int cache_block_group(struct btrfs_root *root, | |||
88 | return -ENOMEM; | 88 | return -ENOMEM; |
89 | 89 | ||
90 | path->reada = 2; | 90 | path->reada = 2; |
91 | path->skip_locking = 1; | ||
92 | first_free = block_group->key.objectid; | 91 | first_free = block_group->key.objectid; |
93 | key.objectid = block_group->key.objectid; | 92 | key.objectid = block_group->key.objectid; |
94 | key.offset = 0; | 93 | key.offset = 0; |