diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-08-01 15:11:20 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:06 -0400 |
commit | 65b51a009e29e64c0951f21ea17fdc66bbb0fbd7 (patch) | |
tree | 800926527fad4c12ca64083816f33be3d716ec13 /fs/btrfs/ctree.c | |
parent | 18e35e0ab337ec99c7e03e9ae917745a352c0bb1 (diff) |
btrfs_search_slot: reduce lock contention by cowing in two stages
A btree block cow has two parts, the first is to allocate a destination
block and the second is to copy the old bock over.
The first part needs locks in the extent allocation tree, and may need to
do IO. This changeset splits that into a separate function that can be
called without any tree locks held.
btrfs_search_slot is changed to drop its path and start over if it has
to COW a contended block. This often means that many writers will
pre-alloc a new destination for a the same contended block, but they
cache their prealloc for later use on lower levels in the tree.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 147 |
1 files changed, 120 insertions, 27 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4792062dd53..7114faafa9d4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -181,7 +181,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
181 | struct extent_buffer *buf, | 181 | struct extent_buffer *buf, |
182 | struct extent_buffer *parent, int parent_slot, | 182 | struct extent_buffer *parent, int parent_slot, |
183 | struct extent_buffer **cow_ret, | 183 | struct extent_buffer **cow_ret, |
184 | u64 search_start, u64 empty_size) | 184 | u64 search_start, u64 empty_size, |
185 | u64 prealloc_dest) | ||
185 | { | 186 | { |
186 | u64 root_gen; | 187 | u64 root_gen; |
187 | struct extent_buffer *cow; | 188 | struct extent_buffer *cow; |
@@ -216,10 +217,27 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
216 | } else { | 217 | } else { |
217 | first_key.objectid = 0; | 218 | first_key.objectid = 0; |
218 | } | 219 | } |
219 | cow = btrfs_alloc_free_block(trans, root, buf->len, | 220 | if (prealloc_dest) { |
220 | root->root_key.objectid, | 221 | struct btrfs_key ins; |
221 | root_gen, first_key.objectid, level, | 222 | |
222 | search_start, empty_size); | 223 | ins.objectid = prealloc_dest; |
224 | ins.offset = buf->len; | ||
225 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
226 | |||
227 | ret = btrfs_alloc_reserved_extent(trans, root, | ||
228 | root->root_key.objectid, | ||
229 | root_gen, level, | ||
230 | first_key.objectid, | ||
231 | &ins); | ||
232 | BUG_ON(ret); | ||
233 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | ||
234 | buf->len); | ||
235 | } else { | ||
236 | cow = btrfs_alloc_free_block(trans, root, buf->len, | ||
237 | root->root_key.objectid, | ||
238 | root_gen, first_key.objectid, | ||
239 | level, search_start, empty_size); | ||
240 | } | ||
223 | if (IS_ERR(cow)) | 241 | if (IS_ERR(cow)) |
224 | return PTR_ERR(cow); | 242 | return PTR_ERR(cow); |
225 | 243 | ||
@@ -279,7 +297,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
279 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 297 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
280 | struct btrfs_root *root, struct extent_buffer *buf, | 298 | struct btrfs_root *root, struct extent_buffer *buf, |
281 | struct extent_buffer *parent, int parent_slot, | 299 | struct extent_buffer *parent, int parent_slot, |
282 | struct extent_buffer **cow_ret) | 300 | struct extent_buffer **cow_ret, u64 prealloc_dest) |
283 | { | 301 | { |
284 | u64 search_start; | 302 | u64 search_start; |
285 | u64 header_trans; | 303 | u64 header_trans; |
@@ -302,12 +320,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
302 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 320 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
303 | *cow_ret = buf; | 321 | *cow_ret = buf; |
304 | spin_unlock(&root->fs_info->hash_lock); | 322 | spin_unlock(&root->fs_info->hash_lock); |
323 | WARN_ON(prealloc_dest); | ||
305 | return 0; | 324 | return 0; |
306 | } | 325 | } |
307 | spin_unlock(&root->fs_info->hash_lock); | 326 | spin_unlock(&root->fs_info->hash_lock); |
308 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); | 327 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); |
309 | ret = __btrfs_cow_block(trans, root, buf, parent, | 328 | ret = __btrfs_cow_block(trans, root, buf, parent, |
310 | parent_slot, cow_ret, search_start, 0); | 329 | parent_slot, cow_ret, search_start, 0, |
330 | prealloc_dest); | ||
311 | return ret; | 331 | return ret; |
312 | } | 332 | } |
313 | 333 | ||
@@ -451,7 +471,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
451 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 471 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
452 | &cur, search_start, | 472 | &cur, search_start, |
453 | min(16 * blocksize, | 473 | min(16 * blocksize, |
454 | (end_slot - i) * blocksize)); | 474 | (end_slot - i) * blocksize), 0); |
455 | if (err) { | 475 | if (err) { |
456 | btrfs_tree_unlock(cur); | 476 | btrfs_tree_unlock(cur); |
457 | free_extent_buffer(cur); | 477 | free_extent_buffer(cur); |
@@ -803,7 +823,7 @@ static int balance_level(struct btrfs_trans_handle *trans, | |||
803 | child = read_node_slot(root, mid, 0); | 823 | child = read_node_slot(root, mid, 0); |
804 | btrfs_tree_lock(child); | 824 | btrfs_tree_lock(child); |
805 | BUG_ON(!child); | 825 | BUG_ON(!child); |
806 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 826 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); |
807 | BUG_ON(ret); | 827 | BUG_ON(ret); |
808 | 828 | ||
809 | spin_lock(&root->node_lock); | 829 | spin_lock(&root->node_lock); |
@@ -836,7 +856,7 @@ static int balance_level(struct btrfs_trans_handle *trans, | |||
836 | if (left) { | 856 | if (left) { |
837 | btrfs_tree_lock(left); | 857 | btrfs_tree_lock(left); |
838 | wret = btrfs_cow_block(trans, root, left, | 858 | wret = btrfs_cow_block(trans, root, left, |
839 | parent, pslot - 1, &left); | 859 | parent, pslot - 1, &left, 0); |
840 | if (wret) { | 860 | if (wret) { |
841 | ret = wret; | 861 | ret = wret; |
842 | goto enospc; | 862 | goto enospc; |
@@ -846,7 +866,7 @@ static int balance_level(struct btrfs_trans_handle *trans, | |||
846 | if (right) { | 866 | if (right) { |
847 | btrfs_tree_lock(right); | 867 | btrfs_tree_lock(right); |
848 | wret = btrfs_cow_block(trans, root, right, | 868 | wret = btrfs_cow_block(trans, root, right, |
849 | parent, pslot + 1, &right); | 869 | parent, pslot + 1, &right, 0); |
850 | if (wret) { | 870 | if (wret) { |
851 | ret = wret; | 871 | ret = wret; |
852 | goto enospc; | 872 | goto enospc; |
@@ -1021,7 +1041,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1021 | wret = 1; | 1041 | wret = 1; |
1022 | } else { | 1042 | } else { |
1023 | ret = btrfs_cow_block(trans, root, left, parent, | 1043 | ret = btrfs_cow_block(trans, root, left, parent, |
1024 | pslot - 1, &left); | 1044 | pslot - 1, &left, 0); |
1025 | if (ret) | 1045 | if (ret) |
1026 | wret = 1; | 1046 | wret = 1; |
1027 | else { | 1047 | else { |
@@ -1069,7 +1089,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1069 | } else { | 1089 | } else { |
1070 | ret = btrfs_cow_block(trans, root, right, | 1090 | ret = btrfs_cow_block(trans, root, right, |
1071 | parent, pslot + 1, | 1091 | parent, pslot + 1, |
1072 | &right); | 1092 | &right, 0); |
1073 | if (ret) | 1093 | if (ret) |
1074 | wret = 1; | 1094 | wret = 1; |
1075 | else { | 1095 | else { |
@@ -1245,6 +1265,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1245 | u8 lowest_level = 0; | 1265 | u8 lowest_level = 0; |
1246 | u64 blocknr; | 1266 | u64 blocknr; |
1247 | u64 gen; | 1267 | u64 gen; |
1268 | struct btrfs_key prealloc_block; | ||
1248 | 1269 | ||
1249 | lowest_level = p->lowest_level; | 1270 | lowest_level = p->lowest_level; |
1250 | WARN_ON(lowest_level && ins_len); | 1271 | WARN_ON(lowest_level && ins_len); |
@@ -1253,6 +1274,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1253 | !mutex_is_locked(&root->fs_info->alloc_mutex)); | 1274 | !mutex_is_locked(&root->fs_info->alloc_mutex)); |
1254 | if (ins_len < 0) | 1275 | if (ins_len < 0) |
1255 | lowest_unlock = 2; | 1276 | lowest_unlock = 2; |
1277 | |||
1278 | prealloc_block.objectid = 0; | ||
1279 | |||
1256 | again: | 1280 | again: |
1257 | if (p->skip_locking) | 1281 | if (p->skip_locking) |
1258 | b = btrfs_root_node(root); | 1282 | b = btrfs_root_node(root); |
@@ -1261,27 +1285,82 @@ again: | |||
1261 | 1285 | ||
1262 | while (b) { | 1286 | while (b) { |
1263 | level = btrfs_header_level(b); | 1287 | level = btrfs_header_level(b); |
1288 | |||
1289 | /* | ||
1290 | * setup the path here so we can release it under lock | ||
1291 | * contention with the cow code | ||
1292 | */ | ||
1293 | p->nodes[level] = b; | ||
1294 | if (!p->skip_locking) | ||
1295 | p->locks[level] = 1; | ||
1296 | |||
1264 | if (cow) { | 1297 | if (cow) { |
1265 | int wret; | 1298 | int wret; |
1299 | |||
1300 | /* is a cow on this block not required */ | ||
1301 | spin_lock(&root->fs_info->hash_lock); | ||
1302 | if (btrfs_header_generation(b) == trans->transid && | ||
1303 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
1304 | spin_unlock(&root->fs_info->hash_lock); | ||
1305 | goto cow_done; | ||
1306 | } | ||
1307 | spin_unlock(&root->fs_info->hash_lock); | ||
1308 | |||
1309 | /* ok, we have to cow, is our old prealloc the right | ||
1310 | * size? | ||
1311 | */ | ||
1312 | if (prealloc_block.objectid && | ||
1313 | prealloc_block.offset != b->len) { | ||
1314 | btrfs_free_reserved_extent(root, | ||
1315 | prealloc_block.objectid, | ||
1316 | prealloc_block.offset); | ||
1317 | prealloc_block.objectid = 0; | ||
1318 | } | ||
1319 | |||
1320 | /* | ||
1321 | * for higher level blocks, try not to allocate blocks | ||
1322 | * with the block and the parent locks held. | ||
1323 | */ | ||
1324 | if (level > 1 && !prealloc_block.objectid && | ||
1325 | btrfs_path_lock_waiting(p, level)) { | ||
1326 | u32 size = b->len; | ||
1327 | u64 hint = b->start; | ||
1328 | |||
1329 | btrfs_release_path(root, p); | ||
1330 | ret = btrfs_reserve_extent(trans, root, | ||
1331 | size, size, 0, | ||
1332 | hint, (u64)-1, | ||
1333 | &prealloc_block, 0); | ||
1334 | BUG_ON(ret); | ||
1335 | goto again; | ||
1336 | } | ||
1337 | |||
1266 | wret = btrfs_cow_block(trans, root, b, | 1338 | wret = btrfs_cow_block(trans, root, b, |
1267 | p->nodes[level + 1], | 1339 | p->nodes[level + 1], |
1268 | p->slots[level + 1], | 1340 | p->slots[level + 1], |
1269 | &b); | 1341 | &b, prealloc_block.objectid); |
1342 | prealloc_block.objectid = 0; | ||
1270 | if (wret) { | 1343 | if (wret) { |
1271 | free_extent_buffer(b); | 1344 | free_extent_buffer(b); |
1272 | return wret; | 1345 | ret = wret; |
1346 | goto done; | ||
1273 | } | 1347 | } |
1274 | } | 1348 | } |
1349 | cow_done: | ||
1275 | BUG_ON(!cow && ins_len); | 1350 | BUG_ON(!cow && ins_len); |
1276 | if (level != btrfs_header_level(b)) | 1351 | if (level != btrfs_header_level(b)) |
1277 | WARN_ON(1); | 1352 | WARN_ON(1); |
1278 | level = btrfs_header_level(b); | 1353 | level = btrfs_header_level(b); |
1354 | |||
1279 | p->nodes[level] = b; | 1355 | p->nodes[level] = b; |
1280 | if (!p->skip_locking) | 1356 | if (!p->skip_locking) |
1281 | p->locks[level] = 1; | 1357 | p->locks[level] = 1; |
1358 | |||
1282 | ret = check_block(root, p, level); | 1359 | ret = check_block(root, p, level); |
1283 | if (ret) | 1360 | if (ret) { |
1284 | return -1; | 1361 | ret = -1; |
1362 | goto done; | ||
1363 | } | ||
1285 | 1364 | ||
1286 | ret = bin_search(b, key, level, &slot); | 1365 | ret = bin_search(b, key, level, &slot); |
1287 | if (level != 0) { | 1366 | if (level != 0) { |
@@ -1292,15 +1371,19 @@ again: | |||
1292 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1371 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1293 | int sret = split_node(trans, root, p, level); | 1372 | int sret = split_node(trans, root, p, level); |
1294 | BUG_ON(sret > 0); | 1373 | BUG_ON(sret > 0); |
1295 | if (sret) | 1374 | if (sret) { |
1296 | return sret; | 1375 | ret = sret; |
1376 | goto done; | ||
1377 | } | ||
1297 | b = p->nodes[level]; | 1378 | b = p->nodes[level]; |
1298 | slot = p->slots[level]; | 1379 | slot = p->slots[level]; |
1299 | } else if (ins_len < 0) { | 1380 | } else if (ins_len < 0) { |
1300 | int sret = balance_level(trans, root, p, | 1381 | int sret = balance_level(trans, root, p, |
1301 | level); | 1382 | level); |
1302 | if (sret) | 1383 | if (sret) { |
1303 | return sret; | 1384 | ret = sret; |
1385 | goto done; | ||
1386 | } | ||
1304 | b = p->nodes[level]; | 1387 | b = p->nodes[level]; |
1305 | if (!b) { | 1388 | if (!b) { |
1306 | btrfs_release_path(NULL, p); | 1389 | btrfs_release_path(NULL, p); |
@@ -1362,14 +1445,24 @@ again: | |||
1362 | int sret = split_leaf(trans, root, key, | 1445 | int sret = split_leaf(trans, root, key, |
1363 | p, ins_len, ret == 0); | 1446 | p, ins_len, ret == 0); |
1364 | BUG_ON(sret > 0); | 1447 | BUG_ON(sret > 0); |
1365 | if (sret) | 1448 | if (sret) { |
1366 | return sret; | 1449 | ret = sret; |
1450 | goto done; | ||
1451 | } | ||
1367 | } | 1452 | } |
1368 | unlock_up(p, level, lowest_unlock); | 1453 | unlock_up(p, level, lowest_unlock); |
1369 | return ret; | 1454 | goto done; |
1370 | } | 1455 | } |
1371 | } | 1456 | } |
1372 | return 1; | 1457 | ret = 1; |
1458 | done: | ||
1459 | if (prealloc_block.objectid) { | ||
1460 | btrfs_free_reserved_extent(root, | ||
1461 | prealloc_block.objectid, | ||
1462 | prealloc_block.offset); | ||
1463 | } | ||
1464 | |||
1465 | return ret; | ||
1373 | } | 1466 | } |
1374 | 1467 | ||
1375 | /* | 1468 | /* |
@@ -1840,7 +1933,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1840 | 1933 | ||
1841 | /* cow and double check */ | 1934 | /* cow and double check */ |
1842 | ret = btrfs_cow_block(trans, root, right, upper, | 1935 | ret = btrfs_cow_block(trans, root, right, upper, |
1843 | slot + 1, &right); | 1936 | slot + 1, &right, 0); |
1844 | if (ret) | 1937 | if (ret) |
1845 | goto out_unlock; | 1938 | goto out_unlock; |
1846 | 1939 | ||
@@ -2021,7 +2114,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2021 | 2114 | ||
2022 | /* cow and double check */ | 2115 | /* cow and double check */ |
2023 | ret = btrfs_cow_block(trans, root, left, | 2116 | ret = btrfs_cow_block(trans, root, left, |
2024 | path->nodes[1], slot - 1, &left); | 2117 | path->nodes[1], slot - 1, &left, 0); |
2025 | if (ret) { | 2118 | if (ret) { |
2026 | /* we hit -ENOSPC, but it isn't fatal here */ | 2119 | /* we hit -ENOSPC, but it isn't fatal here */ |
2027 | ret = 1; | 2120 | ret = 1; |