diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 325 |
1 files changed, 276 insertions, 49 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9e46c0776816..37f31b5529aa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -38,22 +38,64 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
38 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 38 | static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
39 | struct btrfs_path *path, int level, int slot); | 39 | struct btrfs_path *path, int level, int slot); |
40 | 40 | ||
41 | inline void btrfs_init_path(struct btrfs_path *p) | ||
42 | { | ||
43 | memset(p, 0, sizeof(*p)); | ||
44 | } | ||
45 | |||
46 | struct btrfs_path *btrfs_alloc_path(void) | 41 | struct btrfs_path *btrfs_alloc_path(void) |
47 | { | 42 | { |
48 | struct btrfs_path *path; | 43 | struct btrfs_path *path; |
49 | path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); | 44 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); |
50 | if (path) { | 45 | if (path) |
51 | btrfs_init_path(path); | ||
52 | path->reada = 1; | 46 | path->reada = 1; |
53 | } | ||
54 | return path; | 47 | return path; |
55 | } | 48 | } |
56 | 49 | ||
50 | /* | ||
51 | * set all locked nodes in the path to blocking locks. This should | ||
52 | * be done before scheduling | ||
53 | */ | ||
54 | noinline void btrfs_set_path_blocking(struct btrfs_path *p) | ||
55 | { | ||
56 | int i; | ||
57 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
58 | if (p->nodes[i] && p->locks[i]) | ||
59 | btrfs_set_lock_blocking(p->nodes[i]); | ||
60 | } | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * reset all the locked nodes in the patch to spinning locks. | ||
65 | * | ||
66 | * held is used to keep lockdep happy, when lockdep is enabled | ||
67 | * we set held to a blocking lock before we go around and | ||
68 | * retake all the spinlocks in the path. You can safely use NULL | ||
69 | * for held | ||
70 | */ | ||
71 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | ||
72 | struct extent_buffer *held) | ||
73 | { | ||
74 | int i; | ||
75 | |||
76 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
77 | /* lockdep really cares that we take all of these spinlocks | ||
78 | * in the right order. If any of the locks in the path are not | ||
79 | * currently blocking, it is going to complain. So, make really | ||
80 | * really sure by forcing the path to blocking before we clear | ||
81 | * the path blocking. | ||
82 | */ | ||
83 | if (held) | ||
84 | btrfs_set_lock_blocking(held); | ||
85 | btrfs_set_path_blocking(p); | ||
86 | #endif | ||
87 | |||
88 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | ||
89 | if (p->nodes[i] && p->locks[i]) | ||
90 | btrfs_clear_lock_blocking(p->nodes[i]); | ||
91 | } | ||
92 | |||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
94 | if (held) | ||
95 | btrfs_clear_lock_blocking(held); | ||
96 | #endif | ||
97 | } | ||
98 | |||
57 | /* this also releases the path */ | 99 | /* this also releases the path */ |
58 | void btrfs_free_path(struct btrfs_path *p) | 100 | void btrfs_free_path(struct btrfs_path *p) |
59 | { | 101 | { |
@@ -235,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
235 | if (*cow_ret == buf) | 277 | if (*cow_ret == buf) |
236 | unlock_orig = 1; | 278 | unlock_orig = 1; |
237 | 279 | ||
238 | WARN_ON(!btrfs_tree_locked(buf)); | 280 | btrfs_assert_tree_locked(buf); |
239 | 281 | ||
240 | if (parent) | 282 | if (parent) |
241 | parent_start = parent->start; | 283 | parent_start = parent->start; |
@@ -261,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
261 | trans->transid, level, &ins); | 303 | trans->transid, level, &ins); |
262 | BUG_ON(ret); | 304 | BUG_ON(ret); |
263 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | 305 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, |
264 | buf->len); | 306 | buf->len, level); |
265 | } else { | 307 | } else { |
266 | cow = btrfs_alloc_free_block(trans, root, buf->len, | 308 | cow = btrfs_alloc_free_block(trans, root, buf->len, |
267 | parent_start, | 309 | parent_start, |
@@ -272,6 +314,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
272 | if (IS_ERR(cow)) | 314 | if (IS_ERR(cow)) |
273 | return PTR_ERR(cow); | 315 | return PTR_ERR(cow); |
274 | 316 | ||
317 | /* cow is set to blocking by btrfs_init_new_buffer */ | ||
318 | |||
275 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | 319 | copy_extent_buffer(cow, buf, 0, 0, cow->len); |
276 | btrfs_set_header_bytenr(cow, cow->start); | 320 | btrfs_set_header_bytenr(cow, cow->start); |
277 | btrfs_set_header_generation(cow, trans->transid); | 321 | btrfs_set_header_generation(cow, trans->transid); |
@@ -388,17 +432,20 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
388 | WARN_ON(1); | 432 | WARN_ON(1); |
389 | } | 433 | } |
390 | 434 | ||
391 | spin_lock(&root->fs_info->hash_lock); | ||
392 | if (btrfs_header_generation(buf) == trans->transid && | 435 | if (btrfs_header_generation(buf) == trans->transid && |
393 | btrfs_header_owner(buf) == root->root_key.objectid && | 436 | btrfs_header_owner(buf) == root->root_key.objectid && |
394 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 437 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
395 | *cow_ret = buf; | 438 | *cow_ret = buf; |
396 | spin_unlock(&root->fs_info->hash_lock); | ||
397 | WARN_ON(prealloc_dest); | 439 | WARN_ON(prealloc_dest); |
398 | return 0; | 440 | return 0; |
399 | } | 441 | } |
400 | spin_unlock(&root->fs_info->hash_lock); | 442 | |
401 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); | 443 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); |
444 | |||
445 | if (parent) | ||
446 | btrfs_set_lock_blocking(parent); | ||
447 | btrfs_set_lock_blocking(buf); | ||
448 | |||
402 | ret = __btrfs_cow_block(trans, root, buf, parent, | 449 | ret = __btrfs_cow_block(trans, root, buf, parent, |
403 | parent_slot, cow_ret, search_start, 0, | 450 | parent_slot, cow_ret, search_start, 0, |
404 | prealloc_dest); | 451 | prealloc_dest); |
@@ -504,6 +551,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
504 | if (parent_nritems == 1) | 551 | if (parent_nritems == 1) |
505 | return 0; | 552 | return 0; |
506 | 553 | ||
554 | btrfs_set_lock_blocking(parent); | ||
555 | |||
507 | for (i = start_slot; i < end_slot; i++) { | 556 | for (i = start_slot; i < end_slot; i++) { |
508 | int close = 1; | 557 | int close = 1; |
509 | 558 | ||
@@ -564,6 +613,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
564 | search_start = last_block; | 613 | search_start = last_block; |
565 | 614 | ||
566 | btrfs_tree_lock(cur); | 615 | btrfs_tree_lock(cur); |
616 | btrfs_set_lock_blocking(cur); | ||
567 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 617 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
568 | &cur, search_start, | 618 | &cur, search_start, |
569 | min(16 * blocksize, | 619 | min(16 * blocksize, |
@@ -862,6 +912,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
862 | return 0; | 912 | return 0; |
863 | 913 | ||
864 | mid = path->nodes[level]; | 914 | mid = path->nodes[level]; |
915 | |||
865 | WARN_ON(!path->locks[level]); | 916 | WARN_ON(!path->locks[level]); |
866 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 917 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
867 | 918 | ||
@@ -883,8 +934,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
883 | 934 | ||
884 | /* promote the child to a root */ | 935 | /* promote the child to a root */ |
885 | child = read_node_slot(root, mid, 0); | 936 | child = read_node_slot(root, mid, 0); |
886 | btrfs_tree_lock(child); | ||
887 | BUG_ON(!child); | 937 | BUG_ON(!child); |
938 | btrfs_tree_lock(child); | ||
939 | btrfs_set_lock_blocking(child); | ||
888 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | 940 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); |
889 | BUG_ON(ret); | 941 | BUG_ON(ret); |
890 | 942 | ||
@@ -900,6 +952,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
900 | 952 | ||
901 | add_root_to_dirty_list(root); | 953 | add_root_to_dirty_list(root); |
902 | btrfs_tree_unlock(child); | 954 | btrfs_tree_unlock(child); |
955 | |||
903 | path->locks[level] = 0; | 956 | path->locks[level] = 0; |
904 | path->nodes[level] = NULL; | 957 | path->nodes[level] = NULL; |
905 | clean_tree_block(trans, root, mid); | 958 | clean_tree_block(trans, root, mid); |
@@ -924,6 +977,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
924 | left = read_node_slot(root, parent, pslot - 1); | 977 | left = read_node_slot(root, parent, pslot - 1); |
925 | if (left) { | 978 | if (left) { |
926 | btrfs_tree_lock(left); | 979 | btrfs_tree_lock(left); |
980 | btrfs_set_lock_blocking(left); | ||
927 | wret = btrfs_cow_block(trans, root, left, | 981 | wret = btrfs_cow_block(trans, root, left, |
928 | parent, pslot - 1, &left, 0); | 982 | parent, pslot - 1, &left, 0); |
929 | if (wret) { | 983 | if (wret) { |
@@ -934,6 +988,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
934 | right = read_node_slot(root, parent, pslot + 1); | 988 | right = read_node_slot(root, parent, pslot + 1); |
935 | if (right) { | 989 | if (right) { |
936 | btrfs_tree_lock(right); | 990 | btrfs_tree_lock(right); |
991 | btrfs_set_lock_blocking(right); | ||
937 | wret = btrfs_cow_block(trans, root, right, | 992 | wret = btrfs_cow_block(trans, root, right, |
938 | parent, pslot + 1, &right, 0); | 993 | parent, pslot + 1, &right, 0); |
939 | if (wret) { | 994 | if (wret) { |
@@ -1109,6 +1164,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1109 | u32 left_nr; | 1164 | u32 left_nr; |
1110 | 1165 | ||
1111 | btrfs_tree_lock(left); | 1166 | btrfs_tree_lock(left); |
1167 | btrfs_set_lock_blocking(left); | ||
1168 | |||
1112 | left_nr = btrfs_header_nritems(left); | 1169 | left_nr = btrfs_header_nritems(left); |
1113 | if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | 1170 | if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { |
1114 | wret = 1; | 1171 | wret = 1; |
@@ -1155,7 +1212,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1155 | */ | 1212 | */ |
1156 | if (right) { | 1213 | if (right) { |
1157 | u32 right_nr; | 1214 | u32 right_nr; |
1215 | |||
1158 | btrfs_tree_lock(right); | 1216 | btrfs_tree_lock(right); |
1217 | btrfs_set_lock_blocking(right); | ||
1218 | |||
1159 | right_nr = btrfs_header_nritems(right); | 1219 | right_nr = btrfs_header_nritems(right); |
1160 | if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | 1220 | if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { |
1161 | wret = 1; | 1221 | wret = 1; |
@@ -1210,8 +1270,7 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1210 | struct btrfs_disk_key disk_key; | 1270 | struct btrfs_disk_key disk_key; |
1211 | u32 nritems; | 1271 | u32 nritems; |
1212 | u64 search; | 1272 | u64 search; |
1213 | u64 lowest_read; | 1273 | u64 target; |
1214 | u64 highest_read; | ||
1215 | u64 nread = 0; | 1274 | u64 nread = 0; |
1216 | int direction = path->reada; | 1275 | int direction = path->reada; |
1217 | struct extent_buffer *eb; | 1276 | struct extent_buffer *eb; |
@@ -1235,8 +1294,7 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1235 | return; | 1294 | return; |
1236 | } | 1295 | } |
1237 | 1296 | ||
1238 | highest_read = search; | 1297 | target = search; |
1239 | lowest_read = search; | ||
1240 | 1298 | ||
1241 | nritems = btrfs_header_nritems(node); | 1299 | nritems = btrfs_header_nritems(node); |
1242 | nr = slot; | 1300 | nr = slot; |
@@ -1256,27 +1314,80 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1256 | break; | 1314 | break; |
1257 | } | 1315 | } |
1258 | search = btrfs_node_blockptr(node, nr); | 1316 | search = btrfs_node_blockptr(node, nr); |
1259 | if ((search >= lowest_read && search <= highest_read) || | 1317 | if ((search <= target && target - search <= 65536) || |
1260 | (search < lowest_read && lowest_read - search <= 16384) || | 1318 | (search > target && search - target <= 65536)) { |
1261 | (search > highest_read && search - highest_read <= 16384)) { | ||
1262 | readahead_tree_block(root, search, blocksize, | 1319 | readahead_tree_block(root, search, blocksize, |
1263 | btrfs_node_ptr_generation(node, nr)); | 1320 | btrfs_node_ptr_generation(node, nr)); |
1264 | nread += blocksize; | 1321 | nread += blocksize; |
1265 | } | 1322 | } |
1266 | nscan++; | 1323 | nscan++; |
1267 | if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) | 1324 | if ((nread > 65536 || nscan > 32)) |
1268 | break; | 1325 | break; |
1326 | } | ||
1327 | } | ||
1269 | 1328 | ||
1270 | if (nread > (256 * 1024) || nscan > 128) | 1329 | /* |
1271 | break; | 1330 | * returns -EAGAIN if it had to drop the path, or zero if everything was in |
1331 | * cache | ||
1332 | */ | ||
1333 | static noinline int reada_for_balance(struct btrfs_root *root, | ||
1334 | struct btrfs_path *path, int level) | ||
1335 | { | ||
1336 | int slot; | ||
1337 | int nritems; | ||
1338 | struct extent_buffer *parent; | ||
1339 | struct extent_buffer *eb; | ||
1340 | u64 gen; | ||
1341 | u64 block1 = 0; | ||
1342 | u64 block2 = 0; | ||
1343 | int ret = 0; | ||
1344 | int blocksize; | ||
1345 | |||
1346 | parent = path->nodes[level - 1]; | ||
1347 | if (!parent) | ||
1348 | return 0; | ||
1272 | 1349 | ||
1273 | if (search < lowest_read) | 1350 | nritems = btrfs_header_nritems(parent); |
1274 | lowest_read = search; | 1351 | slot = path->slots[level]; |
1275 | if (search > highest_read) | 1352 | blocksize = btrfs_level_size(root, level); |
1276 | highest_read = search; | 1353 | |
1354 | if (slot > 0) { | ||
1355 | block1 = btrfs_node_blockptr(parent, slot - 1); | ||
1356 | gen = btrfs_node_ptr_generation(parent, slot - 1); | ||
1357 | eb = btrfs_find_tree_block(root, block1, blocksize); | ||
1358 | if (eb && btrfs_buffer_uptodate(eb, gen)) | ||
1359 | block1 = 0; | ||
1360 | free_extent_buffer(eb); | ||
1361 | } | ||
1362 | if (slot < nritems) { | ||
1363 | block2 = btrfs_node_blockptr(parent, slot + 1); | ||
1364 | gen = btrfs_node_ptr_generation(parent, slot + 1); | ||
1365 | eb = btrfs_find_tree_block(root, block2, blocksize); | ||
1366 | if (eb && btrfs_buffer_uptodate(eb, gen)) | ||
1367 | block2 = 0; | ||
1368 | free_extent_buffer(eb); | ||
1277 | } | 1369 | } |
1370 | if (block1 || block2) { | ||
1371 | ret = -EAGAIN; | ||
1372 | btrfs_release_path(root, path); | ||
1373 | if (block1) | ||
1374 | readahead_tree_block(root, block1, blocksize, 0); | ||
1375 | if (block2) | ||
1376 | readahead_tree_block(root, block2, blocksize, 0); | ||
1377 | |||
1378 | if (block1) { | ||
1379 | eb = read_tree_block(root, block1, blocksize, 0); | ||
1380 | free_extent_buffer(eb); | ||
1381 | } | ||
1382 | if (block1) { | ||
1383 | eb = read_tree_block(root, block2, blocksize, 0); | ||
1384 | free_extent_buffer(eb); | ||
1385 | } | ||
1386 | } | ||
1387 | return ret; | ||
1278 | } | 1388 | } |
1279 | 1389 | ||
1390 | |||
1280 | /* | 1391 | /* |
1281 | * when we walk down the tree, it is usually safe to unlock the higher layers | 1392 | * when we walk down the tree, it is usually safe to unlock the higher layers |
1282 | * in the tree. The exceptions are when our path goes through slot 0, because | 1393 | * in the tree. The exceptions are when our path goes through slot 0, because |
@@ -1328,6 +1439,32 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1328 | } | 1439 | } |
1329 | 1440 | ||
1330 | /* | 1441 | /* |
1442 | * This releases any locks held in the path starting at level and | ||
1443 | * going all the way up to the root. | ||
1444 | * | ||
1445 | * btrfs_search_slot will keep the lock held on higher nodes in a few | ||
1446 | * corner cases, such as COW of the block at slot zero in the node. This | ||
1447 | * ignores those rules, and it should only be called when there are no | ||
1448 | * more updates to be done higher up in the tree. | ||
1449 | */ | ||
1450 | noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | ||
1451 | { | ||
1452 | int i; | ||
1453 | |||
1454 | if (path->keep_locks || path->lowest_level) | ||
1455 | return; | ||
1456 | |||
1457 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | ||
1458 | if (!path->nodes[i]) | ||
1459 | continue; | ||
1460 | if (!path->locks[i]) | ||
1461 | continue; | ||
1462 | btrfs_tree_unlock(path->nodes[i]); | ||
1463 | path->locks[i] = 0; | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1467 | /* | ||
1331 | * look for key in the tree. path is filled in with nodes along the way | 1468 | * look for key in the tree. path is filled in with nodes along the way |
1332 | * if key is found, we return zero and you can find the item in the leaf | 1469 | * if key is found, we return zero and you can find the item in the leaf |
1333 | * level of the path (level 0) | 1470 | * level of the path (level 0) |
@@ -1387,32 +1524,30 @@ again: | |||
1387 | int wret; | 1524 | int wret; |
1388 | 1525 | ||
1389 | /* is a cow on this block not required */ | 1526 | /* is a cow on this block not required */ |
1390 | spin_lock(&root->fs_info->hash_lock); | ||
1391 | if (btrfs_header_generation(b) == trans->transid && | 1527 | if (btrfs_header_generation(b) == trans->transid && |
1392 | btrfs_header_owner(b) == root->root_key.objectid && | 1528 | btrfs_header_owner(b) == root->root_key.objectid && |
1393 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1529 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
1394 | spin_unlock(&root->fs_info->hash_lock); | ||
1395 | goto cow_done; | 1530 | goto cow_done; |
1396 | } | 1531 | } |
1397 | spin_unlock(&root->fs_info->hash_lock); | ||
1398 | 1532 | ||
1399 | /* ok, we have to cow, is our old prealloc the right | 1533 | /* ok, we have to cow, is our old prealloc the right |
1400 | * size? | 1534 | * size? |
1401 | */ | 1535 | */ |
1402 | if (prealloc_block.objectid && | 1536 | if (prealloc_block.objectid && |
1403 | prealloc_block.offset != b->len) { | 1537 | prealloc_block.offset != b->len) { |
1538 | btrfs_release_path(root, p); | ||
1404 | btrfs_free_reserved_extent(root, | 1539 | btrfs_free_reserved_extent(root, |
1405 | prealloc_block.objectid, | 1540 | prealloc_block.objectid, |
1406 | prealloc_block.offset); | 1541 | prealloc_block.offset); |
1407 | prealloc_block.objectid = 0; | 1542 | prealloc_block.objectid = 0; |
1543 | goto again; | ||
1408 | } | 1544 | } |
1409 | 1545 | ||
1410 | /* | 1546 | /* |
1411 | * for higher level blocks, try not to allocate blocks | 1547 | * for higher level blocks, try not to allocate blocks |
1412 | * with the block and the parent locks held. | 1548 | * with the block and the parent locks held. |
1413 | */ | 1549 | */ |
1414 | if (level > 1 && !prealloc_block.objectid && | 1550 | if (level > 0 && !prealloc_block.objectid) { |
1415 | btrfs_path_lock_waiting(p, level)) { | ||
1416 | u32 size = b->len; | 1551 | u32 size = b->len; |
1417 | u64 hint = b->start; | 1552 | u64 hint = b->start; |
1418 | 1553 | ||
@@ -1425,6 +1560,8 @@ again: | |||
1425 | goto again; | 1560 | goto again; |
1426 | } | 1561 | } |
1427 | 1562 | ||
1563 | btrfs_set_path_blocking(p); | ||
1564 | |||
1428 | wret = btrfs_cow_block(trans, root, b, | 1565 | wret = btrfs_cow_block(trans, root, b, |
1429 | p->nodes[level + 1], | 1566 | p->nodes[level + 1], |
1430 | p->slots[level + 1], | 1567 | p->slots[level + 1], |
@@ -1446,6 +1583,22 @@ cow_done: | |||
1446 | if (!p->skip_locking) | 1583 | if (!p->skip_locking) |
1447 | p->locks[level] = 1; | 1584 | p->locks[level] = 1; |
1448 | 1585 | ||
1586 | btrfs_clear_path_blocking(p, NULL); | ||
1587 | |||
1588 | /* | ||
1589 | * we have a lock on b and as long as we aren't changing | ||
1590 | * the tree, there is no way to for the items in b to change. | ||
1591 | * It is safe to drop the lock on our parent before we | ||
1592 | * go through the expensive btree search on b. | ||
1593 | * | ||
1594 | * If cow is true, then we might be changing slot zero, | ||
1595 | * which may require changing the parent. So, we can't | ||
1596 | * drop the lock until after we know which slot we're | ||
1597 | * operating on. | ||
1598 | */ | ||
1599 | if (!cow) | ||
1600 | btrfs_unlock_up_safe(p, level + 1); | ||
1601 | |||
1449 | ret = check_block(root, p, level); | 1602 | ret = check_block(root, p, level); |
1450 | if (ret) { | 1603 | if (ret) { |
1451 | ret = -1; | 1604 | ret = -1; |
@@ -1453,6 +1606,7 @@ cow_done: | |||
1453 | } | 1606 | } |
1454 | 1607 | ||
1455 | ret = bin_search(b, key, level, &slot); | 1608 | ret = bin_search(b, key, level, &slot); |
1609 | |||
1456 | if (level != 0) { | 1610 | if (level != 0) { |
1457 | if (ret && slot > 0) | 1611 | if (ret && slot > 0) |
1458 | slot -= 1; | 1612 | slot -= 1; |
@@ -1460,7 +1614,16 @@ cow_done: | |||
1460 | if ((p->search_for_split || ins_len > 0) && | 1614 | if ((p->search_for_split || ins_len > 0) && |
1461 | btrfs_header_nritems(b) >= | 1615 | btrfs_header_nritems(b) >= |
1462 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1616 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1463 | int sret = split_node(trans, root, p, level); | 1617 | int sret; |
1618 | |||
1619 | sret = reada_for_balance(root, p, level); | ||
1620 | if (sret) | ||
1621 | goto again; | ||
1622 | |||
1623 | btrfs_set_path_blocking(p); | ||
1624 | sret = split_node(trans, root, p, level); | ||
1625 | btrfs_clear_path_blocking(p, NULL); | ||
1626 | |||
1464 | BUG_ON(sret > 0); | 1627 | BUG_ON(sret > 0); |
1465 | if (sret) { | 1628 | if (sret) { |
1466 | ret = sret; | 1629 | ret = sret; |
@@ -1468,9 +1631,19 @@ cow_done: | |||
1468 | } | 1631 | } |
1469 | b = p->nodes[level]; | 1632 | b = p->nodes[level]; |
1470 | slot = p->slots[level]; | 1633 | slot = p->slots[level]; |
1471 | } else if (ins_len < 0) { | 1634 | } else if (ins_len < 0 && |
1472 | int sret = balance_level(trans, root, p, | 1635 | btrfs_header_nritems(b) < |
1473 | level); | 1636 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { |
1637 | int sret; | ||
1638 | |||
1639 | sret = reada_for_balance(root, p, level); | ||
1640 | if (sret) | ||
1641 | goto again; | ||
1642 | |||
1643 | btrfs_set_path_blocking(p); | ||
1644 | sret = balance_level(trans, root, p, level); | ||
1645 | btrfs_clear_path_blocking(p, NULL); | ||
1646 | |||
1474 | if (sret) { | 1647 | if (sret) { |
1475 | ret = sret; | 1648 | ret = sret; |
1476 | goto done; | 1649 | goto done; |
@@ -1504,7 +1677,7 @@ cow_done: | |||
1504 | * of the btree by dropping locks before | 1677 | * of the btree by dropping locks before |
1505 | * we read. | 1678 | * we read. |
1506 | */ | 1679 | */ |
1507 | if (level > 1) { | 1680 | if (level > 0) { |
1508 | btrfs_release_path(NULL, p); | 1681 | btrfs_release_path(NULL, p); |
1509 | if (tmp) | 1682 | if (tmp) |
1510 | free_extent_buffer(tmp); | 1683 | free_extent_buffer(tmp); |
@@ -1519,6 +1692,7 @@ cow_done: | |||
1519 | free_extent_buffer(tmp); | 1692 | free_extent_buffer(tmp); |
1520 | goto again; | 1693 | goto again; |
1521 | } else { | 1694 | } else { |
1695 | btrfs_set_path_blocking(p); | ||
1522 | if (tmp) | 1696 | if (tmp) |
1523 | free_extent_buffer(tmp); | 1697 | free_extent_buffer(tmp); |
1524 | if (should_reada) | 1698 | if (should_reada) |
@@ -1528,14 +1702,29 @@ cow_done: | |||
1528 | b = read_node_slot(root, b, slot); | 1702 | b = read_node_slot(root, b, slot); |
1529 | } | 1703 | } |
1530 | } | 1704 | } |
1531 | if (!p->skip_locking) | 1705 | if (!p->skip_locking) { |
1532 | btrfs_tree_lock(b); | 1706 | int lret; |
1707 | |||
1708 | btrfs_clear_path_blocking(p, NULL); | ||
1709 | lret = btrfs_try_spin_lock(b); | ||
1710 | |||
1711 | if (!lret) { | ||
1712 | btrfs_set_path_blocking(p); | ||
1713 | btrfs_tree_lock(b); | ||
1714 | btrfs_clear_path_blocking(p, b); | ||
1715 | } | ||
1716 | } | ||
1533 | } else { | 1717 | } else { |
1534 | p->slots[level] = slot; | 1718 | p->slots[level] = slot; |
1535 | if (ins_len > 0 && | 1719 | if (ins_len > 0 && |
1536 | btrfs_leaf_free_space(root, b) < ins_len) { | 1720 | btrfs_leaf_free_space(root, b) < ins_len) { |
1537 | int sret = split_leaf(trans, root, key, | 1721 | int sret; |
1722 | |||
1723 | btrfs_set_path_blocking(p); | ||
1724 | sret = split_leaf(trans, root, key, | ||
1538 | p, ins_len, ret == 0); | 1725 | p, ins_len, ret == 0); |
1726 | btrfs_clear_path_blocking(p, NULL); | ||
1727 | |||
1539 | BUG_ON(sret > 0); | 1728 | BUG_ON(sret > 0); |
1540 | if (sret) { | 1729 | if (sret) { |
1541 | ret = sret; | 1730 | ret = sret; |
@@ -1549,12 +1738,16 @@ cow_done: | |||
1549 | } | 1738 | } |
1550 | ret = 1; | 1739 | ret = 1; |
1551 | done: | 1740 | done: |
1741 | /* | ||
1742 | * we don't really know what they plan on doing with the path | ||
1743 | * from here on, so for now just mark it as blocking | ||
1744 | */ | ||
1745 | btrfs_set_path_blocking(p); | ||
1552 | if (prealloc_block.objectid) { | 1746 | if (prealloc_block.objectid) { |
1553 | btrfs_free_reserved_extent(root, | 1747 | btrfs_free_reserved_extent(root, |
1554 | prealloc_block.objectid, | 1748 | prealloc_block.objectid, |
1555 | prealloc_block.offset); | 1749 | prealloc_block.offset); |
1556 | } | 1750 | } |
1557 | |||
1558 | return ret; | 1751 | return ret; |
1559 | } | 1752 | } |
1560 | 1753 | ||
@@ -1578,6 +1771,8 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1578 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | 1771 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); |
1579 | BUG_ON(ret); | 1772 | BUG_ON(ret); |
1580 | 1773 | ||
1774 | btrfs_set_lock_blocking(eb); | ||
1775 | |||
1581 | parent = eb; | 1776 | parent = eb; |
1582 | while (1) { | 1777 | while (1) { |
1583 | level = btrfs_header_level(parent); | 1778 | level = btrfs_header_level(parent); |
@@ -1602,6 +1797,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1602 | eb = read_tree_block(root, bytenr, blocksize, | 1797 | eb = read_tree_block(root, bytenr, blocksize, |
1603 | generation); | 1798 | generation); |
1604 | btrfs_tree_lock(eb); | 1799 | btrfs_tree_lock(eb); |
1800 | btrfs_set_lock_blocking(eb); | ||
1605 | } | 1801 | } |
1606 | 1802 | ||
1607 | /* | 1803 | /* |
@@ -1626,6 +1822,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1626 | eb = read_tree_block(root, bytenr, blocksize, | 1822 | eb = read_tree_block(root, bytenr, blocksize, |
1627 | generation); | 1823 | generation); |
1628 | btrfs_tree_lock(eb); | 1824 | btrfs_tree_lock(eb); |
1825 | btrfs_set_lock_blocking(eb); | ||
1629 | } | 1826 | } |
1630 | 1827 | ||
1631 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1828 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
@@ -2168,10 +2365,12 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2168 | if (slot >= btrfs_header_nritems(upper) - 1) | 2365 | if (slot >= btrfs_header_nritems(upper) - 1) |
2169 | return 1; | 2366 | return 1; |
2170 | 2367 | ||
2171 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | 2368 | btrfs_assert_tree_locked(path->nodes[1]); |
2172 | 2369 | ||
2173 | right = read_node_slot(root, upper, slot + 1); | 2370 | right = read_node_slot(root, upper, slot + 1); |
2174 | btrfs_tree_lock(right); | 2371 | btrfs_tree_lock(right); |
2372 | btrfs_set_lock_blocking(right); | ||
2373 | |||
2175 | free_space = btrfs_leaf_free_space(root, right); | 2374 | free_space = btrfs_leaf_free_space(root, right); |
2176 | if (free_space < data_size) | 2375 | if (free_space < data_size) |
2177 | goto out_unlock; | 2376 | goto out_unlock; |
@@ -2363,10 +2562,12 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2363 | if (right_nritems == 0) | 2562 | if (right_nritems == 0) |
2364 | return 1; | 2563 | return 1; |
2365 | 2564 | ||
2366 | WARN_ON(!btrfs_tree_locked(path->nodes[1])); | 2565 | btrfs_assert_tree_locked(path->nodes[1]); |
2367 | 2566 | ||
2368 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2567 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2369 | btrfs_tree_lock(left); | 2568 | btrfs_tree_lock(left); |
2569 | btrfs_set_lock_blocking(left); | ||
2570 | |||
2370 | free_space = btrfs_leaf_free_space(root, left); | 2571 | free_space = btrfs_leaf_free_space(root, left); |
2371 | if (free_space < data_size) { | 2572 | if (free_space < data_size) { |
2372 | ret = 1; | 2573 | ret = 1; |
@@ -2825,6 +3026,12 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, | |||
2825 | path->keep_locks = 0; | 3026 | path->keep_locks = 0; |
2826 | BUG_ON(ret); | 3027 | BUG_ON(ret); |
2827 | 3028 | ||
3029 | /* | ||
3030 | * make sure any changes to the path from split_leaf leave it | ||
3031 | * in a blocking state | ||
3032 | */ | ||
3033 | btrfs_set_path_blocking(path); | ||
3034 | |||
2828 | leaf = path->nodes[0]; | 3035 | leaf = path->nodes[0]; |
2829 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | 3036 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); |
2830 | 3037 | ||
@@ -3354,6 +3561,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3354 | BUG(); | 3561 | BUG(); |
3355 | } | 3562 | } |
3356 | out: | 3563 | out: |
3564 | btrfs_unlock_up_safe(path, 1); | ||
3357 | return ret; | 3565 | return ret; |
3358 | } | 3566 | } |
3359 | 3567 | ||
@@ -3441,15 +3649,22 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3441 | { | 3649 | { |
3442 | int ret; | 3650 | int ret; |
3443 | u64 root_gen = btrfs_header_generation(path->nodes[1]); | 3651 | u64 root_gen = btrfs_header_generation(path->nodes[1]); |
3652 | u64 parent_start = path->nodes[1]->start; | ||
3653 | u64 parent_owner = btrfs_header_owner(path->nodes[1]); | ||
3444 | 3654 | ||
3445 | ret = del_ptr(trans, root, path, 1, path->slots[1]); | 3655 | ret = del_ptr(trans, root, path, 1, path->slots[1]); |
3446 | if (ret) | 3656 | if (ret) |
3447 | return ret; | 3657 | return ret; |
3448 | 3658 | ||
3659 | /* | ||
3660 | * btrfs_free_extent is expensive, we want to make sure we | ||
3661 | * aren't holding any locks when we call it | ||
3662 | */ | ||
3663 | btrfs_unlock_up_safe(path, 0); | ||
3664 | |||
3449 | ret = btrfs_free_extent(trans, root, bytenr, | 3665 | ret = btrfs_free_extent(trans, root, bytenr, |
3450 | btrfs_level_size(root, 0), | 3666 | btrfs_level_size(root, 0), |
3451 | path->nodes[1]->start, | 3667 | parent_start, parent_owner, |
3452 | btrfs_header_owner(path->nodes[1]), | ||
3453 | root_gen, 0, 1); | 3668 | root_gen, 0, 1); |
3454 | return ret; | 3669 | return ret; |
3455 | } | 3670 | } |
@@ -3721,6 +3936,7 @@ find_next_key: | |||
3721 | */ | 3936 | */ |
3722 | if (slot >= nritems) { | 3937 | if (slot >= nritems) { |
3723 | path->slots[level] = slot; | 3938 | path->slots[level] = slot; |
3939 | btrfs_set_path_blocking(path); | ||
3724 | sret = btrfs_find_next_key(root, path, min_key, level, | 3940 | sret = btrfs_find_next_key(root, path, min_key, level, |
3725 | cache_only, min_trans); | 3941 | cache_only, min_trans); |
3726 | if (sret == 0) { | 3942 | if (sret == 0) { |
@@ -3738,16 +3954,20 @@ find_next_key: | |||
3738 | unlock_up(path, level, 1); | 3954 | unlock_up(path, level, 1); |
3739 | goto out; | 3955 | goto out; |
3740 | } | 3956 | } |
3957 | btrfs_set_path_blocking(path); | ||
3741 | cur = read_node_slot(root, cur, slot); | 3958 | cur = read_node_slot(root, cur, slot); |
3742 | 3959 | ||
3743 | btrfs_tree_lock(cur); | 3960 | btrfs_tree_lock(cur); |
3961 | |||
3744 | path->locks[level - 1] = 1; | 3962 | path->locks[level - 1] = 1; |
3745 | path->nodes[level - 1] = cur; | 3963 | path->nodes[level - 1] = cur; |
3746 | unlock_up(path, level, 1); | 3964 | unlock_up(path, level, 1); |
3965 | btrfs_clear_path_blocking(path, NULL); | ||
3747 | } | 3966 | } |
3748 | out: | 3967 | out: |
3749 | if (ret == 0) | 3968 | if (ret == 0) |
3750 | memcpy(min_key, &found_key, sizeof(found_key)); | 3969 | memcpy(min_key, &found_key, sizeof(found_key)); |
3970 | btrfs_set_path_blocking(path); | ||
3751 | return ret; | 3971 | return ret; |
3752 | } | 3972 | } |
3753 | 3973 | ||
@@ -3843,6 +4063,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3843 | if (ret < 0) | 4063 | if (ret < 0) |
3844 | return ret; | 4064 | return ret; |
3845 | 4065 | ||
4066 | btrfs_set_path_blocking(path); | ||
3846 | nritems = btrfs_header_nritems(path->nodes[0]); | 4067 | nritems = btrfs_header_nritems(path->nodes[0]); |
3847 | /* | 4068 | /* |
3848 | * by releasing the path above we dropped all our locks. A balance | 4069 | * by releasing the path above we dropped all our locks. A balance |
@@ -3873,14 +4094,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3873 | free_extent_buffer(next); | 4094 | free_extent_buffer(next); |
3874 | } | 4095 | } |
3875 | 4096 | ||
4097 | /* the path was set to blocking above */ | ||
3876 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4098 | if (level == 1 && (path->locks[1] || path->skip_locking) && |
3877 | path->reada) | 4099 | path->reada) |
3878 | reada_for_search(root, path, level, slot, 0); | 4100 | reada_for_search(root, path, level, slot, 0); |
3879 | 4101 | ||
3880 | next = read_node_slot(root, c, slot); | 4102 | next = read_node_slot(root, c, slot); |
3881 | if (!path->skip_locking) { | 4103 | if (!path->skip_locking) { |
3882 | WARN_ON(!btrfs_tree_locked(c)); | 4104 | btrfs_assert_tree_locked(c); |
3883 | btrfs_tree_lock(next); | 4105 | btrfs_tree_lock(next); |
4106 | btrfs_set_lock_blocking(next); | ||
3884 | } | 4107 | } |
3885 | break; | 4108 | break; |
3886 | } | 4109 | } |
@@ -3897,12 +4120,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3897 | path->locks[level] = 1; | 4120 | path->locks[level] = 1; |
3898 | if (!level) | 4121 | if (!level) |
3899 | break; | 4122 | break; |
4123 | |||
4124 | btrfs_set_path_blocking(path); | ||
3900 | if (level == 1 && path->locks[1] && path->reada) | 4125 | if (level == 1 && path->locks[1] && path->reada) |
3901 | reada_for_search(root, path, level, slot, 0); | 4126 | reada_for_search(root, path, level, slot, 0); |
3902 | next = read_node_slot(root, next, 0); | 4127 | next = read_node_slot(root, next, 0); |
3903 | if (!path->skip_locking) { | 4128 | if (!path->skip_locking) { |
3904 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); | 4129 | btrfs_assert_tree_locked(path->nodes[level]); |
3905 | btrfs_tree_lock(next); | 4130 | btrfs_tree_lock(next); |
4131 | btrfs_set_lock_blocking(next); | ||
3906 | } | 4132 | } |
3907 | } | 4133 | } |
3908 | done: | 4134 | done: |
@@ -3927,6 +4153,7 @@ int btrfs_previous_item(struct btrfs_root *root, | |||
3927 | 4153 | ||
3928 | while (1) { | 4154 | while (1) { |
3929 | if (path->slots[0] == 0) { | 4155 | if (path->slots[0] == 0) { |
4156 | btrfs_set_path_blocking(path); | ||
3930 | ret = btrfs_prev_leaf(root, path); | 4157 | ret = btrfs_prev_leaf(root, path); |
3931 | if (ret != 0) | 4158 | if (ret != 0) |
3932 | return ret; | 4159 | return ret; |