diff options
-rw-r--r-- | fs/btrfs/ctree.c | 234 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 5 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 18 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 16 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 3 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 208 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 6 | ||||
-rw-r--r-- | fs/btrfs/tree-defrag.c | 1 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 4 |
11 files changed, 470 insertions, 39 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3b6e35aafc9e..3af777357acb 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -54,6 +54,31 @@ struct btrfs_path *btrfs_alloc_path(void) | |||
54 | return path; | 54 | return path; |
55 | } | 55 | } |
56 | 56 | ||
57 | /* | ||
58 | * set all locked nodes in the path to blocking locks. This should | ||
59 | * be done before scheduling | ||
60 | */ | ||
61 | noinline void btrfs_set_path_blocking(struct btrfs_path *p) | ||
62 | { | ||
63 | int i; | ||
64 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
65 | if (p->nodes[i] && p->locks[i]) | ||
66 | btrfs_set_lock_blocking(p->nodes[i]); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * reset all the locked nodes in the patch to spinning locks. | ||
72 | */ | ||
73 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p) | ||
74 | { | ||
75 | int i; | ||
76 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | ||
77 | if (p->nodes[i] && p->locks[i]) | ||
78 | btrfs_clear_lock_blocking(p->nodes[i]); | ||
79 | } | ||
80 | } | ||
81 | |||
57 | /* this also releases the path */ | 82 | /* this also releases the path */ |
58 | void btrfs_free_path(struct btrfs_path *p) | 83 | void btrfs_free_path(struct btrfs_path *p) |
59 | { | 84 | { |
@@ -272,6 +297,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
272 | if (IS_ERR(cow)) | 297 | if (IS_ERR(cow)) |
273 | return PTR_ERR(cow); | 298 | return PTR_ERR(cow); |
274 | 299 | ||
300 | /* cow is set to blocking by btrfs_init_new_buffer */ | ||
301 | |||
275 | copy_extent_buffer(cow, buf, 0, 0, cow->len); | 302 | copy_extent_buffer(cow, buf, 0, 0, cow->len); |
276 | btrfs_set_header_bytenr(cow, cow->start); | 303 | btrfs_set_header_bytenr(cow, cow->start); |
277 | btrfs_set_header_generation(cow, trans->transid); | 304 | btrfs_set_header_generation(cow, trans->transid); |
@@ -397,6 +424,11 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
397 | } | 424 | } |
398 | 425 | ||
399 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); | 426 | search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); |
427 | |||
428 | if (parent) | ||
429 | btrfs_set_lock_blocking(parent); | ||
430 | btrfs_set_lock_blocking(buf); | ||
431 | |||
400 | ret = __btrfs_cow_block(trans, root, buf, parent, | 432 | ret = __btrfs_cow_block(trans, root, buf, parent, |
401 | parent_slot, cow_ret, search_start, 0, | 433 | parent_slot, cow_ret, search_start, 0, |
402 | prealloc_dest); | 434 | prealloc_dest); |
@@ -502,6 +534,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
502 | if (parent_nritems == 1) | 534 | if (parent_nritems == 1) |
503 | return 0; | 535 | return 0; |
504 | 536 | ||
537 | btrfs_set_lock_blocking(parent); | ||
538 | |||
505 | for (i = start_slot; i < end_slot; i++) { | 539 | for (i = start_slot; i < end_slot; i++) { |
506 | int close = 1; | 540 | int close = 1; |
507 | 541 | ||
@@ -562,6 +596,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
562 | search_start = last_block; | 596 | search_start = last_block; |
563 | 597 | ||
564 | btrfs_tree_lock(cur); | 598 | btrfs_tree_lock(cur); |
599 | btrfs_set_lock_blocking(cur); | ||
565 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 600 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
566 | &cur, search_start, | 601 | &cur, search_start, |
567 | min(16 * blocksize, | 602 | min(16 * blocksize, |
@@ -860,6 +895,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
860 | return 0; | 895 | return 0; |
861 | 896 | ||
862 | mid = path->nodes[level]; | 897 | mid = path->nodes[level]; |
898 | |||
863 | WARN_ON(!path->locks[level]); | 899 | WARN_ON(!path->locks[level]); |
864 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 900 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
865 | 901 | ||
@@ -882,6 +918,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
882 | /* promote the child to a root */ | 918 | /* promote the child to a root */ |
883 | child = read_node_slot(root, mid, 0); | 919 | child = read_node_slot(root, mid, 0); |
884 | btrfs_tree_lock(child); | 920 | btrfs_tree_lock(child); |
921 | btrfs_set_lock_blocking(child); | ||
885 | BUG_ON(!child); | 922 | BUG_ON(!child); |
886 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | 923 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); |
887 | BUG_ON(ret); | 924 | BUG_ON(ret); |
@@ -898,6 +935,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
898 | 935 | ||
899 | add_root_to_dirty_list(root); | 936 | add_root_to_dirty_list(root); |
900 | btrfs_tree_unlock(child); | 937 | btrfs_tree_unlock(child); |
938 | |||
901 | path->locks[level] = 0; | 939 | path->locks[level] = 0; |
902 | path->nodes[level] = NULL; | 940 | path->nodes[level] = NULL; |
903 | clean_tree_block(trans, root, mid); | 941 | clean_tree_block(trans, root, mid); |
@@ -922,6 +960,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
922 | left = read_node_slot(root, parent, pslot - 1); | 960 | left = read_node_slot(root, parent, pslot - 1); |
923 | if (left) { | 961 | if (left) { |
924 | btrfs_tree_lock(left); | 962 | btrfs_tree_lock(left); |
963 | btrfs_set_lock_blocking(left); | ||
925 | wret = btrfs_cow_block(trans, root, left, | 964 | wret = btrfs_cow_block(trans, root, left, |
926 | parent, pslot - 1, &left, 0); | 965 | parent, pslot - 1, &left, 0); |
927 | if (wret) { | 966 | if (wret) { |
@@ -932,6 +971,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
932 | right = read_node_slot(root, parent, pslot + 1); | 971 | right = read_node_slot(root, parent, pslot + 1); |
933 | if (right) { | 972 | if (right) { |
934 | btrfs_tree_lock(right); | 973 | btrfs_tree_lock(right); |
974 | btrfs_set_lock_blocking(right); | ||
935 | wret = btrfs_cow_block(trans, root, right, | 975 | wret = btrfs_cow_block(trans, root, right, |
936 | parent, pslot + 1, &right, 0); | 976 | parent, pslot + 1, &right, 0); |
937 | if (wret) { | 977 | if (wret) { |
@@ -1107,6 +1147,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1107 | u32 left_nr; | 1147 | u32 left_nr; |
1108 | 1148 | ||
1109 | btrfs_tree_lock(left); | 1149 | btrfs_tree_lock(left); |
1150 | btrfs_set_lock_blocking(left); | ||
1151 | |||
1110 | left_nr = btrfs_header_nritems(left); | 1152 | left_nr = btrfs_header_nritems(left); |
1111 | if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | 1153 | if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { |
1112 | wret = 1; | 1154 | wret = 1; |
@@ -1153,7 +1195,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1153 | */ | 1195 | */ |
1154 | if (right) { | 1196 | if (right) { |
1155 | u32 right_nr; | 1197 | u32 right_nr; |
1198 | |||
1156 | btrfs_tree_lock(right); | 1199 | btrfs_tree_lock(right); |
1200 | btrfs_set_lock_blocking(right); | ||
1201 | |||
1157 | right_nr = btrfs_header_nritems(right); | 1202 | right_nr = btrfs_header_nritems(right); |
1158 | if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { | 1203 | if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { |
1159 | wret = 1; | 1204 | wret = 1; |
@@ -1265,6 +1310,68 @@ static noinline void reada_for_search(struct btrfs_root *root, | |||
1265 | } | 1310 | } |
1266 | 1311 | ||
1267 | /* | 1312 | /* |
1313 | * returns -EAGAIN if it had to drop the path, or zero if everything was in | ||
1314 | * cache | ||
1315 | */ | ||
1316 | static noinline int reada_for_balance(struct btrfs_root *root, | ||
1317 | struct btrfs_path *path, int level) | ||
1318 | { | ||
1319 | int slot; | ||
1320 | int nritems; | ||
1321 | struct extent_buffer *parent; | ||
1322 | struct extent_buffer *eb; | ||
1323 | u64 gen; | ||
1324 | u64 block1 = 0; | ||
1325 | u64 block2 = 0; | ||
1326 | int ret = 0; | ||
1327 | int blocksize; | ||
1328 | |||
1329 | parent = path->nodes[level - 1]; | ||
1330 | if (!parent) | ||
1331 | return 0; | ||
1332 | |||
1333 | nritems = btrfs_header_nritems(parent); | ||
1334 | slot = path->slots[level]; | ||
1335 | blocksize = btrfs_level_size(root, level); | ||
1336 | |||
1337 | if (slot > 0) { | ||
1338 | block1 = btrfs_node_blockptr(parent, slot - 1); | ||
1339 | gen = btrfs_node_ptr_generation(parent, slot - 1); | ||
1340 | eb = btrfs_find_tree_block(root, block1, blocksize); | ||
1341 | if (eb && btrfs_buffer_uptodate(eb, gen)) | ||
1342 | block1 = 0; | ||
1343 | free_extent_buffer(eb); | ||
1344 | } | ||
1345 | if (slot < nritems) { | ||
1346 | block2 = btrfs_node_blockptr(parent, slot + 1); | ||
1347 | gen = btrfs_node_ptr_generation(parent, slot + 1); | ||
1348 | eb = btrfs_find_tree_block(root, block2, blocksize); | ||
1349 | if (eb && btrfs_buffer_uptodate(eb, gen)) | ||
1350 | block2 = 0; | ||
1351 | free_extent_buffer(eb); | ||
1352 | } | ||
1353 | if (block1 || block2) { | ||
1354 | ret = -EAGAIN; | ||
1355 | btrfs_release_path(root, path); | ||
1356 | if (block1) | ||
1357 | readahead_tree_block(root, block1, blocksize, 0); | ||
1358 | if (block2) | ||
1359 | readahead_tree_block(root, block2, blocksize, 0); | ||
1360 | |||
1361 | if (block1) { | ||
1362 | eb = read_tree_block(root, block1, blocksize, 0); | ||
1363 | free_extent_buffer(eb); | ||
1364 | } | ||
1365 | if (block1) { | ||
1366 | eb = read_tree_block(root, block2, blocksize, 0); | ||
1367 | free_extent_buffer(eb); | ||
1368 | } | ||
1369 | } | ||
1370 | return ret; | ||
1371 | } | ||
1372 | |||
1373 | |||
1374 | /* | ||
1268 | * when we walk down the tree, it is usually safe to unlock the higher layers | 1375 | * when we walk down the tree, it is usually safe to unlock the higher layers |
1269 | * in the tree. The exceptions are when our path goes through slot 0, because | 1376 | * in the tree. The exceptions are when our path goes through slot 0, because |
1270 | * operations on the tree might require changing key pointers higher up in the | 1377 | * operations on the tree might require changing key pointers higher up in the |
@@ -1315,6 +1422,32 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1315 | } | 1422 | } |
1316 | 1423 | ||
1317 | /* | 1424 | /* |
1425 | * This releases any locks held in the path starting at level and | ||
1426 | * going all the way up to the root. | ||
1427 | * | ||
1428 | * btrfs_search_slot will keep the lock held on higher nodes in a few | ||
1429 | * corner cases, such as COW of the block at slot zero in the node. This | ||
1430 | * ignores those rules, and it should only be called when there are no | ||
1431 | * more updates to be done higher up in the tree. | ||
1432 | */ | ||
1433 | noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | ||
1434 | { | ||
1435 | int i; | ||
1436 | |||
1437 | if (path->keep_locks || path->lowest_level) | ||
1438 | return; | ||
1439 | |||
1440 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | ||
1441 | if (!path->nodes[i]) | ||
1442 | break; | ||
1443 | if (!path->locks[i]) | ||
1444 | break; | ||
1445 | btrfs_tree_unlock(path->nodes[i]); | ||
1446 | path->locks[i] = 0; | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1318 | * look for key in the tree. path is filled in with nodes along the way | 1451 | * look for key in the tree. path is filled in with nodes along the way |
1319 | * if key is found, we return zero and you can find the item in the leaf | 1452 | * if key is found, we return zero and you can find the item in the leaf |
1320 | * level of the path (level 0) | 1453 | * level of the path (level 0) |
@@ -1385,6 +1518,7 @@ again: | |||
1385 | */ | 1518 | */ |
1386 | if (prealloc_block.objectid && | 1519 | if (prealloc_block.objectid && |
1387 | prealloc_block.offset != b->len) { | 1520 | prealloc_block.offset != b->len) { |
1521 | btrfs_set_path_blocking(p); | ||
1388 | btrfs_free_reserved_extent(root, | 1522 | btrfs_free_reserved_extent(root, |
1389 | prealloc_block.objectid, | 1523 | prealloc_block.objectid, |
1390 | prealloc_block.offset); | 1524 | prealloc_block.offset); |
@@ -1409,6 +1543,8 @@ again: | |||
1409 | goto again; | 1543 | goto again; |
1410 | } | 1544 | } |
1411 | 1545 | ||
1546 | btrfs_set_path_blocking(p); | ||
1547 | |||
1412 | wret = btrfs_cow_block(trans, root, b, | 1548 | wret = btrfs_cow_block(trans, root, b, |
1413 | p->nodes[level + 1], | 1549 | p->nodes[level + 1], |
1414 | p->slots[level + 1], | 1550 | p->slots[level + 1], |
@@ -1430,6 +1566,22 @@ cow_done: | |||
1430 | if (!p->skip_locking) | 1566 | if (!p->skip_locking) |
1431 | p->locks[level] = 1; | 1567 | p->locks[level] = 1; |
1432 | 1568 | ||
1569 | btrfs_clear_path_blocking(p); | ||
1570 | |||
1571 | /* | ||
1572 | * we have a lock on b and as long as we aren't changing | ||
1573 | * the tree, there is no way to for the items in b to change. | ||
1574 | * It is safe to drop the lock on our parent before we | ||
1575 | * go through the expensive btree search on b. | ||
1576 | * | ||
1577 | * If cow is true, then we might be changing slot zero, | ||
1578 | * which may require changing the parent. So, we can't | ||
1579 | * drop the lock until after we know which slot we're | ||
1580 | * operating on. | ||
1581 | */ | ||
1582 | if (!cow) | ||
1583 | btrfs_unlock_up_safe(p, level + 1); | ||
1584 | |||
1433 | ret = check_block(root, p, level); | 1585 | ret = check_block(root, p, level); |
1434 | if (ret) { | 1586 | if (ret) { |
1435 | ret = -1; | 1587 | ret = -1; |
@@ -1437,6 +1589,7 @@ cow_done: | |||
1437 | } | 1589 | } |
1438 | 1590 | ||
1439 | ret = bin_search(b, key, level, &slot); | 1591 | ret = bin_search(b, key, level, &slot); |
1592 | |||
1440 | if (level != 0) { | 1593 | if (level != 0) { |
1441 | if (ret && slot > 0) | 1594 | if (ret && slot > 0) |
1442 | slot -= 1; | 1595 | slot -= 1; |
@@ -1444,7 +1597,16 @@ cow_done: | |||
1444 | if ((p->search_for_split || ins_len > 0) && | 1597 | if ((p->search_for_split || ins_len > 0) && |
1445 | btrfs_header_nritems(b) >= | 1598 | btrfs_header_nritems(b) >= |
1446 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1599 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1447 | int sret = split_node(trans, root, p, level); | 1600 | int sret; |
1601 | |||
1602 | sret = reada_for_balance(root, p, level); | ||
1603 | if (sret) | ||
1604 | goto again; | ||
1605 | |||
1606 | btrfs_set_path_blocking(p); | ||
1607 | sret = split_node(trans, root, p, level); | ||
1608 | btrfs_clear_path_blocking(p); | ||
1609 | |||
1448 | BUG_ON(sret > 0); | 1610 | BUG_ON(sret > 0); |
1449 | if (sret) { | 1611 | if (sret) { |
1450 | ret = sret; | 1612 | ret = sret; |
@@ -1453,8 +1615,16 @@ cow_done: | |||
1453 | b = p->nodes[level]; | 1615 | b = p->nodes[level]; |
1454 | slot = p->slots[level]; | 1616 | slot = p->slots[level]; |
1455 | } else if (ins_len < 0) { | 1617 | } else if (ins_len < 0) { |
1456 | int sret = balance_level(trans, root, p, | 1618 | int sret; |
1457 | level); | 1619 | |
1620 | sret = reada_for_balance(root, p, level); | ||
1621 | if (sret) | ||
1622 | goto again; | ||
1623 | |||
1624 | btrfs_set_path_blocking(p); | ||
1625 | sret = balance_level(trans, root, p, level); | ||
1626 | btrfs_clear_path_blocking(p); | ||
1627 | |||
1458 | if (sret) { | 1628 | if (sret) { |
1459 | ret = sret; | 1629 | ret = sret; |
1460 | goto done; | 1630 | goto done; |
@@ -1488,7 +1658,7 @@ cow_done: | |||
1488 | * of the btree by dropping locks before | 1658 | * of the btree by dropping locks before |
1489 | * we read. | 1659 | * we read. |
1490 | */ | 1660 | */ |
1491 | if (level > 1) { | 1661 | if (level > 0) { |
1492 | btrfs_release_path(NULL, p); | 1662 | btrfs_release_path(NULL, p); |
1493 | if (tmp) | 1663 | if (tmp) |
1494 | free_extent_buffer(tmp); | 1664 | free_extent_buffer(tmp); |
@@ -1503,6 +1673,7 @@ cow_done: | |||
1503 | free_extent_buffer(tmp); | 1673 | free_extent_buffer(tmp); |
1504 | goto again; | 1674 | goto again; |
1505 | } else { | 1675 | } else { |
1676 | btrfs_set_path_blocking(p); | ||
1506 | if (tmp) | 1677 | if (tmp) |
1507 | free_extent_buffer(tmp); | 1678 | free_extent_buffer(tmp); |
1508 | if (should_reada) | 1679 | if (should_reada) |
@@ -1512,14 +1683,29 @@ cow_done: | |||
1512 | b = read_node_slot(root, b, slot); | 1683 | b = read_node_slot(root, b, slot); |
1513 | } | 1684 | } |
1514 | } | 1685 | } |
1515 | if (!p->skip_locking) | 1686 | if (!p->skip_locking) { |
1516 | btrfs_tree_lock(b); | 1687 | int lret; |
1688 | |||
1689 | btrfs_clear_path_blocking(p); | ||
1690 | lret = btrfs_try_spin_lock(b); | ||
1691 | |||
1692 | if (!lret) { | ||
1693 | btrfs_set_path_blocking(p); | ||
1694 | btrfs_tree_lock(b); | ||
1695 | btrfs_clear_path_blocking(p); | ||
1696 | } | ||
1697 | } | ||
1517 | } else { | 1698 | } else { |
1518 | p->slots[level] = slot; | 1699 | p->slots[level] = slot; |
1519 | if (ins_len > 0 && | 1700 | if (ins_len > 0 && |
1520 | btrfs_leaf_free_space(root, b) < ins_len) { | 1701 | btrfs_leaf_free_space(root, b) < ins_len) { |
1521 | int sret = split_leaf(trans, root, key, | 1702 | int sret; |
1703 | |||
1704 | btrfs_set_path_blocking(p); | ||
1705 | sret = split_leaf(trans, root, key, | ||
1522 | p, ins_len, ret == 0); | 1706 | p, ins_len, ret == 0); |
1707 | btrfs_clear_path_blocking(p); | ||
1708 | |||
1523 | BUG_ON(sret > 0); | 1709 | BUG_ON(sret > 0); |
1524 | if (sret) { | 1710 | if (sret) { |
1525 | ret = sret; | 1711 | ret = sret; |
@@ -1533,12 +1719,16 @@ cow_done: | |||
1533 | } | 1719 | } |
1534 | ret = 1; | 1720 | ret = 1; |
1535 | done: | 1721 | done: |
1722 | /* | ||
1723 | * we don't really know what they plan on doing with the path | ||
1724 | * from here on, so for now just mark it as blocking | ||
1725 | */ | ||
1726 | btrfs_set_path_blocking(p); | ||
1536 | if (prealloc_block.objectid) { | 1727 | if (prealloc_block.objectid) { |
1537 | btrfs_free_reserved_extent(root, | 1728 | btrfs_free_reserved_extent(root, |
1538 | prealloc_block.objectid, | 1729 | prealloc_block.objectid, |
1539 | prealloc_block.offset); | 1730 | prealloc_block.offset); |
1540 | } | 1731 | } |
1541 | |||
1542 | return ret; | 1732 | return ret; |
1543 | } | 1733 | } |
1544 | 1734 | ||
@@ -1562,6 +1752,8 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1562 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | 1752 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); |
1563 | BUG_ON(ret); | 1753 | BUG_ON(ret); |
1564 | 1754 | ||
1755 | btrfs_set_lock_blocking(eb); | ||
1756 | |||
1565 | parent = eb; | 1757 | parent = eb; |
1566 | while (1) { | 1758 | while (1) { |
1567 | level = btrfs_header_level(parent); | 1759 | level = btrfs_header_level(parent); |
@@ -1586,6 +1778,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1586 | eb = read_tree_block(root, bytenr, blocksize, | 1778 | eb = read_tree_block(root, bytenr, blocksize, |
1587 | generation); | 1779 | generation); |
1588 | btrfs_tree_lock(eb); | 1780 | btrfs_tree_lock(eb); |
1781 | btrfs_set_lock_blocking(eb); | ||
1589 | } | 1782 | } |
1590 | 1783 | ||
1591 | /* | 1784 | /* |
@@ -1610,6 +1803,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1610 | eb = read_tree_block(root, bytenr, blocksize, | 1803 | eb = read_tree_block(root, bytenr, blocksize, |
1611 | generation); | 1804 | generation); |
1612 | btrfs_tree_lock(eb); | 1805 | btrfs_tree_lock(eb); |
1806 | btrfs_set_lock_blocking(eb); | ||
1613 | } | 1807 | } |
1614 | 1808 | ||
1615 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1809 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
@@ -2156,6 +2350,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2156 | 2350 | ||
2157 | right = read_node_slot(root, upper, slot + 1); | 2351 | right = read_node_slot(root, upper, slot + 1); |
2158 | btrfs_tree_lock(right); | 2352 | btrfs_tree_lock(right); |
2353 | btrfs_set_lock_blocking(right); | ||
2354 | |||
2159 | free_space = btrfs_leaf_free_space(root, right); | 2355 | free_space = btrfs_leaf_free_space(root, right); |
2160 | if (free_space < data_size) | 2356 | if (free_space < data_size) |
2161 | goto out_unlock; | 2357 | goto out_unlock; |
@@ -2351,6 +2547,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2351 | 2547 | ||
2352 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2548 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2353 | btrfs_tree_lock(left); | 2549 | btrfs_tree_lock(left); |
2550 | btrfs_set_lock_blocking(left); | ||
2551 | |||
2354 | free_space = btrfs_leaf_free_space(root, left); | 2552 | free_space = btrfs_leaf_free_space(root, left); |
2355 | if (free_space < data_size) { | 2553 | if (free_space < data_size) { |
2356 | ret = 1; | 2554 | ret = 1; |
@@ -2809,6 +3007,12 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, | |||
2809 | path->keep_locks = 0; | 3007 | path->keep_locks = 0; |
2810 | BUG_ON(ret); | 3008 | BUG_ON(ret); |
2811 | 3009 | ||
3010 | /* | ||
3011 | * make sure any changes to the path from split_leaf leave it | ||
3012 | * in a blocking state | ||
3013 | */ | ||
3014 | btrfs_set_path_blocking(path); | ||
3015 | |||
2812 | leaf = path->nodes[0]; | 3016 | leaf = path->nodes[0]; |
2813 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | 3017 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); |
2814 | 3018 | ||
@@ -3338,6 +3542,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3338 | BUG(); | 3542 | BUG(); |
3339 | } | 3543 | } |
3340 | out: | 3544 | out: |
3545 | btrfs_unlock_up_safe(path, 1); | ||
3341 | return ret; | 3546 | return ret; |
3342 | } | 3547 | } |
3343 | 3548 | ||
@@ -3705,12 +3910,14 @@ find_next_key: | |||
3705 | */ | 3910 | */ |
3706 | if (slot >= nritems) { | 3911 | if (slot >= nritems) { |
3707 | path->slots[level] = slot; | 3912 | path->slots[level] = slot; |
3913 | btrfs_set_path_blocking(path); | ||
3708 | sret = btrfs_find_next_key(root, path, min_key, level, | 3914 | sret = btrfs_find_next_key(root, path, min_key, level, |
3709 | cache_only, min_trans); | 3915 | cache_only, min_trans); |
3710 | if (sret == 0) { | 3916 | if (sret == 0) { |
3711 | btrfs_release_path(root, path); | 3917 | btrfs_release_path(root, path); |
3712 | goto again; | 3918 | goto again; |
3713 | } else { | 3919 | } else { |
3920 | btrfs_clear_path_blocking(path); | ||
3714 | goto out; | 3921 | goto out; |
3715 | } | 3922 | } |
3716 | } | 3923 | } |
@@ -3722,16 +3929,20 @@ find_next_key: | |||
3722 | unlock_up(path, level, 1); | 3929 | unlock_up(path, level, 1); |
3723 | goto out; | 3930 | goto out; |
3724 | } | 3931 | } |
3932 | btrfs_set_path_blocking(path); | ||
3725 | cur = read_node_slot(root, cur, slot); | 3933 | cur = read_node_slot(root, cur, slot); |
3726 | 3934 | ||
3727 | btrfs_tree_lock(cur); | 3935 | btrfs_tree_lock(cur); |
3936 | |||
3728 | path->locks[level - 1] = 1; | 3937 | path->locks[level - 1] = 1; |
3729 | path->nodes[level - 1] = cur; | 3938 | path->nodes[level - 1] = cur; |
3730 | unlock_up(path, level, 1); | 3939 | unlock_up(path, level, 1); |
3940 | btrfs_clear_path_blocking(path); | ||
3731 | } | 3941 | } |
3732 | out: | 3942 | out: |
3733 | if (ret == 0) | 3943 | if (ret == 0) |
3734 | memcpy(min_key, &found_key, sizeof(found_key)); | 3944 | memcpy(min_key, &found_key, sizeof(found_key)); |
3945 | btrfs_set_path_blocking(path); | ||
3735 | return ret; | 3946 | return ret; |
3736 | } | 3947 | } |
3737 | 3948 | ||
@@ -3827,6 +4038,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3827 | if (ret < 0) | 4038 | if (ret < 0) |
3828 | return ret; | 4039 | return ret; |
3829 | 4040 | ||
4041 | btrfs_set_path_blocking(path); | ||
3830 | nritems = btrfs_header_nritems(path->nodes[0]); | 4042 | nritems = btrfs_header_nritems(path->nodes[0]); |
3831 | /* | 4043 | /* |
3832 | * by releasing the path above we dropped all our locks. A balance | 4044 | * by releasing the path above we dropped all our locks. A balance |
@@ -3857,6 +4069,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3857 | free_extent_buffer(next); | 4069 | free_extent_buffer(next); |
3858 | } | 4070 | } |
3859 | 4071 | ||
4072 | /* the path was set to blocking above */ | ||
3860 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4073 | if (level == 1 && (path->locks[1] || path->skip_locking) && |
3861 | path->reada) | 4074 | path->reada) |
3862 | reada_for_search(root, path, level, slot, 0); | 4075 | reada_for_search(root, path, level, slot, 0); |
@@ -3865,6 +4078,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3865 | if (!path->skip_locking) { | 4078 | if (!path->skip_locking) { |
3866 | WARN_ON(!btrfs_tree_locked(c)); | 4079 | WARN_ON(!btrfs_tree_locked(c)); |
3867 | btrfs_tree_lock(next); | 4080 | btrfs_tree_lock(next); |
4081 | btrfs_set_lock_blocking(next); | ||
3868 | } | 4082 | } |
3869 | break; | 4083 | break; |
3870 | } | 4084 | } |
@@ -3881,12 +4095,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
3881 | path->locks[level] = 1; | 4095 | path->locks[level] = 1; |
3882 | if (!level) | 4096 | if (!level) |
3883 | break; | 4097 | break; |
4098 | |||
4099 | btrfs_set_path_blocking(path); | ||
3884 | if (level == 1 && path->locks[1] && path->reada) | 4100 | if (level == 1 && path->locks[1] && path->reada) |
3885 | reada_for_search(root, path, level, slot, 0); | 4101 | reada_for_search(root, path, level, slot, 0); |
3886 | next = read_node_slot(root, next, 0); | 4102 | next = read_node_slot(root, next, 0); |
3887 | if (!path->skip_locking) { | 4103 | if (!path->skip_locking) { |
3888 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); | 4104 | WARN_ON(!btrfs_tree_locked(path->nodes[level])); |
3889 | btrfs_tree_lock(next); | 4105 | btrfs_tree_lock(next); |
4106 | btrfs_set_lock_blocking(next); | ||
3890 | } | 4107 | } |
3891 | } | 4108 | } |
3892 | done: | 4109 | done: |
@@ -3911,6 +4128,7 @@ int btrfs_previous_item(struct btrfs_root *root, | |||
3911 | 4128 | ||
3912 | while (1) { | 4129 | while (1) { |
3913 | if (path->slots[0] == 0) { | 4130 | if (path->slots[0] == 0) { |
4131 | btrfs_set_path_blocking(path); | ||
3914 | ret = btrfs_prev_leaf(root, path); | 4132 | ret = btrfs_prev_leaf(root, path); |
3915 | if (ret != 0) | 4133 | if (ret != 0) |
3916 | return ret; | 4134 | return ret; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f2b8d26b0472..531db112c8bd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1835,6 +1835,10 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); | |||
1835 | struct btrfs_path *btrfs_alloc_path(void); | 1835 | struct btrfs_path *btrfs_alloc_path(void); |
1836 | void btrfs_free_path(struct btrfs_path *p); | 1836 | void btrfs_free_path(struct btrfs_path *p); |
1837 | void btrfs_init_path(struct btrfs_path *p); | 1837 | void btrfs_init_path(struct btrfs_path *p); |
1838 | void btrfs_set_path_blocking(struct btrfs_path *p); | ||
1839 | void btrfs_clear_path_blocking(struct btrfs_path *p); | ||
1840 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | ||
1841 | |||
1838 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1842 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1839 | struct btrfs_path *path, int slot, int nr); | 1843 | struct btrfs_path *path, int slot, int nr); |
1840 | int btrfs_del_leaf(struct btrfs_trans_handle *trans, | 1844 | int btrfs_del_leaf(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 549271607c17..5aebddd71193 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -799,7 +799,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
799 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 799 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
800 | 800 | ||
801 | if (ret == 0) | 801 | if (ret == 0) |
802 | buf->flags |= EXTENT_UPTODATE; | 802 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); |
803 | else | 803 | else |
804 | WARN_ON(1); | 804 | WARN_ON(1); |
805 | return buf; | 805 | return buf; |
@@ -813,6 +813,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
813 | if (btrfs_header_generation(buf) == | 813 | if (btrfs_header_generation(buf) == |
814 | root->fs_info->running_transaction->transid) { | 814 | root->fs_info->running_transaction->transid) { |
815 | WARN_ON(!btrfs_tree_locked(buf)); | 815 | WARN_ON(!btrfs_tree_locked(buf)); |
816 | |||
817 | /* ugh, clear_extent_buffer_dirty can be expensive */ | ||
818 | btrfs_set_lock_blocking(buf); | ||
819 | |||
816 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 820 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
817 | buf); | 821 | buf); |
818 | } | 822 | } |
@@ -2311,6 +2315,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2311 | u64 transid = btrfs_header_generation(buf); | 2315 | u64 transid = btrfs_header_generation(buf); |
2312 | struct inode *btree_inode = root->fs_info->btree_inode; | 2316 | struct inode *btree_inode = root->fs_info->btree_inode; |
2313 | 2317 | ||
2318 | btrfs_set_lock_blocking(buf); | ||
2319 | |||
2314 | WARN_ON(!btrfs_tree_locked(buf)); | 2320 | WARN_ON(!btrfs_tree_locked(buf)); |
2315 | if (transid != root->fs_info->generation) { | 2321 | if (transid != root->fs_info->generation) { |
2316 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 2322 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " |
@@ -2353,7 +2359,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2353 | int ret; | 2359 | int ret; |
2354 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 2360 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
2355 | if (ret == 0) | 2361 | if (ret == 0) |
2356 | buf->flags |= EXTENT_UPTODATE; | 2362 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); |
2357 | return ret; | 2363 | return ret; |
2358 | } | 2364 | } |
2359 | 2365 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a22f2e6ec47..ed1e25d72483 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3407,7 +3407,10 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3407 | btrfs_set_header_generation(buf, trans->transid); | 3407 | btrfs_set_header_generation(buf, trans->transid); |
3408 | btrfs_tree_lock(buf); | 3408 | btrfs_tree_lock(buf); |
3409 | clean_tree_block(trans, root, buf); | 3409 | clean_tree_block(trans, root, buf); |
3410 | |||
3411 | btrfs_set_lock_blocking(buf); | ||
3410 | btrfs_set_buffer_uptodate(buf); | 3412 | btrfs_set_buffer_uptodate(buf); |
3413 | |||
3411 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 3414 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
3412 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 3415 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
3413 | buf->start + buf->len - 1, GFP_NOFS); | 3416 | buf->start + buf->len - 1, GFP_NOFS); |
@@ -3416,6 +3419,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
3416 | buf->start + buf->len - 1, GFP_NOFS); | 3419 | buf->start + buf->len - 1, GFP_NOFS); |
3417 | } | 3420 | } |
3418 | trans->blocks_used++; | 3421 | trans->blocks_used++; |
3422 | /* this returns a buffer locked for blocking */ | ||
3419 | return buf; | 3423 | return buf; |
3420 | } | 3424 | } |
3421 | 3425 | ||
@@ -3752,6 +3756,7 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, | |||
3752 | 3756 | ||
3753 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 3757 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); |
3754 | btrfs_tree_lock(next); | 3758 | btrfs_tree_lock(next); |
3759 | btrfs_set_lock_blocking(next); | ||
3755 | 3760 | ||
3756 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, | 3761 | ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, |
3757 | &refs); | 3762 | &refs); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2ea7f052722c..dd5df53e045a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2990,7 +2990,9 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
2990 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 2990 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
2991 | eb->start = start; | 2991 | eb->start = start; |
2992 | eb->len = len; | 2992 | eb->len = len; |
2993 | mutex_init(&eb->mutex); | 2993 | spin_lock_init(&eb->lock); |
2994 | init_waitqueue_head(&eb->lock_wq); | ||
2995 | |||
2994 | #if LEAK_DEBUG | 2996 | #if LEAK_DEBUG |
2995 | spin_lock_irqsave(&leak_lock, flags); | 2997 | spin_lock_irqsave(&leak_lock, flags); |
2996 | list_add(&eb->leak_list, &buffers); | 2998 | list_add(&eb->leak_list, &buffers); |
@@ -3071,8 +3073,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3071 | unlock_page(p); | 3073 | unlock_page(p); |
3072 | } | 3074 | } |
3073 | if (uptodate) | 3075 | if (uptodate) |
3074 | eb->flags |= EXTENT_UPTODATE; | 3076 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3075 | eb->flags |= EXTENT_BUFFER_FILLED; | ||
3076 | 3077 | ||
3077 | spin_lock(&tree->buffer_lock); | 3078 | spin_lock(&tree->buffer_lock); |
3078 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | 3079 | exists = buffer_tree_insert(tree, start, &eb->rb_node); |
@@ -3226,7 +3227,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3226 | unsigned long num_pages; | 3227 | unsigned long num_pages; |
3227 | 3228 | ||
3228 | num_pages = num_extent_pages(eb->start, eb->len); | 3229 | num_pages = num_extent_pages(eb->start, eb->len); |
3229 | eb->flags &= ~EXTENT_UPTODATE; | 3230 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3230 | 3231 | ||
3231 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3232 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3232 | GFP_NOFS); | 3233 | GFP_NOFS); |
@@ -3297,7 +3298,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3297 | struct page *page; | 3298 | struct page *page; |
3298 | int pg_uptodate = 1; | 3299 | int pg_uptodate = 1; |
3299 | 3300 | ||
3300 | if (eb->flags & EXTENT_UPTODATE) | 3301 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3301 | return 1; | 3302 | return 1; |
3302 | 3303 | ||
3303 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3304 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
@@ -3333,7 +3334,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3333 | struct bio *bio = NULL; | 3334 | struct bio *bio = NULL; |
3334 | unsigned long bio_flags = 0; | 3335 | unsigned long bio_flags = 0; |
3335 | 3336 | ||
3336 | if (eb->flags & EXTENT_UPTODATE) | 3337 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3337 | return 0; | 3338 | return 0; |
3338 | 3339 | ||
3339 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3340 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
@@ -3364,7 +3365,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3364 | } | 3365 | } |
3365 | if (all_uptodate) { | 3366 | if (all_uptodate) { |
3366 | if (start_i == 0) | 3367 | if (start_i == 0) |
3367 | eb->flags |= EXTENT_UPTODATE; | 3368 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3368 | goto unlock_exit; | 3369 | goto unlock_exit; |
3369 | } | 3370 | } |
3370 | 3371 | ||
@@ -3400,7 +3401,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3400 | } | 3401 | } |
3401 | 3402 | ||
3402 | if (!ret) | 3403 | if (!ret) |
3403 | eb->flags |= EXTENT_UPTODATE; | 3404 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3404 | return ret; | 3405 | return ret; |
3405 | 3406 | ||
3406 | unlock_exit: | 3407 | unlock_exit: |
@@ -3497,7 +3498,6 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3497 | unmap_extent_buffer(eb, eb->map_token, km); | 3498 | unmap_extent_buffer(eb, eb->map_token, km); |
3498 | eb->map_token = NULL; | 3499 | eb->map_token = NULL; |
3499 | save = 1; | 3500 | save = 1; |
3500 | WARN_ON(!mutex_is_locked(&eb->mutex)); | ||
3501 | } | 3501 | } |
3502 | err = map_private_extent_buffer(eb, start, min_len, token, map, | 3502 | err = map_private_extent_buffer(eb, start, min_len, token, map, |
3503 | map_start, map_len, km); | 3503 | map_start, map_len, km); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e80c6d96b318..1f9df88afbf6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -22,6 +22,10 @@ | |||
22 | /* flags for bio submission */ | 22 | /* flags for bio submission */ |
23 | #define EXTENT_BIO_COMPRESSED 1 | 23 | #define EXTENT_BIO_COMPRESSED 1 |
24 | 24 | ||
25 | /* these are bit numbers for test/set bit */ | ||
26 | #define EXTENT_BUFFER_UPTODATE 0 | ||
27 | #define EXTENT_BUFFER_BLOCKING 1 | ||
28 | |||
25 | /* | 29 | /* |
26 | * page->private values. Every page that is controlled by the extent | 30 | * page->private values. Every page that is controlled by the extent |
27 | * map has page->private set to one. | 31 | * map has page->private set to one. |
@@ -95,11 +99,19 @@ struct extent_buffer { | |||
95 | unsigned long map_start; | 99 | unsigned long map_start; |
96 | unsigned long map_len; | 100 | unsigned long map_len; |
97 | struct page *first_page; | 101 | struct page *first_page; |
102 | unsigned long bflags; | ||
98 | atomic_t refs; | 103 | atomic_t refs; |
99 | int flags; | ||
100 | struct list_head leak_list; | 104 | struct list_head leak_list; |
101 | struct rb_node rb_node; | 105 | struct rb_node rb_node; |
102 | struct mutex mutex; | 106 | |
107 | /* the spinlock is used to protect most operations */ | ||
108 | spinlock_t lock; | ||
109 | |||
110 | /* | ||
111 | * when we keep the lock held while blocking, waiters go onto | ||
112 | * the wq | ||
113 | */ | ||
114 | wait_queue_head_t lock_wq; | ||
103 | }; | 115 | }; |
104 | 116 | ||
105 | struct extent_map_tree; | 117 | struct extent_map_tree; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4a79e1c5ebd0..ebd7d6c37df8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "ref-cache.h" | 51 | #include "ref-cache.h" |
52 | #include "compression.h" | 52 | #include "compression.h" |
53 | #include "locking.h" | ||
53 | 54 | ||
54 | struct btrfs_iget_args { | 55 | struct btrfs_iget_args { |
55 | u64 ino; | 56 | u64 ino; |
@@ -2021,6 +2022,7 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2021 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | 2022 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); |
2022 | 2023 | ||
2023 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | 2024 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); |
2025 | |||
2024 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2026 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, |
2025 | alloc_group_block, 0); | 2027 | alloc_group_block, 0); |
2026 | btrfs_free_path(path); | 2028 | btrfs_free_path(path); |
@@ -2117,6 +2119,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2117 | goto failed; | 2119 | goto failed; |
2118 | } | 2120 | } |
2119 | 2121 | ||
2122 | btrfs_unlock_up_safe(path, 1); | ||
2120 | leaf = path->nodes[0]; | 2123 | leaf = path->nodes[0]; |
2121 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2124 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2122 | struct btrfs_inode_item); | 2125 | struct btrfs_inode_item); |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 39bae7761db6..68fd9ccf1805 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -26,45 +26,215 @@ | |||
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * locks the per buffer mutex in an extent buffer. This uses adaptive locks | 29 | * btrfs_header_level() isn't free, so don't call it when lockdep isn't |
30 | * and the spin is not tuned very extensively. The spinning does make a big | 30 | * on |
31 | * difference in almost every workload, but spinning for the right amount of | ||
32 | * time needs some help. | ||
33 | * | ||
34 | * In general, we want to spin as long as the lock holder is doing btree | ||
35 | * searches, and we should give up if they are in more expensive code. | ||
36 | */ | 31 | */ |
32 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
33 | static inline void spin_nested(struct extent_buffer *eb) | ||
34 | { | ||
35 | spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); | ||
36 | } | ||
37 | #else | ||
38 | static inline void spin_nested(struct extent_buffer *eb) | ||
39 | { | ||
40 | spin_lock(&eb->lock); | ||
41 | } | ||
42 | #endif | ||
37 | 43 | ||
38 | int btrfs_tree_lock(struct extent_buffer *eb) | 44 | /* |
45 | * Setting a lock to blocking will drop the spinlock and set the | ||
46 | * flag that forces other procs who want the lock to wait. After | ||
47 | * this you can safely schedule with the lock held. | ||
48 | */ | ||
49 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | ||
39 | { | 50 | { |
40 | int i; | 51 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { |
52 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | ||
53 | spin_unlock(&eb->lock); | ||
54 | } | ||
55 | /* exit with the spin lock released and the bit set */ | ||
56 | } | ||
41 | 57 | ||
42 | if (mutex_trylock(&eb->mutex)) | 58 | /* |
43 | return 0; | 59 | * clearing the blocking flag will take the spinlock again. |
60 | * After this you can't safely schedule | ||
61 | */ | ||
62 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | ||
63 | { | ||
64 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | ||
65 | spin_nested(eb); | ||
66 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | ||
67 | smp_mb__after_clear_bit(); | ||
68 | } | ||
69 | /* exit with the spin lock held */ | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * unfortunately, many of the places that currently set a lock to blocking | ||
74 | * don't end up blocking for every long, and often they don't block | ||
75 | * at all. For a dbench 50 run, if we don't spin one the blocking bit | ||
76 | * at all, the context switch rate can jump up to 400,000/sec or more. | ||
77 | * | ||
78 | * So, we're still stuck with this crummy spin on the blocking bit, | ||
79 | * at least until the most common causes of the short blocks | ||
80 | * can be dealt with. | ||
81 | */ | ||
82 | static int btrfs_spin_on_block(struct extent_buffer *eb) | ||
83 | { | ||
84 | int i; | ||
44 | for (i = 0; i < 512; i++) { | 85 | for (i = 0; i < 512; i++) { |
45 | cpu_relax(); | 86 | cpu_relax(); |
46 | if (mutex_trylock(&eb->mutex)) | 87 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
88 | return 1; | ||
89 | if (need_resched()) | ||
90 | break; | ||
91 | } | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * This is somewhat different from trylock. It will take the | ||
97 | * spinlock but if it finds the lock is set to blocking, it will | ||
98 | * return without the lock held. | ||
99 | * | ||
100 | * returns 1 if it was able to take the lock and zero otherwise | ||
101 | * | ||
102 | * After this call, scheduling is not safe without first calling | ||
103 | * btrfs_set_lock_blocking() | ||
104 | */ | ||
105 | int btrfs_try_spin_lock(struct extent_buffer *eb) | ||
106 | { | ||
107 | int i; | ||
108 | |||
109 | spin_nested(eb); | ||
110 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
111 | return 1; | ||
112 | spin_unlock(&eb->lock); | ||
113 | |||
114 | /* spin for a bit on the BLOCKING flag */ | ||
115 | for (i = 0; i < 2; i++) { | ||
116 | if (!btrfs_spin_on_block(eb)) | ||
117 | break; | ||
118 | |||
119 | spin_nested(eb); | ||
120 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
121 | return 1; | ||
122 | spin_unlock(&eb->lock); | ||
123 | } | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * the autoremove wake function will return 0 if it tried to wake up | ||
129 | * a process that was already awake, which means that process won't | ||
130 | * count as an exclusive wakeup. The waitq code will continue waking | ||
131 | * procs until it finds one that was actually sleeping. | ||
132 | * | ||
133 | * For btrfs, this isn't quite what we want. We want a single proc | ||
134 | * to be notified that the lock is ready for taking. If that proc | ||
135 | * already happen to be awake, great, it will loop around and try for | ||
136 | * the lock. | ||
137 | * | ||
138 | * So, btrfs_wake_function always returns 1, even when the proc that we | ||
139 | * tried to wake up was already awake. | ||
140 | */ | ||
141 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | ||
142 | int sync, void *key) | ||
143 | { | ||
144 | autoremove_wake_function(wait, mode, sync, key); | ||
145 | return 1; | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * returns with the extent buffer spinlocked. | ||
150 | * | ||
151 | * This will spin and/or wait as required to take the lock, and then | ||
152 | * return with the spinlock held. | ||
153 | * | ||
154 | * After this call, scheduling is not safe without first calling | ||
155 | * btrfs_set_lock_blocking() | ||
156 | */ | ||
157 | int btrfs_tree_lock(struct extent_buffer *eb) | ||
158 | { | ||
159 | DEFINE_WAIT(wait); | ||
160 | wait.func = btrfs_wake_function; | ||
161 | |||
162 | while(1) { | ||
163 | spin_nested(eb); | ||
164 | |||
165 | /* nobody is blocking, exit with the spinlock held */ | ||
166 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
47 | return 0; | 167 | return 0; |
168 | |||
169 | /* | ||
170 | * we have the spinlock, but the real owner is blocking. | ||
171 | * wait for them | ||
172 | */ | ||
173 | spin_unlock(&eb->lock); | ||
174 | |||
175 | /* | ||
176 | * spin for a bit, and if the blocking flag goes away, | ||
177 | * loop around | ||
178 | */ | ||
179 | if (btrfs_spin_on_block(eb)) | ||
180 | continue; | ||
181 | |||
182 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | ||
183 | TASK_UNINTERRUPTIBLE); | ||
184 | |||
185 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
186 | schedule(); | ||
187 | |||
188 | finish_wait(&eb->lock_wq, &wait); | ||
48 | } | 189 | } |
49 | cpu_relax(); | ||
50 | mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); | ||
51 | return 0; | 190 | return 0; |
52 | } | 191 | } |
53 | 192 | ||
193 | /* | ||
194 | * Very quick trylock, this does not spin or schedule. It returns | ||
195 | * 1 with the spinlock held if it was able to take the lock, or it | ||
196 | * returns zero if it was unable to take the lock. | ||
197 | * | ||
198 | * After this call, scheduling is not safe without first calling | ||
199 | * btrfs_set_lock_blocking() | ||
200 | */ | ||
54 | int btrfs_try_tree_lock(struct extent_buffer *eb) | 201 | int btrfs_try_tree_lock(struct extent_buffer *eb) |
55 | { | 202 | { |
56 | return mutex_trylock(&eb->mutex); | 203 | if (spin_trylock(&eb->lock)) { |
204 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | ||
205 | /* | ||
206 | * we've got the spinlock, but the real owner is | ||
207 | * blocking. Drop the spinlock and return failure | ||
208 | */ | ||
209 | spin_unlock(&eb->lock); | ||
210 | return 0; | ||
211 | } | ||
212 | return 1; | ||
213 | } | ||
214 | /* someone else has the spinlock giveup */ | ||
215 | return 0; | ||
57 | } | 216 | } |
58 | 217 | ||
59 | int btrfs_tree_unlock(struct extent_buffer *eb) | 218 | int btrfs_tree_unlock(struct extent_buffer *eb) |
60 | { | 219 | { |
61 | mutex_unlock(&eb->mutex); | 220 | /* |
221 | * if we were a blocking owner, we don't have the spinlock held | ||
222 | * just clear the bit and look for waiters | ||
223 | */ | ||
224 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
225 | smp_mb__after_clear_bit(); | ||
226 | else | ||
227 | spin_unlock(&eb->lock); | ||
228 | |||
229 | if (waitqueue_active(&eb->lock_wq)) | ||
230 | wake_up(&eb->lock_wq); | ||
62 | return 0; | 231 | return 0; |
63 | } | 232 | } |
64 | 233 | ||
65 | int btrfs_tree_locked(struct extent_buffer *eb) | 234 | int btrfs_tree_locked(struct extent_buffer *eb) |
66 | { | 235 | { |
67 | return mutex_is_locked(&eb->mutex); | 236 | return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || |
237 | spin_is_locked(&eb->lock); | ||
68 | } | 238 | } |
69 | 239 | ||
70 | /* | 240 | /* |
@@ -75,12 +245,14 @@ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) | |||
75 | { | 245 | { |
76 | int i; | 246 | int i; |
77 | struct extent_buffer *eb; | 247 | struct extent_buffer *eb; |
248 | |||
78 | for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { | 249 | for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { |
79 | eb = path->nodes[i]; | 250 | eb = path->nodes[i]; |
80 | if (!eb) | 251 | if (!eb) |
81 | break; | 252 | break; |
82 | smp_mb(); | 253 | smp_mb(); |
83 | if (!list_empty(&eb->mutex.wait_list)) | 254 | if (spin_is_contended(&eb->lock) || |
255 | waitqueue_active(&eb->lock_wq)) | ||
84 | return 1; | 256 | return 1; |
85 | } | 257 | } |
86 | return 0; | 258 | return 0; |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index bc1faef12519..d92e707f5870 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -22,6 +22,12 @@ | |||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 22 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 23 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_tree_locked(struct extent_buffer *eb); | 24 | int btrfs_tree_locked(struct extent_buffer *eb); |
25 | |||
25 | int btrfs_try_tree_lock(struct extent_buffer *eb); | 26 | int btrfs_try_tree_lock(struct extent_buffer *eb); |
27 | int btrfs_try_spin_lock(struct extent_buffer *eb); | ||
28 | |||
26 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level); | 29 | int btrfs_path_lock_waiting(struct btrfs_path *path, int level); |
30 | |||
31 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | ||
32 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | ||
27 | #endif | 33 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3e8358c36165..98d25fa4570e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -74,6 +74,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
74 | u32 nritems; | 74 | u32 nritems; |
75 | 75 | ||
76 | root_node = btrfs_lock_root_node(root); | 76 | root_node = btrfs_lock_root_node(root); |
77 | btrfs_set_lock_blocking(root_node); | ||
77 | nritems = btrfs_header_nritems(root_node); | 78 | nritems = btrfs_header_nritems(root_node); |
78 | root->defrag_max.objectid = 0; | 79 | root->defrag_max.objectid = 0; |
79 | /* from above we know this is not a leaf */ | 80 | /* from above we know this is not a leaf */ |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4f26f3ed0c87..20794290256b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1615,6 +1615,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1615 | 1615 | ||
1616 | btrfs_tree_lock(next); | 1616 | btrfs_tree_lock(next); |
1617 | clean_tree_block(trans, root, next); | 1617 | clean_tree_block(trans, root, next); |
1618 | btrfs_set_lock_blocking(next); | ||
1618 | btrfs_wait_tree_block_writeback(next); | 1619 | btrfs_wait_tree_block_writeback(next); |
1619 | btrfs_tree_unlock(next); | 1620 | btrfs_tree_unlock(next); |
1620 | 1621 | ||
@@ -1661,6 +1662,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1661 | next = path->nodes[*level]; | 1662 | next = path->nodes[*level]; |
1662 | btrfs_tree_lock(next); | 1663 | btrfs_tree_lock(next); |
1663 | clean_tree_block(trans, root, next); | 1664 | clean_tree_block(trans, root, next); |
1665 | btrfs_set_lock_blocking(next); | ||
1664 | btrfs_wait_tree_block_writeback(next); | 1666 | btrfs_wait_tree_block_writeback(next); |
1665 | btrfs_tree_unlock(next); | 1667 | btrfs_tree_unlock(next); |
1666 | 1668 | ||
@@ -1718,6 +1720,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1718 | 1720 | ||
1719 | btrfs_tree_lock(next); | 1721 | btrfs_tree_lock(next); |
1720 | clean_tree_block(trans, root, next); | 1722 | clean_tree_block(trans, root, next); |
1723 | btrfs_set_lock_blocking(next); | ||
1721 | btrfs_wait_tree_block_writeback(next); | 1724 | btrfs_wait_tree_block_writeback(next); |
1722 | btrfs_tree_unlock(next); | 1725 | btrfs_tree_unlock(next); |
1723 | 1726 | ||
@@ -1790,6 +1793,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1790 | 1793 | ||
1791 | btrfs_tree_lock(next); | 1794 | btrfs_tree_lock(next); |
1792 | clean_tree_block(trans, log, next); | 1795 | clean_tree_block(trans, log, next); |
1796 | btrfs_set_lock_blocking(next); | ||
1793 | btrfs_wait_tree_block_writeback(next); | 1797 | btrfs_wait_tree_block_writeback(next); |
1794 | btrfs_tree_unlock(next); | 1798 | btrfs_tree_unlock(next); |
1795 | 1799 | ||