aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-07-16 15:23:14 -0400
committerChris Mason <chris.mason@oracle.com>2011-07-27 12:46:46 -0400
commitbd681513fa6f2ff29aa391f01e413a2d1c59fd77 (patch)
treebb10ec6ef876b4d7a553cbe54976ec49a0d10b21 /fs/btrfs/ctree.c
parent81317fdeddcef259b6ecf7b5c0d04caa167c6b54 (diff)
Btrfs: switch the btrfs tree locks to reader/writer
The btrfs metadata btree is the source of significant lock contention, especially in the root node. This commit changes our locking to use a reader/writer lock. The lock is built on top of rw spinlocks, and it extends the lock tracking to remember if we have a read lock or a write lock when we go to blocking. Atomics count the number of blocking readers or writers at any given time. It removes all of the adaptive spinning from the old code and uses only the spinning/blocking hints inside of btrfs to decide when it should continue spinning. In read heavy workloads this is dramatically faster. In write heavy workloads we're still faster because of less contention on the root node lock. We suffer slightly in dbench because we schedule more often during write locks, but all other benchmarks so far are improved. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c268
1 files changed, 209 insertions, 59 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d2431284b913..0ad48e782d37 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
54{ 54{
55 int i; 55 int i;
56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
57 if (p->nodes[i] && p->locks[i]) 57 if (!p->nodes[i] || !p->locks[i])
58 btrfs_set_lock_blocking(p->nodes[i]); 58 continue;
59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
60 if (p->locks[i] == BTRFS_READ_LOCK)
61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
62 else if (p->locks[i] == BTRFS_WRITE_LOCK)
63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
59 } 64 }
60} 65}
61 66
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
68 * for held 73 * for held
69 */ 74 */
70noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 75noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
71 struct extent_buffer *held) 76 struct extent_buffer *held, int held_rw)
72{ 77{
73 int i; 78 int i;
74 79
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
79 * really sure by forcing the path to blocking before we clear 84 * really sure by forcing the path to blocking before we clear
80 * the path blocking. 85 * the path blocking.
81 */ 86 */
82 if (held) 87 if (held) {
83 btrfs_set_lock_blocking(held); 88 btrfs_set_lock_blocking_rw(held, held_rw);
89 if (held_rw == BTRFS_WRITE_LOCK)
90 held_rw = BTRFS_WRITE_LOCK_BLOCKING;
91 else if (held_rw == BTRFS_READ_LOCK)
92 held_rw = BTRFS_READ_LOCK_BLOCKING;
93 }
84 btrfs_set_path_blocking(p); 94 btrfs_set_path_blocking(p);
85#endif 95#endif
86 96
87 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
88 if (p->nodes[i] && p->locks[i]) 98 if (p->nodes[i] && p->locks[i]) {
89 btrfs_clear_lock_blocking(p->nodes[i]); 99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
101 p->locks[i] = BTRFS_WRITE_LOCK;
102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
103 p->locks[i] = BTRFS_READ_LOCK;
104 }
90 } 105 }
91 106
92#ifdef CONFIG_DEBUG_LOCK_ALLOC 107#ifdef CONFIG_DEBUG_LOCK_ALLOC
93 if (held) 108 if (held)
94 btrfs_clear_lock_blocking(held); 109 btrfs_clear_lock_blocking_rw(held, held_rw);
95#endif 110#endif
96} 111}
97 112
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
119 if (!p->nodes[i]) 134 if (!p->nodes[i])
120 continue; 135 continue;
121 if (p->locks[i]) { 136 if (p->locks[i]) {
122 btrfs_tree_unlock(p->nodes[i]); 137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
123 p->locks[i] = 0; 138 p->locks[i] = 0;
124 } 139 }
125 free_extent_buffer(p->nodes[i]); 140 free_extent_buffer(p->nodes[i]);
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
167 return eb; 182 return eb;
168} 183}
169 184
185/* loop around taking references on and locking the root node of the
186 * tree until you end up with a lock on the root. A locked buffer
187 * is returned, with a reference held.
188 */
189struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
190{
191 struct extent_buffer *eb;
192
193 while (1) {
194 eb = btrfs_root_node(root);
195 btrfs_tree_read_lock(eb);
196 if (eb == root->node)
197 break;
198 btrfs_tree_read_unlock(eb);
199 free_extent_buffer(eb);
200 }
201 return eb;
202}
203
170/* cowonly root (everything not a reference counted cow subvolume), just get 204/* cowonly root (everything not a reference counted cow subvolume), just get
171 * put onto a simple dirty list. transaction.c walks this to make sure they 205 * put onto a simple dirty list. transaction.c walks this to make sure they
172 * get properly updated on disk. 206 * get properly updated on disk.
@@ -862,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
862 896
863 mid = path->nodes[level]; 897 mid = path->nodes[level];
864 898
865 WARN_ON(!path->locks[level]); 899 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
900 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
866 WARN_ON(btrfs_header_generation(mid) != trans->transid); 901 WARN_ON(btrfs_header_generation(mid) != trans->transid);
867 902
868 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 903 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -1360,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
1360 1395
1361 t = path->nodes[i]; 1396 t = path->nodes[i];
1362 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1397 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1363 btrfs_tree_unlock(t); 1398 btrfs_tree_unlock_rw(t, path->locks[i]);
1364 path->locks[i] = 0; 1399 path->locks[i] = 0;
1365 } 1400 }
1366 } 1401 }
@@ -1387,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1387 continue; 1422 continue;
1388 if (!path->locks[i]) 1423 if (!path->locks[i])
1389 continue; 1424 continue;
1390 btrfs_tree_unlock(path->nodes[i]); 1425 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
1391 path->locks[i] = 0; 1426 path->locks[i] = 0;
1392 } 1427 }
1393} 1428}
@@ -1436,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1436 * we can trust our generation number 1471 * we can trust our generation number
1437 */ 1472 */
1438 free_extent_buffer(tmp); 1473 free_extent_buffer(tmp);
1474 btrfs_set_path_blocking(p);
1475
1439 tmp = read_tree_block(root, blocknr, blocksize, gen); 1476 tmp = read_tree_block(root, blocknr, blocksize, gen);
1440 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1477 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1441 *eb_ret = tmp; 1478 *eb_ret = tmp;
@@ -1491,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1491static int 1528static int
1492setup_nodes_for_search(struct btrfs_trans_handle *trans, 1529setup_nodes_for_search(struct btrfs_trans_handle *trans,
1493 struct btrfs_root *root, struct btrfs_path *p, 1530 struct btrfs_root *root, struct btrfs_path *p,
1494 struct extent_buffer *b, int level, int ins_len) 1531 struct extent_buffer *b, int level, int ins_len,
1532 int *write_lock_level)
1495{ 1533{
1496 int ret; 1534 int ret;
1497 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1535 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1498 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1536 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1499 int sret; 1537 int sret;
1500 1538
1539 if (*write_lock_level < level + 1) {
1540 *write_lock_level = level + 1;
1541 btrfs_release_path(p);
1542 goto again;
1543 }
1544
1501 sret = reada_for_balance(root, p, level); 1545 sret = reada_for_balance(root, p, level);
1502 if (sret) 1546 if (sret)
1503 goto again; 1547 goto again;
1504 1548
1505 btrfs_set_path_blocking(p); 1549 btrfs_set_path_blocking(p);
1506 sret = split_node(trans, root, p, level); 1550 sret = split_node(trans, root, p, level);
1507 btrfs_clear_path_blocking(p, NULL); 1551 btrfs_clear_path_blocking(p, NULL, 0);
1508 1552
1509 BUG_ON(sret > 0); 1553 BUG_ON(sret > 0);
1510 if (sret) { 1554 if (sret) {
@@ -1516,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1516 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1560 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
1517 int sret; 1561 int sret;
1518 1562
1563 if (*write_lock_level < level + 1) {
1564 *write_lock_level = level + 1;
1565 btrfs_release_path(p);
1566 goto again;
1567 }
1568
1519 sret = reada_for_balance(root, p, level); 1569 sret = reada_for_balance(root, p, level);
1520 if (sret) 1570 if (sret)
1521 goto again; 1571 goto again;
1522 1572
1523 btrfs_set_path_blocking(p); 1573 btrfs_set_path_blocking(p);
1524 sret = balance_level(trans, root, p, level); 1574 sret = balance_level(trans, root, p, level);
1525 btrfs_clear_path_blocking(p, NULL); 1575 btrfs_clear_path_blocking(p, NULL, 0);
1526 1576
1527 if (sret) { 1577 if (sret) {
1528 ret = sret; 1578 ret = sret;
@@ -1566,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1566 int err; 1616 int err;
1567 int level; 1617 int level;
1568 int lowest_unlock = 1; 1618 int lowest_unlock = 1;
1619 int root_lock;
1620 /* everything at write_lock_level or lower must be write locked */
1621 int write_lock_level = 0;
1569 u8 lowest_level = 0; 1622 u8 lowest_level = 0;
1570 1623
1571 lowest_level = p->lowest_level; 1624 lowest_level = p->lowest_level;
1572 WARN_ON(lowest_level && ins_len > 0); 1625 WARN_ON(lowest_level && ins_len > 0);
1573 WARN_ON(p->nodes[0] != NULL); 1626 WARN_ON(p->nodes[0] != NULL);
1574 1627
1575 if (ins_len < 0) 1628 if (ins_len < 0) {
1576 lowest_unlock = 2; 1629 lowest_unlock = 2;
1577 1630
1631 /* when we are removing items, we might have to go up to level
1632 * two as we update tree pointers Make sure we keep write
1633 * for those levels as well
1634 */
1635 write_lock_level = 2;
1636 } else if (ins_len > 0) {
1637 /*
1638 * for inserting items, make sure we have a write lock on
1639 * level 1 so we can update keys
1640 */
1641 write_lock_level = 1;
1642 }
1643
1644 if (!cow)
1645 write_lock_level = -1;
1646
1647 if (cow && (p->keep_locks || p->lowest_level))
1648 write_lock_level = BTRFS_MAX_LEVEL;
1649
1578again: 1650again:
1651 /*
1652 * we try very hard to do read locks on the root
1653 */
1654 root_lock = BTRFS_READ_LOCK;
1655 level = 0;
1579 if (p->search_commit_root) { 1656 if (p->search_commit_root) {
1657 /*
1658 * the commit roots are read only
1659 * so we always do read locks
1660 */
1580 b = root->commit_root; 1661 b = root->commit_root;
1581 extent_buffer_get(b); 1662 extent_buffer_get(b);
1663 level = btrfs_header_level(b);
1582 if (!p->skip_locking) 1664 if (!p->skip_locking)
1583 btrfs_tree_lock(b); 1665 btrfs_tree_read_lock(b);
1584 } else { 1666 } else {
1585 if (p->skip_locking) 1667 if (p->skip_locking) {
1586 b = btrfs_root_node(root); 1668 b = btrfs_root_node(root);
1587 else 1669 level = btrfs_header_level(b);
1588 b = btrfs_lock_root_node(root); 1670 } else {
1671 /* we don't know the level of the root node
1672 * until we actually have it read locked
1673 */
1674 b = btrfs_read_lock_root_node(root);
1675 level = btrfs_header_level(b);
1676 if (level <= write_lock_level) {
1677 /* whoops, must trade for write lock */
1678 btrfs_tree_read_unlock(b);
1679 free_extent_buffer(b);
1680 b = btrfs_lock_root_node(root);
1681 root_lock = BTRFS_WRITE_LOCK;
1682
1683 /* the level might have changed, check again */
1684 level = btrfs_header_level(b);
1685 }
1686 }
1589 } 1687 }
1688 p->nodes[level] = b;
1689 if (!p->skip_locking)
1690 p->locks[level] = root_lock;
1590 1691
1591 while (b) { 1692 while (b) {
1592 level = btrfs_header_level(b); 1693 level = btrfs_header_level(b);
@@ -1595,10 +1696,6 @@ again:
1595 * setup the path here so we can release it under lock 1696 * setup the path here so we can release it under lock
1596 * contention with the cow code 1697 * contention with the cow code
1597 */ 1698 */
1598 p->nodes[level] = b;
1599 if (!p->skip_locking)
1600 p->locks[level] = 1;
1601
1602 if (cow) { 1699 if (cow) {
1603 /* 1700 /*
1604 * if we don't really need to cow this block 1701 * if we don't really need to cow this block
@@ -1610,6 +1707,16 @@ again:
1610 1707
1611 btrfs_set_path_blocking(p); 1708 btrfs_set_path_blocking(p);
1612 1709
1710 /*
1711 * must have write locks on this node and the
1712 * parent
1713 */
1714 if (level + 1 > write_lock_level) {
1715 write_lock_level = level + 1;
1716 btrfs_release_path(p);
1717 goto again;
1718 }
1719
1613 err = btrfs_cow_block(trans, root, b, 1720 err = btrfs_cow_block(trans, root, b,
1614 p->nodes[level + 1], 1721 p->nodes[level + 1],
1615 p->slots[level + 1], &b); 1722 p->slots[level + 1], &b);
@@ -1622,10 +1729,7 @@ cow_done:
1622 BUG_ON(!cow && ins_len); 1729 BUG_ON(!cow && ins_len);
1623 1730
1624 p->nodes[level] = b; 1731 p->nodes[level] = b;
1625 if (!p->skip_locking) 1732 btrfs_clear_path_blocking(p, NULL, 0);
1626 p->locks[level] = 1;
1627
1628 btrfs_clear_path_blocking(p, NULL);
1629 1733
1630 /* 1734 /*
1631 * we have a lock on b and as long as we aren't changing 1735 * we have a lock on b and as long as we aren't changing
@@ -1651,7 +1755,7 @@ cow_done:
1651 } 1755 }
1652 p->slots[level] = slot; 1756 p->slots[level] = slot;
1653 err = setup_nodes_for_search(trans, root, p, b, level, 1757 err = setup_nodes_for_search(trans, root, p, b, level,
1654 ins_len); 1758 ins_len, &write_lock_level);
1655 if (err == -EAGAIN) 1759 if (err == -EAGAIN)
1656 goto again; 1760 goto again;
1657 if (err) { 1761 if (err) {
@@ -1661,6 +1765,19 @@ cow_done:
1661 b = p->nodes[level]; 1765 b = p->nodes[level];
1662 slot = p->slots[level]; 1766 slot = p->slots[level];
1663 1767
1768 /*
1769 * slot 0 is special, if we change the key
1770 * we have to update the parent pointer
1771 * which means we must have a write lock
1772 * on the parent
1773 */
1774 if (slot == 0 && cow &&
1775 write_lock_level < level + 1) {
1776 write_lock_level = level + 1;
1777 btrfs_release_path(p);
1778 goto again;
1779 }
1780
1664 unlock_up(p, level, lowest_unlock); 1781 unlock_up(p, level, lowest_unlock);
1665 1782
1666 if (level == lowest_level) { 1783 if (level == lowest_level) {
@@ -1679,23 +1796,42 @@ cow_done:
1679 } 1796 }
1680 1797
1681 if (!p->skip_locking) { 1798 if (!p->skip_locking) {
1682 btrfs_clear_path_blocking(p, NULL); 1799 level = btrfs_header_level(b);
1683 err = btrfs_try_spin_lock(b); 1800 if (level <= write_lock_level) {
1684 1801 err = btrfs_try_tree_write_lock(b);
1685 if (!err) { 1802 if (!err) {
1686 btrfs_set_path_blocking(p); 1803 btrfs_set_path_blocking(p);
1687 btrfs_tree_lock(b); 1804 btrfs_tree_lock(b);
1688 btrfs_clear_path_blocking(p, b); 1805 btrfs_clear_path_blocking(p, b,
1806 BTRFS_WRITE_LOCK);
1807 }
1808 p->locks[level] = BTRFS_WRITE_LOCK;
1809 } else {
1810 err = btrfs_try_tree_read_lock(b);
1811 if (!err) {
1812 btrfs_set_path_blocking(p);
1813 btrfs_tree_read_lock(b);
1814 btrfs_clear_path_blocking(p, b,
1815 BTRFS_READ_LOCK);
1816 }
1817 p->locks[level] = BTRFS_READ_LOCK;
1689 } 1818 }
1819 p->nodes[level] = b;
1690 } 1820 }
1691 } else { 1821 } else {
1692 p->slots[level] = slot; 1822 p->slots[level] = slot;
1693 if (ins_len > 0 && 1823 if (ins_len > 0 &&
1694 btrfs_leaf_free_space(root, b) < ins_len) { 1824 btrfs_leaf_free_space(root, b) < ins_len) {
1825 if (write_lock_level < 1) {
1826 write_lock_level = 1;
1827 btrfs_release_path(p);
1828 goto again;
1829 }
1830
1695 btrfs_set_path_blocking(p); 1831 btrfs_set_path_blocking(p);
1696 err = split_leaf(trans, root, key, 1832 err = split_leaf(trans, root, key,
1697 p, ins_len, ret == 0); 1833 p, ins_len, ret == 0);
1698 btrfs_clear_path_blocking(p, NULL); 1834 btrfs_clear_path_blocking(p, NULL, 0);
1699 1835
1700 BUG_ON(err > 0); 1836 BUG_ON(err > 0);
1701 if (err) { 1837 if (err) {
@@ -1976,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
1976 add_root_to_dirty_list(root); 2112 add_root_to_dirty_list(root);
1977 extent_buffer_get(c); 2113 extent_buffer_get(c);
1978 path->nodes[level] = c; 2114 path->nodes[level] = c;
1979 path->locks[level] = 1; 2115 path->locks[level] = BTRFS_WRITE_LOCK;
1980 path->slots[level] = 0; 2116 path->slots[level] = 0;
1981 return 0; 2117 return 0;
1982} 2118}
@@ -3819,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3819 3955
3820 WARN_ON(!path->keep_locks); 3956 WARN_ON(!path->keep_locks);
3821again: 3957again:
3822 cur = btrfs_lock_root_node(root); 3958 cur = btrfs_read_lock_root_node(root);
3823 level = btrfs_header_level(cur); 3959 level = btrfs_header_level(cur);
3824 WARN_ON(path->nodes[level]); 3960 WARN_ON(path->nodes[level]);
3825 path->nodes[level] = cur; 3961 path->nodes[level] = cur;
3826 path->locks[level] = 1; 3962 path->locks[level] = BTRFS_READ_LOCK;
3827 3963
3828 if (btrfs_header_generation(cur) < min_trans) { 3964 if (btrfs_header_generation(cur) < min_trans) {
3829 ret = 1; 3965 ret = 1;
@@ -3913,12 +4049,12 @@ find_next_key:
3913 cur = read_node_slot(root, cur, slot); 4049 cur = read_node_slot(root, cur, slot);
3914 BUG_ON(!cur); 4050 BUG_ON(!cur);
3915 4051
3916 btrfs_tree_lock(cur); 4052 btrfs_tree_read_lock(cur);
3917 4053
3918 path->locks[level - 1] = 1; 4054 path->locks[level - 1] = BTRFS_READ_LOCK;
3919 path->nodes[level - 1] = cur; 4055 path->nodes[level - 1] = cur;
3920 unlock_up(path, level, 1); 4056 unlock_up(path, level, 1);
3921 btrfs_clear_path_blocking(path, NULL); 4057 btrfs_clear_path_blocking(path, NULL, 0);
3922 } 4058 }
3923out: 4059out:
3924 if (ret == 0) 4060 if (ret == 0)
@@ -4034,6 +4170,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4034 int ret; 4170 int ret;
4035 int old_spinning = path->leave_spinning; 4171 int old_spinning = path->leave_spinning;
4036 int force_blocking = 0; 4172 int force_blocking = 0;
4173 int next_rw_lock = 0;
4037 4174
4038 nritems = btrfs_header_nritems(path->nodes[0]); 4175 nritems = btrfs_header_nritems(path->nodes[0]);
4039 if (nritems == 0) 4176 if (nritems == 0)
@@ -4051,6 +4188,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4051again: 4188again:
4052 level = 1; 4189 level = 1;
4053 next = NULL; 4190 next = NULL;
4191 next_rw_lock = 0;
4054 btrfs_release_path(path); 4192 btrfs_release_path(path);
4055 4193
4056 path->keep_locks = 1; 4194 path->keep_locks = 1;
@@ -4096,11 +4234,12 @@ again:
4096 } 4234 }
4097 4235
4098 if (next) { 4236 if (next) {
4099 btrfs_tree_unlock(next); 4237 btrfs_tree_unlock_rw(next, next_rw_lock);
4100 free_extent_buffer(next); 4238 free_extent_buffer(next);
4101 } 4239 }
4102 4240
4103 next = c; 4241 next = c;
4242 next_rw_lock = path->locks[level];
4104 ret = read_block_for_search(NULL, root, path, &next, level, 4243 ret = read_block_for_search(NULL, root, path, &next, level,
4105 slot, &key); 4244 slot, &key);
4106 if (ret == -EAGAIN) 4245 if (ret == -EAGAIN)
@@ -4112,15 +4251,22 @@ again:
4112 } 4251 }
4113 4252
4114 if (!path->skip_locking) { 4253 if (!path->skip_locking) {
4115 ret = btrfs_try_spin_lock(next); 4254 ret = btrfs_try_tree_read_lock(next);
4116 if (!ret) { 4255 if (!ret) {
4117 btrfs_set_path_blocking(path); 4256 btrfs_set_path_blocking(path);
4118 btrfs_tree_lock(next); 4257 btrfs_tree_read_lock(next);
4119 if (!force_blocking) 4258 if (!force_blocking) {
4120 btrfs_clear_path_blocking(path, next); 4259 btrfs_clear_path_blocking(path, next,
4260 BTRFS_READ_LOCK);
4261 }
4262 }
4263 if (force_blocking) {
4264 btrfs_set_lock_blocking_rw(next,
4265 BTRFS_READ_LOCK);
4266 next_rw_lock = BTRFS_READ_LOCK_BLOCKING;
4267 } else {
4268 next_rw_lock = BTRFS_READ_LOCK;
4121 } 4269 }
4122 if (force_blocking)
4123 btrfs_set_lock_blocking(next);
4124 } 4270 }
4125 break; 4271 break;
4126 } 4272 }
@@ -4129,14 +4275,13 @@ again:
4129 level--; 4275 level--;
4130 c = path->nodes[level]; 4276 c = path->nodes[level];
4131 if (path->locks[level]) 4277 if (path->locks[level])
4132 btrfs_tree_unlock(c); 4278 btrfs_tree_unlock_rw(c, path->locks[level]);
4133 4279
4134 free_extent_buffer(c); 4280 free_extent_buffer(c);
4135 path->nodes[level] = next; 4281 path->nodes[level] = next;
4136 path->slots[level] = 0; 4282 path->slots[level] = 0;
4137 if (!path->skip_locking) 4283 if (!path->skip_locking)
4138 path->locks[level] = 1; 4284 path->locks[level] = next_rw_lock;
4139
4140 if (!level) 4285 if (!level)
4141 break; 4286 break;
4142 4287
@@ -4151,16 +4296,21 @@ again:
4151 } 4296 }
4152 4297
4153 if (!path->skip_locking) { 4298 if (!path->skip_locking) {
4154 btrfs_assert_tree_locked(path->nodes[level]); 4299 ret = btrfs_try_tree_read_lock(next);
4155 ret = btrfs_try_spin_lock(next);
4156 if (!ret) { 4300 if (!ret) {
4157 btrfs_set_path_blocking(path); 4301 btrfs_set_path_blocking(path);
4158 btrfs_tree_lock(next); 4302 btrfs_tree_read_lock(next);
4159 if (!force_blocking) 4303 if (!force_blocking)
4160 btrfs_clear_path_blocking(path, next); 4304 btrfs_clear_path_blocking(path, next,
4305 BTRFS_READ_LOCK);
4306 }
4307 if (force_blocking) {
4308 btrfs_set_lock_blocking_rw(next,
4309 BTRFS_READ_LOCK);
4310 next_rw_lock = BTRFS_READ_LOCK_BLOCKING;
4311 } else {
4312 next_rw_lock = BTRFS_READ_LOCK;
4161 } 4313 }
4162 if (force_blocking)
4163 btrfs_set_lock_blocking(next);
4164 } 4314 }
4165 } 4315 }
4166 ret = 0; 4316 ret = 0;