aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c312
1 files changed, 197 insertions, 115 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index dbb724124633..e5b2533b691a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1244,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1244 * readahead one full node of leaves, finding things that are close 1244 * readahead one full node of leaves, finding things that are close
1245 * to the block in 'slot', and triggering ra on them. 1245 * to the block in 'slot', and triggering ra on them.
1246 */ 1246 */
1247static noinline void reada_for_search(struct btrfs_root *root, 1247static void reada_for_search(struct btrfs_root *root,
1248 struct btrfs_path *path, 1248 struct btrfs_path *path,
1249 int level, int slot, u64 objectid) 1249 int level, int slot, u64 objectid)
1250{ 1250{
1251 struct extent_buffer *node; 1251 struct extent_buffer *node;
1252 struct btrfs_disk_key disk_key; 1252 struct btrfs_disk_key disk_key;
@@ -1447,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1447} 1447}
1448 1448
1449/* 1449/*
1450 * helper function for btrfs_search_slot. The goal is to find a block
1451 * in cache without setting the path to blocking. If we find the block
1452 * we return zero and the path is unchanged.
1453 *
1454 * If we can't find the block, we set the path blocking and do some
1455 * reada. -EAGAIN is returned and the search must be repeated.
1456 */
1457static int
1458read_block_for_search(struct btrfs_trans_handle *trans,
1459 struct btrfs_root *root, struct btrfs_path *p,
1460 struct extent_buffer **eb_ret, int level, int slot,
1461 struct btrfs_key *key)
1462{
1463 u64 blocknr;
1464 u64 gen;
1465 u32 blocksize;
1466 struct extent_buffer *b = *eb_ret;
1467 struct extent_buffer *tmp;
1468
1469 blocknr = btrfs_node_blockptr(b, slot);
1470 gen = btrfs_node_ptr_generation(b, slot);
1471 blocksize = btrfs_level_size(root, level - 1);
1472
1473 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1474 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1475 *eb_ret = tmp;
1476 return 0;
1477 }
1478
1479 /*
1480 * reduce lock contention at high levels
1481 * of the btree by dropping locks before
1482 * we read.
1483 */
1484 btrfs_release_path(NULL, p);
1485 if (tmp)
1486 free_extent_buffer(tmp);
1487 if (p->reada)
1488 reada_for_search(root, p, level, slot, key->objectid);
1489
1490 tmp = read_tree_block(root, blocknr, blocksize, gen);
1491 if (tmp)
1492 free_extent_buffer(tmp);
1493 return -EAGAIN;
1494}
1495
1496/*
1497 * helper function for btrfs_search_slot. This does all of the checks
1498 * for node-level blocks and does any balancing required based on
1499 * the ins_len.
1500 *
1501 * If no extra work was required, zero is returned. If we had to
1502 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1503 * start over
1504 */
1505static int
1506setup_nodes_for_search(struct btrfs_trans_handle *trans,
1507 struct btrfs_root *root, struct btrfs_path *p,
1508 struct extent_buffer *b, int level, int ins_len)
1509{
1510 int ret;
1511 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
1512 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1513 int sret;
1514
1515 sret = reada_for_balance(root, p, level);
1516 if (sret)
1517 goto again;
1518
1519 btrfs_set_path_blocking(p);
1520 sret = split_node(trans, root, p, level);
1521 btrfs_clear_path_blocking(p, NULL);
1522
1523 BUG_ON(sret > 0);
1524 if (sret) {
1525 ret = sret;
1526 goto done;
1527 }
1528 b = p->nodes[level];
1529 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1530 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1531 int sret;
1532
1533 sret = reada_for_balance(root, p, level);
1534 if (sret)
1535 goto again;
1536
1537 btrfs_set_path_blocking(p);
1538 sret = balance_level(trans, root, p, level);
1539 btrfs_clear_path_blocking(p, NULL);
1540
1541 if (sret) {
1542 ret = sret;
1543 goto done;
1544 }
1545 b = p->nodes[level];
1546 if (!b) {
1547 btrfs_release_path(NULL, p);
1548 goto again;
1549 }
1550 BUG_ON(btrfs_header_nritems(b) == 1);
1551 }
1552 return 0;
1553
1554again:
1555 ret = -EAGAIN;
1556done:
1557 return ret;
1558}
1559
1560/*
1450 * look for key in the tree. path is filled in with nodes along the way 1561 * look for key in the tree. path is filled in with nodes along the way
1451 * if key is found, we return zero and you can find the item in the leaf 1562 * if key is found, we return zero and you can find the item in the leaf
1452 * level of the path (level 0) 1563 * level of the path (level 0)
@@ -1464,16 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1464 ins_len, int cow) 1575 ins_len, int cow)
1465{ 1576{
1466 struct extent_buffer *b; 1577 struct extent_buffer *b;
1467 struct extent_buffer *tmp;
1468 int slot; 1578 int slot;
1469 int ret; 1579 int ret;
1470 int level; 1580 int level;
1471 int should_reada = p->reada;
1472 int lowest_unlock = 1; 1581 int lowest_unlock = 1;
1473 int blocksize;
1474 u8 lowest_level = 0; 1582 u8 lowest_level = 0;
1475 u64 blocknr;
1476 u64 gen;
1477 1583
1478 lowest_level = p->lowest_level; 1584 lowest_level = p->lowest_level;
1479 WARN_ON(lowest_level && ins_len > 0); 1585 WARN_ON(lowest_level && ins_len > 0);
@@ -1502,7 +1608,11 @@ again:
1502 if (cow) { 1608 if (cow) {
1503 int wret; 1609 int wret;
1504 1610
1505 /* is a cow on this block not required */ 1611 /*
1612 * if we don't really need to cow this block
1613 * then we don't want to set the path blocking,
1614 * so we test it here
1615 */
1506 if (btrfs_header_generation(b) == trans->transid && 1616 if (btrfs_header_generation(b) == trans->transid &&
1507 btrfs_header_owner(b) == root->root_key.objectid && 1617 btrfs_header_owner(b) == root->root_key.objectid &&
1508 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { 1618 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
@@ -1557,51 +1667,15 @@ cow_done:
1557 if (ret && slot > 0) 1667 if (ret && slot > 0)
1558 slot -= 1; 1668 slot -= 1;
1559 p->slots[level] = slot; 1669 p->slots[level] = slot;
1560 if ((p->search_for_split || ins_len > 0) && 1670 ret = setup_nodes_for_search(trans, root, p, b, level,
1561 btrfs_header_nritems(b) >= 1671 ins_len);
1562 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1672 if (ret == -EAGAIN)
1563 int sret; 1673 goto again;
1564 1674 else if (ret)
1565 sret = reada_for_balance(root, p, level); 1675 goto done;
1566 if (sret) 1676 b = p->nodes[level];
1567 goto again; 1677 slot = p->slots[level];
1568
1569 btrfs_set_path_blocking(p);
1570 sret = split_node(trans, root, p, level);
1571 btrfs_clear_path_blocking(p, NULL);
1572
1573 BUG_ON(sret > 0);
1574 if (sret) {
1575 ret = sret;
1576 goto done;
1577 }
1578 b = p->nodes[level];
1579 slot = p->slots[level];
1580 } else if (ins_len < 0 &&
1581 btrfs_header_nritems(b) <
1582 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1583 int sret;
1584
1585 sret = reada_for_balance(root, p, level);
1586 if (sret)
1587 goto again;
1588
1589 btrfs_set_path_blocking(p);
1590 sret = balance_level(trans, root, p, level);
1591 btrfs_clear_path_blocking(p, NULL);
1592 1678
1593 if (sret) {
1594 ret = sret;
1595 goto done;
1596 }
1597 b = p->nodes[level];
1598 if (!b) {
1599 btrfs_release_path(NULL, p);
1600 goto again;
1601 }
1602 slot = p->slots[level];
1603 BUG_ON(btrfs_header_nritems(b) == 1);
1604 }
1605 unlock_up(p, level, lowest_unlock); 1679 unlock_up(p, level, lowest_unlock);
1606 1680
1607 /* this is only true while dropping a snapshot */ 1681 /* this is only true while dropping a snapshot */
@@ -1610,44 +1684,11 @@ cow_done:
1610 goto done; 1684 goto done;
1611 } 1685 }
1612 1686
1613 blocknr = btrfs_node_blockptr(b, slot); 1687 ret = read_block_for_search(trans, root, p,
1614 gen = btrfs_node_ptr_generation(b, slot); 1688 &b, level, slot, key);
1615 blocksize = btrfs_level_size(root, level - 1); 1689 if (ret == -EAGAIN)
1690 goto again;
1616 1691
1617 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1618 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1619 b = tmp;
1620 } else {
1621 /*
1622 * reduce lock contention at high levels
1623 * of the btree by dropping locks before
1624 * we read.
1625 */
1626 if (level > 0) {
1627 btrfs_release_path(NULL, p);
1628 if (tmp)
1629 free_extent_buffer(tmp);
1630 if (should_reada)
1631 reada_for_search(root, p,
1632 level, slot,
1633 key->objectid);
1634
1635 tmp = read_tree_block(root, blocknr,
1636 blocksize, gen);
1637 if (tmp)
1638 free_extent_buffer(tmp);
1639 goto again;
1640 } else {
1641 btrfs_set_path_blocking(p);
1642 if (tmp)
1643 free_extent_buffer(tmp);
1644 if (should_reada)
1645 reada_for_search(root, p,
1646 level, slot,
1647 key->objectid);
1648 b = read_node_slot(root, b, slot);
1649 }
1650 }
1651 if (!p->skip_locking) { 1692 if (!p->skip_locking) {
1652 int lret; 1693 int lret;
1653 1694
@@ -2116,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2116 BUG_ON(!path->nodes[level]); 2157 BUG_ON(!path->nodes[level]);
2117 lower = path->nodes[level]; 2158 lower = path->nodes[level];
2118 nritems = btrfs_header_nritems(lower); 2159 nritems = btrfs_header_nritems(lower);
2119 if (slot > nritems) 2160 BUG_ON(slot > nritems);
2120 BUG();
2121 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2161 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2122 BUG(); 2162 BUG();
2123 if (slot != nritems) { 2163 if (slot != nritems) {
@@ -4086,28 +4126,44 @@ next:
4086int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4126int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4087{ 4127{
4088 int slot; 4128 int slot;
4089 int level = 1; 4129 int level;
4090 struct extent_buffer *c; 4130 struct extent_buffer *c;
4091 struct extent_buffer *next = NULL; 4131 struct extent_buffer *next;
4092 struct btrfs_key key; 4132 struct btrfs_key key;
4093 u32 nritems; 4133 u32 nritems;
4094 int ret; 4134 int ret;
4135 int old_spinning = path->leave_spinning;
4136 int force_blocking = 0;
4095 4137
4096 nritems = btrfs_header_nritems(path->nodes[0]); 4138 nritems = btrfs_header_nritems(path->nodes[0]);
4097 if (nritems == 0) 4139 if (nritems == 0)
4098 return 1; 4140 return 1;
4099 4141
4100 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4142 /*
4143 * we take the blocks in an order that upsets lockdep. Using
4144 * blocking mode is the only way around it.
4145 */
4146#ifdef CONFIG_DEBUG_LOCK_ALLOC
4147 force_blocking = 1;
4148#endif
4101 4149
4150 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4151again:
4152 level = 1;
4153 next = NULL;
4102 btrfs_release_path(root, path); 4154 btrfs_release_path(root, path);
4155
4103 path->keep_locks = 1; 4156 path->keep_locks = 1;
4157
4158 if (!force_blocking)
4159 path->leave_spinning = 1;
4160
4104 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4161 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4105 path->keep_locks = 0; 4162 path->keep_locks = 0;
4106 4163
4107 if (ret < 0) 4164 if (ret < 0)
4108 return ret; 4165 return ret;
4109 4166
4110 btrfs_set_path_blocking(path);
4111 nritems = btrfs_header_nritems(path->nodes[0]); 4167 nritems = btrfs_header_nritems(path->nodes[0]);
4112 /* 4168 /*
4113 * by releasing the path above we dropped all our locks. A balance 4169 * by releasing the path above we dropped all our locks. A balance
@@ -4117,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4117 */ 4173 */
4118 if (nritems > 0 && path->slots[0] < nritems - 1) { 4174 if (nritems > 0 && path->slots[0] < nritems - 1) {
4119 path->slots[0]++; 4175 path->slots[0]++;
4176 ret = 0;
4120 goto done; 4177 goto done;
4121 } 4178 }
4122 4179
4123 while (level < BTRFS_MAX_LEVEL) { 4180 while (level < BTRFS_MAX_LEVEL) {
4124 if (!path->nodes[level]) 4181 if (!path->nodes[level]) {
4125 return 1; 4182 ret = 1;
4183 goto done;
4184 }
4126 4185
4127 slot = path->slots[level] + 1; 4186 slot = path->slots[level] + 1;
4128 c = path->nodes[level]; 4187 c = path->nodes[level];
4129 if (slot >= btrfs_header_nritems(c)) { 4188 if (slot >= btrfs_header_nritems(c)) {
4130 level++; 4189 level++;
4131 if (level == BTRFS_MAX_LEVEL) 4190 if (level == BTRFS_MAX_LEVEL) {
4132 return 1; 4191 ret = 1;
4192 goto done;
4193 }
4133 continue; 4194 continue;
4134 } 4195 }
4135 4196
@@ -4138,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4138 free_extent_buffer(next); 4199 free_extent_buffer(next);
4139 } 4200 }
4140 4201
4141 /* the path was set to blocking above */ 4202 next = c;
4142 if (level == 1 && (path->locks[1] || path->skip_locking) && 4203 ret = read_block_for_search(NULL, root, path, &next, level,
4143 path->reada) 4204 slot, &key);
4144 reada_for_search(root, path, level, slot, 0); 4205 if (ret == -EAGAIN)
4206 goto again;
4145 4207
4146 next = read_node_slot(root, c, slot);
4147 if (!path->skip_locking) { 4208 if (!path->skip_locking) {
4148 btrfs_assert_tree_locked(c); 4209 ret = btrfs_try_spin_lock(next);
4149 btrfs_tree_lock(next); 4210 if (!ret) {
4150 btrfs_set_lock_blocking(next); 4211 btrfs_set_path_blocking(path);
4212 btrfs_tree_lock(next);
4213 if (!force_blocking)
4214 btrfs_clear_path_blocking(path, next);
4215 }
4216 if (force_blocking)
4217 btrfs_set_lock_blocking(next);
4151 } 4218 }
4152 break; 4219 break;
4153 } 4220 }
@@ -4157,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4157 c = path->nodes[level]; 4224 c = path->nodes[level];
4158 if (path->locks[level]) 4225 if (path->locks[level])
4159 btrfs_tree_unlock(c); 4226 btrfs_tree_unlock(c);
4227
4160 free_extent_buffer(c); 4228 free_extent_buffer(c);
4161 path->nodes[level] = next; 4229 path->nodes[level] = next;
4162 path->slots[level] = 0; 4230 path->slots[level] = 0;
4163 if (!path->skip_locking) 4231 if (!path->skip_locking)
4164 path->locks[level] = 1; 4232 path->locks[level] = 1;
4233
4165 if (!level) 4234 if (!level)
4166 break; 4235 break;
4167 4236
4168 btrfs_set_path_blocking(path); 4237 ret = read_block_for_search(NULL, root, path, &next, level,
4169 if (level == 1 && path->locks[1] && path->reada) 4238 0, &key);
4170 reada_for_search(root, path, level, slot, 0); 4239 if (ret == -EAGAIN)
4171 next = read_node_slot(root, next, 0); 4240 goto again;
4241
4172 if (!path->skip_locking) { 4242 if (!path->skip_locking) {
4173 btrfs_assert_tree_locked(path->nodes[level]); 4243 btrfs_assert_tree_locked(path->nodes[level]);
4174 btrfs_tree_lock(next); 4244 ret = btrfs_try_spin_lock(next);
4175 btrfs_set_lock_blocking(next); 4245 if (!ret) {
4246 btrfs_set_path_blocking(path);
4247 btrfs_tree_lock(next);
4248 if (!force_blocking)
4249 btrfs_clear_path_blocking(path, next);
4250 }
4251 if (force_blocking)
4252 btrfs_set_lock_blocking(next);
4176 } 4253 }
4177 } 4254 }
4255 ret = 0;
4178done: 4256done:
4179 unlock_up(path, 0, 1); 4257 unlock_up(path, 0, 1);
4180 return 0; 4258 path->leave_spinning = old_spinning;
4259 if (!old_spinning)
4260 btrfs_set_path_blocking(path);
4261
4262 return ret;
4181} 4263}
4182 4264
4183/* 4265/*