diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 312 |
1 files changed, 197 insertions, 115 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index dbb724124633..e5b2533b691a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1244,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1244 | * readahead one full node of leaves, finding things that are close | 1244 | * readahead one full node of leaves, finding things that are close |
1245 | * to the block in 'slot', and triggering ra on them. | 1245 | * to the block in 'slot', and triggering ra on them. |
1246 | */ | 1246 | */ |
1247 | static noinline void reada_for_search(struct btrfs_root *root, | 1247 | static void reada_for_search(struct btrfs_root *root, |
1248 | struct btrfs_path *path, | 1248 | struct btrfs_path *path, |
1249 | int level, int slot, u64 objectid) | 1249 | int level, int slot, u64 objectid) |
1250 | { | 1250 | { |
1251 | struct extent_buffer *node; | 1251 | struct extent_buffer *node; |
1252 | struct btrfs_disk_key disk_key; | 1252 | struct btrfs_disk_key disk_key; |
@@ -1447,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1447 | } | 1447 | } |
1448 | 1448 | ||
1449 | /* | 1449 | /* |
1450 | * helper function for btrfs_search_slot. The goal is to find a block | ||
1451 | * in cache without setting the path to blocking. If we find the block | ||
1452 | * we return zero and the path is unchanged. | ||
1453 | * | ||
1454 | * If we can't find the block, we set the path blocking and do some | ||
1455 | * reada. -EAGAIN is returned and the search must be repeated. | ||
1456 | */ | ||
1457 | static int | ||
1458 | read_block_for_search(struct btrfs_trans_handle *trans, | ||
1459 | struct btrfs_root *root, struct btrfs_path *p, | ||
1460 | struct extent_buffer **eb_ret, int level, int slot, | ||
1461 | struct btrfs_key *key) | ||
1462 | { | ||
1463 | u64 blocknr; | ||
1464 | u64 gen; | ||
1465 | u32 blocksize; | ||
1466 | struct extent_buffer *b = *eb_ret; | ||
1467 | struct extent_buffer *tmp; | ||
1468 | |||
1469 | blocknr = btrfs_node_blockptr(b, slot); | ||
1470 | gen = btrfs_node_ptr_generation(b, slot); | ||
1471 | blocksize = btrfs_level_size(root, level - 1); | ||
1472 | |||
1473 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1474 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1475 | *eb_ret = tmp; | ||
1476 | return 0; | ||
1477 | } | ||
1478 | |||
1479 | /* | ||
1480 | * reduce lock contention at high levels | ||
1481 | * of the btree by dropping locks before | ||
1482 | * we read. | ||
1483 | */ | ||
1484 | btrfs_release_path(NULL, p); | ||
1485 | if (tmp) | ||
1486 | free_extent_buffer(tmp); | ||
1487 | if (p->reada) | ||
1488 | reada_for_search(root, p, level, slot, key->objectid); | ||
1489 | |||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | ||
1491 | if (tmp) | ||
1492 | free_extent_buffer(tmp); | ||
1493 | return -EAGAIN; | ||
1494 | } | ||
1495 | |||
1496 | /* | ||
1497 | * helper function for btrfs_search_slot. This does all of the checks | ||
1498 | * for node-level blocks and does any balancing required based on | ||
1499 | * the ins_len. | ||
1500 | * | ||
1501 | * If no extra work was required, zero is returned. If we had to | ||
1502 | * drop the path, -EAGAIN is returned and btrfs_search_slot must | ||
1503 | * start over | ||
1504 | */ | ||
1505 | static int | ||
1506 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | ||
1507 | struct btrfs_root *root, struct btrfs_path *p, | ||
1508 | struct extent_buffer *b, int level, int ins_len) | ||
1509 | { | ||
1510 | int ret; | ||
1511 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | ||
1512 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | ||
1513 | int sret; | ||
1514 | |||
1515 | sret = reada_for_balance(root, p, level); | ||
1516 | if (sret) | ||
1517 | goto again; | ||
1518 | |||
1519 | btrfs_set_path_blocking(p); | ||
1520 | sret = split_node(trans, root, p, level); | ||
1521 | btrfs_clear_path_blocking(p, NULL); | ||
1522 | |||
1523 | BUG_ON(sret > 0); | ||
1524 | if (sret) { | ||
1525 | ret = sret; | ||
1526 | goto done; | ||
1527 | } | ||
1528 | b = p->nodes[level]; | ||
1529 | } else if (ins_len < 0 && btrfs_header_nritems(b) < | ||
1530 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1531 | int sret; | ||
1532 | |||
1533 | sret = reada_for_balance(root, p, level); | ||
1534 | if (sret) | ||
1535 | goto again; | ||
1536 | |||
1537 | btrfs_set_path_blocking(p); | ||
1538 | sret = balance_level(trans, root, p, level); | ||
1539 | btrfs_clear_path_blocking(p, NULL); | ||
1540 | |||
1541 | if (sret) { | ||
1542 | ret = sret; | ||
1543 | goto done; | ||
1544 | } | ||
1545 | b = p->nodes[level]; | ||
1546 | if (!b) { | ||
1547 | btrfs_release_path(NULL, p); | ||
1548 | goto again; | ||
1549 | } | ||
1550 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1551 | } | ||
1552 | return 0; | ||
1553 | |||
1554 | again: | ||
1555 | ret = -EAGAIN; | ||
1556 | done: | ||
1557 | return ret; | ||
1558 | } | ||
1559 | |||
1560 | /* | ||
1450 | * look for key in the tree. path is filled in with nodes along the way | 1561 | * look for key in the tree. path is filled in with nodes along the way |
1451 | * if key is found, we return zero and you can find the item in the leaf | 1562 | * if key is found, we return zero and you can find the item in the leaf |
1452 | * level of the path (level 0) | 1563 | * level of the path (level 0) |
@@ -1464,16 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1464 | ins_len, int cow) | 1575 | ins_len, int cow) |
1465 | { | 1576 | { |
1466 | struct extent_buffer *b; | 1577 | struct extent_buffer *b; |
1467 | struct extent_buffer *tmp; | ||
1468 | int slot; | 1578 | int slot; |
1469 | int ret; | 1579 | int ret; |
1470 | int level; | 1580 | int level; |
1471 | int should_reada = p->reada; | ||
1472 | int lowest_unlock = 1; | 1581 | int lowest_unlock = 1; |
1473 | int blocksize; | ||
1474 | u8 lowest_level = 0; | 1582 | u8 lowest_level = 0; |
1475 | u64 blocknr; | ||
1476 | u64 gen; | ||
1477 | 1583 | ||
1478 | lowest_level = p->lowest_level; | 1584 | lowest_level = p->lowest_level; |
1479 | WARN_ON(lowest_level && ins_len > 0); | 1585 | WARN_ON(lowest_level && ins_len > 0); |
@@ -1502,7 +1608,11 @@ again: | |||
1502 | if (cow) { | 1608 | if (cow) { |
1503 | int wret; | 1609 | int wret; |
1504 | 1610 | ||
1505 | /* is a cow on this block not required */ | 1611 | /* |
1612 | * if we don't really need to cow this block | ||
1613 | * then we don't want to set the path blocking, | ||
1614 | * so we test it here | ||
1615 | */ | ||
1506 | if (btrfs_header_generation(b) == trans->transid && | 1616 | if (btrfs_header_generation(b) == trans->transid && |
1507 | btrfs_header_owner(b) == root->root_key.objectid && | 1617 | btrfs_header_owner(b) == root->root_key.objectid && |
1508 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1618 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
@@ -1557,51 +1667,15 @@ cow_done: | |||
1557 | if (ret && slot > 0) | 1667 | if (ret && slot > 0) |
1558 | slot -= 1; | 1668 | slot -= 1; |
1559 | p->slots[level] = slot; | 1669 | p->slots[level] = slot; |
1560 | if ((p->search_for_split || ins_len > 0) && | 1670 | ret = setup_nodes_for_search(trans, root, p, b, level, |
1561 | btrfs_header_nritems(b) >= | 1671 | ins_len); |
1562 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1672 | if (ret == -EAGAIN) |
1563 | int sret; | 1673 | goto again; |
1564 | 1674 | else if (ret) | |
1565 | sret = reada_for_balance(root, p, level); | 1675 | goto done; |
1566 | if (sret) | 1676 | b = p->nodes[level]; |
1567 | goto again; | 1677 | slot = p->slots[level]; |
1568 | |||
1569 | btrfs_set_path_blocking(p); | ||
1570 | sret = split_node(trans, root, p, level); | ||
1571 | btrfs_clear_path_blocking(p, NULL); | ||
1572 | |||
1573 | BUG_ON(sret > 0); | ||
1574 | if (sret) { | ||
1575 | ret = sret; | ||
1576 | goto done; | ||
1577 | } | ||
1578 | b = p->nodes[level]; | ||
1579 | slot = p->slots[level]; | ||
1580 | } else if (ins_len < 0 && | ||
1581 | btrfs_header_nritems(b) < | ||
1582 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { | ||
1583 | int sret; | ||
1584 | |||
1585 | sret = reada_for_balance(root, p, level); | ||
1586 | if (sret) | ||
1587 | goto again; | ||
1588 | |||
1589 | btrfs_set_path_blocking(p); | ||
1590 | sret = balance_level(trans, root, p, level); | ||
1591 | btrfs_clear_path_blocking(p, NULL); | ||
1592 | 1678 | ||
1593 | if (sret) { | ||
1594 | ret = sret; | ||
1595 | goto done; | ||
1596 | } | ||
1597 | b = p->nodes[level]; | ||
1598 | if (!b) { | ||
1599 | btrfs_release_path(NULL, p); | ||
1600 | goto again; | ||
1601 | } | ||
1602 | slot = p->slots[level]; | ||
1603 | BUG_ON(btrfs_header_nritems(b) == 1); | ||
1604 | } | ||
1605 | unlock_up(p, level, lowest_unlock); | 1679 | unlock_up(p, level, lowest_unlock); |
1606 | 1680 | ||
1607 | /* this is only true while dropping a snapshot */ | 1681 | /* this is only true while dropping a snapshot */ |
@@ -1610,44 +1684,11 @@ cow_done: | |||
1610 | goto done; | 1684 | goto done; |
1611 | } | 1685 | } |
1612 | 1686 | ||
1613 | blocknr = btrfs_node_blockptr(b, slot); | 1687 | ret = read_block_for_search(trans, root, p, |
1614 | gen = btrfs_node_ptr_generation(b, slot); | 1688 | &b, level, slot, key); |
1615 | blocksize = btrfs_level_size(root, level - 1); | 1689 | if (ret == -EAGAIN) |
1690 | goto again; | ||
1616 | 1691 | ||
1617 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | ||
1618 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | ||
1619 | b = tmp; | ||
1620 | } else { | ||
1621 | /* | ||
1622 | * reduce lock contention at high levels | ||
1623 | * of the btree by dropping locks before | ||
1624 | * we read. | ||
1625 | */ | ||
1626 | if (level > 0) { | ||
1627 | btrfs_release_path(NULL, p); | ||
1628 | if (tmp) | ||
1629 | free_extent_buffer(tmp); | ||
1630 | if (should_reada) | ||
1631 | reada_for_search(root, p, | ||
1632 | level, slot, | ||
1633 | key->objectid); | ||
1634 | |||
1635 | tmp = read_tree_block(root, blocknr, | ||
1636 | blocksize, gen); | ||
1637 | if (tmp) | ||
1638 | free_extent_buffer(tmp); | ||
1639 | goto again; | ||
1640 | } else { | ||
1641 | btrfs_set_path_blocking(p); | ||
1642 | if (tmp) | ||
1643 | free_extent_buffer(tmp); | ||
1644 | if (should_reada) | ||
1645 | reada_for_search(root, p, | ||
1646 | level, slot, | ||
1647 | key->objectid); | ||
1648 | b = read_node_slot(root, b, slot); | ||
1649 | } | ||
1650 | } | ||
1651 | if (!p->skip_locking) { | 1692 | if (!p->skip_locking) { |
1652 | int lret; | 1693 | int lret; |
1653 | 1694 | ||
@@ -2116,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2116 | BUG_ON(!path->nodes[level]); | 2157 | BUG_ON(!path->nodes[level]); |
2117 | lower = path->nodes[level]; | 2158 | lower = path->nodes[level]; |
2118 | nritems = btrfs_header_nritems(lower); | 2159 | nritems = btrfs_header_nritems(lower); |
2119 | if (slot > nritems) | 2160 | BUG_ON(slot > nritems); |
2120 | BUG(); | ||
2121 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) | 2161 | if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) |
2122 | BUG(); | 2162 | BUG(); |
2123 | if (slot != nritems) { | 2163 | if (slot != nritems) { |
@@ -4086,28 +4126,44 @@ next: | |||
4086 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | 4126 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
4087 | { | 4127 | { |
4088 | int slot; | 4128 | int slot; |
4089 | int level = 1; | 4129 | int level; |
4090 | struct extent_buffer *c; | 4130 | struct extent_buffer *c; |
4091 | struct extent_buffer *next = NULL; | 4131 | struct extent_buffer *next; |
4092 | struct btrfs_key key; | 4132 | struct btrfs_key key; |
4093 | u32 nritems; | 4133 | u32 nritems; |
4094 | int ret; | 4134 | int ret; |
4135 | int old_spinning = path->leave_spinning; | ||
4136 | int force_blocking = 0; | ||
4095 | 4137 | ||
4096 | nritems = btrfs_header_nritems(path->nodes[0]); | 4138 | nritems = btrfs_header_nritems(path->nodes[0]); |
4097 | if (nritems == 0) | 4139 | if (nritems == 0) |
4098 | return 1; | 4140 | return 1; |
4099 | 4141 | ||
4100 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4142 | /* |
4143 | * we take the blocks in an order that upsets lockdep. Using | ||
4144 | * blocking mode is the only way around it. | ||
4145 | */ | ||
4146 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4147 | force_blocking = 1; | ||
4148 | #endif | ||
4101 | 4149 | ||
4150 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
4151 | again: | ||
4152 | level = 1; | ||
4153 | next = NULL; | ||
4102 | btrfs_release_path(root, path); | 4154 | btrfs_release_path(root, path); |
4155 | |||
4103 | path->keep_locks = 1; | 4156 | path->keep_locks = 1; |
4157 | |||
4158 | if (!force_blocking) | ||
4159 | path->leave_spinning = 1; | ||
4160 | |||
4104 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4161 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4105 | path->keep_locks = 0; | 4162 | path->keep_locks = 0; |
4106 | 4163 | ||
4107 | if (ret < 0) | 4164 | if (ret < 0) |
4108 | return ret; | 4165 | return ret; |
4109 | 4166 | ||
4110 | btrfs_set_path_blocking(path); | ||
4111 | nritems = btrfs_header_nritems(path->nodes[0]); | 4167 | nritems = btrfs_header_nritems(path->nodes[0]); |
4112 | /* | 4168 | /* |
4113 | * by releasing the path above we dropped all our locks. A balance | 4169 | * by releasing the path above we dropped all our locks. A balance |
@@ -4117,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4117 | */ | 4173 | */ |
4118 | if (nritems > 0 && path->slots[0] < nritems - 1) { | 4174 | if (nritems > 0 && path->slots[0] < nritems - 1) { |
4119 | path->slots[0]++; | 4175 | path->slots[0]++; |
4176 | ret = 0; | ||
4120 | goto done; | 4177 | goto done; |
4121 | } | 4178 | } |
4122 | 4179 | ||
4123 | while (level < BTRFS_MAX_LEVEL) { | 4180 | while (level < BTRFS_MAX_LEVEL) { |
4124 | if (!path->nodes[level]) | 4181 | if (!path->nodes[level]) { |
4125 | return 1; | 4182 | ret = 1; |
4183 | goto done; | ||
4184 | } | ||
4126 | 4185 | ||
4127 | slot = path->slots[level] + 1; | 4186 | slot = path->slots[level] + 1; |
4128 | c = path->nodes[level]; | 4187 | c = path->nodes[level]; |
4129 | if (slot >= btrfs_header_nritems(c)) { | 4188 | if (slot >= btrfs_header_nritems(c)) { |
4130 | level++; | 4189 | level++; |
4131 | if (level == BTRFS_MAX_LEVEL) | 4190 | if (level == BTRFS_MAX_LEVEL) { |
4132 | return 1; | 4191 | ret = 1; |
4192 | goto done; | ||
4193 | } | ||
4133 | continue; | 4194 | continue; |
4134 | } | 4195 | } |
4135 | 4196 | ||
@@ -4138,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4138 | free_extent_buffer(next); | 4199 | free_extent_buffer(next); |
4139 | } | 4200 | } |
4140 | 4201 | ||
4141 | /* the path was set to blocking above */ | 4202 | next = c; |
4142 | if (level == 1 && (path->locks[1] || path->skip_locking) && | 4203 | ret = read_block_for_search(NULL, root, path, &next, level, |
4143 | path->reada) | 4204 | slot, &key); |
4144 | reada_for_search(root, path, level, slot, 0); | 4205 | if (ret == -EAGAIN) |
4206 | goto again; | ||
4145 | 4207 | ||
4146 | next = read_node_slot(root, c, slot); | ||
4147 | if (!path->skip_locking) { | 4208 | if (!path->skip_locking) { |
4148 | btrfs_assert_tree_locked(c); | 4209 | ret = btrfs_try_spin_lock(next); |
4149 | btrfs_tree_lock(next); | 4210 | if (!ret) { |
4150 | btrfs_set_lock_blocking(next); | 4211 | btrfs_set_path_blocking(path); |
4212 | btrfs_tree_lock(next); | ||
4213 | if (!force_blocking) | ||
4214 | btrfs_clear_path_blocking(path, next); | ||
4215 | } | ||
4216 | if (force_blocking) | ||
4217 | btrfs_set_lock_blocking(next); | ||
4151 | } | 4218 | } |
4152 | break; | 4219 | break; |
4153 | } | 4220 | } |
@@ -4157,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4157 | c = path->nodes[level]; | 4224 | c = path->nodes[level]; |
4158 | if (path->locks[level]) | 4225 | if (path->locks[level]) |
4159 | btrfs_tree_unlock(c); | 4226 | btrfs_tree_unlock(c); |
4227 | |||
4160 | free_extent_buffer(c); | 4228 | free_extent_buffer(c); |
4161 | path->nodes[level] = next; | 4229 | path->nodes[level] = next; |
4162 | path->slots[level] = 0; | 4230 | path->slots[level] = 0; |
4163 | if (!path->skip_locking) | 4231 | if (!path->skip_locking) |
4164 | path->locks[level] = 1; | 4232 | path->locks[level] = 1; |
4233 | |||
4165 | if (!level) | 4234 | if (!level) |
4166 | break; | 4235 | break; |
4167 | 4236 | ||
4168 | btrfs_set_path_blocking(path); | 4237 | ret = read_block_for_search(NULL, root, path, &next, level, |
4169 | if (level == 1 && path->locks[1] && path->reada) | 4238 | 0, &key); |
4170 | reada_for_search(root, path, level, slot, 0); | 4239 | if (ret == -EAGAIN) |
4171 | next = read_node_slot(root, next, 0); | 4240 | goto again; |
4241 | |||
4172 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4173 | btrfs_assert_tree_locked(path->nodes[level]); | 4243 | btrfs_assert_tree_locked(path->nodes[level]); |
4174 | btrfs_tree_lock(next); | 4244 | ret = btrfs_try_spin_lock(next); |
4175 | btrfs_set_lock_blocking(next); | 4245 | if (!ret) { |
4246 | btrfs_set_path_blocking(path); | ||
4247 | btrfs_tree_lock(next); | ||
4248 | if (!force_blocking) | ||
4249 | btrfs_clear_path_blocking(path, next); | ||
4250 | } | ||
4251 | if (force_blocking) | ||
4252 | btrfs_set_lock_blocking(next); | ||
4176 | } | 4253 | } |
4177 | } | 4254 | } |
4255 | ret = 0; | ||
4178 | done: | 4256 | done: |
4179 | unlock_up(path, 0, 1); | 4257 | unlock_up(path, 0, 1); |
4180 | return 0; | 4258 | path->leave_spinning = old_spinning; |
4259 | if (!old_spinning) | ||
4260 | btrfs_set_path_blocking(path); | ||
4261 | |||
4262 | return ret; | ||
4181 | } | 4263 | } |
4182 | 4264 | ||
4183 | /* | 4265 | /* |