aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c308
1 files changed, 221 insertions, 87 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4bc570a396e..b5baff0dccfe 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "disk-io.h" 22#include "disk-io.h"
22#include "transaction.h" 23#include "transaction.h"
@@ -104,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
104/* this also releases the path */ 105/* this also releases the path */
105void btrfs_free_path(struct btrfs_path *p) 106void btrfs_free_path(struct btrfs_path *p)
106{ 107{
108 if (!p)
109 return;
107 btrfs_release_path(NULL, p); 110 btrfs_release_path(NULL, p);
108 kmem_cache_free(btrfs_path_cachep, p); 111 kmem_cache_free(btrfs_path_cachep, p);
109} 112}
@@ -199,7 +202,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
199 struct extent_buffer **cow_ret, u64 new_root_objectid) 202 struct extent_buffer **cow_ret, u64 new_root_objectid)
200{ 203{
201 struct extent_buffer *cow; 204 struct extent_buffer *cow;
202 u32 nritems;
203 int ret = 0; 205 int ret = 0;
204 int level; 206 int level;
205 struct btrfs_disk_key disk_key; 207 struct btrfs_disk_key disk_key;
@@ -209,7 +211,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
209 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 211 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
210 212
211 level = btrfs_header_level(buf); 213 level = btrfs_header_level(buf);
212 nritems = btrfs_header_nritems(buf);
213 if (level == 0) 214 if (level == 0)
214 btrfs_item_key(buf, &disk_key, 0); 215 btrfs_item_key(buf, &disk_key, 0);
215 else 216 else
@@ -279,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
279static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 280static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
280 struct btrfs_root *root, 281 struct btrfs_root *root,
281 struct extent_buffer *buf, 282 struct extent_buffer *buf,
282 struct extent_buffer *cow) 283 struct extent_buffer *cow,
284 int *last_ref)
283{ 285{
284 u64 refs; 286 u64 refs;
285 u64 owner; 287 u64 owner;
@@ -365,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
365 BUG_ON(ret); 367 BUG_ON(ret);
366 } 368 }
367 clean_tree_block(trans, root, buf); 369 clean_tree_block(trans, root, buf);
370 *last_ref = 1;
368 } 371 }
369 return 0; 372 return 0;
370} 373}
@@ -391,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
391 struct btrfs_disk_key disk_key; 394 struct btrfs_disk_key disk_key;
392 struct extent_buffer *cow; 395 struct extent_buffer *cow;
393 int level; 396 int level;
397 int last_ref = 0;
394 int unlock_orig = 0; 398 int unlock_orig = 0;
395 u64 parent_start; 399 u64 parent_start;
396 400
@@ -441,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
441 (unsigned long)btrfs_header_fsid(cow), 445 (unsigned long)btrfs_header_fsid(cow),
442 BTRFS_FSID_SIZE); 446 BTRFS_FSID_SIZE);
443 447
444 update_ref_for_cow(trans, root, buf, cow); 448 update_ref_for_cow(trans, root, buf, cow, &last_ref);
449
450 if (root->ref_cows)
451 btrfs_reloc_cow_block(trans, root, buf, cow);
445 452
446 if (buf == root->node) { 453 if (buf == root->node) {
447 WARN_ON(parent && parent != buf); 454 WARN_ON(parent && parent != buf);
@@ -456,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
456 extent_buffer_get(cow); 463 extent_buffer_get(cow);
457 spin_unlock(&root->node_lock); 464 spin_unlock(&root->node_lock);
458 465
459 btrfs_free_tree_block(trans, root, buf->start, buf->len, 466 btrfs_free_tree_block(trans, root, buf, parent_start,
460 parent_start, root->root_key.objectid, level); 467 last_ref);
461 free_extent_buffer(buf); 468 free_extent_buffer(buf);
462 add_root_to_dirty_list(root); 469 add_root_to_dirty_list(root);
463 } else { 470 } else {
@@ -472,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
472 btrfs_set_node_ptr_generation(parent, parent_slot, 479 btrfs_set_node_ptr_generation(parent, parent_slot,
473 trans->transid); 480 trans->transid);
474 btrfs_mark_buffer_dirty(parent); 481 btrfs_mark_buffer_dirty(parent);
475 btrfs_free_tree_block(trans, root, buf->start, buf->len, 482 btrfs_free_tree_block(trans, root, buf, parent_start,
476 parent_start, root->root_key.objectid, level); 483 last_ref);
477 } 484 }
478 if (unlock_orig) 485 if (unlock_orig)
479 btrfs_tree_unlock(buf); 486 btrfs_tree_unlock(buf);
@@ -948,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
948 return bin_search(eb, key, level, slot); 955 return bin_search(eb, key, level, slot);
949} 956}
950 957
958static void root_add_used(struct btrfs_root *root, u32 size)
959{
960 spin_lock(&root->accounting_lock);
961 btrfs_set_root_used(&root->root_item,
962 btrfs_root_used(&root->root_item) + size);
963 spin_unlock(&root->accounting_lock);
964}
965
966static void root_sub_used(struct btrfs_root *root, u32 size)
967{
968 spin_lock(&root->accounting_lock);
969 btrfs_set_root_used(&root->root_item,
970 btrfs_root_used(&root->root_item) - size);
971 spin_unlock(&root->accounting_lock);
972}
973
951/* given a node and slot number, this reads the blocks it points to. The 974/* given a node and slot number, this reads the blocks it points to. The
952 * extent buffer is returned with a reference taken (but unlocked). 975 * extent buffer is returned with a reference taken (but unlocked).
953 * NULL is returned on error. 976 * NULL is returned on error.
@@ -985,7 +1008,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
985 int wret; 1008 int wret;
986 int pslot; 1009 int pslot;
987 int orig_slot = path->slots[level]; 1010 int orig_slot = path->slots[level];
988 int err_on_enospc = 0;
989 u64 orig_ptr; 1011 u64 orig_ptr;
990 1012
991 if (level == 0) 1013 if (level == 0)
@@ -1018,7 +1040,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1018 btrfs_tree_lock(child); 1040 btrfs_tree_lock(child);
1019 btrfs_set_lock_blocking(child); 1041 btrfs_set_lock_blocking(child);
1020 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 1042 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
1021 BUG_ON(ret); 1043 if (ret) {
1044 btrfs_tree_unlock(child);
1045 free_extent_buffer(child);
1046 goto enospc;
1047 }
1022 1048
1023 spin_lock(&root->node_lock); 1049 spin_lock(&root->node_lock);
1024 root->node = child; 1050 root->node = child;
@@ -1033,18 +1059,18 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1033 btrfs_tree_unlock(mid); 1059 btrfs_tree_unlock(mid);
1034 /* once for the path */ 1060 /* once for the path */
1035 free_extent_buffer(mid); 1061 free_extent_buffer(mid);
1036 ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, 1062
1037 0, root->root_key.objectid, level); 1063 root_sub_used(root, mid->len);
1064 btrfs_free_tree_block(trans, root, mid, 0, 1);
1038 /* once for the root ptr */ 1065 /* once for the root ptr */
1039 free_extent_buffer(mid); 1066 free_extent_buffer(mid);
1040 return ret; 1067 return 0;
1041 } 1068 }
1042 if (btrfs_header_nritems(mid) > 1069 if (btrfs_header_nritems(mid) >
1043 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1070 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1044 return 0; 1071 return 0;
1045 1072
1046 if (btrfs_header_nritems(mid) < 2) 1073 btrfs_header_nritems(mid);
1047 err_on_enospc = 1;
1048 1074
1049 left = read_node_slot(root, parent, pslot - 1); 1075 left = read_node_slot(root, parent, pslot - 1);
1050 if (left) { 1076 if (left) {
@@ -1075,8 +1101,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1075 wret = push_node_left(trans, root, left, mid, 1); 1101 wret = push_node_left(trans, root, left, mid, 1);
1076 if (wret < 0) 1102 if (wret < 0)
1077 ret = wret; 1103 ret = wret;
1078 if (btrfs_header_nritems(mid) < 2) 1104 btrfs_header_nritems(mid);
1079 err_on_enospc = 1;
1080 } 1105 }
1081 1106
1082 /* 1107 /*
@@ -1087,23 +1112,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1087 if (wret < 0 && wret != -ENOSPC) 1112 if (wret < 0 && wret != -ENOSPC)
1088 ret = wret; 1113 ret = wret;
1089 if (btrfs_header_nritems(right) == 0) { 1114 if (btrfs_header_nritems(right) == 0) {
1090 u64 bytenr = right->start;
1091 u32 blocksize = right->len;
1092
1093 clean_tree_block(trans, root, right); 1115 clean_tree_block(trans, root, right);
1094 btrfs_tree_unlock(right); 1116 btrfs_tree_unlock(right);
1095 free_extent_buffer(right);
1096 right = NULL;
1097 wret = del_ptr(trans, root, path, level + 1, pslot + 1117 wret = del_ptr(trans, root, path, level + 1, pslot +
1098 1); 1118 1);
1099 if (wret) 1119 if (wret)
1100 ret = wret; 1120 ret = wret;
1101 wret = btrfs_free_tree_block(trans, root, 1121 root_sub_used(root, right->len);
1102 bytenr, blocksize, 0, 1122 btrfs_free_tree_block(trans, root, right, 0, 1);
1103 root->root_key.objectid, 1123 free_extent_buffer(right);
1104 level); 1124 right = NULL;
1105 if (wret)
1106 ret = wret;
1107 } else { 1125 } else {
1108 struct btrfs_disk_key right_key; 1126 struct btrfs_disk_key right_key;
1109 btrfs_node_key(right, &right_key, 0); 1127 btrfs_node_key(right, &right_key, 0);
@@ -1135,21 +1153,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1135 BUG_ON(wret == 1); 1153 BUG_ON(wret == 1);
1136 } 1154 }
1137 if (btrfs_header_nritems(mid) == 0) { 1155 if (btrfs_header_nritems(mid) == 0) {
1138 /* we've managed to empty the middle node, drop it */
1139 u64 bytenr = mid->start;
1140 u32 blocksize = mid->len;
1141
1142 clean_tree_block(trans, root, mid); 1156 clean_tree_block(trans, root, mid);
1143 btrfs_tree_unlock(mid); 1157 btrfs_tree_unlock(mid);
1144 free_extent_buffer(mid);
1145 mid = NULL;
1146 wret = del_ptr(trans, root, path, level + 1, pslot); 1158 wret = del_ptr(trans, root, path, level + 1, pslot);
1147 if (wret) 1159 if (wret)
1148 ret = wret; 1160 ret = wret;
1149 wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, 1161 root_sub_used(root, mid->len);
1150 0, root->root_key.objectid, level); 1162 btrfs_free_tree_block(trans, root, mid, 0, 1);
1151 if (wret) 1163 free_extent_buffer(mid);
1152 ret = wret; 1164 mid = NULL;
1153 } else { 1165 } else {
1154 /* update the parent key to reflect our changes */ 1166 /* update the parent key to reflect our changes */
1155 struct btrfs_disk_key mid_key; 1167 struct btrfs_disk_key mid_key;
@@ -1209,14 +1221,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1209 int wret; 1221 int wret;
1210 int pslot; 1222 int pslot;
1211 int orig_slot = path->slots[level]; 1223 int orig_slot = path->slots[level];
1212 u64 orig_ptr;
1213 1224
1214 if (level == 0) 1225 if (level == 0)
1215 return 1; 1226 return 1;
1216 1227
1217 mid = path->nodes[level]; 1228 mid = path->nodes[level];
1218 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1229 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1219 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1220 1230
1221 if (level < BTRFS_MAX_LEVEL - 1) 1231 if (level < BTRFS_MAX_LEVEL - 1)
1222 parent = path->nodes[level + 1]; 1232 parent = path->nodes[level + 1];
@@ -1562,13 +1572,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1562 blocksize = btrfs_level_size(root, level - 1); 1572 blocksize = btrfs_level_size(root, level - 1);
1563 1573
1564 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1574 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1565 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1575 if (tmp) {
1566 /* 1576 if (btrfs_buffer_uptodate(tmp, 0)) {
1567 * we found an up to date block without sleeping, return 1577 if (btrfs_buffer_uptodate(tmp, gen)) {
1568 * right away 1578 /*
1569 */ 1579 * we found an up to date block without
1570 *eb_ret = tmp; 1580 * sleeping, return
1571 return 0; 1581 * right away
1582 */
1583 *eb_ret = tmp;
1584 return 0;
1585 }
1586 /* the pages were up to date, but we failed
1587 * the generation number check. Do a full
1588 * read for the generation number that is correct.
1589 * We must do this without dropping locks so
1590 * we can trust our generation number
1591 */
1592 free_extent_buffer(tmp);
1593 tmp = read_tree_block(root, blocknr, blocksize, gen);
1594 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1595 *eb_ret = tmp;
1596 return 0;
1597 }
1598 free_extent_buffer(tmp);
1599 btrfs_release_path(NULL, p);
1600 return -EIO;
1601 }
1572 } 1602 }
1573 1603
1574 /* 1604 /*
@@ -1581,15 +1611,14 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1581 btrfs_unlock_up_safe(p, level + 1); 1611 btrfs_unlock_up_safe(p, level + 1);
1582 btrfs_set_path_blocking(p); 1612 btrfs_set_path_blocking(p);
1583 1613
1584 if (tmp) 1614 free_extent_buffer(tmp);
1585 free_extent_buffer(tmp);
1586 if (p->reada) 1615 if (p->reada)
1587 reada_for_search(root, p, level, slot, key->objectid); 1616 reada_for_search(root, p, level, slot, key->objectid);
1588 1617
1589 btrfs_release_path(NULL, p); 1618 btrfs_release_path(NULL, p);
1590 1619
1591 ret = -EAGAIN; 1620 ret = -EAGAIN;
1592 tmp = read_tree_block(root, blocknr, blocksize, gen); 1621 tmp = read_tree_block(root, blocknr, blocksize, 0);
1593 if (tmp) { 1622 if (tmp) {
1594 /* 1623 /*
1595 * If the read above didn't mark this buffer up to date, 1624 * If the read above didn't mark this buffer up to date,
@@ -1739,7 +1768,6 @@ again:
1739 p->nodes[level + 1], 1768 p->nodes[level + 1],
1740 p->slots[level + 1], &b); 1769 p->slots[level + 1], &b);
1741 if (err) { 1770 if (err) {
1742 free_extent_buffer(b);
1743 ret = err; 1771 ret = err;
1744 goto done; 1772 goto done;
1745 } 1773 }
@@ -2075,6 +2103,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2075 if (IS_ERR(c)) 2103 if (IS_ERR(c))
2076 return PTR_ERR(c); 2104 return PTR_ERR(c);
2077 2105
2106 root_add_used(root, root->nodesize);
2107
2078 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 2108 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
2079 btrfs_set_header_nritems(c, 1); 2109 btrfs_set_header_nritems(c, 1);
2080 btrfs_set_header_level(c, level); 2110 btrfs_set_header_level(c, level);
@@ -2133,6 +2163,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2133 int nritems; 2163 int nritems;
2134 2164
2135 BUG_ON(!path->nodes[level]); 2165 BUG_ON(!path->nodes[level]);
2166 btrfs_assert_tree_locked(path->nodes[level]);
2136 lower = path->nodes[level]; 2167 lower = path->nodes[level];
2137 nritems = btrfs_header_nritems(lower); 2168 nritems = btrfs_header_nritems(lower);
2138 BUG_ON(slot > nritems); 2169 BUG_ON(slot > nritems);
@@ -2201,6 +2232,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2201 if (IS_ERR(split)) 2232 if (IS_ERR(split))
2202 return PTR_ERR(split); 2233 return PTR_ERR(split);
2203 2234
2235 root_add_used(root, root->nodesize);
2236
2204 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2237 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2205 btrfs_set_header_level(split, btrfs_header_level(c)); 2238 btrfs_set_header_level(split, btrfs_header_level(c));
2206 btrfs_set_header_bytenr(split, split->start); 2239 btrfs_set_header_bytenr(split, split->start);
@@ -2285,12 +2318,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2285 return ret; 2318 return ret;
2286} 2319}
2287 2320
2321/*
2322 * min slot controls the lowest index we're willing to push to the
2323 * right. We'll push up to and including min_slot, but no lower
2324 */
2288static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2325static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2289 struct btrfs_root *root, 2326 struct btrfs_root *root,
2290 struct btrfs_path *path, 2327 struct btrfs_path *path,
2291 int data_size, int empty, 2328 int data_size, int empty,
2292 struct extent_buffer *right, 2329 struct extent_buffer *right,
2293 int free_space, u32 left_nritems) 2330 int free_space, u32 left_nritems,
2331 u32 min_slot)
2294{ 2332{
2295 struct extent_buffer *left = path->nodes[0]; 2333 struct extent_buffer *left = path->nodes[0];
2296 struct extent_buffer *upper = path->nodes[1]; 2334 struct extent_buffer *upper = path->nodes[1];
@@ -2308,7 +2346,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2308 if (empty) 2346 if (empty)
2309 nr = 0; 2347 nr = 0;
2310 else 2348 else
2311 nr = 1; 2349 nr = max_t(u32, 1, min_slot);
2312 2350
2313 if (path->slots[0] >= left_nritems) 2351 if (path->slots[0] >= left_nritems)
2314 push_space += data_size; 2352 push_space += data_size;
@@ -2414,6 +2452,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2414 2452
2415 if (left_nritems) 2453 if (left_nritems)
2416 btrfs_mark_buffer_dirty(left); 2454 btrfs_mark_buffer_dirty(left);
2455 else
2456 clean_tree_block(trans, root, left);
2457
2417 btrfs_mark_buffer_dirty(right); 2458 btrfs_mark_buffer_dirty(right);
2418 2459
2419 btrfs_item_key(right, &disk_key, 0); 2460 btrfs_item_key(right, &disk_key, 0);
@@ -2447,10 +2488,14 @@ out_unlock:
2447 * 2488 *
2448 * returns 1 if the push failed because the other node didn't have enough 2489 * returns 1 if the push failed because the other node didn't have enough
2449 * room, 0 if everything worked out and < 0 if there were major errors. 2490 * room, 0 if everything worked out and < 0 if there were major errors.
2491 *
2492 * this will push starting from min_slot to the end of the leaf. It won't
2493 * push any slot lower than min_slot
2450 */ 2494 */
2451static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2495static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2452 *root, struct btrfs_path *path, int data_size, 2496 *root, struct btrfs_path *path,
2453 int empty) 2497 int min_data_size, int data_size,
2498 int empty, u32 min_slot)
2454{ 2499{
2455 struct extent_buffer *left = path->nodes[0]; 2500 struct extent_buffer *left = path->nodes[0];
2456 struct extent_buffer *right; 2501 struct extent_buffer *right;
@@ -2471,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2471 btrfs_assert_tree_locked(path->nodes[1]); 2516 btrfs_assert_tree_locked(path->nodes[1]);
2472 2517
2473 right = read_node_slot(root, upper, slot + 1); 2518 right = read_node_slot(root, upper, slot + 1);
2519 if (right == NULL)
2520 return 1;
2521
2474 btrfs_tree_lock(right); 2522 btrfs_tree_lock(right);
2475 btrfs_set_lock_blocking(right); 2523 btrfs_set_lock_blocking(right);
2476 2524
@@ -2492,8 +2540,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2492 if (left_nritems == 0) 2540 if (left_nritems == 0)
2493 goto out_unlock; 2541 goto out_unlock;
2494 2542
2495 return __push_leaf_right(trans, root, path, data_size, empty, 2543 return __push_leaf_right(trans, root, path, min_data_size, empty,
2496 right, free_space, left_nritems); 2544 right, free_space, left_nritems, min_slot);
2497out_unlock: 2545out_unlock:
2498 btrfs_tree_unlock(right); 2546 btrfs_tree_unlock(right);
2499 free_extent_buffer(right); 2547 free_extent_buffer(right);
@@ -2503,16 +2551,20 @@ out_unlock:
2503/* 2551/*
2504 * push some data in the path leaf to the left, trying to free up at 2552 * push some data in the path leaf to the left, trying to free up at
2505 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2553 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2554 *
2555 * max_slot can put a limit on how far into the leaf we'll push items. The
2556 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2557 * items
2506 */ 2558 */
2507static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2559static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2508 struct btrfs_root *root, 2560 struct btrfs_root *root,
2509 struct btrfs_path *path, int data_size, 2561 struct btrfs_path *path, int data_size,
2510 int empty, struct extent_buffer *left, 2562 int empty, struct extent_buffer *left,
2511 int free_space, int right_nritems) 2563 int free_space, u32 right_nritems,
2564 u32 max_slot)
2512{ 2565{
2513 struct btrfs_disk_key disk_key; 2566 struct btrfs_disk_key disk_key;
2514 struct extent_buffer *right = path->nodes[0]; 2567 struct extent_buffer *right = path->nodes[0];
2515 int slot;
2516 int i; 2568 int i;
2517 int push_space = 0; 2569 int push_space = 0;
2518 int push_items = 0; 2570 int push_items = 0;
@@ -2524,12 +2576,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2524 u32 this_item_size; 2576 u32 this_item_size;
2525 u32 old_left_item_size; 2577 u32 old_left_item_size;
2526 2578
2527 slot = path->slots[1];
2528
2529 if (empty) 2579 if (empty)
2530 nr = right_nritems; 2580 nr = min(right_nritems, max_slot);
2531 else 2581 else
2532 nr = right_nritems - 1; 2582 nr = min(right_nritems - 1, max_slot);
2533 2583
2534 for (i = 0; i < nr; i++) { 2584 for (i = 0; i < nr; i++) {
2535 item = btrfs_item_nr(right, i); 2585 item = btrfs_item_nr(right, i);
@@ -2659,6 +2709,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2659 btrfs_mark_buffer_dirty(left); 2709 btrfs_mark_buffer_dirty(left);
2660 if (right_nritems) 2710 if (right_nritems)
2661 btrfs_mark_buffer_dirty(right); 2711 btrfs_mark_buffer_dirty(right);
2712 else
2713 clean_tree_block(trans, root, right);
2662 2714
2663 btrfs_item_key(right, &disk_key, 0); 2715 btrfs_item_key(right, &disk_key, 0);
2664 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2716 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
@@ -2668,8 +2720,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2668 /* then fixup the leaf pointer in the path */ 2720 /* then fixup the leaf pointer in the path */
2669 if (path->slots[0] < push_items) { 2721 if (path->slots[0] < push_items) {
2670 path->slots[0] += old_left_nritems; 2722 path->slots[0] += old_left_nritems;
2671 if (btrfs_header_nritems(path->nodes[0]) == 0)
2672 clean_tree_block(trans, root, path->nodes[0]);
2673 btrfs_tree_unlock(path->nodes[0]); 2723 btrfs_tree_unlock(path->nodes[0]);
2674 free_extent_buffer(path->nodes[0]); 2724 free_extent_buffer(path->nodes[0]);
2675 path->nodes[0] = left; 2725 path->nodes[0] = left;
@@ -2690,10 +2740,14 @@ out:
2690/* 2740/*
2691 * push some data in the path leaf to the left, trying to free up at 2741 * push some data in the path leaf to the left, trying to free up at
2692 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2742 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2743 *
2744 * max_slot can put a limit on how far into the leaf we'll push items. The
2745 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2746 * items
2693 */ 2747 */
2694static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2748static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2695 *root, struct btrfs_path *path, int data_size, 2749 *root, struct btrfs_path *path, int min_data_size,
2696 int empty) 2750 int data_size, int empty, u32 max_slot)
2697{ 2751{
2698 struct extent_buffer *right = path->nodes[0]; 2752 struct extent_buffer *right = path->nodes[0];
2699 struct extent_buffer *left; 2753 struct extent_buffer *left;
@@ -2715,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2715 btrfs_assert_tree_locked(path->nodes[1]); 2769 btrfs_assert_tree_locked(path->nodes[1]);
2716 2770
2717 left = read_node_slot(root, path->nodes[1], slot - 1); 2771 left = read_node_slot(root, path->nodes[1], slot - 1);
2772 if (left == NULL)
2773 return 1;
2774
2718 btrfs_tree_lock(left); 2775 btrfs_tree_lock(left);
2719 btrfs_set_lock_blocking(left); 2776 btrfs_set_lock_blocking(left);
2720 2777
@@ -2739,8 +2796,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2739 goto out; 2796 goto out;
2740 } 2797 }
2741 2798
2742 return __push_leaf_left(trans, root, path, data_size, 2799 return __push_leaf_left(trans, root, path, min_data_size,
2743 empty, left, free_space, right_nritems); 2800 empty, left, free_space, right_nritems,
2801 max_slot);
2744out: 2802out:
2745 btrfs_tree_unlock(left); 2803 btrfs_tree_unlock(left);
2746 free_extent_buffer(left); 2804 free_extent_buffer(left);
@@ -2833,6 +2891,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2833} 2891}
2834 2892
2835/* 2893/*
2894 * double splits happen when we need to insert a big item in the middle
2895 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2896 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2897 * A B C
2898 *
2899 * We avoid this by trying to push the items on either side of our target
2900 * into the adjacent leaves. If all goes well we can avoid the double split
2901 * completely.
2902 */
2903static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2904 struct btrfs_root *root,
2905 struct btrfs_path *path,
2906 int data_size)
2907{
2908 int ret;
2909 int progress = 0;
2910 int slot;
2911 u32 nritems;
2912
2913 slot = path->slots[0];
2914
2915 /*
2916 * try to push all the items after our slot into the
2917 * right leaf
2918 */
2919 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2920 if (ret < 0)
2921 return ret;
2922
2923 if (ret == 0)
2924 progress++;
2925
2926 nritems = btrfs_header_nritems(path->nodes[0]);
2927 /*
2928 * our goal is to get our slot at the start or end of a leaf. If
2929 * we've done so we're done
2930 */
2931 if (path->slots[0] == 0 || path->slots[0] == nritems)
2932 return 0;
2933
2934 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2935 return 0;
2936
2937 /* try to push all the items before our slot into the next leaf */
2938 slot = path->slots[0];
2939 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2940 if (ret < 0)
2941 return ret;
2942
2943 if (ret == 0)
2944 progress++;
2945
2946 if (progress)
2947 return 0;
2948 return 1;
2949}
2950
2951/*
2836 * split the path's leaf in two, making sure there is at least data_size 2952 * split the path's leaf in two, making sure there is at least data_size
2837 * available for the resulting leaf level of the path. 2953 * available for the resulting leaf level of the path.
2838 * 2954 *
@@ -2854,6 +2970,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2854 int wret; 2970 int wret;
2855 int split; 2971 int split;
2856 int num_doubles = 0; 2972 int num_doubles = 0;
2973 int tried_avoid_double = 0;
2857 2974
2858 l = path->nodes[0]; 2975 l = path->nodes[0];
2859 slot = path->slots[0]; 2976 slot = path->slots[0];
@@ -2862,12 +2979,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2862 return -EOVERFLOW; 2979 return -EOVERFLOW;
2863 2980
2864 /* first try to make some room by pushing left and right */ 2981 /* first try to make some room by pushing left and right */
2865 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2982 if (data_size) {
2866 wret = push_leaf_right(trans, root, path, data_size, 0); 2983 wret = push_leaf_right(trans, root, path, data_size,
2984 data_size, 0, 0);
2867 if (wret < 0) 2985 if (wret < 0)
2868 return wret; 2986 return wret;
2869 if (wret) { 2987 if (wret) {
2870 wret = push_leaf_left(trans, root, path, data_size, 0); 2988 wret = push_leaf_left(trans, root, path, data_size,
2989 data_size, 0, (u32)-1);
2871 if (wret < 0) 2990 if (wret < 0)
2872 return wret; 2991 return wret;
2873 } 2992 }
@@ -2901,6 +3020,8 @@ again:
2901 if (mid != nritems && 3020 if (mid != nritems &&
2902 leaf_space_used(l, mid, nritems - mid) + 3021 leaf_space_used(l, mid, nritems - mid) +
2903 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3022 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3023 if (data_size && !tried_avoid_double)
3024 goto push_for_double;
2904 split = 2; 3025 split = 2;
2905 } 3026 }
2906 } 3027 }
@@ -2917,6 +3038,8 @@ again:
2917 if (mid != nritems && 3038 if (mid != nritems &&
2918 leaf_space_used(l, mid, nritems - mid) + 3039 leaf_space_used(l, mid, nritems - mid) +
2919 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3040 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3041 if (data_size && !tried_avoid_double)
3042 goto push_for_double;
2920 split = 2 ; 3043 split = 2 ;
2921 } 3044 }
2922 } 3045 }
@@ -2931,10 +3054,10 @@ again:
2931 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 3054 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2932 root->root_key.objectid, 3055 root->root_key.objectid,
2933 &disk_key, 0, l->start, 0); 3056 &disk_key, 0, l->start, 0);
2934 if (IS_ERR(right)) { 3057 if (IS_ERR(right))
2935 BUG_ON(1);
2936 return PTR_ERR(right); 3058 return PTR_ERR(right);
2937 } 3059
3060 root_add_used(root, root->leafsize);
2938 3061
2939 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 3062 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2940 btrfs_set_header_bytenr(right, right->start); 3063 btrfs_set_header_bytenr(right, right->start);
@@ -2997,6 +3120,13 @@ again:
2997 } 3120 }
2998 3121
2999 return ret; 3122 return ret;
3123
3124push_for_double:
3125 push_for_double_split(trans, root, path, data_size);
3126 tried_avoid_double = 1;
3127 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3128 return 0;
3129 goto again;
3000} 3130}
3001 3131
3002static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3132static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3040,6 +3170,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3170 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3041 goto err; 3171 goto err;
3042 3172
3173 /* the leaf has changed, it now has room. return now */
3174 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3175 goto err;
3176
3043 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3177 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0], 3178 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item); 3179 struct btrfs_file_extent_item);
@@ -3049,7 +3183,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3049 3183
3050 btrfs_set_path_blocking(path); 3184 btrfs_set_path_blocking(path);
3051 ret = split_leaf(trans, root, &key, path, ins_len, 1); 3185 ret = split_leaf(trans, root, &key, path, ins_len, 1);
3052 BUG_ON(ret); 3186 if (ret)
3187 goto err;
3053 3188
3054 path->keep_locks = 0; 3189 path->keep_locks = 0;
3055 btrfs_unlock_up_safe(path, 1); 3190 btrfs_unlock_up_safe(path, 1);
@@ -3212,7 +3347,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3212{ 3347{
3213 int ret = 0; 3348 int ret = 0;
3214 int slot; 3349 int slot;
3215 int slot_orig;
3216 struct extent_buffer *leaf; 3350 struct extent_buffer *leaf;
3217 struct btrfs_item *item; 3351 struct btrfs_item *item;
3218 u32 nritems; 3352 u32 nritems;
@@ -3222,7 +3356,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3222 unsigned int size_diff; 3356 unsigned int size_diff;
3223 int i; 3357 int i;
3224 3358
3225 slot_orig = path->slots[0];
3226 leaf = path->nodes[0]; 3359 leaf = path->nodes[0];
3227 slot = path->slots[0]; 3360 slot = path->slots[0];
3228 3361
@@ -3327,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3327{ 3460{
3328 int ret = 0; 3461 int ret = 0;
3329 int slot; 3462 int slot;
3330 int slot_orig;
3331 struct extent_buffer *leaf; 3463 struct extent_buffer *leaf;
3332 struct btrfs_item *item; 3464 struct btrfs_item *item;
3333 u32 nritems; 3465 u32 nritems;
@@ -3336,7 +3468,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3336 unsigned int old_size; 3468 unsigned int old_size;
3337 int i; 3469 int i;
3338 3470
3339 slot_orig = path->slots[0];
3340 leaf = path->nodes[0]; 3471 leaf = path->nodes[0];
3341 3472
3342 nritems = btrfs_header_nritems(leaf); 3473 nritems = btrfs_header_nritems(leaf);
@@ -3669,7 +3800,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3669 struct btrfs_key *cpu_key, u32 *data_size, 3800 struct btrfs_key *cpu_key, u32 *data_size,
3670 int nr) 3801 int nr)
3671{ 3802{
3672 struct extent_buffer *leaf;
3673 int ret = 0; 3803 int ret = 0;
3674 int slot; 3804 int slot;
3675 int i; 3805 int i;
@@ -3686,7 +3816,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3686 if (ret < 0) 3816 if (ret < 0)
3687 goto out; 3817 goto out;
3688 3818
3689 leaf = path->nodes[0];
3690 slot = path->slots[0]; 3819 slot = path->slots[0];
3691 BUG_ON(slot < 0); 3820 BUG_ON(slot < 0);
3692 3821
@@ -3791,9 +3920,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3791 */ 3920 */
3792 btrfs_unlock_up_safe(path, 0); 3921 btrfs_unlock_up_safe(path, 0);
3793 3922
3794 ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, 3923 root_sub_used(root, leaf->len);
3795 0, root->root_key.objectid, 0); 3924
3796 return ret; 3925 btrfs_free_tree_block(trans, root, leaf, 0, 1);
3926 return 0;
3797} 3927}
3798/* 3928/*
3799 * delete the item at the leaf level in path. If that empties 3929 * delete the item at the leaf level in path. If that empties
@@ -3860,6 +3990,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3860 if (leaf == root->node) { 3990 if (leaf == root->node) {
3861 btrfs_set_header_level(leaf, 0); 3991 btrfs_set_header_level(leaf, 0);
3862 } else { 3992 } else {
3993 btrfs_set_path_blocking(path);
3994 clean_tree_block(trans, root, leaf);
3863 ret = btrfs_del_leaf(trans, root, path, leaf); 3995 ret = btrfs_del_leaf(trans, root, path, leaf);
3864 BUG_ON(ret); 3996 BUG_ON(ret);
3865 } 3997 }
@@ -3885,13 +4017,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3885 extent_buffer_get(leaf); 4017 extent_buffer_get(leaf);
3886 4018
3887 btrfs_set_path_blocking(path); 4019 btrfs_set_path_blocking(path);
3888 wret = push_leaf_left(trans, root, path, 1, 1); 4020 wret = push_leaf_left(trans, root, path, 1, 1,
4021 1, (u32)-1);
3889 if (wret < 0 && wret != -ENOSPC) 4022 if (wret < 0 && wret != -ENOSPC)
3890 ret = wret; 4023 ret = wret;
3891 4024
3892 if (path->nodes[0] == leaf && 4025 if (path->nodes[0] == leaf &&
3893 btrfs_header_nritems(leaf)) { 4026 btrfs_header_nritems(leaf)) {
3894 wret = push_leaf_right(trans, root, path, 1, 1); 4027 wret = push_leaf_right(trans, root, path, 1,
4028 1, 1, 0);
3895 if (wret < 0 && wret != -ENOSPC) 4029 if (wret < 0 && wret != -ENOSPC)
3896 ret = wret; 4030 ret = wret;
3897 } 4031 }