aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-06-25 16:01:30 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:03 -0400
commit925baeddc5b0764a53f2214a1253251bab0e0324 (patch)
tree0e069bf9cc1c4ecd17c812fd1fb81bf807909ee6 /fs/btrfs/ctree.c
parent1cc127b5d1b71453091859301de4a7dd6ee96fa8 (diff)
Btrfs: Start btree concurrency work.
The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c386
1 files changed, 288 insertions, 98 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1d404bde3cfa..75625c68fdd8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -21,6 +21,7 @@
21#include "disk-io.h" 21#include "disk-io.h"
22#include "transaction.h" 22#include "transaction.h"
23#include "print-tree.h" 23#include "print-tree.h"
24#include "locking.h"
24 25
25static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 26static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level); 27 *root, struct btrfs_path *path, int level);
@@ -64,12 +65,47 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64 int i; 65 int i;
65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 66 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 if (!p->nodes[i]) 67 if (!p->nodes[i])
67 break; 68 continue;
69 if (p->locks[i]) {
70 btrfs_tree_unlock(p->nodes[i]);
71 p->locks[i] = 0;
72 }
68 free_extent_buffer(p->nodes[i]); 73 free_extent_buffer(p->nodes[i]);
69 } 74 }
70 memset(p, 0, sizeof(*p)); 75 memset(p, 0, sizeof(*p));
71} 76}
72 77
78struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
79{
80 struct extent_buffer *eb;
81 spin_lock(&root->node_lock);
82 eb = root->node;
83 extent_buffer_get(eb);
84 spin_unlock(&root->node_lock);
85 return eb;
86}
87
88struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
89{
90 struct extent_buffer *eb;
91
92 while(1) {
93 eb = btrfs_root_node(root);
94 btrfs_tree_lock(eb);
95
96 spin_lock(&root->node_lock);
97 if (eb == root->node) {
98 spin_unlock(&root->node_lock);
99 break;
100 }
101 spin_unlock(&root->node_lock);
102
103 btrfs_tree_unlock(eb);
104 free_extent_buffer(eb);
105 }
106 return eb;
107}
108
73static void add_root_to_dirty_list(struct btrfs_root *root) 109static void add_root_to_dirty_list(struct btrfs_root *root)
74{ 110{
75 if (root->track_dirty && list_empty(&root->dirty_list)) { 111 if (root->track_dirty && list_empty(&root->dirty_list)) {
@@ -111,7 +147,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
111 } else { 147 } else {
112 first_key.objectid = 0; 148 first_key.objectid = 0;
113 } 149 }
114 cow = __btrfs_alloc_free_block(trans, new_root, buf->len, 150 cow = btrfs_alloc_free_block(trans, new_root, buf->len,
115 new_root_objectid, 151 new_root_objectid,
116 trans->transid, first_key.objectid, 152 trans->transid, first_key.objectid,
117 level, buf->start, 0); 153 level, buf->start, 0);
@@ -151,8 +187,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
151 int ret = 0; 187 int ret = 0;
152 int different_trans = 0; 188 int different_trans = 0;
153 int level; 189 int level;
190 int unlock_orig = 0;
154 struct btrfs_key first_key; 191 struct btrfs_key first_key;
155 192
193 if (*cow_ret == buf)
194 unlock_orig = 1;
195
196 WARN_ON(!btrfs_tree_locked(buf));
197
156 if (root->ref_cows) { 198 if (root->ref_cows) {
157 root_gen = trans->transid; 199 root_gen = trans->transid;
158 } else { 200 } else {
@@ -172,7 +214,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
172 } else { 214 } else {
173 first_key.objectid = 0; 215 first_key.objectid = 0;
174 } 216 }
175 cow = __btrfs_alloc_free_block(trans, root, buf->len, 217 cow = btrfs_alloc_free_block(trans, root, buf->len,
176 root->root_key.objectid, 218 root->root_key.objectid,
177 root_gen, first_key.objectid, level, 219 root_gen, first_key.objectid, level,
178 search_start, empty_size); 220 search_start, empty_size);
@@ -196,9 +238,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
196 } 238 }
197 239
198 if (buf == root->node) { 240 if (buf == root->node) {
241 WARN_ON(parent && parent != buf);
199 root_gen = btrfs_header_generation(buf); 242 root_gen = btrfs_header_generation(buf);
243
244 spin_lock(&root->node_lock);
200 root->node = cow; 245 root->node = cow;
201 extent_buffer_get(cow); 246 extent_buffer_get(cow);
247 spin_unlock(&root->node_lock);
248
202 if (buf != root->commit_root) { 249 if (buf != root->commit_root) {
203 btrfs_free_extent(trans, root, buf->start, 250 btrfs_free_extent(trans, root, buf->start,
204 buf->len, root->root_key.objectid, 251 buf->len, root->root_key.objectid,
@@ -219,6 +266,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
219 btrfs_header_owner(parent), root_gen, 266 btrfs_header_owner(parent), root_gen,
220 0, 0, 1); 267 0, 0, 1);
221 } 268 }
269 if (unlock_orig)
270 btrfs_tree_unlock(buf);
222 free_extent_buffer(buf); 271 free_extent_buffer(buf);
223 btrfs_mark_buffer_dirty(cow); 272 btrfs_mark_buffer_dirty(cow);
224 *cow_ret = cow; 273 *cow_ret = cow;
@@ -316,6 +365,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
316 int progress_passed = 0; 365 int progress_passed = 0;
317 struct btrfs_disk_key disk_key; 366 struct btrfs_disk_key disk_key;
318 367
368 /* FIXME this code needs locking */
369 return 0;
370
319 parent_level = btrfs_header_level(parent); 371 parent_level = btrfs_header_level(parent);
320 if (cache_only && parent_level != 1) 372 if (cache_only && parent_level != 1)
321 return 0; 373 return 0;
@@ -729,6 +781,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
729 return 0; 781 return 0;
730 782
731 mid = path->nodes[level]; 783 mid = path->nodes[level];
784 WARN_ON(!path->locks[level]);
732 WARN_ON(btrfs_header_generation(mid) != trans->transid); 785 WARN_ON(btrfs_header_generation(mid) != trans->transid);
733 786
734 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 787 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -749,14 +802,21 @@ static int balance_level(struct btrfs_trans_handle *trans,
749 802
750 /* promote the child to a root */ 803 /* promote the child to a root */
751 child = read_node_slot(root, mid, 0); 804 child = read_node_slot(root, mid, 0);
805 btrfs_tree_lock(child);
752 BUG_ON(!child); 806 BUG_ON(!child);
753 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 807 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
754 BUG_ON(ret); 808 BUG_ON(ret);
755 809
810 spin_lock(&root->node_lock);
756 root->node = child; 811 root->node = child;
812 spin_unlock(&root->node_lock);
813
757 add_root_to_dirty_list(root); 814 add_root_to_dirty_list(root);
815 btrfs_tree_unlock(child);
816 path->locks[level] = 0;
758 path->nodes[level] = NULL; 817 path->nodes[level] = NULL;
759 clean_tree_block(trans, root, mid); 818 clean_tree_block(trans, root, mid);
819 btrfs_tree_unlock(mid);
760 /* once for the path */ 820 /* once for the path */
761 free_extent_buffer(mid); 821 free_extent_buffer(mid);
762 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 822 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
@@ -775,6 +835,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
775 835
776 left = read_node_slot(root, parent, pslot - 1); 836 left = read_node_slot(root, parent, pslot - 1);
777 if (left) { 837 if (left) {
838 btrfs_tree_lock(left);
778 wret = btrfs_cow_block(trans, root, left, 839 wret = btrfs_cow_block(trans, root, left,
779 parent, pslot - 1, &left); 840 parent, pslot - 1, &left);
780 if (wret) { 841 if (wret) {
@@ -784,6 +845,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
784 } 845 }
785 right = read_node_slot(root, parent, pslot + 1); 846 right = read_node_slot(root, parent, pslot + 1);
786 if (right) { 847 if (right) {
848 btrfs_tree_lock(right);
787 wret = btrfs_cow_block(trans, root, right, 849 wret = btrfs_cow_block(trans, root, right,
788 parent, pslot + 1, &right); 850 parent, pslot + 1, &right);
789 if (wret) { 851 if (wret) {
@@ -815,6 +877,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
815 u32 blocksize = right->len; 877 u32 blocksize = right->len;
816 878
817 clean_tree_block(trans, root, right); 879 clean_tree_block(trans, root, right);
880 btrfs_tree_unlock(right);
818 free_extent_buffer(right); 881 free_extent_buffer(right);
819 right = NULL; 882 right = NULL;
820 wret = del_ptr(trans, root, path, level + 1, pslot + 883 wret = del_ptr(trans, root, path, level + 1, pslot +
@@ -862,7 +925,9 @@ static int balance_level(struct btrfs_trans_handle *trans,
862 u64 root_gen = btrfs_header_generation(parent); 925 u64 root_gen = btrfs_header_generation(parent);
863 u64 bytenr = mid->start; 926 u64 bytenr = mid->start;
864 u32 blocksize = mid->len; 927 u32 blocksize = mid->len;
928
865 clean_tree_block(trans, root, mid); 929 clean_tree_block(trans, root, mid);
930 btrfs_tree_unlock(mid);
866 free_extent_buffer(mid); 931 free_extent_buffer(mid);
867 mid = NULL; 932 mid = NULL;
868 wret = del_ptr(trans, root, path, level + 1, pslot); 933 wret = del_ptr(trans, root, path, level + 1, pslot);
@@ -885,11 +950,14 @@ static int balance_level(struct btrfs_trans_handle *trans,
885 if (left) { 950 if (left) {
886 if (btrfs_header_nritems(left) > orig_slot) { 951 if (btrfs_header_nritems(left) > orig_slot) {
887 extent_buffer_get(left); 952 extent_buffer_get(left);
953 /* left was locked after cow */
888 path->nodes[level] = left; 954 path->nodes[level] = left;
889 path->slots[level + 1] -= 1; 955 path->slots[level + 1] -= 1;
890 path->slots[level] = orig_slot; 956 path->slots[level] = orig_slot;
891 if (mid) 957 if (mid) {
958 btrfs_tree_unlock(mid);
892 free_extent_buffer(mid); 959 free_extent_buffer(mid);
960 }
893 } else { 961 } else {
894 orig_slot -= btrfs_header_nritems(left); 962 orig_slot -= btrfs_header_nritems(left);
895 path->slots[level] = orig_slot; 963 path->slots[level] = orig_slot;
@@ -901,10 +969,15 @@ static int balance_level(struct btrfs_trans_handle *trans,
901 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 969 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
902 BUG(); 970 BUG();
903enospc: 971enospc:
904 if (right) 972 if (right) {
973 btrfs_tree_unlock(right);
905 free_extent_buffer(right); 974 free_extent_buffer(right);
906 if (left) 975 }
976 if (left) {
977 if (path->nodes[level] != left)
978 btrfs_tree_unlock(left);
907 free_extent_buffer(left); 979 free_extent_buffer(left);
980 }
908 return ret; 981 return ret;
909} 982}
910 983
@@ -942,6 +1015,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
942 /* first, try to make some room in the middle buffer */ 1015 /* first, try to make some room in the middle buffer */
943 if (left) { 1016 if (left) {
944 u32 left_nr; 1017 u32 left_nr;
1018
1019 btrfs_tree_lock(left);
945 left_nr = btrfs_header_nritems(left); 1020 left_nr = btrfs_header_nritems(left);
946 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1021 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
947 wret = 1; 1022 wret = 1;
@@ -967,24 +1042,28 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
967 path->nodes[level] = left; 1042 path->nodes[level] = left;
968 path->slots[level + 1] -= 1; 1043 path->slots[level + 1] -= 1;
969 path->slots[level] = orig_slot; 1044 path->slots[level] = orig_slot;
1045 btrfs_tree_unlock(mid);
970 free_extent_buffer(mid); 1046 free_extent_buffer(mid);
971 } else { 1047 } else {
972 orig_slot -= 1048 orig_slot -=
973 btrfs_header_nritems(left); 1049 btrfs_header_nritems(left);
974 path->slots[level] = orig_slot; 1050 path->slots[level] = orig_slot;
1051 btrfs_tree_unlock(left);
975 free_extent_buffer(left); 1052 free_extent_buffer(left);
976 } 1053 }
977 return 0; 1054 return 0;
978 } 1055 }
1056 btrfs_tree_unlock(left);
979 free_extent_buffer(left); 1057 free_extent_buffer(left);
980 } 1058 }
981 right= read_node_slot(root, parent, pslot + 1); 1059 right = read_node_slot(root, parent, pslot + 1);
982 1060
983 /* 1061 /*
984 * then try to empty the right most buffer into the middle 1062 * then try to empty the right most buffer into the middle
985 */ 1063 */
986 if (right) { 1064 if (right) {
987 u32 right_nr; 1065 u32 right_nr;
1066 btrfs_tree_lock(right);
988 right_nr = btrfs_header_nritems(right); 1067 right_nr = btrfs_header_nritems(right);
989 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1068 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
990 wret = 1; 1069 wret = 1;
@@ -1013,12 +1092,15 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1013 path->slots[level + 1] += 1; 1092 path->slots[level + 1] += 1;
1014 path->slots[level] = orig_slot - 1093 path->slots[level] = orig_slot -
1015 btrfs_header_nritems(mid); 1094 btrfs_header_nritems(mid);
1095 btrfs_tree_unlock(mid);
1016 free_extent_buffer(mid); 1096 free_extent_buffer(mid);
1017 } else { 1097 } else {
1098 btrfs_tree_unlock(right);
1018 free_extent_buffer(right); 1099 free_extent_buffer(right);
1019 } 1100 }
1020 return 0; 1101 return 0;
1021 } 1102 }
1103 btrfs_tree_unlock(right);
1022 free_extent_buffer(right); 1104 free_extent_buffer(right);
1023 } 1105 }
1024 return 1; 1106 return 1;
@@ -1050,6 +1132,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1050 return; 1132 return;
1051 1133
1052 node = path->nodes[level]; 1134 node = path->nodes[level];
1135 WARN_ON(!path->skip_locking && !btrfs_tree_locked(node));
1136
1053 search = btrfs_node_blockptr(node, slot); 1137 search = btrfs_node_blockptr(node, slot);
1054 blocksize = btrfs_level_size(root, level - 1); 1138 blocksize = btrfs_level_size(root, level - 1);
1055 eb = btrfs_find_tree_block(root, search, blocksize); 1139 eb = btrfs_find_tree_block(root, search, blocksize);
@@ -1098,6 +1182,39 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1098 highest_read = search; 1182 highest_read = search;
1099 } 1183 }
1100} 1184}
1185
1186static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
1187{
1188 int i;
1189 int skip_level = level;
1190 struct extent_buffer *t;
1191
1192 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1193 if (!path->nodes[i])
1194 break;
1195 if (!path->locks[i])
1196 break;
1197 if (path->slots[i] == 0) {
1198 skip_level = i + 1;
1199 continue;
1200 }
1201 if (path->keep_locks) {
1202 u32 nritems;
1203 t = path->nodes[i];
1204 nritems = btrfs_header_nritems(t);
1205 if (path->slots[i] >= nritems - 1) {
1206 skip_level = i + 1;
1207 continue;
1208 }
1209 }
1210 t = path->nodes[i];
1211 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1212 btrfs_tree_unlock(t);
1213 path->locks[i] = 0;
1214 }
1215 }
1216}
1217
1101/* 1218/*
1102 * look for key in the tree. path is filled in with nodes along the way 1219 * look for key in the tree. path is filled in with nodes along the way
1103 * if key is found, we return zero and you can find the item in the leaf 1220 * if key is found, we return zero and you can find the item in the leaf
@@ -1120,15 +1237,27 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1120 int ret; 1237 int ret;
1121 int level; 1238 int level;
1122 int should_reada = p->reada; 1239 int should_reada = p->reada;
1240 int lowest_unlock = 1;
1123 u8 lowest_level = 0; 1241 u8 lowest_level = 0;
1124 1242
1125 lowest_level = p->lowest_level; 1243 lowest_level = p->lowest_level;
1126 WARN_ON(lowest_level && ins_len); 1244 WARN_ON(lowest_level && ins_len);
1127 WARN_ON(p->nodes[0] != NULL); 1245 WARN_ON(p->nodes[0] != NULL);
1128 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 1246 // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1247 WARN_ON(root == root->fs_info->extent_root &&
1248 !mutex_is_locked(&root->fs_info->alloc_mutex));
1249 WARN_ON(root == root->fs_info->chunk_root &&
1250 !mutex_is_locked(&root->fs_info->chunk_mutex));
1251 WARN_ON(root == root->fs_info->dev_root &&
1252 !mutex_is_locked(&root->fs_info->chunk_mutex));
1253 if (ins_len < 0)
1254 lowest_unlock = 2;
1129again: 1255again:
1130 b = root->node; 1256 if (!p->skip_locking)
1131 extent_buffer_get(b); 1257 b = btrfs_lock_root_node(root);
1258 else
1259 b = btrfs_root_node(root);
1260
1132 while (b) { 1261 while (b) {
1133 level = btrfs_header_level(b); 1262 level = btrfs_header_level(b);
1134 if (cow) { 1263 if (cow) {
@@ -1147,9 +1276,12 @@ again:
1147 WARN_ON(1); 1276 WARN_ON(1);
1148 level = btrfs_header_level(b); 1277 level = btrfs_header_level(b);
1149 p->nodes[level] = b; 1278 p->nodes[level] = b;
1279 if (!p->skip_locking)
1280 p->locks[level] = 1;
1150 ret = check_block(root, p, level); 1281 ret = check_block(root, p, level);
1151 if (ret) 1282 if (ret)
1152 return -1; 1283 return -1;
1284
1153 ret = bin_search(b, key, level, &slot); 1285 ret = bin_search(b, key, level, &slot);
1154 if (level != 0) { 1286 if (level != 0) {
1155 if (ret && slot > 0) 1287 if (ret && slot > 0)
@@ -1177,14 +1309,19 @@ again:
1177 BUG_ON(btrfs_header_nritems(b) == 1); 1309 BUG_ON(btrfs_header_nritems(b) == 1);
1178 } 1310 }
1179 /* this is only true while dropping a snapshot */ 1311 /* this is only true while dropping a snapshot */
1180 if (level == lowest_level) 1312 if (level == lowest_level) {
1313 unlock_up(p, level, lowest_unlock);
1181 break; 1314 break;
1315 }
1182 1316
1183 if (should_reada) 1317 if (should_reada)
1184 reada_for_search(root, p, level, slot, 1318 reada_for_search(root, p, level, slot,
1185 key->objectid); 1319 key->objectid);
1186 1320
1187 b = read_node_slot(root, b, slot); 1321 b = read_node_slot(root, b, slot);
1322 if (!p->skip_locking)
1323 btrfs_tree_lock(b);
1324 unlock_up(p, level, lowest_unlock);
1188 } else { 1325 } else {
1189 p->slots[level] = slot; 1326 p->slots[level] = slot;
1190 if (ins_len > 0 && btrfs_leaf_free_space(root, b) < 1327 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
@@ -1195,6 +1332,7 @@ again:
1195 if (sret) 1332 if (sret)
1196 return sret; 1333 return sret;
1197 } 1334 }
1335 unlock_up(p, level, lowest_unlock);
1198 return ret; 1336 return ret;
1199 } 1337 }
1200 } 1338 }
@@ -1225,6 +1363,13 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1225 break; 1363 break;
1226 t = path->nodes[i]; 1364 t = path->nodes[i];
1227 btrfs_set_node_key(t, key, tslot); 1365 btrfs_set_node_key(t, key, tslot);
1366 if (!btrfs_tree_locked(path->nodes[i])) {
1367 int ii;
1368printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i]));
1369 for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) {
1370printk("level %d slot %d\n", ii, path->slots[ii]);
1371 }
1372 }
1228 btrfs_mark_buffer_dirty(path->nodes[i]); 1373 btrfs_mark_buffer_dirty(path->nodes[i]);
1229 if (tslot != 0) 1374 if (tslot != 0)
1230 break; 1375 break;
@@ -1370,6 +1515,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1370 u64 lower_gen; 1515 u64 lower_gen;
1371 struct extent_buffer *lower; 1516 struct extent_buffer *lower;
1372 struct extent_buffer *c; 1517 struct extent_buffer *c;
1518 struct extent_buffer *old;
1373 struct btrfs_disk_key lower_key; 1519 struct btrfs_disk_key lower_key;
1374 1520
1375 BUG_ON(path->nodes[level]); 1521 BUG_ON(path->nodes[level]);
@@ -1386,12 +1532,13 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1386 else 1532 else
1387 btrfs_node_key(lower, &lower_key, 0); 1533 btrfs_node_key(lower, &lower_key, 0);
1388 1534
1389 c = __btrfs_alloc_free_block(trans, root, root->nodesize, 1535 c = btrfs_alloc_free_block(trans, root, root->nodesize,
1390 root->root_key.objectid, 1536 root->root_key.objectid,
1391 root_gen, lower_key.objectid, level, 1537 root_gen, lower_key.objectid, level,
1392 root->node->start, 0); 1538 root->node->start, 0);
1393 if (IS_ERR(c)) 1539 if (IS_ERR(c))
1394 return PTR_ERR(c); 1540 return PTR_ERR(c);
1541
1395 memset_extent_buffer(c, 0, 0, root->nodesize); 1542 memset_extent_buffer(c, 0, 0, root->nodesize);
1396 btrfs_set_header_nritems(c, 1); 1543 btrfs_set_header_nritems(c, 1);
1397 btrfs_set_header_level(c, level); 1544 btrfs_set_header_level(c, level);
@@ -1416,23 +1563,31 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1416 1563
1417 btrfs_mark_buffer_dirty(c); 1564 btrfs_mark_buffer_dirty(c);
1418 1565
1419 /* the super has an extra ref to root->node */ 1566 spin_lock(&root->node_lock);
1420 free_extent_buffer(root->node); 1567 old = root->node;
1421 root->node = c; 1568 root->node = c;
1569 spin_unlock(&root->node_lock);
1570
1571 /* the super has an extra ref to root->node */
1572 free_extent_buffer(old);
1573
1422 add_root_to_dirty_list(root); 1574 add_root_to_dirty_list(root);
1423 extent_buffer_get(c); 1575 extent_buffer_get(c);
1424 path->nodes[level] = c; 1576 path->nodes[level] = c;
1577 path->locks[level] = 1;
1425 path->slots[level] = 0; 1578 path->slots[level] = 0;
1426 1579
1427 if (root->ref_cows && lower_gen != trans->transid) { 1580 if (root->ref_cows && lower_gen != trans->transid) {
1428 struct btrfs_path *back_path = btrfs_alloc_path(); 1581 struct btrfs_path *back_path = btrfs_alloc_path();
1429 int ret; 1582 int ret;
1583 mutex_lock(&root->fs_info->alloc_mutex);
1430 ret = btrfs_insert_extent_backref(trans, 1584 ret = btrfs_insert_extent_backref(trans,
1431 root->fs_info->extent_root, 1585 root->fs_info->extent_root,
1432 path, lower->start, 1586 path, lower->start,
1433 root->root_key.objectid, 1587 root->root_key.objectid,
1434 trans->transid, 0, 0); 1588 trans->transid, 0, 0);
1435 BUG_ON(ret); 1589 BUG_ON(ret);
1590 mutex_unlock(&root->fs_info->alloc_mutex);
1436 btrfs_free_path(back_path); 1591 btrfs_free_path(back_path);
1437 } 1592 }
1438 return 0; 1593 return 0;
@@ -1521,7 +1676,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1521 root_gen = 0; 1676 root_gen = 0;
1522 1677
1523 btrfs_node_key(c, &disk_key, 0); 1678 btrfs_node_key(c, &disk_key, 0);
1524 split = __btrfs_alloc_free_block(trans, root, root->nodesize, 1679 split = btrfs_alloc_free_block(trans, root, root->nodesize,
1525 root->root_key.objectid, 1680 root->root_key.objectid,
1526 root_gen, 1681 root_gen,
1527 btrfs_disk_key_objectid(&disk_key), 1682 btrfs_disk_key_objectid(&disk_key),
@@ -1564,10 +1719,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1564 1719
1565 if (path->slots[level] >= mid) { 1720 if (path->slots[level] >= mid) {
1566 path->slots[level] -= mid; 1721 path->slots[level] -= mid;
1722 btrfs_tree_unlock(c);
1567 free_extent_buffer(c); 1723 free_extent_buffer(c);
1568 path->nodes[level] = split; 1724 path->nodes[level] = split;
1569 path->slots[level + 1] += 1; 1725 path->slots[level + 1] += 1;
1570 } else { 1726 } else {
1727 btrfs_tree_unlock(split);
1571 free_extent_buffer(split); 1728 free_extent_buffer(split);
1572 } 1729 }
1573 return ret; 1730 return ret;
@@ -1648,30 +1805,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1648 return 1; 1805 return 1;
1649 1806
1650 right = read_node_slot(root, upper, slot + 1); 1807 right = read_node_slot(root, upper, slot + 1);
1808 btrfs_tree_lock(right);
1651 free_space = btrfs_leaf_free_space(root, right); 1809 free_space = btrfs_leaf_free_space(root, right);
1652 if (free_space < data_size + sizeof(struct btrfs_item)) { 1810 if (free_space < data_size + sizeof(struct btrfs_item))
1653 free_extent_buffer(right); 1811 goto out_unlock;
1654 return 1;
1655 }
1656 1812
1657 /* cow and double check */ 1813 /* cow and double check */
1658 ret = btrfs_cow_block(trans, root, right, upper, 1814 ret = btrfs_cow_block(trans, root, right, upper,
1659 slot + 1, &right); 1815 slot + 1, &right);
1660 if (ret) { 1816 if (ret)
1661 free_extent_buffer(right); 1817 goto out_unlock;
1662 return 1; 1818
1663 }
1664 free_space = btrfs_leaf_free_space(root, right); 1819 free_space = btrfs_leaf_free_space(root, right);
1665 if (free_space < data_size + sizeof(struct btrfs_item)) { 1820 if (free_space < data_size + sizeof(struct btrfs_item))
1666 free_extent_buffer(right); 1821 goto out_unlock;
1667 return 1;
1668 }
1669 1822
1670 left_nritems = btrfs_header_nritems(left); 1823 left_nritems = btrfs_header_nritems(left);
1671 if (left_nritems == 0) { 1824 if (left_nritems == 0)
1672 free_extent_buffer(right); 1825 goto out_unlock;
1673 return 1;
1674 }
1675 1826
1676 if (empty) 1827 if (empty)
1677 nr = 0; 1828 nr = 0;
@@ -1707,10 +1858,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1707 left->map_token = NULL; 1858 left->map_token = NULL;
1708 } 1859 }
1709 1860
1710 if (push_items == 0) { 1861 if (push_items == 0)
1711 free_extent_buffer(right); 1862 goto out_unlock;
1712 return 1;
1713 }
1714 1863
1715 if (!empty && push_items == left_nritems) 1864 if (!empty && push_items == left_nritems)
1716 WARN_ON(1); 1865 WARN_ON(1);
@@ -1778,14 +1927,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1778 /* then fixup the leaf pointer in the path */ 1927 /* then fixup the leaf pointer in the path */
1779 if (path->slots[0] >= left_nritems) { 1928 if (path->slots[0] >= left_nritems) {
1780 path->slots[0] -= left_nritems; 1929 path->slots[0] -= left_nritems;
1930 if (btrfs_header_nritems(path->nodes[0]) == 0)
1931 clean_tree_block(trans, root, path->nodes[0]);
1932 btrfs_tree_unlock(path->nodes[0]);
1781 free_extent_buffer(path->nodes[0]); 1933 free_extent_buffer(path->nodes[0]);
1782 path->nodes[0] = right; 1934 path->nodes[0] = right;
1783 path->slots[1] += 1; 1935 path->slots[1] += 1;
1784 } else { 1936 } else {
1937 btrfs_tree_unlock(right);
1785 free_extent_buffer(right); 1938 free_extent_buffer(right);
1786 } 1939 }
1787 return 0; 1940 return 0;
1941
1942out_unlock:
1943 btrfs_tree_unlock(right);
1944 free_extent_buffer(right);
1945 return 1;
1788} 1946}
1947
1789/* 1948/*
1790 * push some data in the path leaf to the left, trying to free up at 1949 * push some data in the path leaf to the left, trying to free up at
1791 * least data_size bytes. returns zero if the push worked, nonzero otherwise 1950 * least data_size bytes. returns zero if the push worked, nonzero otherwise
@@ -1823,10 +1982,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1823 } 1982 }
1824 1983
1825 left = read_node_slot(root, path->nodes[1], slot - 1); 1984 left = read_node_slot(root, path->nodes[1], slot - 1);
1985 btrfs_tree_lock(left);
1826 free_space = btrfs_leaf_free_space(root, left); 1986 free_space = btrfs_leaf_free_space(root, left);
1827 if (free_space < data_size + sizeof(struct btrfs_item)) { 1987 if (free_space < data_size + sizeof(struct btrfs_item)) {
1828 free_extent_buffer(left); 1988 ret = 1;
1829 return 1; 1989 goto out;
1830 } 1990 }
1831 1991
1832 /* cow and double check */ 1992 /* cow and double check */
@@ -1834,14 +1994,14 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1834 path->nodes[1], slot - 1, &left); 1994 path->nodes[1], slot - 1, &left);
1835 if (ret) { 1995 if (ret) {
1836 /* we hit -ENOSPC, but it isn't fatal here */ 1996 /* we hit -ENOSPC, but it isn't fatal here */
1837 free_extent_buffer(left); 1997 ret = 1;
1838 return 1; 1998 goto out;
1839 } 1999 }
1840 2000
1841 free_space = btrfs_leaf_free_space(root, left); 2001 free_space = btrfs_leaf_free_space(root, left);
1842 if (free_space < data_size + sizeof(struct btrfs_item)) { 2002 if (free_space < data_size + sizeof(struct btrfs_item)) {
1843 free_extent_buffer(left); 2003 ret = 1;
1844 return 1; 2004 goto out;
1845 } 2005 }
1846 2006
1847 if (empty) 2007 if (empty)
@@ -1876,8 +2036,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1876 } 2036 }
1877 2037
1878 if (push_items == 0) { 2038 if (push_items == 0) {
1879 free_extent_buffer(left); 2039 ret = 1;
1880 return 1; 2040 goto out;
1881 } 2041 }
1882 if (!empty && push_items == btrfs_header_nritems(right)) 2042 if (!empty && push_items == btrfs_header_nritems(right))
1883 WARN_ON(1); 2043 WARN_ON(1);
@@ -1975,15 +2135,23 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1975 /* then fixup the leaf pointer in the path */ 2135 /* then fixup the leaf pointer in the path */
1976 if (path->slots[0] < push_items) { 2136 if (path->slots[0] < push_items) {
1977 path->slots[0] += old_left_nritems; 2137 path->slots[0] += old_left_nritems;
2138 if (btrfs_header_nritems(path->nodes[0]) == 0)
2139 clean_tree_block(trans, root, path->nodes[0]);
2140 btrfs_tree_unlock(path->nodes[0]);
1978 free_extent_buffer(path->nodes[0]); 2141 free_extent_buffer(path->nodes[0]);
1979 path->nodes[0] = left; 2142 path->nodes[0] = left;
1980 path->slots[1] -= 1; 2143 path->slots[1] -= 1;
1981 } else { 2144 } else {
2145 btrfs_tree_unlock(left);
1982 free_extent_buffer(left); 2146 free_extent_buffer(left);
1983 path->slots[0] -= push_items; 2147 path->slots[0] -= push_items;
1984 } 2148 }
1985 BUG_ON(path->slots[0] < 0); 2149 BUG_ON(path->slots[0] < 0);
1986 return ret; 2150 return ret;
2151out:
2152 btrfs_tree_unlock(left);
2153 free_extent_buffer(left);
2154 return ret;
1987} 2155}
1988 2156
1989/* 2157/*
@@ -2052,7 +2220,7 @@ again:
2052 2220
2053 btrfs_item_key(l, &disk_key, 0); 2221 btrfs_item_key(l, &disk_key, 0);
2054 2222
2055 right = __btrfs_alloc_free_block(trans, root, root->leafsize, 2223 right = btrfs_alloc_free_block(trans, root, root->leafsize,
2056 root->root_key.objectid, 2224 root->root_key.objectid,
2057 root_gen, disk_key.objectid, 0, 2225 root_gen, disk_key.objectid, 0,
2058 l->start, 0); 2226 l->start, 0);
@@ -2085,6 +2253,8 @@ again:
2085 path->slots[1] + 1, 1); 2253 path->slots[1] + 1, 1);
2086 if (wret) 2254 if (wret)
2087 ret = wret; 2255 ret = wret;
2256
2257 btrfs_tree_unlock(path->nodes[0]);
2088 free_extent_buffer(path->nodes[0]); 2258 free_extent_buffer(path->nodes[0]);
2089 path->nodes[0] = right; 2259 path->nodes[0] = right;
2090 path->slots[0] = 0; 2260 path->slots[0] = 0;
@@ -2111,6 +2281,7 @@ again:
2111 path->slots[1], 1); 2281 path->slots[1], 1);
2112 if (wret) 2282 if (wret)
2113 ret = wret; 2283 ret = wret;
2284 btrfs_tree_unlock(path->nodes[0]);
2114 free_extent_buffer(path->nodes[0]); 2285 free_extent_buffer(path->nodes[0]);
2115 path->nodes[0] = right; 2286 path->nodes[0] = right;
2116 path->slots[0] = 0; 2287 path->slots[0] = 0;
@@ -2184,12 +2355,15 @@ again:
2184 BUG_ON(path->slots[0] != slot); 2355 BUG_ON(path->slots[0] != slot);
2185 2356
2186 if (mid <= slot) { 2357 if (mid <= slot) {
2358 btrfs_tree_unlock(path->nodes[0]);
2187 free_extent_buffer(path->nodes[0]); 2359 free_extent_buffer(path->nodes[0]);
2188 path->nodes[0] = right; 2360 path->nodes[0] = right;
2189 path->slots[0] -= mid; 2361 path->slots[0] -= mid;
2190 path->slots[1] += 1; 2362 path->slots[1] += 1;
2191 } else 2363 } else {
2364 btrfs_tree_unlock(right);
2192 free_extent_buffer(right); 2365 free_extent_buffer(right);
2366 }
2193 2367
2194 BUG_ON(path->slots[0] < 0); 2368 BUG_ON(path->slots[0] < 0);
2195 2369
@@ -2418,10 +2592,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2418 total_data += data_size[i]; 2592 total_data += data_size[i];
2419 } 2593 }
2420 2594
2421 /* create a root if there isn't one */
2422 if (!root->node)
2423 BUG();
2424
2425 total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); 2595 total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2426 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 2596 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2427 if (ret == 0) { 2597 if (ret == 0) {
@@ -2516,7 +2686,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2516 btrfs_print_leaf(root, leaf); 2686 btrfs_print_leaf(root, leaf);
2517 BUG(); 2687 BUG();
2518 } 2688 }
2519
2520out: 2689out:
2521 return ret; 2690 return ret;
2522} 2691}
@@ -2655,7 +2824,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2655 btrfs_set_header_level(leaf, 0); 2824 btrfs_set_header_level(leaf, 0);
2656 } else { 2825 } else {
2657 u64 root_gen = btrfs_header_generation(path->nodes[1]); 2826 u64 root_gen = btrfs_header_generation(path->nodes[1]);
2658 clean_tree_block(trans, root, leaf);
2659 wret = del_ptr(trans, root, path, 1, path->slots[1]); 2827 wret = del_ptr(trans, root, path, 1, path->slots[1]);
2660 if (wret) 2828 if (wret)
2661 ret = wret; 2829 ret = wret;
@@ -2706,8 +2874,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2706 root_gen = btrfs_header_generation( 2874 root_gen = btrfs_header_generation(
2707 path->nodes[1]); 2875 path->nodes[1]);
2708 2876
2709 clean_tree_block(trans, root, leaf);
2710
2711 wret = del_ptr(trans, root, path, 1, slot); 2877 wret = del_ptr(trans, root, path, 1, slot);
2712 if (wret) 2878 if (wret)
2713 ret = wret; 2879 ret = wret;
@@ -2720,7 +2886,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2720 if (wret) 2886 if (wret)
2721 ret = wret; 2887 ret = wret;
2722 } else { 2888 } else {
2723 btrfs_mark_buffer_dirty(leaf); 2889 /* if we're still in the path, make sure
2890 * we're dirty. Otherwise, one of the
2891 * push_leaf functions must have already
2892 * dirtied this buffer
2893 */
2894 if (path->nodes[0] == leaf)
2895 btrfs_mark_buffer_dirty(leaf);
2724 free_extent_buffer(leaf); 2896 free_extent_buffer(leaf);
2725 } 2897 }
2726 } else { 2898 } else {
@@ -2731,56 +2903,40 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2731} 2903}
2732 2904
2733/* 2905/*
2734 * walk up the tree as far as required to find the previous leaf. 2906 * search the tree again to find a leaf with lesser keys
2735 * returns 0 if it found something or 1 if there are no lesser leaves. 2907 * returns 0 if it found something or 1 if there are no lesser leaves.
2736 * returns < 0 on io errors. 2908 * returns < 0 on io errors.
2737 */ 2909 */
2738int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 2910int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2739{ 2911{
2740 int slot; 2912 struct btrfs_key key;
2741 int level = 1; 2913 struct btrfs_disk_key found_key;
2742 struct extent_buffer *c; 2914 int ret;
2743 struct extent_buffer *next = NULL;
2744 2915
2745 while(level < BTRFS_MAX_LEVEL) { 2916 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
2746 if (!path->nodes[level])
2747 return 1;
2748 2917
2749 slot = path->slots[level]; 2918 if (key.offset > 0)
2750 c = path->nodes[level]; 2919 key.offset--;
2751 if (slot == 0) { 2920 else if (key.type > 0)
2752 level++; 2921 key.type--;
2753 if (level == BTRFS_MAX_LEVEL) 2922 else if (key.objectid > 0)
2754 return 1; 2923 key.objectid--;
2755 continue; 2924 else
2756 } 2925 return 1;
2757 slot--;
2758
2759 if (next)
2760 free_extent_buffer(next);
2761 2926
2762 next = read_node_slot(root, c, slot); 2927 btrfs_release_path(root, path);
2763 break; 2928 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2764 } 2929 if (ret < 0)
2765 path->slots[level] = slot; 2930 return ret;
2766 while(1) { 2931 btrfs_item_key(path->nodes[0], &found_key, 0);
2767 level--; 2932 ret = comp_keys(&found_key, &key);
2768 c = path->nodes[level]; 2933 if (ret < 0)
2769 free_extent_buffer(c); 2934 return 0;
2770 slot = btrfs_header_nritems(next); 2935 return 1;
2771 if (slot != 0)
2772 slot--;
2773 path->nodes[level] = next;
2774 path->slots[level] = slot;
2775 if (!level)
2776 break;
2777 next = read_node_slot(root, next, slot);
2778 }
2779 return 0;
2780} 2936}
2781 2937
2782/* 2938/*
2783 * walk up the tree as far as required to find the next leaf. 2939 * search the tree again to find a leaf with greater keys
2784 * returns 0 if it found something or 1 if there are no greater leaves. 2940 * returns 0 if it found something or 1 if there are no greater leaves.
2785 * returns < 0 on io errors. 2941 * returns < 0 on io errors.
2786 */ 2942 */
@@ -2790,6 +2946,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2790 int level = 1; 2946 int level = 1;
2791 struct extent_buffer *c; 2947 struct extent_buffer *c;
2792 struct extent_buffer *next = NULL; 2948 struct extent_buffer *next = NULL;
2949 struct btrfs_key key;
2950 u32 nritems;
2951 int ret;
2952
2953 nritems = btrfs_header_nritems(path->nodes[0]);
2954 if (nritems == 0) {
2955 return 1;
2956 }
2957
2958 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
2959
2960 path->keep_locks = 1;
2961 btrfs_release_path(root, path);
2962 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2963 path->keep_locks = 0;
2964
2965 if (ret < 0)
2966 return ret;
2967
2968 if (path->slots[0] < nritems - 1) {
2969 goto done;
2970 }
2793 2971
2794 while(level < BTRFS_MAX_LEVEL) { 2972 while(level < BTRFS_MAX_LEVEL) {
2795 if (!path->nodes[level]) 2973 if (!path->nodes[level])
@@ -2799,33 +2977,45 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2799 c = path->nodes[level]; 2977 c = path->nodes[level];
2800 if (slot >= btrfs_header_nritems(c)) { 2978 if (slot >= btrfs_header_nritems(c)) {
2801 level++; 2979 level++;
2802 if (level == BTRFS_MAX_LEVEL) 2980 if (level == BTRFS_MAX_LEVEL) {
2803 return 1; 2981 return 1;
2982 }
2804 continue; 2983 continue;
2805 } 2984 }
2806 2985
2807 if (next) 2986 if (next) {
2987 btrfs_tree_unlock(next);
2808 free_extent_buffer(next); 2988 free_extent_buffer(next);
2989 }
2809 2990
2810 if (path->reada) 2991 if (level == 1 && path->locks[1] && path->reada)
2811 reada_for_search(root, path, level, slot, 0); 2992 reada_for_search(root, path, level, slot, 0);
2812 2993
2813 next = read_node_slot(root, c, slot); 2994 next = read_node_slot(root, c, slot);
2995 if (!path->skip_locking)
2996 btrfs_tree_lock(next);
2814 break; 2997 break;
2815 } 2998 }
2816 path->slots[level] = slot; 2999 path->slots[level] = slot;
2817 while(1) { 3000 while(1) {
2818 level--; 3001 level--;
2819 c = path->nodes[level]; 3002 c = path->nodes[level];
3003 if (path->locks[level])
3004 btrfs_tree_unlock(c);
2820 free_extent_buffer(c); 3005 free_extent_buffer(c);
2821 path->nodes[level] = next; 3006 path->nodes[level] = next;
2822 path->slots[level] = 0; 3007 path->slots[level] = 0;
3008 path->locks[level] = 1;
2823 if (!level) 3009 if (!level)
2824 break; 3010 break;
2825 if (path->reada) 3011 if (level == 1 && path->locks[1] && path->reada)
2826 reada_for_search(root, path, level, 0, 0); 3012 reada_for_search(root, path, level, slot, 0);
2827 next = read_node_slot(root, next, 0); 3013 next = read_node_slot(root, next, 0);
3014 if (!path->skip_locking)
3015 btrfs_tree_lock(next);
2828 } 3016 }
3017done:
3018 unlock_up(path, 0, 1);
2829 return 0; 3019 return 0;
2830} 3020}
2831 3021