aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-06-25 16:01:30 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:03 -0400
commit925baeddc5b0764a53f2214a1253251bab0e0324 (patch)
tree0e069bf9cc1c4ecd17c812fd1fb81bf807909ee6 /fs
parent1cc127b5d1b71453091859301de4a7dd6ee96fa8 (diff)
Btrfs: Start btree concurrency work.
The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c386
-rw-r--r--fs/btrfs/ctree.h23
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c252
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/inode.c8
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/btrfs/locking.c50
-rw-r--r--fs/btrfs/locking.h26
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/tree-defrag.c2
12 files changed, 579 insertions, 214 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 7ed6b39e42d2..0e2dcc718de2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \ 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o 9 extent_io.o volumes.o async-thread.o ioctl.o locking.o
10 10
11btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o 11btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o
12else 12else
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1d404bde3cfa..75625c68fdd8 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -21,6 +21,7 @@
21#include "disk-io.h" 21#include "disk-io.h"
22#include "transaction.h" 22#include "transaction.h"
23#include "print-tree.h" 23#include "print-tree.h"
24#include "locking.h"
24 25
25static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 26static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26 *root, struct btrfs_path *path, int level); 27 *root, struct btrfs_path *path, int level);
@@ -64,12 +65,47 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64 int i; 65 int i;
65 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 66 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66 if (!p->nodes[i]) 67 if (!p->nodes[i])
67 break; 68 continue;
69 if (p->locks[i]) {
70 btrfs_tree_unlock(p->nodes[i]);
71 p->locks[i] = 0;
72 }
68 free_extent_buffer(p->nodes[i]); 73 free_extent_buffer(p->nodes[i]);
69 } 74 }
70 memset(p, 0, sizeof(*p)); 75 memset(p, 0, sizeof(*p));
71} 76}
72 77
78struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
79{
80 struct extent_buffer *eb;
81 spin_lock(&root->node_lock);
82 eb = root->node;
83 extent_buffer_get(eb);
84 spin_unlock(&root->node_lock);
85 return eb;
86}
87
88struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
89{
90 struct extent_buffer *eb;
91
92 while(1) {
93 eb = btrfs_root_node(root);
94 btrfs_tree_lock(eb);
95
96 spin_lock(&root->node_lock);
97 if (eb == root->node) {
98 spin_unlock(&root->node_lock);
99 break;
100 }
101 spin_unlock(&root->node_lock);
102
103 btrfs_tree_unlock(eb);
104 free_extent_buffer(eb);
105 }
106 return eb;
107}
108
73static void add_root_to_dirty_list(struct btrfs_root *root) 109static void add_root_to_dirty_list(struct btrfs_root *root)
74{ 110{
75 if (root->track_dirty && list_empty(&root->dirty_list)) { 111 if (root->track_dirty && list_empty(&root->dirty_list)) {
@@ -111,7 +147,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
111 } else { 147 } else {
112 first_key.objectid = 0; 148 first_key.objectid = 0;
113 } 149 }
114 cow = __btrfs_alloc_free_block(trans, new_root, buf->len, 150 cow = btrfs_alloc_free_block(trans, new_root, buf->len,
115 new_root_objectid, 151 new_root_objectid,
116 trans->transid, first_key.objectid, 152 trans->transid, first_key.objectid,
117 level, buf->start, 0); 153 level, buf->start, 0);
@@ -151,8 +187,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
151 int ret = 0; 187 int ret = 0;
152 int different_trans = 0; 188 int different_trans = 0;
153 int level; 189 int level;
190 int unlock_orig = 0;
154 struct btrfs_key first_key; 191 struct btrfs_key first_key;
155 192
193 if (*cow_ret == buf)
194 unlock_orig = 1;
195
196 WARN_ON(!btrfs_tree_locked(buf));
197
156 if (root->ref_cows) { 198 if (root->ref_cows) {
157 root_gen = trans->transid; 199 root_gen = trans->transid;
158 } else { 200 } else {
@@ -172,7 +214,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
172 } else { 214 } else {
173 first_key.objectid = 0; 215 first_key.objectid = 0;
174 } 216 }
175 cow = __btrfs_alloc_free_block(trans, root, buf->len, 217 cow = btrfs_alloc_free_block(trans, root, buf->len,
176 root->root_key.objectid, 218 root->root_key.objectid,
177 root_gen, first_key.objectid, level, 219 root_gen, first_key.objectid, level,
178 search_start, empty_size); 220 search_start, empty_size);
@@ -196,9 +238,14 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
196 } 238 }
197 239
198 if (buf == root->node) { 240 if (buf == root->node) {
241 WARN_ON(parent && parent != buf);
199 root_gen = btrfs_header_generation(buf); 242 root_gen = btrfs_header_generation(buf);
243
244 spin_lock(&root->node_lock);
200 root->node = cow; 245 root->node = cow;
201 extent_buffer_get(cow); 246 extent_buffer_get(cow);
247 spin_unlock(&root->node_lock);
248
202 if (buf != root->commit_root) { 249 if (buf != root->commit_root) {
203 btrfs_free_extent(trans, root, buf->start, 250 btrfs_free_extent(trans, root, buf->start,
204 buf->len, root->root_key.objectid, 251 buf->len, root->root_key.objectid,
@@ -219,6 +266,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
219 btrfs_header_owner(parent), root_gen, 266 btrfs_header_owner(parent), root_gen,
220 0, 0, 1); 267 0, 0, 1);
221 } 268 }
269 if (unlock_orig)
270 btrfs_tree_unlock(buf);
222 free_extent_buffer(buf); 271 free_extent_buffer(buf);
223 btrfs_mark_buffer_dirty(cow); 272 btrfs_mark_buffer_dirty(cow);
224 *cow_ret = cow; 273 *cow_ret = cow;
@@ -316,6 +365,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
316 int progress_passed = 0; 365 int progress_passed = 0;
317 struct btrfs_disk_key disk_key; 366 struct btrfs_disk_key disk_key;
318 367
368 /* FIXME this code needs locking */
369 return 0;
370
319 parent_level = btrfs_header_level(parent); 371 parent_level = btrfs_header_level(parent);
320 if (cache_only && parent_level != 1) 372 if (cache_only && parent_level != 1)
321 return 0; 373 return 0;
@@ -729,6 +781,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
729 return 0; 781 return 0;
730 782
731 mid = path->nodes[level]; 783 mid = path->nodes[level];
784 WARN_ON(!path->locks[level]);
732 WARN_ON(btrfs_header_generation(mid) != trans->transid); 785 WARN_ON(btrfs_header_generation(mid) != trans->transid);
733 786
734 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 787 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@@ -749,14 +802,21 @@ static int balance_level(struct btrfs_trans_handle *trans,
749 802
750 /* promote the child to a root */ 803 /* promote the child to a root */
751 child = read_node_slot(root, mid, 0); 804 child = read_node_slot(root, mid, 0);
805 btrfs_tree_lock(child);
752 BUG_ON(!child); 806 BUG_ON(!child);
753 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 807 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
754 BUG_ON(ret); 808 BUG_ON(ret);
755 809
810 spin_lock(&root->node_lock);
756 root->node = child; 811 root->node = child;
812 spin_unlock(&root->node_lock);
813
757 add_root_to_dirty_list(root); 814 add_root_to_dirty_list(root);
815 btrfs_tree_unlock(child);
816 path->locks[level] = 0;
758 path->nodes[level] = NULL; 817 path->nodes[level] = NULL;
759 clean_tree_block(trans, root, mid); 818 clean_tree_block(trans, root, mid);
819 btrfs_tree_unlock(mid);
760 /* once for the path */ 820 /* once for the path */
761 free_extent_buffer(mid); 821 free_extent_buffer(mid);
762 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 822 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
@@ -775,6 +835,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
775 835
776 left = read_node_slot(root, parent, pslot - 1); 836 left = read_node_slot(root, parent, pslot - 1);
777 if (left) { 837 if (left) {
838 btrfs_tree_lock(left);
778 wret = btrfs_cow_block(trans, root, left, 839 wret = btrfs_cow_block(trans, root, left,
779 parent, pslot - 1, &left); 840 parent, pslot - 1, &left);
780 if (wret) { 841 if (wret) {
@@ -784,6 +845,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
784 } 845 }
785 right = read_node_slot(root, parent, pslot + 1); 846 right = read_node_slot(root, parent, pslot + 1);
786 if (right) { 847 if (right) {
848 btrfs_tree_lock(right);
787 wret = btrfs_cow_block(trans, root, right, 849 wret = btrfs_cow_block(trans, root, right,
788 parent, pslot + 1, &right); 850 parent, pslot + 1, &right);
789 if (wret) { 851 if (wret) {
@@ -815,6 +877,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
815 u32 blocksize = right->len; 877 u32 blocksize = right->len;
816 878
817 clean_tree_block(trans, root, right); 879 clean_tree_block(trans, root, right);
880 btrfs_tree_unlock(right);
818 free_extent_buffer(right); 881 free_extent_buffer(right);
819 right = NULL; 882 right = NULL;
820 wret = del_ptr(trans, root, path, level + 1, pslot + 883 wret = del_ptr(trans, root, path, level + 1, pslot +
@@ -862,7 +925,9 @@ static int balance_level(struct btrfs_trans_handle *trans,
862 u64 root_gen = btrfs_header_generation(parent); 925 u64 root_gen = btrfs_header_generation(parent);
863 u64 bytenr = mid->start; 926 u64 bytenr = mid->start;
864 u32 blocksize = mid->len; 927 u32 blocksize = mid->len;
928
865 clean_tree_block(trans, root, mid); 929 clean_tree_block(trans, root, mid);
930 btrfs_tree_unlock(mid);
866 free_extent_buffer(mid); 931 free_extent_buffer(mid);
867 mid = NULL; 932 mid = NULL;
868 wret = del_ptr(trans, root, path, level + 1, pslot); 933 wret = del_ptr(trans, root, path, level + 1, pslot);
@@ -885,11 +950,14 @@ static int balance_level(struct btrfs_trans_handle *trans,
885 if (left) { 950 if (left) {
886 if (btrfs_header_nritems(left) > orig_slot) { 951 if (btrfs_header_nritems(left) > orig_slot) {
887 extent_buffer_get(left); 952 extent_buffer_get(left);
953 /* left was locked after cow */
888 path->nodes[level] = left; 954 path->nodes[level] = left;
889 path->slots[level + 1] -= 1; 955 path->slots[level + 1] -= 1;
890 path->slots[level] = orig_slot; 956 path->slots[level] = orig_slot;
891 if (mid) 957 if (mid) {
958 btrfs_tree_unlock(mid);
892 free_extent_buffer(mid); 959 free_extent_buffer(mid);
960 }
893 } else { 961 } else {
894 orig_slot -= btrfs_header_nritems(left); 962 orig_slot -= btrfs_header_nritems(left);
895 path->slots[level] = orig_slot; 963 path->slots[level] = orig_slot;
@@ -901,10 +969,15 @@ static int balance_level(struct btrfs_trans_handle *trans,
901 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 969 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
902 BUG(); 970 BUG();
903enospc: 971enospc:
904 if (right) 972 if (right) {
973 btrfs_tree_unlock(right);
905 free_extent_buffer(right); 974 free_extent_buffer(right);
906 if (left) 975 }
976 if (left) {
977 if (path->nodes[level] != left)
978 btrfs_tree_unlock(left);
907 free_extent_buffer(left); 979 free_extent_buffer(left);
980 }
908 return ret; 981 return ret;
909} 982}
910 983
@@ -942,6 +1015,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
942 /* first, try to make some room in the middle buffer */ 1015 /* first, try to make some room in the middle buffer */
943 if (left) { 1016 if (left) {
944 u32 left_nr; 1017 u32 left_nr;
1018
1019 btrfs_tree_lock(left);
945 left_nr = btrfs_header_nritems(left); 1020 left_nr = btrfs_header_nritems(left);
946 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1021 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
947 wret = 1; 1022 wret = 1;
@@ -967,24 +1042,28 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
967 path->nodes[level] = left; 1042 path->nodes[level] = left;
968 path->slots[level + 1] -= 1; 1043 path->slots[level + 1] -= 1;
969 path->slots[level] = orig_slot; 1044 path->slots[level] = orig_slot;
1045 btrfs_tree_unlock(mid);
970 free_extent_buffer(mid); 1046 free_extent_buffer(mid);
971 } else { 1047 } else {
972 orig_slot -= 1048 orig_slot -=
973 btrfs_header_nritems(left); 1049 btrfs_header_nritems(left);
974 path->slots[level] = orig_slot; 1050 path->slots[level] = orig_slot;
1051 btrfs_tree_unlock(left);
975 free_extent_buffer(left); 1052 free_extent_buffer(left);
976 } 1053 }
977 return 0; 1054 return 0;
978 } 1055 }
1056 btrfs_tree_unlock(left);
979 free_extent_buffer(left); 1057 free_extent_buffer(left);
980 } 1058 }
981 right= read_node_slot(root, parent, pslot + 1); 1059 right = read_node_slot(root, parent, pslot + 1);
982 1060
983 /* 1061 /*
984 * then try to empty the right most buffer into the middle 1062 * then try to empty the right most buffer into the middle
985 */ 1063 */
986 if (right) { 1064 if (right) {
987 u32 right_nr; 1065 u32 right_nr;
1066 btrfs_tree_lock(right);
988 right_nr = btrfs_header_nritems(right); 1067 right_nr = btrfs_header_nritems(right);
989 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1068 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
990 wret = 1; 1069 wret = 1;
@@ -1013,12 +1092,15 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1013 path->slots[level + 1] += 1; 1092 path->slots[level + 1] += 1;
1014 path->slots[level] = orig_slot - 1093 path->slots[level] = orig_slot -
1015 btrfs_header_nritems(mid); 1094 btrfs_header_nritems(mid);
1095 btrfs_tree_unlock(mid);
1016 free_extent_buffer(mid); 1096 free_extent_buffer(mid);
1017 } else { 1097 } else {
1098 btrfs_tree_unlock(right);
1018 free_extent_buffer(right); 1099 free_extent_buffer(right);
1019 } 1100 }
1020 return 0; 1101 return 0;
1021 } 1102 }
1103 btrfs_tree_unlock(right);
1022 free_extent_buffer(right); 1104 free_extent_buffer(right);
1023 } 1105 }
1024 return 1; 1106 return 1;
@@ -1050,6 +1132,8 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1050 return; 1132 return;
1051 1133
1052 node = path->nodes[level]; 1134 node = path->nodes[level];
1135 WARN_ON(!path->skip_locking && !btrfs_tree_locked(node));
1136
1053 search = btrfs_node_blockptr(node, slot); 1137 search = btrfs_node_blockptr(node, slot);
1054 blocksize = btrfs_level_size(root, level - 1); 1138 blocksize = btrfs_level_size(root, level - 1);
1055 eb = btrfs_find_tree_block(root, search, blocksize); 1139 eb = btrfs_find_tree_block(root, search, blocksize);
@@ -1098,6 +1182,39 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1098 highest_read = search; 1182 highest_read = search;
1099 } 1183 }
1100} 1184}
1185
1186static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock)
1187{
1188 int i;
1189 int skip_level = level;
1190 struct extent_buffer *t;
1191
1192 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1193 if (!path->nodes[i])
1194 break;
1195 if (!path->locks[i])
1196 break;
1197 if (path->slots[i] == 0) {
1198 skip_level = i + 1;
1199 continue;
1200 }
1201 if (path->keep_locks) {
1202 u32 nritems;
1203 t = path->nodes[i];
1204 nritems = btrfs_header_nritems(t);
1205 if (path->slots[i] >= nritems - 1) {
1206 skip_level = i + 1;
1207 continue;
1208 }
1209 }
1210 t = path->nodes[i];
1211 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1212 btrfs_tree_unlock(t);
1213 path->locks[i] = 0;
1214 }
1215 }
1216}
1217
1101/* 1218/*
1102 * look for key in the tree. path is filled in with nodes along the way 1219 * look for key in the tree. path is filled in with nodes along the way
1103 * if key is found, we return zero and you can find the item in the leaf 1220 * if key is found, we return zero and you can find the item in the leaf
@@ -1120,15 +1237,27 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1120 int ret; 1237 int ret;
1121 int level; 1238 int level;
1122 int should_reada = p->reada; 1239 int should_reada = p->reada;
1240 int lowest_unlock = 1;
1123 u8 lowest_level = 0; 1241 u8 lowest_level = 0;
1124 1242
1125 lowest_level = p->lowest_level; 1243 lowest_level = p->lowest_level;
1126 WARN_ON(lowest_level && ins_len); 1244 WARN_ON(lowest_level && ins_len);
1127 WARN_ON(p->nodes[0] != NULL); 1245 WARN_ON(p->nodes[0] != NULL);
1128 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 1246 // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1247 WARN_ON(root == root->fs_info->extent_root &&
1248 !mutex_is_locked(&root->fs_info->alloc_mutex));
1249 WARN_ON(root == root->fs_info->chunk_root &&
1250 !mutex_is_locked(&root->fs_info->chunk_mutex));
1251 WARN_ON(root == root->fs_info->dev_root &&
1252 !mutex_is_locked(&root->fs_info->chunk_mutex));
1253 if (ins_len < 0)
1254 lowest_unlock = 2;
1129again: 1255again:
1130 b = root->node; 1256 if (!p->skip_locking)
1131 extent_buffer_get(b); 1257 b = btrfs_lock_root_node(root);
1258 else
1259 b = btrfs_root_node(root);
1260
1132 while (b) { 1261 while (b) {
1133 level = btrfs_header_level(b); 1262 level = btrfs_header_level(b);
1134 if (cow) { 1263 if (cow) {
@@ -1147,9 +1276,12 @@ again:
1147 WARN_ON(1); 1276 WARN_ON(1);
1148 level = btrfs_header_level(b); 1277 level = btrfs_header_level(b);
1149 p->nodes[level] = b; 1278 p->nodes[level] = b;
1279 if (!p->skip_locking)
1280 p->locks[level] = 1;
1150 ret = check_block(root, p, level); 1281 ret = check_block(root, p, level);
1151 if (ret) 1282 if (ret)
1152 return -1; 1283 return -1;
1284
1153 ret = bin_search(b, key, level, &slot); 1285 ret = bin_search(b, key, level, &slot);
1154 if (level != 0) { 1286 if (level != 0) {
1155 if (ret && slot > 0) 1287 if (ret && slot > 0)
@@ -1177,14 +1309,19 @@ again:
1177 BUG_ON(btrfs_header_nritems(b) == 1); 1309 BUG_ON(btrfs_header_nritems(b) == 1);
1178 } 1310 }
1179 /* this is only true while dropping a snapshot */ 1311 /* this is only true while dropping a snapshot */
1180 if (level == lowest_level) 1312 if (level == lowest_level) {
1313 unlock_up(p, level, lowest_unlock);
1181 break; 1314 break;
1315 }
1182 1316
1183 if (should_reada) 1317 if (should_reada)
1184 reada_for_search(root, p, level, slot, 1318 reada_for_search(root, p, level, slot,
1185 key->objectid); 1319 key->objectid);
1186 1320
1187 b = read_node_slot(root, b, slot); 1321 b = read_node_slot(root, b, slot);
1322 if (!p->skip_locking)
1323 btrfs_tree_lock(b);
1324 unlock_up(p, level, lowest_unlock);
1188 } else { 1325 } else {
1189 p->slots[level] = slot; 1326 p->slots[level] = slot;
1190 if (ins_len > 0 && btrfs_leaf_free_space(root, b) < 1327 if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
@@ -1195,6 +1332,7 @@ again:
1195 if (sret) 1332 if (sret)
1196 return sret; 1333 return sret;
1197 } 1334 }
1335 unlock_up(p, level, lowest_unlock);
1198 return ret; 1336 return ret;
1199 } 1337 }
1200 } 1338 }
@@ -1225,6 +1363,13 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans,
1225 break; 1363 break;
1226 t = path->nodes[i]; 1364 t = path->nodes[i];
1227 btrfs_set_node_key(t, key, tslot); 1365 btrfs_set_node_key(t, key, tslot);
1366 if (!btrfs_tree_locked(path->nodes[i])) {
1367 int ii;
1368printk("fixup without lock on level %d\n", btrfs_header_level(path->nodes[i]));
1369 for (ii = 0; ii < BTRFS_MAX_LEVEL; ii++) {
1370printk("level %d slot %d\n", ii, path->slots[ii]);
1371 }
1372 }
1228 btrfs_mark_buffer_dirty(path->nodes[i]); 1373 btrfs_mark_buffer_dirty(path->nodes[i]);
1229 if (tslot != 0) 1374 if (tslot != 0)
1230 break; 1375 break;
@@ -1370,6 +1515,7 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1370 u64 lower_gen; 1515 u64 lower_gen;
1371 struct extent_buffer *lower; 1516 struct extent_buffer *lower;
1372 struct extent_buffer *c; 1517 struct extent_buffer *c;
1518 struct extent_buffer *old;
1373 struct btrfs_disk_key lower_key; 1519 struct btrfs_disk_key lower_key;
1374 1520
1375 BUG_ON(path->nodes[level]); 1521 BUG_ON(path->nodes[level]);
@@ -1386,12 +1532,13 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1386 else 1532 else
1387 btrfs_node_key(lower, &lower_key, 0); 1533 btrfs_node_key(lower, &lower_key, 0);
1388 1534
1389 c = __btrfs_alloc_free_block(trans, root, root->nodesize, 1535 c = btrfs_alloc_free_block(trans, root, root->nodesize,
1390 root->root_key.objectid, 1536 root->root_key.objectid,
1391 root_gen, lower_key.objectid, level, 1537 root_gen, lower_key.objectid, level,
1392 root->node->start, 0); 1538 root->node->start, 0);
1393 if (IS_ERR(c)) 1539 if (IS_ERR(c))
1394 return PTR_ERR(c); 1540 return PTR_ERR(c);
1541
1395 memset_extent_buffer(c, 0, 0, root->nodesize); 1542 memset_extent_buffer(c, 0, 0, root->nodesize);
1396 btrfs_set_header_nritems(c, 1); 1543 btrfs_set_header_nritems(c, 1);
1397 btrfs_set_header_level(c, level); 1544 btrfs_set_header_level(c, level);
@@ -1416,23 +1563,31 @@ static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1416 1563
1417 btrfs_mark_buffer_dirty(c); 1564 btrfs_mark_buffer_dirty(c);
1418 1565
1419 /* the super has an extra ref to root->node */ 1566 spin_lock(&root->node_lock);
1420 free_extent_buffer(root->node); 1567 old = root->node;
1421 root->node = c; 1568 root->node = c;
1569 spin_unlock(&root->node_lock);
1570
1571 /* the super has an extra ref to root->node */
1572 free_extent_buffer(old);
1573
1422 add_root_to_dirty_list(root); 1574 add_root_to_dirty_list(root);
1423 extent_buffer_get(c); 1575 extent_buffer_get(c);
1424 path->nodes[level] = c; 1576 path->nodes[level] = c;
1577 path->locks[level] = 1;
1425 path->slots[level] = 0; 1578 path->slots[level] = 0;
1426 1579
1427 if (root->ref_cows && lower_gen != trans->transid) { 1580 if (root->ref_cows && lower_gen != trans->transid) {
1428 struct btrfs_path *back_path = btrfs_alloc_path(); 1581 struct btrfs_path *back_path = btrfs_alloc_path();
1429 int ret; 1582 int ret;
1583 mutex_lock(&root->fs_info->alloc_mutex);
1430 ret = btrfs_insert_extent_backref(trans, 1584 ret = btrfs_insert_extent_backref(trans,
1431 root->fs_info->extent_root, 1585 root->fs_info->extent_root,
1432 path, lower->start, 1586 path, lower->start,
1433 root->root_key.objectid, 1587 root->root_key.objectid,
1434 trans->transid, 0, 0); 1588 trans->transid, 0, 0);
1435 BUG_ON(ret); 1589 BUG_ON(ret);
1590 mutex_unlock(&root->fs_info->alloc_mutex);
1436 btrfs_free_path(back_path); 1591 btrfs_free_path(back_path);
1437 } 1592 }
1438 return 0; 1593 return 0;
@@ -1521,7 +1676,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1521 root_gen = 0; 1676 root_gen = 0;
1522 1677
1523 btrfs_node_key(c, &disk_key, 0); 1678 btrfs_node_key(c, &disk_key, 0);
1524 split = __btrfs_alloc_free_block(trans, root, root->nodesize, 1679 split = btrfs_alloc_free_block(trans, root, root->nodesize,
1525 root->root_key.objectid, 1680 root->root_key.objectid,
1526 root_gen, 1681 root_gen,
1527 btrfs_disk_key_objectid(&disk_key), 1682 btrfs_disk_key_objectid(&disk_key),
@@ -1564,10 +1719,12 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1564 1719
1565 if (path->slots[level] >= mid) { 1720 if (path->slots[level] >= mid) {
1566 path->slots[level] -= mid; 1721 path->slots[level] -= mid;
1722 btrfs_tree_unlock(c);
1567 free_extent_buffer(c); 1723 free_extent_buffer(c);
1568 path->nodes[level] = split; 1724 path->nodes[level] = split;
1569 path->slots[level + 1] += 1; 1725 path->slots[level + 1] += 1;
1570 } else { 1726 } else {
1727 btrfs_tree_unlock(split);
1571 free_extent_buffer(split); 1728 free_extent_buffer(split);
1572 } 1729 }
1573 return ret; 1730 return ret;
@@ -1648,30 +1805,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1648 return 1; 1805 return 1;
1649 1806
1650 right = read_node_slot(root, upper, slot + 1); 1807 right = read_node_slot(root, upper, slot + 1);
1808 btrfs_tree_lock(right);
1651 free_space = btrfs_leaf_free_space(root, right); 1809 free_space = btrfs_leaf_free_space(root, right);
1652 if (free_space < data_size + sizeof(struct btrfs_item)) { 1810 if (free_space < data_size + sizeof(struct btrfs_item))
1653 free_extent_buffer(right); 1811 goto out_unlock;
1654 return 1;
1655 }
1656 1812
1657 /* cow and double check */ 1813 /* cow and double check */
1658 ret = btrfs_cow_block(trans, root, right, upper, 1814 ret = btrfs_cow_block(trans, root, right, upper,
1659 slot + 1, &right); 1815 slot + 1, &right);
1660 if (ret) { 1816 if (ret)
1661 free_extent_buffer(right); 1817 goto out_unlock;
1662 return 1; 1818
1663 }
1664 free_space = btrfs_leaf_free_space(root, right); 1819 free_space = btrfs_leaf_free_space(root, right);
1665 if (free_space < data_size + sizeof(struct btrfs_item)) { 1820 if (free_space < data_size + sizeof(struct btrfs_item))
1666 free_extent_buffer(right); 1821 goto out_unlock;
1667 return 1;
1668 }
1669 1822
1670 left_nritems = btrfs_header_nritems(left); 1823 left_nritems = btrfs_header_nritems(left);
1671 if (left_nritems == 0) { 1824 if (left_nritems == 0)
1672 free_extent_buffer(right); 1825 goto out_unlock;
1673 return 1;
1674 }
1675 1826
1676 if (empty) 1827 if (empty)
1677 nr = 0; 1828 nr = 0;
@@ -1707,10 +1858,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1707 left->map_token = NULL; 1858 left->map_token = NULL;
1708 } 1859 }
1709 1860
1710 if (push_items == 0) { 1861 if (push_items == 0)
1711 free_extent_buffer(right); 1862 goto out_unlock;
1712 return 1;
1713 }
1714 1863
1715 if (!empty && push_items == left_nritems) 1864 if (!empty && push_items == left_nritems)
1716 WARN_ON(1); 1865 WARN_ON(1);
@@ -1778,14 +1927,24 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1778 /* then fixup the leaf pointer in the path */ 1927 /* then fixup the leaf pointer in the path */
1779 if (path->slots[0] >= left_nritems) { 1928 if (path->slots[0] >= left_nritems) {
1780 path->slots[0] -= left_nritems; 1929 path->slots[0] -= left_nritems;
1930 if (btrfs_header_nritems(path->nodes[0]) == 0)
1931 clean_tree_block(trans, root, path->nodes[0]);
1932 btrfs_tree_unlock(path->nodes[0]);
1781 free_extent_buffer(path->nodes[0]); 1933 free_extent_buffer(path->nodes[0]);
1782 path->nodes[0] = right; 1934 path->nodes[0] = right;
1783 path->slots[1] += 1; 1935 path->slots[1] += 1;
1784 } else { 1936 } else {
1937 btrfs_tree_unlock(right);
1785 free_extent_buffer(right); 1938 free_extent_buffer(right);
1786 } 1939 }
1787 return 0; 1940 return 0;
1941
1942out_unlock:
1943 btrfs_tree_unlock(right);
1944 free_extent_buffer(right);
1945 return 1;
1788} 1946}
1947
1789/* 1948/*
1790 * push some data in the path leaf to the left, trying to free up at 1949 * push some data in the path leaf to the left, trying to free up at
1791 * least data_size bytes. returns zero if the push worked, nonzero otherwise 1950 * least data_size bytes. returns zero if the push worked, nonzero otherwise
@@ -1823,10 +1982,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1823 } 1982 }
1824 1983
1825 left = read_node_slot(root, path->nodes[1], slot - 1); 1984 left = read_node_slot(root, path->nodes[1], slot - 1);
1985 btrfs_tree_lock(left);
1826 free_space = btrfs_leaf_free_space(root, left); 1986 free_space = btrfs_leaf_free_space(root, left);
1827 if (free_space < data_size + sizeof(struct btrfs_item)) { 1987 if (free_space < data_size + sizeof(struct btrfs_item)) {
1828 free_extent_buffer(left); 1988 ret = 1;
1829 return 1; 1989 goto out;
1830 } 1990 }
1831 1991
1832 /* cow and double check */ 1992 /* cow and double check */
@@ -1834,14 +1994,14 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1834 path->nodes[1], slot - 1, &left); 1994 path->nodes[1], slot - 1, &left);
1835 if (ret) { 1995 if (ret) {
1836 /* we hit -ENOSPC, but it isn't fatal here */ 1996 /* we hit -ENOSPC, but it isn't fatal here */
1837 free_extent_buffer(left); 1997 ret = 1;
1838 return 1; 1998 goto out;
1839 } 1999 }
1840 2000
1841 free_space = btrfs_leaf_free_space(root, left); 2001 free_space = btrfs_leaf_free_space(root, left);
1842 if (free_space < data_size + sizeof(struct btrfs_item)) { 2002 if (free_space < data_size + sizeof(struct btrfs_item)) {
1843 free_extent_buffer(left); 2003 ret = 1;
1844 return 1; 2004 goto out;
1845 } 2005 }
1846 2006
1847 if (empty) 2007 if (empty)
@@ -1876,8 +2036,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1876 } 2036 }
1877 2037
1878 if (push_items == 0) { 2038 if (push_items == 0) {
1879 free_extent_buffer(left); 2039 ret = 1;
1880 return 1; 2040 goto out;
1881 } 2041 }
1882 if (!empty && push_items == btrfs_header_nritems(right)) 2042 if (!empty && push_items == btrfs_header_nritems(right))
1883 WARN_ON(1); 2043 WARN_ON(1);
@@ -1975,15 +2135,23 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1975 /* then fixup the leaf pointer in the path */ 2135 /* then fixup the leaf pointer in the path */
1976 if (path->slots[0] < push_items) { 2136 if (path->slots[0] < push_items) {
1977 path->slots[0] += old_left_nritems; 2137 path->slots[0] += old_left_nritems;
2138 if (btrfs_header_nritems(path->nodes[0]) == 0)
2139 clean_tree_block(trans, root, path->nodes[0]);
2140 btrfs_tree_unlock(path->nodes[0]);
1978 free_extent_buffer(path->nodes[0]); 2141 free_extent_buffer(path->nodes[0]);
1979 path->nodes[0] = left; 2142 path->nodes[0] = left;
1980 path->slots[1] -= 1; 2143 path->slots[1] -= 1;
1981 } else { 2144 } else {
2145 btrfs_tree_unlock(left);
1982 free_extent_buffer(left); 2146 free_extent_buffer(left);
1983 path->slots[0] -= push_items; 2147 path->slots[0] -= push_items;
1984 } 2148 }
1985 BUG_ON(path->slots[0] < 0); 2149 BUG_ON(path->slots[0] < 0);
1986 return ret; 2150 return ret;
2151out:
2152 btrfs_tree_unlock(left);
2153 free_extent_buffer(left);
2154 return ret;
1987} 2155}
1988 2156
1989/* 2157/*
@@ -2052,7 +2220,7 @@ again:
2052 2220
2053 btrfs_item_key(l, &disk_key, 0); 2221 btrfs_item_key(l, &disk_key, 0);
2054 2222
2055 right = __btrfs_alloc_free_block(trans, root, root->leafsize, 2223 right = btrfs_alloc_free_block(trans, root, root->leafsize,
2056 root->root_key.objectid, 2224 root->root_key.objectid,
2057 root_gen, disk_key.objectid, 0, 2225 root_gen, disk_key.objectid, 0,
2058 l->start, 0); 2226 l->start, 0);
@@ -2085,6 +2253,8 @@ again:
2085 path->slots[1] + 1, 1); 2253 path->slots[1] + 1, 1);
2086 if (wret) 2254 if (wret)
2087 ret = wret; 2255 ret = wret;
2256
2257 btrfs_tree_unlock(path->nodes[0]);
2088 free_extent_buffer(path->nodes[0]); 2258 free_extent_buffer(path->nodes[0]);
2089 path->nodes[0] = right; 2259 path->nodes[0] = right;
2090 path->slots[0] = 0; 2260 path->slots[0] = 0;
@@ -2111,6 +2281,7 @@ again:
2111 path->slots[1], 1); 2281 path->slots[1], 1);
2112 if (wret) 2282 if (wret)
2113 ret = wret; 2283 ret = wret;
2284 btrfs_tree_unlock(path->nodes[0]);
2114 free_extent_buffer(path->nodes[0]); 2285 free_extent_buffer(path->nodes[0]);
2115 path->nodes[0] = right; 2286 path->nodes[0] = right;
2116 path->slots[0] = 0; 2287 path->slots[0] = 0;
@@ -2184,12 +2355,15 @@ again:
2184 BUG_ON(path->slots[0] != slot); 2355 BUG_ON(path->slots[0] != slot);
2185 2356
2186 if (mid <= slot) { 2357 if (mid <= slot) {
2358 btrfs_tree_unlock(path->nodes[0]);
2187 free_extent_buffer(path->nodes[0]); 2359 free_extent_buffer(path->nodes[0]);
2188 path->nodes[0] = right; 2360 path->nodes[0] = right;
2189 path->slots[0] -= mid; 2361 path->slots[0] -= mid;
2190 path->slots[1] += 1; 2362 path->slots[1] += 1;
2191 } else 2363 } else {
2364 btrfs_tree_unlock(right);
2192 free_extent_buffer(right); 2365 free_extent_buffer(right);
2366 }
2193 2367
2194 BUG_ON(path->slots[0] < 0); 2368 BUG_ON(path->slots[0] < 0);
2195 2369
@@ -2418,10 +2592,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2418 total_data += data_size[i]; 2592 total_data += data_size[i];
2419 } 2593 }
2420 2594
2421 /* create a root if there isn't one */
2422 if (!root->node)
2423 BUG();
2424
2425 total_size = total_data + (nr - 1) * sizeof(struct btrfs_item); 2595 total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2426 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 2596 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2427 if (ret == 0) { 2597 if (ret == 0) {
@@ -2516,7 +2686,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2516 btrfs_print_leaf(root, leaf); 2686 btrfs_print_leaf(root, leaf);
2517 BUG(); 2687 BUG();
2518 } 2688 }
2519
2520out: 2689out:
2521 return ret; 2690 return ret;
2522} 2691}
@@ -2655,7 +2824,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2655 btrfs_set_header_level(leaf, 0); 2824 btrfs_set_header_level(leaf, 0);
2656 } else { 2825 } else {
2657 u64 root_gen = btrfs_header_generation(path->nodes[1]); 2826 u64 root_gen = btrfs_header_generation(path->nodes[1]);
2658 clean_tree_block(trans, root, leaf);
2659 wret = del_ptr(trans, root, path, 1, path->slots[1]); 2827 wret = del_ptr(trans, root, path, 1, path->slots[1]);
2660 if (wret) 2828 if (wret)
2661 ret = wret; 2829 ret = wret;
@@ -2706,8 +2874,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2706 root_gen = btrfs_header_generation( 2874 root_gen = btrfs_header_generation(
2707 path->nodes[1]); 2875 path->nodes[1]);
2708 2876
2709 clean_tree_block(trans, root, leaf);
2710
2711 wret = del_ptr(trans, root, path, 1, slot); 2877 wret = del_ptr(trans, root, path, 1, slot);
2712 if (wret) 2878 if (wret)
2713 ret = wret; 2879 ret = wret;
@@ -2720,7 +2886,13 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2720 if (wret) 2886 if (wret)
2721 ret = wret; 2887 ret = wret;
2722 } else { 2888 } else {
2723 btrfs_mark_buffer_dirty(leaf); 2889 /* if we're still in the path, make sure
2890 * we're dirty. Otherwise, one of the
2891 * push_leaf functions must have already
2892 * dirtied this buffer
2893 */
2894 if (path->nodes[0] == leaf)
2895 btrfs_mark_buffer_dirty(leaf);
2724 free_extent_buffer(leaf); 2896 free_extent_buffer(leaf);
2725 } 2897 }
2726 } else { 2898 } else {
@@ -2731,56 +2903,40 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2731} 2903}
2732 2904
2733/* 2905/*
2734 * walk up the tree as far as required to find the previous leaf. 2906 * search the tree again to find a leaf with lesser keys
2735 * returns 0 if it found something or 1 if there are no lesser leaves. 2907 * returns 0 if it found something or 1 if there are no lesser leaves.
2736 * returns < 0 on io errors. 2908 * returns < 0 on io errors.
2737 */ 2909 */
2738int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 2910int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2739{ 2911{
2740 int slot; 2912 struct btrfs_key key;
2741 int level = 1; 2913 struct btrfs_disk_key found_key;
2742 struct extent_buffer *c; 2914 int ret;
2743 struct extent_buffer *next = NULL;
2744 2915
2745 while(level < BTRFS_MAX_LEVEL) { 2916 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
2746 if (!path->nodes[level])
2747 return 1;
2748 2917
2749 slot = path->slots[level]; 2918 if (key.offset > 0)
2750 c = path->nodes[level]; 2919 key.offset--;
2751 if (slot == 0) { 2920 else if (key.type > 0)
2752 level++; 2921 key.type--;
2753 if (level == BTRFS_MAX_LEVEL) 2922 else if (key.objectid > 0)
2754 return 1; 2923 key.objectid--;
2755 continue; 2924 else
2756 } 2925 return 1;
2757 slot--;
2758
2759 if (next)
2760 free_extent_buffer(next);
2761 2926
2762 next = read_node_slot(root, c, slot); 2927 btrfs_release_path(root, path);
2763 break; 2928 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2764 } 2929 if (ret < 0)
2765 path->slots[level] = slot; 2930 return ret;
2766 while(1) { 2931 btrfs_item_key(path->nodes[0], &found_key, 0);
2767 level--; 2932 ret = comp_keys(&found_key, &key);
2768 c = path->nodes[level]; 2933 if (ret < 0)
2769 free_extent_buffer(c); 2934 return 0;
2770 slot = btrfs_header_nritems(next); 2935 return 1;
2771 if (slot != 0)
2772 slot--;
2773 path->nodes[level] = next;
2774 path->slots[level] = slot;
2775 if (!level)
2776 break;
2777 next = read_node_slot(root, next, slot);
2778 }
2779 return 0;
2780} 2936}
2781 2937
2782/* 2938/*
2783 * walk up the tree as far as required to find the next leaf. 2939 * search the tree again to find a leaf with greater keys
2784 * returns 0 if it found something or 1 if there are no greater leaves. 2940 * returns 0 if it found something or 1 if there are no greater leaves.
2785 * returns < 0 on io errors. 2941 * returns < 0 on io errors.
2786 */ 2942 */
@@ -2790,6 +2946,28 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2790 int level = 1; 2946 int level = 1;
2791 struct extent_buffer *c; 2947 struct extent_buffer *c;
2792 struct extent_buffer *next = NULL; 2948 struct extent_buffer *next = NULL;
2949 struct btrfs_key key;
2950 u32 nritems;
2951 int ret;
2952
2953 nritems = btrfs_header_nritems(path->nodes[0]);
2954 if (nritems == 0) {
2955 return 1;
2956 }
2957
2958 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
2959
2960 path->keep_locks = 1;
2961 btrfs_release_path(root, path);
2962 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2963 path->keep_locks = 0;
2964
2965 if (ret < 0)
2966 return ret;
2967
2968 if (path->slots[0] < nritems - 1) {
2969 goto done;
2970 }
2793 2971
2794 while(level < BTRFS_MAX_LEVEL) { 2972 while(level < BTRFS_MAX_LEVEL) {
2795 if (!path->nodes[level]) 2973 if (!path->nodes[level])
@@ -2799,33 +2977,45 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2799 c = path->nodes[level]; 2977 c = path->nodes[level];
2800 if (slot >= btrfs_header_nritems(c)) { 2978 if (slot >= btrfs_header_nritems(c)) {
2801 level++; 2979 level++;
2802 if (level == BTRFS_MAX_LEVEL) 2980 if (level == BTRFS_MAX_LEVEL) {
2803 return 1; 2981 return 1;
2982 }
2804 continue; 2983 continue;
2805 } 2984 }
2806 2985
2807 if (next) 2986 if (next) {
2987 btrfs_tree_unlock(next);
2808 free_extent_buffer(next); 2988 free_extent_buffer(next);
2989 }
2809 2990
2810 if (path->reada) 2991 if (level == 1 && path->locks[1] && path->reada)
2811 reada_for_search(root, path, level, slot, 0); 2992 reada_for_search(root, path, level, slot, 0);
2812 2993
2813 next = read_node_slot(root, c, slot); 2994 next = read_node_slot(root, c, slot);
2995 if (!path->skip_locking)
2996 btrfs_tree_lock(next);
2814 break; 2997 break;
2815 } 2998 }
2816 path->slots[level] = slot; 2999 path->slots[level] = slot;
2817 while(1) { 3000 while(1) {
2818 level--; 3001 level--;
2819 c = path->nodes[level]; 3002 c = path->nodes[level];
3003 if (path->locks[level])
3004 btrfs_tree_unlock(c);
2820 free_extent_buffer(c); 3005 free_extent_buffer(c);
2821 path->nodes[level] = next; 3006 path->nodes[level] = next;
2822 path->slots[level] = 0; 3007 path->slots[level] = 0;
3008 path->locks[level] = 1;
2823 if (!level) 3009 if (!level)
2824 break; 3010 break;
2825 if (path->reada) 3011 if (level == 1 && path->locks[1] && path->reada)
2826 reada_for_search(root, path, level, 0, 0); 3012 reada_for_search(root, path, level, slot, 0);
2827 next = read_node_slot(root, next, 0); 3013 next = read_node_slot(root, next, 0);
3014 if (!path->skip_locking)
3015 btrfs_tree_lock(next);
2828 } 3016 }
3017done:
3018 unlock_up(path, 0, 1);
2829 return 0; 3019 return 0;
2830} 3020}
2831 3021
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index dcea9d706d9b..50891b39f366 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -330,8 +330,13 @@ struct btrfs_node {
330struct btrfs_path { 330struct btrfs_path {
331 struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; 331 struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
332 int slots[BTRFS_MAX_LEVEL]; 332 int slots[BTRFS_MAX_LEVEL];
333 /* if there is real range locking, this locks field will change */
334 int locks[BTRFS_MAX_LEVEL];
333 int reada; 335 int reada;
336 /* keep some upper locks as we walk down */
337 int keep_locks;
334 int lowest_level; 338 int lowest_level;
339 int skip_locking;
335}; 340};
336 341
337/* 342/*
@@ -515,6 +520,8 @@ struct btrfs_fs_info {
515 spinlock_t hash_lock; 520 spinlock_t hash_lock;
516 struct mutex trans_mutex; 521 struct mutex trans_mutex;
517 struct mutex fs_mutex; 522 struct mutex fs_mutex;
523 struct mutex alloc_mutex;
524 struct mutex chunk_mutex;
518 struct list_head trans_list; 525 struct list_head trans_list;
519 struct list_head hashers; 526 struct list_head hashers;
520 struct list_head dead_roots; 527 struct list_head dead_roots;
@@ -576,6 +583,10 @@ struct btrfs_fs_info {
576 */ 583 */
577struct btrfs_root { 584struct btrfs_root {
578 struct extent_buffer *node; 585 struct extent_buffer *node;
586
587 /* the node lock is held while changing the node pointer */
588 spinlock_t node_lock;
589
579 struct extent_buffer *commit_root; 590 struct extent_buffer *commit_root;
580 struct btrfs_root_item root_item; 591 struct btrfs_root_item root_item;
581 struct btrfs_key root_key; 592 struct btrfs_key root_key;
@@ -1353,13 +1364,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
1353 struct btrfs_block_group_cache 1364 struct btrfs_block_group_cache
1354 *hint, u64 search_start, 1365 *hint, u64 search_start,
1355 int data, int owner); 1366 int data, int owner);
1356int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
1357 struct btrfs_root *root, u64 owner_objectid);
1358struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1367struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1359 struct btrfs_root *root, u32 size,
1360 u64 root_objectid,
1361 u64 hint, u64 empty_size);
1362struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1363 struct btrfs_root *root, 1368 struct btrfs_root *root,
1364 u32 blocksize, 1369 u32 blocksize,
1365 u64 root_objectid, 1370 u64 root_objectid,
@@ -1368,8 +1373,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1368 int level, 1373 int level,
1369 u64 hint, 1374 u64 hint,
1370 u64 empty_size); 1375 u64 empty_size);
1371int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
1372 struct btrfs_root *root, u64 new_size);
1373int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); 1376int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
1374int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 1377int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root, 1378 struct btrfs_root *root,
@@ -1409,6 +1412,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1409int btrfs_previous_item(struct btrfs_root *root, 1412int btrfs_previous_item(struct btrfs_root *root,
1410 struct btrfs_path *path, u64 min_objectid, 1413 struct btrfs_path *path, u64 min_objectid,
1411 int type); 1414 int type);
1415
1416struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
1417struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
1418
1412int btrfs_cow_block(struct btrfs_trans_handle *trans, 1419int btrfs_cow_block(struct btrfs_trans_handle *trans,
1413 struct btrfs_root *root, struct extent_buffer *buf, 1420 struct btrfs_root *root, struct extent_buffer *buf,
1414 struct extent_buffer *parent, int parent_slot, 1421 struct extent_buffer *parent, int parent_slot,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e5c758e306d5..fe40bdd984ff 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -32,6 +32,7 @@
32#include "volumes.h" 32#include "volumes.h"
33#include "print-tree.h" 33#include "print-tree.h"
34#include "async-thread.h" 34#include "async-thread.h"
35#include "locking.h"
35 36
36#if 0 37#if 0
37static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) 38static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
@@ -681,9 +682,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
681{ 682{
682 struct inode *btree_inode = root->fs_info->btree_inode; 683 struct inode *btree_inode = root->fs_info->btree_inode;
683 if (btrfs_header_generation(buf) == 684 if (btrfs_header_generation(buf) ==
684 root->fs_info->running_transaction->transid) 685 root->fs_info->running_transaction->transid) {
686 WARN_ON(!btrfs_tree_locked(buf));
685 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 687 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
686 buf); 688 buf);
689 }
687 return 0; 690 return 0;
688} 691}
689 692
@@ -720,6 +723,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
720 root->in_sysfs = 0; 723 root->in_sysfs = 0;
721 724
722 INIT_LIST_HEAD(&root->dirty_list); 725 INIT_LIST_HEAD(&root->dirty_list);
726 spin_lock_init(&root->node_lock);
723 memset(&root->root_key, 0, sizeof(root->root_key)); 727 memset(&root->root_key, 0, sizeof(root->root_key));
724 memset(&root->root_item, 0, sizeof(root->root_item)); 728 memset(&root->root_item, 0, sizeof(root->root_item));
725 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 729 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -1196,6 +1200,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1196 1200
1197 mutex_init(&fs_info->trans_mutex); 1201 mutex_init(&fs_info->trans_mutex);
1198 mutex_init(&fs_info->fs_mutex); 1202 mutex_init(&fs_info->fs_mutex);
1203 mutex_init(&fs_info->alloc_mutex);
1204 mutex_init(&fs_info->chunk_mutex);
1199 1205
1200#if 0 1206#if 0
1201 ret = add_hasher(fs_info, "crc32c"); 1207 ret = add_hasher(fs_info, "crc32c");
@@ -1274,7 +1280,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1274 1280
1275 mutex_lock(&fs_info->fs_mutex); 1281 mutex_lock(&fs_info->fs_mutex);
1276 1282
1283 mutex_lock(&fs_info->chunk_mutex);
1277 ret = btrfs_read_sys_array(tree_root); 1284 ret = btrfs_read_sys_array(tree_root);
1285 mutex_unlock(&fs_info->chunk_mutex);
1278 if (ret) { 1286 if (ret) {
1279 printk("btrfs: failed to read the system array on %s\n", 1287 printk("btrfs: failed to read the system array on %s\n",
1280 sb->s_id); 1288 sb->s_id);
@@ -1296,7 +1304,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1296 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1304 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
1297 BTRFS_UUID_SIZE); 1305 BTRFS_UUID_SIZE);
1298 1306
1307 mutex_lock(&fs_info->chunk_mutex);
1299 ret = btrfs_read_chunk_tree(chunk_root); 1308 ret = btrfs_read_chunk_tree(chunk_root);
1309 mutex_unlock(&fs_info->chunk_mutex);
1300 BUG_ON(ret); 1310 BUG_ON(ret);
1301 1311
1302 btrfs_close_extra_devices(fs_devices); 1312 btrfs_close_extra_devices(fs_devices);
@@ -1654,6 +1664,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
1654 u64 transid = btrfs_header_generation(buf); 1664 u64 transid = btrfs_header_generation(buf);
1655 struct inode *btree_inode = root->fs_info->btree_inode; 1665 struct inode *btree_inode = root->fs_info->btree_inode;
1656 1666
1667 WARN_ON(!btrfs_tree_locked(buf));
1657 if (transid != root->fs_info->generation) { 1668 if (transid != root->fs_info->generation) {
1658 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", 1669 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
1659 (unsigned long long)buf->start, 1670 (unsigned long long)buf->start,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 41a63462d3eb..7e40c516fe62 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,6 +25,7 @@
25#include "print-tree.h" 25#include "print-tree.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "volumes.h" 27#include "volumes.h"
28#include "locking.h"
28 29
29#define BLOCK_GROUP_DATA EXTENT_WRITEBACK 30#define BLOCK_GROUP_DATA EXTENT_WRITEBACK
30#define BLOCK_GROUP_METADATA EXTENT_UPTODATE 31#define BLOCK_GROUP_METADATA EXTENT_UPTODATE
@@ -36,7 +37,28 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
36 btrfs_root *extent_root); 37 btrfs_root *extent_root);
37static int del_pending_extents(struct btrfs_trans_handle *trans, struct 38static int del_pending_extents(struct btrfs_trans_handle *trans, struct
38 btrfs_root *extent_root); 39 btrfs_root *extent_root);
40static struct btrfs_block_group_cache *
41__btrfs_find_block_group(struct btrfs_root *root,
42 struct btrfs_block_group_cache *hint,
43 u64 search_start, int data, int owner);
39 44
45void maybe_lock_mutex(struct btrfs_root *root)
46{
47 if (root != root->fs_info->extent_root &&
48 root != root->fs_info->chunk_root &&
49 root != root->fs_info->dev_root) {
50 mutex_lock(&root->fs_info->alloc_mutex);
51 }
52}
53
54void maybe_unlock_mutex(struct btrfs_root *root)
55{
56 if (root != root->fs_info->extent_root &&
57 root != root->fs_info->chunk_root &&
58 root != root->fs_info->dev_root) {
59 mutex_unlock(&root->fs_info->alloc_mutex);
60 }
61}
40 62
41static int cache_block_group(struct btrfs_root *root, 63static int cache_block_group(struct btrfs_root *root,
42 struct btrfs_block_group_cache *block_group) 64 struct btrfs_block_group_cache *block_group)
@@ -66,6 +88,7 @@ static int cache_block_group(struct btrfs_root *root,
66 return -ENOMEM; 88 return -ENOMEM;
67 89
68 path->reada = 2; 90 path->reada = 2;
91 path->skip_locking = 1;
69 first_free = block_group->key.objectid; 92 first_free = block_group->key.objectid;
70 key.objectid = block_group->key.objectid; 93 key.objectid = block_group->key.objectid;
71 key.offset = 0; 94 key.offset = 0;
@@ -290,7 +313,7 @@ no_cache:
290 cache = btrfs_lookup_first_block_group(root->fs_info, last); 313 cache = btrfs_lookup_first_block_group(root->fs_info, last);
291 } 314 }
292 cache_miss = 0; 315 cache_miss = 0;
293 cache = btrfs_find_block_group(root, cache, last, data, 0); 316 cache = __btrfs_find_block_group(root, cache, last, data, 0);
294 if (!cache) 317 if (!cache)
295 goto no_cache; 318 goto no_cache;
296 *cache_ret = cache; 319 *cache_ret = cache;
@@ -318,10 +341,10 @@ static int block_group_state_bits(u64 flags)
318 return bits; 341 return bits;
319} 342}
320 343
321struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, 344static struct btrfs_block_group_cache *
322 struct btrfs_block_group_cache 345__btrfs_find_block_group(struct btrfs_root *root,
323 *hint, u64 search_start, 346 struct btrfs_block_group_cache *hint,
324 int data, int owner) 347 u64 search_start, int data, int owner)
325{ 348{
326 struct btrfs_block_group_cache *cache; 349 struct btrfs_block_group_cache *cache;
327 struct extent_io_tree *block_group_cache; 350 struct extent_io_tree *block_group_cache;
@@ -411,6 +434,18 @@ found:
411 return found_group; 434 return found_group;
412} 435}
413 436
437struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
438 struct btrfs_block_group_cache
439 *hint, u64 search_start,
440 int data, int owner)
441{
442
443 struct btrfs_block_group_cache *ret;
444 mutex_lock(&root->fs_info->alloc_mutex);
445 ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
446 mutex_unlock(&root->fs_info->alloc_mutex);
447 return ret;
448}
414static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, 449static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
415 u64 owner, u64 owner_offset) 450 u64 owner, u64 owner_offset)
416{ 451{
@@ -646,7 +681,7 @@ out:
646 return ret; 681 return ret;
647} 682}
648 683
649int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 684static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
650 struct btrfs_root *root, 685 struct btrfs_root *root,
651 u64 bytenr, u64 num_bytes, 686 u64 bytenr, u64 num_bytes,
652 u64 root_objectid, u64 ref_generation, 687 u64 root_objectid, u64 ref_generation,
@@ -696,6 +731,22 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
696 return 0; 731 return 0;
697} 732}
698 733
734int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
735 struct btrfs_root *root,
736 u64 bytenr, u64 num_bytes,
737 u64 root_objectid, u64 ref_generation,
738 u64 owner, u64 owner_offset)
739{
740 int ret;
741
742 mutex_lock(&root->fs_info->alloc_mutex);
743 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
744 root_objectid, ref_generation,
745 owner, owner_offset);
746 mutex_unlock(&root->fs_info->alloc_mutex);
747 return ret;
748}
749
699int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 750int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
700 struct btrfs_root *root) 751 struct btrfs_root *root)
701{ 752{
@@ -760,6 +811,10 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
760 struct btrfs_extent_ref *ref_item; 811 struct btrfs_extent_ref *ref_item;
761 int level = -1; 812 int level = -1;
762 813
814 /* FIXME, needs locking */
815 BUG();
816
817 mutex_lock(&root->fs_info->alloc_mutex);
763 path = btrfs_alloc_path(); 818 path = btrfs_alloc_path();
764again: 819again:
765 if (level == -1) 820 if (level == -1)
@@ -854,33 +909,9 @@ again:
854 909
855out: 910out:
856 btrfs_free_path(path); 911 btrfs_free_path(path);
912 mutex_unlock(&root->fs_info->alloc_mutex);
857 return total_count; 913 return total_count;
858} 914}
859int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
860 struct btrfs_root *root, u64 owner_objectid)
861{
862 u64 generation;
863 u64 key_objectid;
864 u64 level;
865 u32 nritems;
866 struct btrfs_disk_key disk_key;
867
868 level = btrfs_header_level(root->node);
869 generation = trans->transid;
870 nritems = btrfs_header_nritems(root->node);
871 if (nritems > 0) {
872 if (level == 0)
873 btrfs_item_key(root->node, &disk_key, 0);
874 else
875 btrfs_node_key(root->node, &disk_key, 0);
876 key_objectid = btrfs_disk_key_objectid(&disk_key);
877 } else {
878 key_objectid = 0;
879 }
880 return btrfs_inc_extent_ref(trans, root, root->node->start,
881 root->node->len, owner_objectid,
882 generation, level, key_objectid);
883}
884 915
885int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 916int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
886 struct extent_buffer *buf) 917 struct extent_buffer *buf)
@@ -897,6 +928,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
897 if (!root->ref_cows) 928 if (!root->ref_cows)
898 return 0; 929 return 0;
899 930
931 mutex_lock(&root->fs_info->alloc_mutex);
900 level = btrfs_header_level(buf); 932 level = btrfs_header_level(buf);
901 nritems = btrfs_header_nritems(buf); 933 nritems = btrfs_header_nritems(buf);
902 for (i = 0; i < nritems; i++) { 934 for (i = 0; i < nritems; i++) {
@@ -913,7 +945,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
913 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 945 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
914 if (disk_bytenr == 0) 946 if (disk_bytenr == 0)
915 continue; 947 continue;
916 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, 948 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
917 btrfs_file_extent_disk_num_bytes(buf, fi), 949 btrfs_file_extent_disk_num_bytes(buf, fi),
918 root->root_key.objectid, trans->transid, 950 root->root_key.objectid, trans->transid,
919 key.objectid, key.offset); 951 key.objectid, key.offset);
@@ -924,7 +956,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
924 } else { 956 } else {
925 bytenr = btrfs_node_blockptr(buf, i); 957 bytenr = btrfs_node_blockptr(buf, i);
926 btrfs_node_key_to_cpu(buf, &key, i); 958 btrfs_node_key_to_cpu(buf, &key, i);
927 ret = btrfs_inc_extent_ref(trans, root, bytenr, 959 ret = __btrfs_inc_extent_ref(trans, root, bytenr,
928 btrfs_level_size(root, level - 1), 960 btrfs_level_size(root, level - 1),
929 root->root_key.objectid, 961 root->root_key.objectid,
930 trans->transid, 962 trans->transid,
@@ -935,6 +967,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
935 } 967 }
936 } 968 }
937 } 969 }
970 mutex_unlock(&root->fs_info->alloc_mutex);
938 return 0; 971 return 0;
939fail: 972fail:
940 WARN_ON(1); 973 WARN_ON(1);
@@ -965,6 +998,7 @@ fail:
965 } 998 }
966 } 999 }
967#endif 1000#endif
1001 mutex_unlock(&root->fs_info->alloc_mutex);
968 return ret; 1002 return ret;
969} 1003}
970 1004
@@ -1019,6 +1053,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1019 if (!path) 1053 if (!path)
1020 return -ENOMEM; 1054 return -ENOMEM;
1021 1055
1056 mutex_lock(&root->fs_info->alloc_mutex);
1022 while(1) { 1057 while(1) {
1023 ret = find_first_extent_bit(block_group_cache, last, 1058 ret = find_first_extent_bit(block_group_cache, last,
1024 &start, &end, BLOCK_GROUP_DIRTY); 1059 &start, &end, BLOCK_GROUP_DIRTY);
@@ -1045,6 +1080,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1045 BLOCK_GROUP_DIRTY, GFP_NOFS); 1080 BLOCK_GROUP_DIRTY, GFP_NOFS);
1046 } 1081 }
1047 btrfs_free_path(path); 1082 btrfs_free_path(path);
1083 mutex_unlock(&root->fs_info->alloc_mutex);
1048 return werr; 1084 return werr;
1049} 1085}
1050 1086
@@ -1162,26 +1198,28 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1162 space_info->force_alloc = 0; 1198 space_info->force_alloc = 0;
1163 } 1199 }
1164 if (space_info->full) 1200 if (space_info->full)
1165 return 0; 1201 goto out;
1166 1202
1167 thresh = div_factor(space_info->total_bytes, 6); 1203 thresh = div_factor(space_info->total_bytes, 6);
1168 if (!force && 1204 if (!force &&
1169 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < 1205 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1170 thresh) 1206 thresh)
1171 return 0; 1207 goto out;
1172 1208
1209 mutex_lock(&extent_root->fs_info->chunk_mutex);
1173 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); 1210 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1174 if (ret == -ENOSPC) { 1211 if (ret == -ENOSPC) {
1175printk("space info full %Lu\n", flags); 1212printk("space info full %Lu\n", flags);
1176 space_info->full = 1; 1213 space_info->full = 1;
1177 return 0; 1214 goto out;
1178 } 1215 }
1179 BUG_ON(ret); 1216 BUG_ON(ret);
1180 1217
1181 ret = btrfs_make_block_group(trans, extent_root, 0, flags, 1218 ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1182 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); 1219 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1183 BUG_ON(ret); 1220 BUG_ON(ret);
1184 1221 mutex_unlock(&extent_root->fs_info->chunk_mutex);
1222out:
1185 return 0; 1223 return 0;
1186} 1224}
1187 1225
@@ -1318,6 +1356,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1318 struct extent_io_tree *free_space_cache; 1356 struct extent_io_tree *free_space_cache;
1319 free_space_cache = &root->fs_info->free_space_cache; 1357 free_space_cache = &root->fs_info->free_space_cache;
1320 1358
1359 mutex_lock(&root->fs_info->alloc_mutex);
1321 while(1) { 1360 while(1) {
1322 ret = find_first_extent_bit(unpin, 0, &start, &end, 1361 ret = find_first_extent_bit(unpin, 0, &start, &end,
1323 EXTENT_DIRTY); 1362 EXTENT_DIRTY);
@@ -1327,6 +1366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1327 clear_extent_dirty(unpin, start, end, GFP_NOFS); 1366 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1328 set_extent_dirty(free_space_cache, start, end, GFP_NOFS); 1367 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1329 } 1368 }
1369 mutex_unlock(&root->fs_info->alloc_mutex);
1330 return 0; 1370 return 0;
1331} 1371}
1332 1372
@@ -1363,18 +1403,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
1363 GFP_NOFS); 1403 GFP_NOFS);
1364 eb = read_tree_block(extent_root, ins.objectid, ins.offset, 1404 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1365 trans->transid); 1405 trans->transid);
1406 btrfs_tree_lock(eb);
1366 level = btrfs_header_level(eb); 1407 level = btrfs_header_level(eb);
1367 if (level == 0) { 1408 if (level == 0) {
1368 btrfs_item_key(eb, &first, 0); 1409 btrfs_item_key(eb, &first, 0);
1369 } else { 1410 } else {
1370 btrfs_node_key(eb, &first, 0); 1411 btrfs_node_key(eb, &first, 0);
1371 } 1412 }
1413 btrfs_tree_unlock(eb);
1414 free_extent_buffer(eb);
1415 /*
1416 * the first key is just a hint, so the race we've created
1417 * against reading it is fine
1418 */
1372 err = btrfs_insert_extent_backref(trans, extent_root, path, 1419 err = btrfs_insert_extent_backref(trans, extent_root, path,
1373 start, extent_root->root_key.objectid, 1420 start, extent_root->root_key.objectid,
1374 0, level, 1421 0, level,
1375 btrfs_disk_key_objectid(&first)); 1422 btrfs_disk_key_objectid(&first));
1376 BUG_ON(err); 1423 BUG_ON(err);
1377 free_extent_buffer(eb);
1378 } 1424 }
1379 btrfs_free_path(path); 1425 btrfs_free_path(path);
1380 return 0; 1426 return 0;
@@ -1384,12 +1430,14 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1384 int pending) 1430 int pending)
1385{ 1431{
1386 int err = 0; 1432 int err = 0;
1387 struct extent_buffer *buf;
1388 1433
1389 if (!pending) { 1434 if (!pending) {
1435#if 0
1436 struct extent_buffer *buf;
1390 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 1437 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1391 if (buf) { 1438 if (buf) {
1392 if (btrfs_buffer_uptodate(buf, 0)) { 1439 if (!btrfs_try_tree_lock(buf) &&
1440 btrfs_buffer_uptodate(buf, 0)) {
1393 u64 transid = 1441 u64 transid =
1394 root->fs_info->running_transaction->transid; 1442 root->fs_info->running_transaction->transid;
1395 u64 header_transid = 1443 u64 header_transid =
@@ -1398,12 +1446,15 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1398 !btrfs_header_flag(buf, 1446 !btrfs_header_flag(buf,
1399 BTRFS_HEADER_FLAG_WRITTEN)) { 1447 BTRFS_HEADER_FLAG_WRITTEN)) {
1400 clean_tree_block(NULL, root, buf); 1448 clean_tree_block(NULL, root, buf);
1449 btrfs_tree_unlock(buf);
1401 free_extent_buffer(buf); 1450 free_extent_buffer(buf);
1402 return 1; 1451 return 1;
1403 } 1452 }
1453 btrfs_tree_unlock(buf);
1404 } 1454 }
1405 free_extent_buffer(buf); 1455 free_extent_buffer(buf);
1406 } 1456 }
1457#endif
1407 update_pinned_extents(root, bytenr, num_bytes, 1); 1458 update_pinned_extents(root, bytenr, num_bytes, 1);
1408 } else { 1459 } else {
1409 set_extent_bits(&root->fs_info->pending_del, 1460 set_extent_bits(&root->fs_info->pending_del,
@@ -1586,10 +1637,11 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1586/* 1637/*
1587 * remove an extent from the root, returns 0 on success 1638 * remove an extent from the root, returns 0 on success
1588 */ 1639 */
1589int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1640static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1590 *root, u64 bytenr, u64 num_bytes, 1641 struct btrfs_root *root, u64 bytenr,
1591 u64 root_objectid, u64 ref_generation, 1642 u64 num_bytes, u64 root_objectid,
1592 u64 owner_objectid, u64 owner_offset, int pin) 1643 u64 ref_generation, u64 owner_objectid,
1644 u64 owner_offset, int pin)
1593{ 1645{
1594 struct btrfs_root *extent_root = root->fs_info->extent_root; 1646 struct btrfs_root *extent_root = root->fs_info->extent_root;
1595 int pending_ret; 1647 int pending_ret;
@@ -1610,6 +1662,22 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1610 return ret ? ret : pending_ret; 1662 return ret ? ret : pending_ret;
1611} 1663}
1612 1664
1665int btrfs_free_extent(struct btrfs_trans_handle *trans,
1666 struct btrfs_root *root, u64 bytenr,
1667 u64 num_bytes, u64 root_objectid,
1668 u64 ref_generation, u64 owner_objectid,
1669 u64 owner_offset, int pin)
1670{
1671 int ret;
1672
1673 maybe_lock_mutex(root);
1674 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1675 root_objectid, ref_generation,
1676 owner_objectid, owner_offset, pin);
1677 maybe_unlock_mutex(root);
1678 return ret;
1679}
1680
1613static u64 stripe_align(struct btrfs_root *root, u64 val) 1681static u64 stripe_align(struct btrfs_root *root, u64 val)
1614{ 1682{
1615 u64 mask = ((u64)root->stripesize - 1); 1683 u64 mask = ((u64)root->stripesize - 1);
@@ -1679,12 +1747,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1679 block_group = btrfs_lookup_first_block_group(info, hint_byte); 1747 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1680 if (!block_group) 1748 if (!block_group)
1681 hint_byte = search_start; 1749 hint_byte = search_start;
1682 block_group = btrfs_find_block_group(root, block_group, 1750 block_group = __btrfs_find_block_group(root, block_group,
1683 hint_byte, data, 1); 1751 hint_byte, data, 1);
1684 if (last_ptr && *last_ptr == 0 && block_group) 1752 if (last_ptr && *last_ptr == 0 && block_group)
1685 hint_byte = block_group->key.objectid; 1753 hint_byte = block_group->key.objectid;
1686 } else { 1754 } else {
1687 block_group = btrfs_find_block_group(root, 1755 block_group = __btrfs_find_block_group(root,
1688 trans->block_group, 1756 trans->block_group,
1689 search_start, data, 1); 1757 search_start, data, 1);
1690 } 1758 }
@@ -1806,7 +1874,7 @@ enospc:
1806 } 1874 }
1807 block_group = btrfs_lookup_first_block_group(info, search_start); 1875 block_group = btrfs_lookup_first_block_group(info, search_start);
1808 cond_resched(); 1876 cond_resched();
1809 block_group = btrfs_find_block_group(root, block_group, 1877 block_group = __btrfs_find_block_group(root, block_group,
1810 search_start, data, 0); 1878 search_start, data, 0);
1811 goto check_failed; 1879 goto check_failed;
1812 1880
@@ -1843,6 +1911,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1843 struct btrfs_path *path; 1911 struct btrfs_path *path;
1844 struct btrfs_key keys[2]; 1912 struct btrfs_key keys[2];
1845 1913
1914 maybe_lock_mutex(root);
1915
1846 if (data) { 1916 if (data) {
1847 alloc_profile = info->avail_data_alloc_bits & 1917 alloc_profile = info->avail_data_alloc_bits &
1848 info->data_alloc_profile; 1918 info->data_alloc_profile;
@@ -1892,9 +1962,10 @@ again:
1892 if (ret) { 1962 if (ret) {
1893 printk("allocation failed flags %Lu\n", data); 1963 printk("allocation failed flags %Lu\n", data);
1894 } 1964 }
1895 BUG_ON(ret); 1965 if (ret) {
1896 if (ret) 1966 BUG();
1897 return ret; 1967 goto out;
1968 }
1898 1969
1899 /* block accounting for super block */ 1970 /* block accounting for super block */
1900 super_used = btrfs_super_bytes_used(&info->super_copy); 1971 super_used = btrfs_super_bytes_used(&info->super_copy);
@@ -1953,11 +2024,11 @@ again:
1953 finish_current_insert(trans, extent_root); 2024 finish_current_insert(trans, extent_root);
1954 pending_ret = del_pending_extents(trans, extent_root); 2025 pending_ret = del_pending_extents(trans, extent_root);
1955 2026
1956 if (ret) { 2027 if (ret)
1957 return ret; 2028 goto out;
1958 }
1959 if (pending_ret) { 2029 if (pending_ret) {
1960 return pending_ret; 2030 ret = pending_ret;
2031 goto out;
1961 } 2032 }
1962 2033
1963update_block: 2034update_block:
@@ -1967,9 +2038,10 @@ update_block:
1967 ins->objectid, ins->offset); 2038 ins->objectid, ins->offset);
1968 BUG(); 2039 BUG();
1969 } 2040 }
1970 return 0; 2041out:
2042 maybe_unlock_mutex(root);
2043 return ret;
1971} 2044}
1972
1973/* 2045/*
1974 * helper function to allocate a block for a given tree 2046 * helper function to allocate a block for a given tree
1975 * returns the tree buffer or NULL. 2047 * returns the tree buffer or NULL.
@@ -1977,28 +2049,6 @@ update_block:
1977struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 2049struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1978 struct btrfs_root *root, 2050 struct btrfs_root *root,
1979 u32 blocksize, 2051 u32 blocksize,
1980 u64 root_objectid, u64 hint,
1981 u64 empty_size)
1982{
1983 u64 ref_generation;
1984
1985 if (root->ref_cows)
1986 ref_generation = trans->transid;
1987 else
1988 ref_generation = 0;
1989
1990
1991 return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1992 ref_generation, 0, 0, hint, empty_size);
1993}
1994
1995/*
1996 * helper function to allocate a block for a given tree
1997 * returns the tree buffer or NULL.
1998 */
1999struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2000 struct btrfs_root *root,
2001 u32 blocksize,
2002 u64 root_objectid, 2052 u64 root_objectid,
2003 u64 ref_generation, 2053 u64 ref_generation,
2004 u64 first_objectid, 2054 u64 first_objectid,
@@ -2026,6 +2076,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2026 return ERR_PTR(-ENOMEM); 2076 return ERR_PTR(-ENOMEM);
2027 } 2077 }
2028 btrfs_set_header_generation(buf, trans->transid); 2078 btrfs_set_header_generation(buf, trans->transid);
2079 btrfs_tree_lock(buf);
2029 clean_tree_block(trans, root, buf); 2080 clean_tree_block(trans, root, buf);
2030 btrfs_set_buffer_uptodate(buf); 2081 btrfs_set_buffer_uptodate(buf);
2031 2082
@@ -2076,7 +2127,7 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2076 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2127 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2077 if (disk_bytenr == 0) 2128 if (disk_bytenr == 0)
2078 continue; 2129 continue;
2079 ret = btrfs_free_extent(trans, root, disk_bytenr, 2130 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2080 btrfs_file_extent_disk_num_bytes(leaf, fi), 2131 btrfs_file_extent_disk_num_bytes(leaf, fi),
2081 leaf_owner, leaf_generation, 2132 leaf_owner, leaf_generation,
2082 key.objectid, key.offset, 0); 2133 key.objectid, key.offset, 0);
@@ -2151,6 +2202,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2151 int ret; 2202 int ret;
2152 u32 refs; 2203 u32 refs;
2153 2204
2205 mutex_lock(&root->fs_info->alloc_mutex);
2206
2154 WARN_ON(*level < 0); 2207 WARN_ON(*level < 0);
2155 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2208 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2156 ret = lookup_extent_ref(trans, root, 2209 ret = lookup_extent_ref(trans, root,
@@ -2182,6 +2235,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2182 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2235 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2183 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2236 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2184 blocksize = btrfs_level_size(root, *level - 1); 2237 blocksize = btrfs_level_size(root, *level - 1);
2238
2185 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); 2239 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2186 BUG_ON(ret); 2240 BUG_ON(ret);
2187 if (refs != 1) { 2241 if (refs != 1) {
@@ -2189,7 +2243,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2189 root_owner = btrfs_header_owner(parent); 2243 root_owner = btrfs_header_owner(parent);
2190 root_gen = btrfs_header_generation(parent); 2244 root_gen = btrfs_header_generation(parent);
2191 path->slots[*level]++; 2245 path->slots[*level]++;
2192 ret = btrfs_free_extent(trans, root, bytenr, 2246 ret = __btrfs_free_extent(trans, root, bytenr,
2193 blocksize, root_owner, 2247 blocksize, root_owner,
2194 root_gen, 0, 0, 1); 2248 root_gen, 0, 0, 1);
2195 BUG_ON(ret); 2249 BUG_ON(ret);
@@ -2201,9 +2255,11 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2201 reada_walk_down(root, cur, path->slots[*level]); 2255 reada_walk_down(root, cur, path->slots[*level]);
2202 2256
2203 mutex_unlock(&root->fs_info->fs_mutex); 2257 mutex_unlock(&root->fs_info->fs_mutex);
2258 mutex_unlock(&root->fs_info->alloc_mutex);
2204 next = read_tree_block(root, bytenr, blocksize, 2259 next = read_tree_block(root, bytenr, blocksize,
2205 ptr_gen); 2260 ptr_gen);
2206 mutex_lock(&root->fs_info->fs_mutex); 2261 mutex_lock(&root->fs_info->fs_mutex);
2262 mutex_lock(&root->fs_info->alloc_mutex);
2207 2263
2208 /* we've dropped the lock, double check */ 2264 /* we've dropped the lock, double check */
2209 ret = lookup_extent_ref(trans, root, bytenr, 2265 ret = lookup_extent_ref(trans, root, bytenr,
@@ -2216,7 +2272,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2216 2272
2217 path->slots[*level]++; 2273 path->slots[*level]++;
2218 free_extent_buffer(next); 2274 free_extent_buffer(next);
2219 ret = btrfs_free_extent(trans, root, bytenr, 2275 ret = __btrfs_free_extent(trans, root, bytenr,
2220 blocksize, 2276 blocksize,
2221 root_owner, 2277 root_owner,
2222 root_gen, 0, 0, 1); 2278 root_gen, 0, 0, 1);
@@ -2244,13 +2300,14 @@ out:
2244 } 2300 }
2245 2301
2246 root_gen = btrfs_header_generation(parent); 2302 root_gen = btrfs_header_generation(parent);
2247 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, 2303 ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2248 path->nodes[*level]->len, 2304 path->nodes[*level]->len,
2249 root_owner, root_gen, 0, 0, 1); 2305 root_owner, root_gen, 0, 0, 1);
2250 free_extent_buffer(path->nodes[*level]); 2306 free_extent_buffer(path->nodes[*level]);
2251 path->nodes[*level] = NULL; 2307 path->nodes[*level] = NULL;
2252 *level += 1; 2308 *level += 1;
2253 BUG_ON(ret); 2309 BUG_ON(ret);
2310 mutex_unlock(&root->fs_info->alloc_mutex);
2254 return 0; 2311 return 0;
2255} 2312}
2256 2313
@@ -2350,6 +2407,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2350 btrfs_node_key(node, &found_key, path->slots[level]); 2407 btrfs_node_key(node, &found_key, path->slots[level]);
2351 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 2408 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2352 sizeof(found_key))); 2409 sizeof(found_key)));
2410 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2411 if (path->nodes[i] && path->locks[i]) {
2412 path->locks[i] = 0;
2413 btrfs_tree_unlock(path->nodes[i]);
2414 }
2415 }
2353 } 2416 }
2354 while(1) { 2417 while(1) {
2355 wret = walk_down_tree(trans, root, path, &level); 2418 wret = walk_down_tree(trans, root, path, &level);
@@ -2383,6 +2446,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
2383 u64 end; 2446 u64 end;
2384 u64 ptr; 2447 u64 ptr;
2385 int ret; 2448 int ret;
2449
2450 mutex_lock(&info->alloc_mutex);
2386 while(1) { 2451 while(1) {
2387 ret = find_first_extent_bit(&info->block_group_cache, 0, 2452 ret = find_first_extent_bit(&info->block_group_cache, 0,
2388 &start, &end, (unsigned int)-1); 2453 &start, &end, (unsigned int)-1);
@@ -2402,6 +2467,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
2402 clear_extent_dirty(&info->free_space_cache, start, 2467 clear_extent_dirty(&info->free_space_cache, start,
2403 end, GFP_NOFS); 2468 end, GFP_NOFS);
2404 } 2469 }
2470 mutex_unlock(&info->alloc_mutex);
2405 return 0; 2471 return 0;
2406} 2472}
2407 2473
@@ -2678,6 +2744,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2678 2744
2679 eb = read_tree_block(found_root, extent_key->objectid, 2745 eb = read_tree_block(found_root, extent_key->objectid,
2680 extent_key->offset, 0); 2746 extent_key->offset, 0);
2747 btrfs_tree_lock(eb);
2681 level = btrfs_header_level(eb); 2748 level = btrfs_header_level(eb);
2682 2749
2683 if (level == 0) 2750 if (level == 0)
@@ -2685,6 +2752,7 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2685 else 2752 else
2686 btrfs_node_key_to_cpu(eb, &found_key, 0); 2753 btrfs_node_key_to_cpu(eb, &found_key, 0);
2687 2754
2755 btrfs_tree_unlock(eb);
2688 free_extent_buffer(eb); 2756 free_extent_buffer(eb);
2689 2757
2690 ret = find_root_for_ref(extent_root, path, &found_key, 2758 ret = find_root_for_ref(extent_root, path, &found_key,
@@ -2888,6 +2956,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2888 int ret; 2956 int ret;
2889 int progress; 2957 int progress;
2890 2958
2959 mutex_lock(&root->fs_info->alloc_mutex);
2891 shrink_block_group = btrfs_lookup_block_group(root->fs_info, 2960 shrink_block_group = btrfs_lookup_block_group(root->fs_info,
2892 shrink_start); 2961 shrink_start);
2893 BUG_ON(!shrink_block_group); 2962 BUG_ON(!shrink_block_group);
@@ -3044,20 +3113,22 @@ next:
3044 (unsigned int)-1, GFP_NOFS); 3113 (unsigned int)-1, GFP_NOFS);
3045out: 3114out:
3046 btrfs_free_path(path); 3115 btrfs_free_path(path);
3116 mutex_unlock(&root->fs_info->alloc_mutex);
3047 return ret; 3117 return ret;
3048} 3118}
3049 3119
3050int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, 3120int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3051 struct btrfs_key *key) 3121 struct btrfs_key *key)
3052{ 3122{
3053 int ret; 3123 int ret = 0;
3054 struct btrfs_key found_key; 3124 struct btrfs_key found_key;
3055 struct extent_buffer *leaf; 3125 struct extent_buffer *leaf;
3056 int slot; 3126 int slot;
3057 3127
3058 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 3128 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3059 if (ret < 0) 3129 if (ret < 0)
3060 return ret; 3130 goto out;
3131
3061 while(1) { 3132 while(1) {
3062 slot = path->slots[0]; 3133 slot = path->slots[0];
3063 leaf = path->nodes[0]; 3134 leaf = path->nodes[0];
@@ -3066,18 +3137,20 @@ int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3066 if (ret == 0) 3137 if (ret == 0)
3067 continue; 3138 continue;
3068 if (ret < 0) 3139 if (ret < 0)
3069 goto error; 3140 goto out;
3070 break; 3141 break;
3071 } 3142 }
3072 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3143 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3073 3144
3074 if (found_key.objectid >= key->objectid && 3145 if (found_key.objectid >= key->objectid &&
3075 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) 3146 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3076 return 0; 3147 ret = 0;
3148 goto out;
3149 }
3077 path->slots[0]++; 3150 path->slots[0]++;
3078 } 3151 }
3079 ret = -ENOENT; 3152 ret = -ENOENT;
3080error: 3153out:
3081 return ret; 3154 return ret;
3082} 3155}
3083 3156
@@ -3103,6 +3176,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3103 if (!path) 3176 if (!path)
3104 return -ENOMEM; 3177 return -ENOMEM;
3105 3178
3179 mutex_lock(&root->fs_info->alloc_mutex);
3106 while(1) { 3180 while(1) {
3107 ret = find_first_block_group(root, path, &key); 3181 ret = find_first_block_group(root, path, &key);
3108 if (ret > 0) { 3182 if (ret > 0) {
@@ -3158,6 +3232,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
3158 ret = 0; 3232 ret = 0;
3159error: 3233error:
3160 btrfs_free_path(path); 3234 btrfs_free_path(path);
3235 mutex_unlock(&root->fs_info->alloc_mutex);
3161 return ret; 3236 return ret;
3162} 3237}
3163 3238
@@ -3205,5 +3280,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3205 ret = del_pending_extents(trans, extent_root); 3280 ret = del_pending_extents(trans, extent_root);
3206 BUG_ON(ret); 3281 BUG_ON(ret);
3207 set_avail_alloc_bits(extent_root->fs_info, type); 3282 set_avail_alloc_bits(extent_root->fs_info, type);
3283
3208 return 0; 3284 return 0;
3209} 3285}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 17c508a941eb..bd15cdcaba95 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2889,7 +2889,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2889 2889
2890 for (i = 0; i < num_pages; i++) { 2890 for (i = 0; i < num_pages; i++) {
2891 page = extent_buffer_page(eb, i); 2891 page = extent_buffer_page(eb, i);
2892 lock_page(page);
2893 if (i == 0) 2892 if (i == 0)
2894 set_page_extent_head(page, eb->len); 2893 set_page_extent_head(page, eb->len);
2895 else 2894 else
@@ -2907,7 +2906,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2907 end = start + PAGE_CACHE_SIZE - 1; 2906 end = start + PAGE_CACHE_SIZE - 1;
2908 if (test_range_bit(tree, start, end, 2907 if (test_range_bit(tree, start, end,
2909 EXTENT_DIRTY, 0)) { 2908 EXTENT_DIRTY, 0)) {
2910 unlock_page(page);
2911 continue; 2909 continue;
2912 } 2910 }
2913 } 2911 }
@@ -2919,7 +2917,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2919 PAGECACHE_TAG_DIRTY); 2917 PAGECACHE_TAG_DIRTY);
2920 } 2918 }
2921 read_unlock_irq(&page->mapping->tree_lock); 2919 read_unlock_irq(&page->mapping->tree_lock);
2922 unlock_page(page);
2923 } 2920 }
2924 return 0; 2921 return 0;
2925} 2922}
@@ -2948,17 +2945,12 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
2948 * on us if the page isn't already dirty. 2945 * on us if the page isn't already dirty.
2949 */ 2946 */
2950 if (i == 0) { 2947 if (i == 0) {
2951 lock_page(page);
2952 set_page_extent_head(page, eb->len); 2948 set_page_extent_head(page, eb->len);
2953 } else if (PagePrivate(page) && 2949 } else if (PagePrivate(page) &&
2954 page->private != EXTENT_PAGE_PRIVATE) { 2950 page->private != EXTENT_PAGE_PRIVATE) {
2955 lock_page(page);
2956 set_page_extent_mapped(page); 2951 set_page_extent_mapped(page);
2957 unlock_page(page);
2958 } 2952 }
2959 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 2953 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
2960 if (i == 0)
2961 unlock_page(page);
2962 } 2954 }
2963 return set_extent_dirty(tree, eb->start, 2955 return set_extent_dirty(tree, eb->start,
2964 eb->start + eb->len - 1, GFP_NOFS); 2956 eb->start + eb->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0c79346fd2c9..61bd8953a683 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -115,6 +115,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
115 trans = btrfs_start_transaction(root, 1); 115 trans = btrfs_start_transaction(root, 1);
116 BUG_ON(!trans); 116 BUG_ON(!trans);
117 btrfs_set_trans_block_group(trans, inode); 117 btrfs_set_trans_block_group(trans, inode);
118 mutex_unlock(&root->fs_info->fs_mutex);
118 119
119 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 120 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
120 num_bytes = max(blocksize, num_bytes); 121 num_bytes = max(blocksize, num_bytes);
@@ -159,6 +160,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
159 btrfs_add_ordered_inode(inode); 160 btrfs_add_ordered_inode(inode);
160 btrfs_update_inode(trans, root, inode); 161 btrfs_update_inode(trans, root, inode);
161out: 162out:
163 mutex_lock(&root->fs_info->fs_mutex);
162 btrfs_end_transaction(trans, root); 164 btrfs_end_transaction(trans, root);
163 return ret; 165 return ret;
164} 166}
@@ -349,10 +351,12 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
349 351
350 mutex_lock(&root->fs_info->fs_mutex); 352 mutex_lock(&root->fs_info->fs_mutex);
351 trans = btrfs_start_transaction(root, 1); 353 trans = btrfs_start_transaction(root, 1);
354 mutex_unlock(&root->fs_info->fs_mutex);
352 355
353 btrfs_set_trans_block_group(trans, inode); 356 btrfs_set_trans_block_group(trans, inode);
354 btrfs_csum_file_blocks(trans, root, inode, bio, sums); 357 btrfs_csum_file_blocks(trans, root, inode, bio, sums);
355 358
359 mutex_lock(&root->fs_info->fs_mutex);
356 ret = btrfs_end_transaction(trans, root); 360 ret = btrfs_end_transaction(trans, root);
357 BUG_ON(ret); 361 BUG_ON(ret);
358 mutex_unlock(&root->fs_info->fs_mutex); 362 mutex_unlock(&root->fs_info->fs_mutex);
@@ -807,6 +811,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
807 goto err; 811 goto err;
808 } 812 }
809 ret = btrfs_delete_one_dir_name(trans, root, path, di); 813 ret = btrfs_delete_one_dir_name(trans, root, path, di);
814 btrfs_release_path(root, path);
810 815
811 dentry->d_inode->i_ctime = dir->i_ctime; 816 dentry->d_inode->i_ctime = dir->i_ctime;
812 ret = btrfs_del_inode_ref(trans, root, name, name_len, 817 ret = btrfs_del_inode_ref(trans, root, name, name_len,
@@ -881,8 +886,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
881 struct btrfs_trans_handle *trans; 886 struct btrfs_trans_handle *trans;
882 unsigned long nr = 0; 887 unsigned long nr = 0;
883 888
884 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) 889 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
885 return -ENOTEMPTY; 890 return -ENOTEMPTY;
891 }
886 892
887 mutex_lock(&root->fs_info->fs_mutex); 893 mutex_lock(&root->fs_info->fs_mutex);
888 ret = btrfs_check_free_space(root, 1, 1); 894 ret = btrfs_check_free_space(root, 1, 1);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6fb455802759..3fbf74e93dba 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,7 @@
43#include "ioctl.h" 43#include "ioctl.h"
44#include "print-tree.h" 44#include "print-tree.h"
45#include "volumes.h" 45#include "volumes.h"
46#include "locking.h"
46 47
47 48
48 49
@@ -75,9 +76,9 @@ static noinline int create_subvol(struct btrfs_root *root, char *name,
75 if (ret) 76 if (ret)
76 goto fail; 77 goto fail;
77 78
78 leaf = __btrfs_alloc_free_block(trans, root, root->leafsize, 79 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
79 objectid, trans->transid, 0, 0, 80 objectid, trans->transid, 0, 0,
80 0, 0); 81 0, 0);
81 if (IS_ERR(leaf)) 82 if (IS_ERR(leaf))
82 return PTR_ERR(leaf); 83 return PTR_ERR(leaf);
83 84
@@ -108,6 +109,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name,
108 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); 109 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
109 root_item.drop_level = 0; 110 root_item.drop_level = 0;
110 111
112 btrfs_tree_unlock(leaf);
111 free_extent_buffer(leaf); 113 free_extent_buffer(leaf);
112 leaf = NULL; 114 leaf = NULL;
113 115
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
new file mode 100644
index 000000000000..80813a307b4b
--- /dev/null
+++ b/fs/btrfs/locking.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/gfp.h>
20#include <linux/pagemap.h>
21#include <linux/spinlock.h>
22#include <linux/page-flags.h>
23#include <linux/bug.h>
24#include "ctree.h"
25#include "extent_io.h"
26#include "locking.h"
27
28int btrfs_tree_lock(struct extent_buffer *eb)
29{
30 lock_page(eb->first_page);
31 return 0;
32}
33
34int btrfs_try_tree_lock(struct extent_buffer *eb)
35{
36 return TestSetPageLocked(eb->first_page);
37}
38
39int btrfs_tree_unlock(struct extent_buffer *eb)
40{
41 WARN_ON(!PageLocked(eb->first_page));
42 unlock_page(eb->first_page);
43 return 0;
44}
45
46int btrfs_tree_locked(struct extent_buffer *eb)
47{
48 return PageLocked(eb->first_page);
49}
50
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
new file mode 100644
index 000000000000..2dab96d8280e
--- /dev/null
+++ b/fs/btrfs/locking.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_LOCKING_
20#define __BTRFS_LOCKING_
21
22int btrfs_tree_lock(struct extent_buffer *eb);
23int btrfs_tree_unlock(struct extent_buffer *eb);
24int btrfs_tree_locked(struct extent_buffer *eb);
25int btrfs_try_tree_lock(struct extent_buffer *eb);
26#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f04684f7fea3..1ed433a71493 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -23,6 +23,7 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "disk-io.h" 24#include "disk-io.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "locking.h"
26 27
27static int total_trans = 0; 28static int total_trans = 0;
28extern struct kmem_cache *btrfs_trans_handle_cachep; 29extern struct kmem_cache *btrfs_trans_handle_cachep;
@@ -96,8 +97,7 @@ static noinline int record_root_in_trans(struct btrfs_root *root)
96 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 97 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
97 (unsigned long)root->root_key.objectid, 98 (unsigned long)root->root_key.objectid,
98 BTRFS_ROOT_DEFRAG_TAG); 99 BTRFS_ROOT_DEFRAG_TAG);
99 root->commit_root = root->node; 100 root->commit_root = btrfs_root_node(root);
100 extent_buffer_get(root->node);
101 } else { 101 } else {
102 WARN_ON(1); 102 WARN_ON(1);
103 } 103 }
@@ -559,6 +559,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
559 struct btrfs_root *tree_root = fs_info->tree_root; 559 struct btrfs_root *tree_root = fs_info->tree_root;
560 struct btrfs_root *root = pending->root; 560 struct btrfs_root *root = pending->root;
561 struct extent_buffer *tmp; 561 struct extent_buffer *tmp;
562 struct extent_buffer *old;
562 int ret; 563 int ret;
563 int namelen; 564 int namelen;
564 u64 objectid; 565 u64 objectid;
@@ -578,16 +579,18 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
578 key.offset = 1; 579 key.offset = 1;
579 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 580 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
580 581
581 extent_buffer_get(root->node); 582 old = btrfs_lock_root_node(root);
582 btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); 583 btrfs_cow_block(trans, root, old, NULL, 0, &old);
583 free_extent_buffer(tmp);
584 584
585 btrfs_copy_root(trans, root, root->node, &tmp, objectid); 585 btrfs_copy_root(trans, root, old, &tmp, objectid);
586 btrfs_tree_unlock(old);
587 free_extent_buffer(old);
586 588
587 btrfs_set_root_bytenr(new_root_item, tmp->start); 589 btrfs_set_root_bytenr(new_root_item, tmp->start);
588 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); 590 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
589 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 591 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
590 new_root_item); 592 new_root_item);
593 btrfs_tree_unlock(tmp);
591 free_extent_buffer(tmp); 594 free_extent_buffer(tmp);
592 if (ret) 595 if (ret)
593 goto fail; 596 goto fail;
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 155961c7b4d5..fab851d85383 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -181,6 +181,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
181 if (root->fs_info->extent_root == root) 181 if (root->fs_info->extent_root == root)
182 is_extent = 1; 182 is_extent = 1;
183 183
184 goto out;
185
184 if (root->ref_cows == 0 && !is_extent) 186 if (root->ref_cows == 0 && !is_extent)
185 goto out; 187 goto out;
186 188