aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-08-01 15:11:20 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:06 -0400
commit65b51a009e29e64c0951f21ea17fdc66bbb0fbd7 (patch)
tree800926527fad4c12ca64083816f33be3d716ec13 /fs/btrfs/ctree.c
parent18e35e0ab337ec99c7e03e9ae917745a352c0bb1 (diff)
btrfs_search_slot: reduce lock contention by cowing in two stages
A btree block cow has two parts, the first is to allocate a destination block and the second is to copy the old bock over. The first part needs locks in the extent allocation tree, and may need to do IO. This changeset splits that into a separate function that can be called without any tree locks held. btrfs_search_slot is changed to drop its path and start over if it has to COW a contended block. This often means that many writers will pre-alloc a new destination for a the same contended block, but they cache their prealloc for later use on lower levels in the tree. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c147
1 files changed, 120 insertions, 27 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4792062dd53..7114faafa9d4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -181,7 +181,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
181 struct extent_buffer *buf, 181 struct extent_buffer *buf,
182 struct extent_buffer *parent, int parent_slot, 182 struct extent_buffer *parent, int parent_slot,
183 struct extent_buffer **cow_ret, 183 struct extent_buffer **cow_ret,
184 u64 search_start, u64 empty_size) 184 u64 search_start, u64 empty_size,
185 u64 prealloc_dest)
185{ 186{
186 u64 root_gen; 187 u64 root_gen;
187 struct extent_buffer *cow; 188 struct extent_buffer *cow;
@@ -216,10 +217,27 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
216 } else { 217 } else {
217 first_key.objectid = 0; 218 first_key.objectid = 0;
218 } 219 }
219 cow = btrfs_alloc_free_block(trans, root, buf->len, 220 if (prealloc_dest) {
220 root->root_key.objectid, 221 struct btrfs_key ins;
221 root_gen, first_key.objectid, level, 222
222 search_start, empty_size); 223 ins.objectid = prealloc_dest;
224 ins.offset = buf->len;
225 ins.type = BTRFS_EXTENT_ITEM_KEY;
226
227 ret = btrfs_alloc_reserved_extent(trans, root,
228 root->root_key.objectid,
229 root_gen, level,
230 first_key.objectid,
231 &ins);
232 BUG_ON(ret);
233 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
234 buf->len);
235 } else {
236 cow = btrfs_alloc_free_block(trans, root, buf->len,
237 root->root_key.objectid,
238 root_gen, first_key.objectid,
239 level, search_start, empty_size);
240 }
223 if (IS_ERR(cow)) 241 if (IS_ERR(cow))
224 return PTR_ERR(cow); 242 return PTR_ERR(cow);
225 243
@@ -279,7 +297,7 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans,
279int btrfs_cow_block(struct btrfs_trans_handle *trans, 297int btrfs_cow_block(struct btrfs_trans_handle *trans,
280 struct btrfs_root *root, struct extent_buffer *buf, 298 struct btrfs_root *root, struct extent_buffer *buf,
281 struct extent_buffer *parent, int parent_slot, 299 struct extent_buffer *parent, int parent_slot,
282 struct extent_buffer **cow_ret) 300 struct extent_buffer **cow_ret, u64 prealloc_dest)
283{ 301{
284 u64 search_start; 302 u64 search_start;
285 u64 header_trans; 303 u64 header_trans;
@@ -302,12 +320,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
302 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 320 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
303 *cow_ret = buf; 321 *cow_ret = buf;
304 spin_unlock(&root->fs_info->hash_lock); 322 spin_unlock(&root->fs_info->hash_lock);
323 WARN_ON(prealloc_dest);
305 return 0; 324 return 0;
306 } 325 }
307 spin_unlock(&root->fs_info->hash_lock); 326 spin_unlock(&root->fs_info->hash_lock);
308 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 327 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
309 ret = __btrfs_cow_block(trans, root, buf, parent, 328 ret = __btrfs_cow_block(trans, root, buf, parent,
310 parent_slot, cow_ret, search_start, 0); 329 parent_slot, cow_ret, search_start, 0,
330 prealloc_dest);
311 return ret; 331 return ret;
312} 332}
313 333
@@ -451,7 +471,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
451 err = __btrfs_cow_block(trans, root, cur, parent, i, 471 err = __btrfs_cow_block(trans, root, cur, parent, i,
452 &cur, search_start, 472 &cur, search_start,
453 min(16 * blocksize, 473 min(16 * blocksize,
454 (end_slot - i) * blocksize)); 474 (end_slot - i) * blocksize), 0);
455 if (err) { 475 if (err) {
456 btrfs_tree_unlock(cur); 476 btrfs_tree_unlock(cur);
457 free_extent_buffer(cur); 477 free_extent_buffer(cur);
@@ -803,7 +823,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
803 child = read_node_slot(root, mid, 0); 823 child = read_node_slot(root, mid, 0);
804 btrfs_tree_lock(child); 824 btrfs_tree_lock(child);
805 BUG_ON(!child); 825 BUG_ON(!child);
806 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 826 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
807 BUG_ON(ret); 827 BUG_ON(ret);
808 828
809 spin_lock(&root->node_lock); 829 spin_lock(&root->node_lock);
@@ -836,7 +856,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
836 if (left) { 856 if (left) {
837 btrfs_tree_lock(left); 857 btrfs_tree_lock(left);
838 wret = btrfs_cow_block(trans, root, left, 858 wret = btrfs_cow_block(trans, root, left,
839 parent, pslot - 1, &left); 859 parent, pslot - 1, &left, 0);
840 if (wret) { 860 if (wret) {
841 ret = wret; 861 ret = wret;
842 goto enospc; 862 goto enospc;
@@ -846,7 +866,7 @@ static int balance_level(struct btrfs_trans_handle *trans,
846 if (right) { 866 if (right) {
847 btrfs_tree_lock(right); 867 btrfs_tree_lock(right);
848 wret = btrfs_cow_block(trans, root, right, 868 wret = btrfs_cow_block(trans, root, right,
849 parent, pslot + 1, &right); 869 parent, pslot + 1, &right, 0);
850 if (wret) { 870 if (wret) {
851 ret = wret; 871 ret = wret;
852 goto enospc; 872 goto enospc;
@@ -1021,7 +1041,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1021 wret = 1; 1041 wret = 1;
1022 } else { 1042 } else {
1023 ret = btrfs_cow_block(trans, root, left, parent, 1043 ret = btrfs_cow_block(trans, root, left, parent,
1024 pslot - 1, &left); 1044 pslot - 1, &left, 0);
1025 if (ret) 1045 if (ret)
1026 wret = 1; 1046 wret = 1;
1027 else { 1047 else {
@@ -1069,7 +1089,7 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1069 } else { 1089 } else {
1070 ret = btrfs_cow_block(trans, root, right, 1090 ret = btrfs_cow_block(trans, root, right,
1071 parent, pslot + 1, 1091 parent, pslot + 1,
1072 &right); 1092 &right, 0);
1073 if (ret) 1093 if (ret)
1074 wret = 1; 1094 wret = 1;
1075 else { 1095 else {
@@ -1245,6 +1265,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1245 u8 lowest_level = 0; 1265 u8 lowest_level = 0;
1246 u64 blocknr; 1266 u64 blocknr;
1247 u64 gen; 1267 u64 gen;
1268 struct btrfs_key prealloc_block;
1248 1269
1249 lowest_level = p->lowest_level; 1270 lowest_level = p->lowest_level;
1250 WARN_ON(lowest_level && ins_len); 1271 WARN_ON(lowest_level && ins_len);
@@ -1253,6 +1274,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1253 !mutex_is_locked(&root->fs_info->alloc_mutex)); 1274 !mutex_is_locked(&root->fs_info->alloc_mutex));
1254 if (ins_len < 0) 1275 if (ins_len < 0)
1255 lowest_unlock = 2; 1276 lowest_unlock = 2;
1277
1278 prealloc_block.objectid = 0;
1279
1256again: 1280again:
1257 if (p->skip_locking) 1281 if (p->skip_locking)
1258 b = btrfs_root_node(root); 1282 b = btrfs_root_node(root);
@@ -1261,27 +1285,82 @@ again:
1261 1285
1262 while (b) { 1286 while (b) {
1263 level = btrfs_header_level(b); 1287 level = btrfs_header_level(b);
1288
1289 /*
1290 * setup the path here so we can release it under lock
1291 * contention with the cow code
1292 */
1293 p->nodes[level] = b;
1294 if (!p->skip_locking)
1295 p->locks[level] = 1;
1296
1264 if (cow) { 1297 if (cow) {
1265 int wret; 1298 int wret;
1299
1300 /* is a cow on this block not required */
1301 spin_lock(&root->fs_info->hash_lock);
1302 if (btrfs_header_generation(b) == trans->transid &&
1303 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1304 spin_unlock(&root->fs_info->hash_lock);
1305 goto cow_done;
1306 }
1307 spin_unlock(&root->fs_info->hash_lock);
1308
1309 /* ok, we have to cow, is our old prealloc the right
1310 * size?
1311 */
1312 if (prealloc_block.objectid &&
1313 prealloc_block.offset != b->len) {
1314 btrfs_free_reserved_extent(root,
1315 prealloc_block.objectid,
1316 prealloc_block.offset);
1317 prealloc_block.objectid = 0;
1318 }
1319
1320 /*
1321 * for higher level blocks, try not to allocate blocks
1322 * with the block and the parent locks held.
1323 */
1324 if (level > 1 && !prealloc_block.objectid &&
1325 btrfs_path_lock_waiting(p, level)) {
1326 u32 size = b->len;
1327 u64 hint = b->start;
1328
1329 btrfs_release_path(root, p);
1330 ret = btrfs_reserve_extent(trans, root,
1331 size, size, 0,
1332 hint, (u64)-1,
1333 &prealloc_block, 0);
1334 BUG_ON(ret);
1335 goto again;
1336 }
1337
1266 wret = btrfs_cow_block(trans, root, b, 1338 wret = btrfs_cow_block(trans, root, b,
1267 p->nodes[level + 1], 1339 p->nodes[level + 1],
1268 p->slots[level + 1], 1340 p->slots[level + 1],
1269 &b); 1341 &b, prealloc_block.objectid);
1342 prealloc_block.objectid = 0;
1270 if (wret) { 1343 if (wret) {
1271 free_extent_buffer(b); 1344 free_extent_buffer(b);
1272 return wret; 1345 ret = wret;
1346 goto done;
1273 } 1347 }
1274 } 1348 }
1349cow_done:
1275 BUG_ON(!cow && ins_len); 1350 BUG_ON(!cow && ins_len);
1276 if (level != btrfs_header_level(b)) 1351 if (level != btrfs_header_level(b))
1277 WARN_ON(1); 1352 WARN_ON(1);
1278 level = btrfs_header_level(b); 1353 level = btrfs_header_level(b);
1354
1279 p->nodes[level] = b; 1355 p->nodes[level] = b;
1280 if (!p->skip_locking) 1356 if (!p->skip_locking)
1281 p->locks[level] = 1; 1357 p->locks[level] = 1;
1358
1282 ret = check_block(root, p, level); 1359 ret = check_block(root, p, level);
1283 if (ret) 1360 if (ret) {
1284 return -1; 1361 ret = -1;
1362 goto done;
1363 }
1285 1364
1286 ret = bin_search(b, key, level, &slot); 1365 ret = bin_search(b, key, level, &slot);
1287 if (level != 0) { 1366 if (level != 0) {
@@ -1292,15 +1371,19 @@ again:
1292 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1371 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1293 int sret = split_node(trans, root, p, level); 1372 int sret = split_node(trans, root, p, level);
1294 BUG_ON(sret > 0); 1373 BUG_ON(sret > 0);
1295 if (sret) 1374 if (sret) {
1296 return sret; 1375 ret = sret;
1376 goto done;
1377 }
1297 b = p->nodes[level]; 1378 b = p->nodes[level];
1298 slot = p->slots[level]; 1379 slot = p->slots[level];
1299 } else if (ins_len < 0) { 1380 } else if (ins_len < 0) {
1300 int sret = balance_level(trans, root, p, 1381 int sret = balance_level(trans, root, p,
1301 level); 1382 level);
1302 if (sret) 1383 if (sret) {
1303 return sret; 1384 ret = sret;
1385 goto done;
1386 }
1304 b = p->nodes[level]; 1387 b = p->nodes[level];
1305 if (!b) { 1388 if (!b) {
1306 btrfs_release_path(NULL, p); 1389 btrfs_release_path(NULL, p);
@@ -1362,14 +1445,24 @@ again:
1362 int sret = split_leaf(trans, root, key, 1445 int sret = split_leaf(trans, root, key,
1363 p, ins_len, ret == 0); 1446 p, ins_len, ret == 0);
1364 BUG_ON(sret > 0); 1447 BUG_ON(sret > 0);
1365 if (sret) 1448 if (sret) {
1366 return sret; 1449 ret = sret;
1450 goto done;
1451 }
1367 } 1452 }
1368 unlock_up(p, level, lowest_unlock); 1453 unlock_up(p, level, lowest_unlock);
1369 return ret; 1454 goto done;
1370 } 1455 }
1371 } 1456 }
1372 return 1; 1457 ret = 1;
1458done:
1459 if (prealloc_block.objectid) {
1460 btrfs_free_reserved_extent(root,
1461 prealloc_block.objectid,
1462 prealloc_block.offset);
1463 }
1464
1465 return ret;
1373} 1466}
1374 1467
1375/* 1468/*
@@ -1840,7 +1933,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1840 1933
1841 /* cow and double check */ 1934 /* cow and double check */
1842 ret = btrfs_cow_block(trans, root, right, upper, 1935 ret = btrfs_cow_block(trans, root, right, upper,
1843 slot + 1, &right); 1936 slot + 1, &right, 0);
1844 if (ret) 1937 if (ret)
1845 goto out_unlock; 1938 goto out_unlock;
1846 1939
@@ -2021,7 +2114,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2021 2114
2022 /* cow and double check */ 2115 /* cow and double check */
2023 ret = btrfs_cow_block(trans, root, left, 2116 ret = btrfs_cow_block(trans, root, left,
2024 path->nodes[1], slot - 1, &left); 2117 path->nodes[1], slot - 1, &left, 0);
2025 if (ret) { 2118 if (ret) {
2026 /* we hit -ENOSPC, but it isn't fatal here */ 2119 /* we hit -ENOSPC, but it isn't fatal here */
2027 ret = 1; 2120 ret = 1;