aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/delayed-ref.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-06-10 10:45:14 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 11:29:46 -0400
commit5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 (patch)
treec611d7d824cbcdb777dd2d8e33e2ed1c5df8a9c6 /fs/btrfs/delayed-ref.c
parent5c939df56c3ea018b58e5aa76181284c2053d699 (diff)
Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE)
This commit introduces a new kind of back reference for btrfs metadata. Once a filesystem has been mounted with this commit, IT WILL NO LONGER BE MOUNTABLE BY OLDER KERNELS. When a tree block in subvolume tree is cow'd, the reference counts of all extents it points to are increased by one. At transaction commit time, the old root of the subvolume is recorded in a "dead root" data structure, and the btree it points to is later walked, dropping reference counts and freeing any blocks where the reference count goes to 0. The increments done during cow and decrements done after commit cancel out, and the walk is a very expensive way to go about freeing the blocks that are no longer referenced by the new btree root. This commit reduces the transaction overhead by avoiding the need for dead root records. When a non-shared tree block is cow'd, we free the old block at once, and the new block inherits old block's references. When a tree block with reference count > 1 is cow'd, we increase the reference counts of all extents the new block points to by one, and decrease the old block's reference count by one. This dead tree avoidance code removes the need to modify the reference counts of lower level extents when a non-shared tree block is cow'd. But we still need to update back ref for all pointers in the block. This is because the location of the block is recorded in the back ref item. We can solve this by introducing a new type of back ref. The new back ref provides information about pointer's key, level and in which tree the pointer lives. This information allow us to find the pointer by searching the tree. The shortcoming of the new back ref is that it only works for pointers in tree blocks referenced by their owner trees. This is mostly a problem for snapshots, where resolving one of these fuzzy back references would be O(number_of_snapshots) and quite slow. The solution used here is to use the fuzzy back references in the common case where a given tree block is only referenced by one root, and use the full back references when multiple roots have a reference on a given block. This commit adds per subvolume red-black tree to keep trace of cached inodes. The red-black tree helps the balancing code to find cached inodes whose inode numbers within a given range. This commit improves the balancing code by introducing several data structures to keep the state of balancing. The most important one is the back ref cache. It caches how the upper level tree blocks are referenced. This greatly reduce the overhead of checking back ref. The improved balancing code scales significantly better with a large number of snapshots. This is a very large commit and was written in a number of pieces. But, they depend heavily on the disk format change and were squashed together to make sure git bisect didn't end up in a bad state wrt space balancing or the format change. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/delayed-ref.c')
-rw-r--r--fs/btrfs/delayed-ref.c509
1 files changed, 380 insertions, 129 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c096a40..84e6781413b1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
29 * add extents in the middle of btrfs_search_slot, and it allows 29 * add extents in the middle of btrfs_search_slot, and it allows
30 * us to buffer up frequently modified backrefs in an rb tree instead 30 * us to buffer up frequently modified backrefs in an rb tree instead
31 * of hammering updates on the extent allocation tree. 31 * of hammering updates on the extent allocation tree.
32 *
33 * Right now this code is only used for reference counted trees, but
34 * the long term goal is to get rid of the similar code for delayed
35 * extent tree modifications.
36 */ 32 */
37 33
38/* 34/*
39 * entries in the rb tree are ordered by the byte number of the extent 35 * compare two delayed tree backrefs with same bytenr and type
40 * and by the byte number of the parent block. 36 */
37static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
38 struct btrfs_delayed_tree_ref *ref1)
39{
40 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41 if (ref1->root < ref2->root)
42 return -1;
43 if (ref1->root > ref2->root)
44 return 1;
45 } else {
46 if (ref1->parent < ref2->parent)
47 return -1;
48 if (ref1->parent > ref2->parent)
49 return 1;
50 }
51 return 0;
52}
53
54/*
55 * compare two delayed data backrefs with same bytenr and type
41 */ 56 */
42static int comp_entry(struct btrfs_delayed_ref_node *ref, 57static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
43 u64 bytenr, u64 parent) 58 struct btrfs_delayed_data_ref *ref1)
44{ 59{
45 if (bytenr < ref->bytenr) 60 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
61 if (ref1->root < ref2->root)
62 return -1;
63 if (ref1->root > ref2->root)
64 return 1;
65 if (ref1->objectid < ref2->objectid)
66 return -1;
67 if (ref1->objectid > ref2->objectid)
68 return 1;
69 if (ref1->offset < ref2->offset)
70 return -1;
71 if (ref1->offset > ref2->offset)
72 return 1;
73 } else {
74 if (ref1->parent < ref2->parent)
75 return -1;
76 if (ref1->parent > ref2->parent)
77 return 1;
78 }
79 return 0;
80}
81
82/*
83 * entries in the rb tree are ordered by the byte number of the extent,
84 * type of the delayed backrefs and content of delayed backrefs.
85 */
86static int comp_entry(struct btrfs_delayed_ref_node *ref2,
87 struct btrfs_delayed_ref_node *ref1)
88{
89 if (ref1->bytenr < ref2->bytenr)
46 return -1; 90 return -1;
47 if (bytenr > ref->bytenr) 91 if (ref1->bytenr > ref2->bytenr)
48 return 1; 92 return 1;
49 if (parent < ref->parent) 93 if (ref1->is_head && ref2->is_head)
94 return 0;
95 if (ref2->is_head)
50 return -1; 96 return -1;
51 if (parent > ref->parent) 97 if (ref1->is_head)
52 return 1; 98 return 1;
99 if (ref1->type < ref2->type)
100 return -1;
101 if (ref1->type > ref2->type)
102 return 1;
103 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
104 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
105 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
106 btrfs_delayed_node_to_tree_ref(ref1));
107 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
108 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
109 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
110 btrfs_delayed_node_to_data_ref(ref1));
111 }
112 BUG();
53 return 0; 113 return 0;
54} 114}
55 115
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
59 * inserted. 119 * inserted.
60 */ 120 */
61static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, 121static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
62 u64 bytenr, u64 parent,
63 struct rb_node *node) 122 struct rb_node *node)
64{ 123{
65 struct rb_node **p = &root->rb_node; 124 struct rb_node **p = &root->rb_node;
66 struct rb_node *parent_node = NULL; 125 struct rb_node *parent_node = NULL;
67 struct btrfs_delayed_ref_node *entry; 126 struct btrfs_delayed_ref_node *entry;
127 struct btrfs_delayed_ref_node *ins;
68 int cmp; 128 int cmp;
69 129
130 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
70 while (*p) { 131 while (*p) {
71 parent_node = *p; 132 parent_node = *p;
72 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 133 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
73 rb_node); 134 rb_node);
74 135
75 cmp = comp_entry(entry, bytenr, parent); 136 cmp = comp_entry(entry, ins);
76 if (cmp < 0) 137 if (cmp < 0)
77 p = &(*p)->rb_left; 138 p = &(*p)->rb_left;
78 else if (cmp > 0) 139 else if (cmp > 0)
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
81 return entry; 142 return entry;
82 } 143 }
83 144
84 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
85 rb_link_node(node, parent_node, p); 145 rb_link_node(node, parent_node, p);
86 rb_insert_color(node, root); 146 rb_insert_color(node, root);
87 return NULL; 147 return NULL;
88} 148}
89 149
90/* 150/*
91 * find an entry based on (bytenr,parent). This returns the delayed 151 * find an head entry based on bytenr. This returns the delayed ref
92 * ref if it was able to find one, or NULL if nothing was in that spot 152 * head if it was able to find one, or NULL if nothing was in that spot
93 */ 153 */
94static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 154static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
95 u64 bytenr, u64 parent, 155 u64 bytenr,
96 struct btrfs_delayed_ref_node **last) 156 struct btrfs_delayed_ref_node **last)
97{ 157{
98 struct rb_node *n = root->rb_node; 158 struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
105 if (last) 165 if (last)
106 *last = entry; 166 *last = entry;
107 167
108 cmp = comp_entry(entry, bytenr, parent); 168 if (bytenr < entry->bytenr)
169 cmp = -1;
170 else if (bytenr > entry->bytenr)
171 cmp = 1;
172 else if (!btrfs_delayed_ref_is_head(entry))
173 cmp = 1;
174 else
175 cmp = 0;
176
109 if (cmp < 0) 177 if (cmp < 0)
110 n = n->rb_left; 178 n = n->rb_left;
111 else if (cmp > 0) 179 else if (cmp > 0)
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
154 node = rb_first(&delayed_refs->root); 222 node = rb_first(&delayed_refs->root);
155 } else { 223 } else {
156 ref = NULL; 224 ref = NULL;
157 tree_search(&delayed_refs->root, start, (u64)-1, &ref); 225 find_ref_head(&delayed_refs->root, start, &ref);
158 if (ref) { 226 if (ref) {
159 struct btrfs_delayed_ref_node *tmp; 227 struct btrfs_delayed_ref_node *tmp;
160 228
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
234 delayed_refs = &trans->transaction->delayed_refs; 302 delayed_refs = &trans->transaction->delayed_refs;
235 spin_lock(&delayed_refs->lock); 303 spin_lock(&delayed_refs->lock);
236 304
237 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 305 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
238 if (ref) { 306 if (ref) {
239 prev_node = rb_prev(&ref->rb_node); 307 prev_node = rb_prev(&ref->rb_node);
240 if (!prev_node) 308 if (!prev_node)
@@ -250,25 +318,28 @@ out:
250} 318}
251 319
252/* 320/*
253 * helper function to lookup reference count 321 * helper function to lookup reference count and flags of extent.
254 * 322 *
255 * the head node for delayed ref is used to store the sum of all the 323 * the head node for delayed ref is used to store the sum of all the
256 * reference count modifications queued up in the rbtree. This way you 324 * reference count modifications queued up in the rbtree. the head
257 * can check to see what the reference count would be if all of the 325 * node may also store the extent flags to set. This way you can check
258 * delayed refs are processed. 326 * to see what the reference count and extent flags would be if all of
327 * the delayed refs are not processed.
259 */ 328 */
260int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 329int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
261 struct btrfs_root *root, u64 bytenr, 330 struct btrfs_root *root, u64 bytenr,
262 u64 num_bytes, u32 *refs) 331 u64 num_bytes, u64 *refs, u64 *flags)
263{ 332{
264 struct btrfs_delayed_ref_node *ref; 333 struct btrfs_delayed_ref_node *ref;
265 struct btrfs_delayed_ref_head *head; 334 struct btrfs_delayed_ref_head *head;
266 struct btrfs_delayed_ref_root *delayed_refs; 335 struct btrfs_delayed_ref_root *delayed_refs;
267 struct btrfs_path *path; 336 struct btrfs_path *path;
268 struct extent_buffer *leaf;
269 struct btrfs_extent_item *ei; 337 struct btrfs_extent_item *ei;
338 struct extent_buffer *leaf;
270 struct btrfs_key key; 339 struct btrfs_key key;
271 u32 num_refs; 340 u32 item_size;
341 u64 num_refs;
342 u64 extent_flags;
272 int ret; 343 int ret;
273 344
274 path = btrfs_alloc_path(); 345 path = btrfs_alloc_path();
@@ -287,37 +358,60 @@ again:
287 358
288 if (ret == 0) { 359 if (ret == 0) {
289 leaf = path->nodes[0]; 360 leaf = path->nodes[0];
290 ei = btrfs_item_ptr(leaf, path->slots[0], 361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
291 struct btrfs_extent_item); 362 if (item_size >= sizeof(*ei)) {
292 num_refs = btrfs_extent_refs(leaf, ei); 363 ei = btrfs_item_ptr(leaf, path->slots[0],
364 struct btrfs_extent_item);
365 num_refs = btrfs_extent_refs(leaf, ei);
366 extent_flags = btrfs_extent_flags(leaf, ei);
367 } else {
368#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
369 struct btrfs_extent_item_v0 *ei0;
370 BUG_ON(item_size != sizeof(*ei0));
371 ei0 = btrfs_item_ptr(leaf, path->slots[0],
372 struct btrfs_extent_item_v0);
373 num_refs = btrfs_extent_refs_v0(leaf, ei0);
374 /* FIXME: this isn't correct for data */
375 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
376#else
377 BUG();
378#endif
379 }
380 BUG_ON(num_refs == 0);
293 } else { 381 } else {
294 num_refs = 0; 382 num_refs = 0;
383 extent_flags = 0;
295 ret = 0; 384 ret = 0;
296 } 385 }
297 386
298 spin_lock(&delayed_refs->lock); 387 spin_lock(&delayed_refs->lock);
299 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 388 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
300 if (ref) { 389 if (ref) {
301 head = btrfs_delayed_node_to_head(ref); 390 head = btrfs_delayed_node_to_head(ref);
302 if (mutex_trylock(&head->mutex)) { 391 if (!mutex_trylock(&head->mutex)) {
303 num_refs += ref->ref_mod; 392 atomic_inc(&ref->refs);
304 mutex_unlock(&head->mutex); 393 spin_unlock(&delayed_refs->lock);
305 *refs = num_refs;
306 goto out;
307 }
308 394
309 atomic_inc(&ref->refs); 395 btrfs_release_path(root->fs_info->extent_root, path);
310 spin_unlock(&delayed_refs->lock);
311 396
312 btrfs_release_path(root->fs_info->extent_root, path); 397 mutex_lock(&head->mutex);
398 mutex_unlock(&head->mutex);
399 btrfs_put_delayed_ref(ref);
400 goto again;
401 }
402 if (head->extent_op && head->extent_op->update_flags)
403 extent_flags |= head->extent_op->flags_to_set;
404 else
405 BUG_ON(num_refs == 0);
313 406
314 mutex_lock(&head->mutex); 407 num_refs += ref->ref_mod;
315 mutex_unlock(&head->mutex); 408 mutex_unlock(&head->mutex);
316 btrfs_put_delayed_ref(ref);
317 goto again;
318 } else {
319 *refs = num_refs;
320 } 409 }
410 WARN_ON(num_refs == 0);
411 if (refs)
412 *refs = num_refs;
413 if (flags)
414 *flags = extent_flags;
321out: 415out:
322 spin_unlock(&delayed_refs->lock); 416 spin_unlock(&delayed_refs->lock);
323 btrfs_free_path(path); 417 btrfs_free_path(path);
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
338 struct btrfs_delayed_ref_node *existing, 432 struct btrfs_delayed_ref_node *existing,
339 struct btrfs_delayed_ref_node *update) 433 struct btrfs_delayed_ref_node *update)
340{ 434{
341 struct btrfs_delayed_ref *existing_ref; 435 if (update->action != existing->action) {
342 struct btrfs_delayed_ref *ref;
343
344 existing_ref = btrfs_delayed_node_to_ref(existing);
345 ref = btrfs_delayed_node_to_ref(update);
346
347 if (ref->pin)
348 existing_ref->pin = 1;
349
350 if (ref->action != existing_ref->action) {
351 /* 436 /*
352 * this is effectively undoing either an add or a 437 * this is effectively undoing either an add or a
353 * drop. We decrement the ref_mod, and if it goes 438 * drop. We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
363 delayed_refs->num_entries--; 448 delayed_refs->num_entries--;
364 if (trans->delayed_ref_updates) 449 if (trans->delayed_ref_updates)
365 trans->delayed_ref_updates--; 450 trans->delayed_ref_updates--;
451 } else {
452 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
366 } 454 }
367 } else { 455 } else {
368 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { 456 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
369 /* if we're adding refs, make sure all the 457 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
370 * details match up. The extent could
371 * have been totally freed and reallocated
372 * by a different owner before the delayed
373 * ref entries were removed.
374 */
375 existing_ref->owner_objectid = ref->owner_objectid;
376 existing_ref->generation = ref->generation;
377 existing_ref->root = ref->root;
378 existing->num_bytes = update->num_bytes;
379 }
380 /* 458 /*
381 * the action on the existing ref matches 459 * the action on the existing ref matches
382 * the action on the ref we're trying to add. 460 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
401 479
402 existing_ref = btrfs_delayed_node_to_head(existing); 480 existing_ref = btrfs_delayed_node_to_head(existing);
403 ref = btrfs_delayed_node_to_head(update); 481 ref = btrfs_delayed_node_to_head(update);
482 BUG_ON(existing_ref->is_data != ref->is_data);
404 483
405 if (ref->must_insert_reserved) { 484 if (ref->must_insert_reserved) {
406 /* if the extent was freed and then 485 /* if the extent was freed and then
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 499
421 } 500 }
422 501
502 if (ref->extent_op) {
503 if (!existing_ref->extent_op) {
504 existing_ref->extent_op = ref->extent_op;
505 } else {
506 if (ref->extent_op->update_key) {
507 memcpy(&existing_ref->extent_op->key,
508 &ref->extent_op->key,
509 sizeof(ref->extent_op->key));
510 existing_ref->extent_op->update_key = 1;
511 }
512 if (ref->extent_op->update_flags) {
513 existing_ref->extent_op->flags_to_set |=
514 ref->extent_op->flags_to_set;
515 existing_ref->extent_op->update_flags = 1;
516 }
517 kfree(ref->extent_op);
518 }
519 }
423 /* 520 /*
424 * update the reference mod on the head to reflect this new operation 521 * update the reference mod on the head to reflect this new operation
425 */ 522 */
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
427} 524}
428 525
429/* 526/*
430 * helper function to actually insert a delayed ref into the rbtree. 527 * helper function to actually insert a head node into the rbtree.
431 * this does all the dirty work in terms of maintaining the correct 528 * this does all the dirty work in terms of maintaining the correct
432 * overall modification count in the head node and properly dealing 529 * overall modification count.
433 * with updating existing nodes as new modifications are queued.
434 */ 530 */
435static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 531static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
436 struct btrfs_delayed_ref_node *ref, 532 struct btrfs_delayed_ref_node *ref,
437 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 533 u64 bytenr, u64 num_bytes,
438 u64 ref_generation, u64 owner_objectid, int action, 534 int action, int is_data)
439 int pin)
440{ 535{
441 struct btrfs_delayed_ref_node *existing; 536 struct btrfs_delayed_ref_node *existing;
442 struct btrfs_delayed_ref *full_ref;
443 struct btrfs_delayed_ref_head *head_ref = NULL; 537 struct btrfs_delayed_ref_head *head_ref = NULL;
444 struct btrfs_delayed_ref_root *delayed_refs; 538 struct btrfs_delayed_ref_root *delayed_refs;
445 int count_mod = 1; 539 int count_mod = 1;
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
449 * the head node stores the sum of all the mods, so dropping a ref 543 * the head node stores the sum of all the mods, so dropping a ref
450 * should drop the sum in the head node by one. 544 * should drop the sum in the head node by one.
451 */ 545 */
452 if (parent == (u64)-1) { 546 if (action == BTRFS_UPDATE_DELAYED_HEAD)
453 if (action == BTRFS_DROP_DELAYED_REF) 547 count_mod = 0;
454 count_mod = -1; 548 else if (action == BTRFS_DROP_DELAYED_REF)
455 else if (action == BTRFS_UPDATE_DELAYED_HEAD) 549 count_mod = -1;
456 count_mod = 0;
457 }
458 550
459 /* 551 /*
460 * BTRFS_ADD_DELAYED_EXTENT means that we need to update 552 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
467 * Once we record must_insert_reserved, switch the action to 559 * Once we record must_insert_reserved, switch the action to
468 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 560 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
469 */ 561 */
470 if (action == BTRFS_ADD_DELAYED_EXTENT) { 562 if (action == BTRFS_ADD_DELAYED_EXTENT)
471 must_insert_reserved = 1; 563 must_insert_reserved = 1;
472 action = BTRFS_ADD_DELAYED_REF; 564 else
473 } else {
474 must_insert_reserved = 0; 565 must_insert_reserved = 0;
475 }
476
477 566
478 delayed_refs = &trans->transaction->delayed_refs; 567 delayed_refs = &trans->transaction->delayed_refs;
479 568
480 /* first set the basic ref node struct up */ 569 /* first set the basic ref node struct up */
481 atomic_set(&ref->refs, 1); 570 atomic_set(&ref->refs, 1);
482 ref->bytenr = bytenr; 571 ref->bytenr = bytenr;
483 ref->parent = parent; 572 ref->num_bytes = num_bytes;
484 ref->ref_mod = count_mod; 573 ref->ref_mod = count_mod;
574 ref->type = 0;
575 ref->action = 0;
576 ref->is_head = 1;
485 ref->in_tree = 1; 577 ref->in_tree = 1;
578
579 head_ref = btrfs_delayed_node_to_head(ref);
580 head_ref->must_insert_reserved = must_insert_reserved;
581 head_ref->is_data = is_data;
582
583 INIT_LIST_HEAD(&head_ref->cluster);
584 mutex_init(&head_ref->mutex);
585
586 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
587
588 if (existing) {
589 update_existing_head_ref(existing, ref);
590 /*
591 * we've updated the existing ref, free the newly
592 * allocated ref
593 */
594 kfree(ref);
595 } else {
596 delayed_refs->num_heads++;
597 delayed_refs->num_heads_ready++;
598 delayed_refs->num_entries++;
599 trans->delayed_ref_updates++;
600 }
601 return 0;
602}
603
604/*
605 * helper to insert a delayed tree ref into the rbtree.
606 */
607static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
608 struct btrfs_delayed_ref_node *ref,
609 u64 bytenr, u64 num_bytes, u64 parent,
610 u64 ref_root, int level, int action)
611{
612 struct btrfs_delayed_ref_node *existing;
613 struct btrfs_delayed_tree_ref *full_ref;
614 struct btrfs_delayed_ref_root *delayed_refs;
615
616 if (action == BTRFS_ADD_DELAYED_EXTENT)
617 action = BTRFS_ADD_DELAYED_REF;
618
619 delayed_refs = &trans->transaction->delayed_refs;
620
621 /* first set the basic ref node struct up */
622 atomic_set(&ref->refs, 1);
623 ref->bytenr = bytenr;
486 ref->num_bytes = num_bytes; 624 ref->num_bytes = num_bytes;
625 ref->ref_mod = 1;
626 ref->action = action;
627 ref->is_head = 0;
628 ref->in_tree = 1;
487 629
488 if (btrfs_delayed_ref_is_head(ref)) { 630 full_ref = btrfs_delayed_node_to_tree_ref(ref);
489 head_ref = btrfs_delayed_node_to_head(ref); 631 if (parent) {
490 head_ref->must_insert_reserved = must_insert_reserved; 632 full_ref->parent = parent;
491 INIT_LIST_HEAD(&head_ref->cluster); 633 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
492 mutex_init(&head_ref->mutex);
493 } else { 634 } else {
494 full_ref = btrfs_delayed_node_to_ref(ref);
495 full_ref->root = ref_root; 635 full_ref->root = ref_root;
496 full_ref->generation = ref_generation; 636 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
497 full_ref->owner_objectid = owner_objectid;
498 full_ref->pin = pin;
499 full_ref->action = action;
500 } 637 }
638 full_ref->level = level;
501 639
502 existing = tree_insert(&delayed_refs->root, bytenr, 640 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
503 parent, &ref->rb_node);
504 641
505 if (existing) { 642 if (existing) {
506 if (btrfs_delayed_ref_is_head(ref)) 643 update_existing_ref(trans, delayed_refs, existing, ref);
507 update_existing_head_ref(existing, ref); 644 /*
508 else 645 * we've updated the existing ref, free the newly
509 update_existing_ref(trans, delayed_refs, existing, ref); 646 * allocated ref
647 */
648 kfree(ref);
649 } else {
650 delayed_refs->num_entries++;
651 trans->delayed_ref_updates++;
652 }
653 return 0;
654}
655
656/*
657 * helper to insert a delayed data ref into the rbtree.
658 */
659static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
660 struct btrfs_delayed_ref_node *ref,
661 u64 bytenr, u64 num_bytes, u64 parent,
662 u64 ref_root, u64 owner, u64 offset,
663 int action)
664{
665 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_data_ref *full_ref;
667 struct btrfs_delayed_ref_root *delayed_refs;
668
669 if (action == BTRFS_ADD_DELAYED_EXTENT)
670 action = BTRFS_ADD_DELAYED_REF;
671
672 delayed_refs = &trans->transaction->delayed_refs;
673
674 /* first set the basic ref node struct up */
675 atomic_set(&ref->refs, 1);
676 ref->bytenr = bytenr;
677 ref->num_bytes = num_bytes;
678 ref->ref_mod = 1;
679 ref->action = action;
680 ref->is_head = 0;
681 ref->in_tree = 1;
682
683 full_ref = btrfs_delayed_node_to_data_ref(ref);
684 if (parent) {
685 full_ref->parent = parent;
686 ref->type = BTRFS_SHARED_DATA_REF_KEY;
687 } else {
688 full_ref->root = ref_root;
689 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
690 }
691 full_ref->objectid = owner;
692 full_ref->offset = offset;
510 693
694 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
695
696 if (existing) {
697 update_existing_ref(trans, delayed_refs, existing, ref);
511 /* 698 /*
512 * we've updated the existing ref, free the newly 699 * we've updated the existing ref, free the newly
513 * allocated ref 700 * allocated ref
514 */ 701 */
515 kfree(ref); 702 kfree(ref);
516 } else { 703 } else {
517 if (btrfs_delayed_ref_is_head(ref)) {
518 delayed_refs->num_heads++;
519 delayed_refs->num_heads_ready++;
520 }
521 delayed_refs->num_entries++; 704 delayed_refs->num_entries++;
522 trans->delayed_ref_updates++; 705 trans->delayed_ref_updates++;
523 } 706 }
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
525} 708}
526 709
527/* 710/*
528 * add a delayed ref to the tree. This does all of the accounting required 711 * add a delayed tree ref. This does all of the accounting required
529 * to make sure the delayed ref is eventually processed before this 712 * to make sure the delayed ref is eventually processed before this
530 * transaction commits. 713 * transaction commits.
531 */ 714 */
532int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 715int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
533 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 716 u64 bytenr, u64 num_bytes, u64 parent,
534 u64 ref_generation, u64 owner_objectid, int action, 717 u64 ref_root, int level, int action,
535 int pin) 718 struct btrfs_delayed_extent_op *extent_op)
536{ 719{
537 struct btrfs_delayed_ref *ref; 720 struct btrfs_delayed_tree_ref *ref;
538 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
539 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
540 int ret; 723 int ret;
541 724
725 BUG_ON(extent_op && extent_op->is_data);
542 ref = kmalloc(sizeof(*ref), GFP_NOFS); 726 ref = kmalloc(sizeof(*ref), GFP_NOFS);
543 if (!ref) 727 if (!ref)
544 return -ENOMEM; 728 return -ENOMEM;
545 729
730 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
731 if (!head_ref) {
732 kfree(ref);
733 return -ENOMEM;
734 }
735
736 head_ref->extent_op = extent_op;
737
738 delayed_refs = &trans->transaction->delayed_refs;
739 spin_lock(&delayed_refs->lock);
740
546 /* 741 /*
547 * the parent = 0 case comes from cases where we don't actually 742 * insert both the head node and the new ref without dropping
548 * know the parent yet. It will get updated later via a add/drop 743 * the spin lock
549 * pair.
550 */ 744 */
551 if (parent == 0) 745 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
552 parent = bytenr; 746 action, 0);
747 BUG_ON(ret);
748
749 ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
750 parent, ref_root, level, action);
751 BUG_ON(ret);
752 spin_unlock(&delayed_refs->lock);
753 return 0;
754}
755
756/*
757 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
758 */
759int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
760 u64 bytenr, u64 num_bytes,
761 u64 parent, u64 ref_root,
762 u64 owner, u64 offset, int action,
763 struct btrfs_delayed_extent_op *extent_op)
764{
765 struct btrfs_delayed_data_ref *ref;
766 struct btrfs_delayed_ref_head *head_ref;
767 struct btrfs_delayed_ref_root *delayed_refs;
768 int ret;
769
770 BUG_ON(extent_op && !extent_op->is_data);
771 ref = kmalloc(sizeof(*ref), GFP_NOFS);
772 if (!ref)
773 return -ENOMEM;
553 774
554 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 775 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
555 if (!head_ref) { 776 if (!head_ref) {
556 kfree(ref); 777 kfree(ref);
557 return -ENOMEM; 778 return -ENOMEM;
558 } 779 }
780
781 head_ref->extent_op = extent_op;
782
559 delayed_refs = &trans->transaction->delayed_refs; 783 delayed_refs = &trans->transaction->delayed_refs;
560 spin_lock(&delayed_refs->lock); 784 spin_lock(&delayed_refs->lock);
561 785
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
563 * insert both the head node and the new ref without dropping 787 * insert both the head node and the new ref without dropping
564 * the spin lock 788 * the spin lock
565 */ 789 */
566 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, 790 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
567 (u64)-1, 0, 0, 0, action, pin); 791 action, 1);
568 BUG_ON(ret); 792 BUG_ON(ret);
569 793
570 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, 794 ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
571 parent, ref_root, ref_generation, 795 parent, ref_root, owner, offset, action);
572 owner_objectid, action, pin); 796 BUG_ON(ret);
797 spin_unlock(&delayed_refs->lock);
798 return 0;
799}
800
801int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
802 u64 bytenr, u64 num_bytes,
803 struct btrfs_delayed_extent_op *extent_op)
804{
805 struct btrfs_delayed_ref_head *head_ref;
806 struct btrfs_delayed_ref_root *delayed_refs;
807 int ret;
808
809 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
810 if (!head_ref)
811 return -ENOMEM;
812
813 head_ref->extent_op = extent_op;
814
815 delayed_refs = &trans->transaction->delayed_refs;
816 spin_lock(&delayed_refs->lock);
817
818 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
819 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
820 extent_op->is_data);
573 BUG_ON(ret); 821 BUG_ON(ret);
822
574 spin_unlock(&delayed_refs->lock); 823 spin_unlock(&delayed_refs->lock);
575 return 0; 824 return 0;
576} 825}
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
587 struct btrfs_delayed_ref_root *delayed_refs; 836 struct btrfs_delayed_ref_root *delayed_refs;
588 837
589 delayed_refs = &trans->transaction->delayed_refs; 838 delayed_refs = &trans->transaction->delayed_refs;
590 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 839 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
591 if (ref) 840 if (ref)
592 return btrfs_delayed_node_to_head(ref); 841 return btrfs_delayed_node_to_head(ref);
593 return NULL; 842 return NULL;
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
603 * 852 *
604 * It is the same as doing a ref add and delete in two separate calls. 853 * It is the same as doing a ref add and delete in two separate calls.
605 */ 854 */
855#if 0
606int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 856int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
607 u64 bytenr, u64 num_bytes, u64 orig_parent, 857 u64 bytenr, u64 num_bytes, u64 orig_parent,
608 u64 parent, u64 orig_ref_root, u64 ref_root, 858 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
666 spin_unlock(&delayed_refs->lock); 916 spin_unlock(&delayed_refs->lock);
667 return 0; 917 return 0;
668} 918}
919#endif