aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:10:06 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:25 -0400
commit56bec294dea971335d4466b30f2d959f28f6e36d (patch)
treefc0b5bbf4bb6ab35582a4c7f58f5ac88f71c38bf /fs
parent9fa8cfe706f9c20067c042a064999d5825a35330 (diff)
Btrfs: do extent allocation and reference count updates in the background
The extent allocation tree maintains a reference count and full back reference information for every extent allocated in the filesystem. For subvolume and snapshot trees, every time a block goes through COW, the new copy of the block adds a reference on every block it points to. If a btree node points to 150 leaves, then the COW code needs to go and add backrefs on 150 different extents, which might be spread all over the extent allocation tree. These updates currently happen during btrfs_cow_block, and most COWs happen during btrfs_search_slot. btrfs_search_slot has locks held on both the parent and the node we are COWing, and so we really want to avoid IO during the COW if we can. This commit adds an rbtree of pending reference count updates and extent allocations. The tree is ordered by byte number of the extent and byte number of the parent for the back reference. The tree allows us to: 1) Modify back references in something close to disk order, reducing seeks 2) Significantly reduce the number of modifications made as block pointers are balanced around 3) Do all of the extent insertion and back reference modifications outside of the performance critical btrfs_search_slot code. #3 has the added benefit of greatly reducing the btrfs stack footprint. The extent allocation tree modifications are done without the deep (and somewhat recursive) call chains used in the past. These delayed back reference updates must be done before the transaction commits, and so the rbtree is tied to the transaction. Throttling is implemented to help keep the queue of backrefs at a reasonable size. Since there was a similar mechanism in place for the extent tree extents, that is removed and replaced by the delayed reference tree. Yan Zheng <yan.zheng@oracle.com> helped review and fixup this code. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c3
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/delayed-ref.c585
-rw-r--r--fs/btrfs/delayed-ref.h182
-rw-r--r--fs/btrfs/disk-io.c29
-rw-r--r--fs/btrfs/extent-tree.c1496
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/transaction.c54
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-defrag.c2
11 files changed, 1234 insertions, 1140 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index d2cf5a54a4b8..9adf5e4f7e96 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o 11 compression.o delayed-ref.o
12else 12else
13 13
14# Normal Makefile 14# Normal Makefile
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 87c90387283b..bebc9fd16665 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -922,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
922 spin_unlock(&root->node_lock); 922 spin_unlock(&root->node_lock);
923 923
924 ret = btrfs_update_extent_ref(trans, root, child->start, 924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
925 mid->start, child->start, 926 mid->start, child->start,
926 root->root_key.objectid, 927 root->root_key.objectid,
927 trans->transid, level - 1); 928 trans->transid, level - 1);
@@ -2075,7 +2076,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2075 spin_unlock(&root->node_lock); 2076 spin_unlock(&root->node_lock);
2076 2077
2077 ret = btrfs_update_extent_ref(trans, root, lower->start, 2078 ret = btrfs_update_extent_ref(trans, root, lower->start,
2078 lower->start, c->start, 2079 lower->len, lower->start, c->start,
2079 root->root_key.objectid, 2080 root->root_key.objectid,
2080 trans->transid, level - 1); 2081 trans->transid, level - 1);
2081 BUG_ON(ret); 2082 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3a37ba7a8d65..ced5fd85dc36 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -688,8 +688,6 @@ struct btrfs_fs_info {
688 struct rb_root block_group_cache_tree; 688 struct rb_root block_group_cache_tree;
689 689
690 struct extent_io_tree pinned_extents; 690 struct extent_io_tree pinned_extents;
691 struct extent_io_tree pending_del;
692 struct extent_io_tree extent_ins;
693 691
694 /* logical->physical extent mapping */ 692 /* logical->physical extent mapping */
695 struct btrfs_mapping_tree mapping_tree; 693 struct btrfs_mapping_tree mapping_tree;
@@ -717,7 +715,6 @@ struct btrfs_fs_info {
717 struct mutex tree_log_mutex; 715 struct mutex tree_log_mutex;
718 struct mutex transaction_kthread_mutex; 716 struct mutex transaction_kthread_mutex;
719 struct mutex cleaner_mutex; 717 struct mutex cleaner_mutex;
720 struct mutex extent_ins_mutex;
721 struct mutex pinned_mutex; 718 struct mutex pinned_mutex;
722 struct mutex chunk_mutex; 719 struct mutex chunk_mutex;
723 struct mutex drop_mutex; 720 struct mutex drop_mutex;
@@ -1704,18 +1701,15 @@ static inline struct dentry *fdentry(struct file *file)
1704} 1701}
1705 1702
1706/* extent-tree.c */ 1703/* extent-tree.c */
1704int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1705 struct btrfs_root *root, unsigned long count);
1707int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1706int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1708int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1709 struct btrfs_root *root, u64 bytenr,
1710 u64 num_bytes, u32 *refs);
1711int btrfs_update_pinned_extents(struct btrfs_root *root, 1707int btrfs_update_pinned_extents(struct btrfs_root *root,
1712 u64 bytenr, u64 num, int pin); 1708 u64 bytenr, u64 num, int pin);
1713int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1709int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1714 struct btrfs_root *root, struct extent_buffer *leaf); 1710 struct btrfs_root *root, struct extent_buffer *leaf);
1715int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1711int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1716 struct btrfs_root *root, u64 objectid, u64 bytenr); 1712 struct btrfs_root *root, u64 objectid, u64 bytenr);
1717int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1718 struct btrfs_root *root);
1719int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1713int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1720struct btrfs_block_group_cache *btrfs_lookup_block_group( 1714struct btrfs_block_group_cache *btrfs_lookup_block_group(
1721 struct btrfs_fs_info *info, 1715 struct btrfs_fs_info *info,
@@ -1777,7 +1771,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1777 u64 root_objectid, u64 ref_generation, 1771 u64 root_objectid, u64 ref_generation,
1778 u64 owner_objectid); 1772 u64 owner_objectid);
1779int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1773int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1780 struct btrfs_root *root, u64 bytenr, 1774 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1781 u64 orig_parent, u64 parent, 1775 u64 orig_parent, u64 parent,
1782 u64 root_objectid, u64 ref_generation, 1776 u64 root_objectid, u64 ref_generation,
1783 u64 owner_objectid); 1777 u64 owner_objectid);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
new file mode 100644
index 000000000000..874565a1f634
--- /dev/null
+++ b/fs/btrfs/delayed-ref.c
@@ -0,0 +1,585 @@
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/sort.h>
21#include <linux/ftrace.h>
22#include "ctree.h"
23#include "delayed-ref.h"
24#include "transaction.h"
25
26/*
27 * delayed back reference update tracking. For subvolume trees
28 * we queue up extent allocations and backref maintenance for
29 * delayed processing. This avoids deep call chains where we
30 * add extents in the middle of btrfs_search_slot, and it allows
31 * us to buffer up frequently modified backrefs in an rb tree instead
32 * of hammering updates on the extent allocation tree.
33 *
34 * Right now this code is only used for reference counted trees, but
35 * the long term goal is to get rid of the similar code for delayed
36 * extent tree modifications.
37 */
38
39/*
40 * entries in the rb tree are ordered by the byte number of the extent
41 * and by the byte number of the parent block.
42 */
43static int comp_entry(struct btrfs_delayed_ref_node *ref,
44 u64 bytenr, u64 parent)
45{
46 if (bytenr < ref->bytenr)
47 return -1;
48 if (bytenr > ref->bytenr)
49 return 1;
50 if (parent < ref->parent)
51 return -1;
52 if (parent > ref->parent)
53 return 1;
54 return 0;
55}
56
57/*
58 * insert a new ref into the rbtree. This returns any existing refs
59 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
60 * inserted.
61 */
62static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
63 u64 bytenr, u64 parent,
64 struct rb_node *node)
65{
66 struct rb_node **p = &root->rb_node;
67 struct rb_node *parent_node = NULL;
68 struct btrfs_delayed_ref_node *entry;
69 int cmp;
70
71 while (*p) {
72 parent_node = *p;
73 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
74 rb_node);
75
76 cmp = comp_entry(entry, bytenr, parent);
77 if (cmp < 0)
78 p = &(*p)->rb_left;
79 else if (cmp > 0)
80 p = &(*p)->rb_right;
81 else
82 return entry;
83 }
84
85 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
86 rb_link_node(node, parent_node, p);
87 rb_insert_color(node, root);
88 return NULL;
89}
90
91/*
92 * find an entry based on (bytenr,parent). This returns the delayed
93 * ref if it was able to find one, or NULL if nothing was in that spot
94 */
95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
96 u64 bytenr, u64 parent)
97{
98 struct rb_node *n = root->rb_node;
99 struct btrfs_delayed_ref_node *entry;
100 int cmp;
101
102 while (n) {
103 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
104 WARN_ON(!entry->in_tree);
105
106 cmp = comp_entry(entry, bytenr, parent);
107 if (cmp < 0)
108 n = n->rb_left;
109 else if (cmp > 0)
110 n = n->rb_right;
111 else
112 return entry;
113 }
114 return NULL;
115}
116
117/*
118 * Locking on delayed refs is done by taking a lock on the head node,
119 * which has the (impossible) parent id of (u64)-1. Once a lock is held
120 * on the head node, you're allowed (and required) to process all the
121 * delayed refs for a given byte number in the tree.
122 *
123 * This will walk forward in the rbtree until it finds a head node it
124 * is able to lock. It might not lock the delayed ref you asked for,
125 * and so it will return the one it did lock in next_ret and return 0.
126 *
127 * If no locks are taken, next_ret is set to null and 1 is returned. This
128 * means there are no more unlocked head nodes in the rbtree.
129 */
130int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
131 struct btrfs_delayed_ref_node *ref,
132 struct btrfs_delayed_ref_head **next_ret)
133{
134 struct rb_node *node;
135 struct btrfs_delayed_ref_head *head;
136 int ret = 0;
137
138 while (1) {
139 if (btrfs_delayed_ref_is_head(ref)) {
140 head = btrfs_delayed_node_to_head(ref);
141 if (mutex_trylock(&head->mutex)) {
142 *next_ret = head;
143 ret = 0;
144 break;
145 }
146 }
147 node = rb_next(&ref->rb_node);
148 if (!node) {
149 ret = 1;
150 *next_ret = NULL;
151 break;
152 }
153 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
154 }
155 return ret;
156}
157
158/*
159 * This checks to see if there are any delayed refs in the
160 * btree for a given bytenr. It returns one if it finds any
161 * and zero otherwise.
162 *
163 * If it only finds a head node, it returns 0.
164 *
165 * The idea is to use this when deciding if you can safely delete an
166 * extent from the extent allocation tree. There may be a pending
167 * ref in the rbtree that adds or removes references, so as long as this
168 * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
169 * allocation tree.
170 */
171int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
172{
173 struct btrfs_delayed_ref_node *ref;
174 struct btrfs_delayed_ref_root *delayed_refs;
175 struct rb_node *prev_node;
176 int ret = 0;
177
178 delayed_refs = &trans->transaction->delayed_refs;
179 spin_lock(&delayed_refs->lock);
180
181 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
182 if (ref) {
183 prev_node = rb_prev(&ref->rb_node);
184 if (!prev_node)
185 goto out;
186 ref = rb_entry(prev_node, struct btrfs_delayed_ref_node,
187 rb_node);
188 if (ref->bytenr == bytenr)
189 ret = 1;
190 }
191out:
192 spin_unlock(&delayed_refs->lock);
193 return ret;
194}
195
196/*
197 * helper function to lookup reference count
198 *
199 * the head node for delayed ref is used to store the sum of all the
200 * reference count modifications queued up in the rbtree. This way you
201 * can check to see what the reference count would be if all of the
202 * delayed refs are processed.
203 */
204int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
205 struct btrfs_root *root, u64 bytenr,
206 u64 num_bytes, u32 *refs)
207{
208 struct btrfs_delayed_ref_node *ref;
209 struct btrfs_delayed_ref_head *head;
210 struct btrfs_delayed_ref_root *delayed_refs;
211 struct btrfs_path *path;
212 struct extent_buffer *leaf;
213 struct btrfs_extent_item *ei;
214 struct btrfs_key key;
215 u32 num_refs;
216 int ret;
217
218 path = btrfs_alloc_path();
219 if (!path)
220 return -ENOMEM;
221
222 key.objectid = bytenr;
223 key.type = BTRFS_EXTENT_ITEM_KEY;
224 key.offset = num_bytes;
225 delayed_refs = &trans->transaction->delayed_refs;
226again:
227 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
228 &key, path, 0, 0);
229 if (ret < 0)
230 goto out;
231
232 if (ret == 0) {
233 leaf = path->nodes[0];
234 ei = btrfs_item_ptr(leaf, path->slots[0],
235 struct btrfs_extent_item);
236 num_refs = btrfs_extent_refs(leaf, ei);
237 } else {
238 num_refs = 0;
239 ret = 0;
240 }
241
242 spin_lock(&delayed_refs->lock);
243 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1);
244 if (ref) {
245 head = btrfs_delayed_node_to_head(ref);
246 if (mutex_trylock(&head->mutex)) {
247 num_refs += ref->ref_mod;
248 mutex_unlock(&head->mutex);
249 *refs = num_refs;
250 goto out;
251 }
252
253 atomic_inc(&ref->refs);
254 spin_unlock(&delayed_refs->lock);
255
256 btrfs_release_path(root->fs_info->extent_root, path);
257
258 mutex_lock(&head->mutex);
259 mutex_unlock(&head->mutex);
260 btrfs_put_delayed_ref(ref);
261 goto again;
262 } else {
263 *refs = num_refs;
264 }
265out:
266 spin_unlock(&delayed_refs->lock);
267 btrfs_free_path(path);
268 return ret;
269}
270
271/*
272 * helper function to update an extent delayed ref in the
273 * rbtree. existing and update must both have the same
274 * bytenr and parent
275 *
276 * This may free existing if the update cancels out whatever
277 * operation it was doing.
278 */
279static noinline void
280update_existing_ref(struct btrfs_trans_handle *trans,
281 struct btrfs_delayed_ref_root *delayed_refs,
282 struct btrfs_delayed_ref_node *existing,
283 struct btrfs_delayed_ref_node *update)
284{
285 struct btrfs_delayed_ref *existing_ref;
286 struct btrfs_delayed_ref *ref;
287
288 existing_ref = btrfs_delayed_node_to_ref(existing);
289 ref = btrfs_delayed_node_to_ref(update);
290
291 if (ref->pin)
292 existing_ref->pin = 1;
293
294 if (ref->action != existing_ref->action) {
295 /*
296 * this is effectively undoing either an add or a
297 * drop. We decrement the ref_mod, and if it goes
298 * down to zero we just delete the entry without
299 * every changing the extent allocation tree.
300 */
301 existing->ref_mod--;
302 if (existing->ref_mod == 0) {
303 rb_erase(&existing->rb_node,
304 &delayed_refs->root);
305 existing->in_tree = 0;
306 btrfs_put_delayed_ref(existing);
307 delayed_refs->num_entries--;
308 if (trans->delayed_ref_updates)
309 trans->delayed_ref_updates--;
310 }
311 } else {
312 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) {
313 /* if we're adding refs, make sure all the
314 * details match up. The extent could
315 * have been totally freed and reallocated
316 * by a different owner before the delayed
317 * ref entries were removed.
318 */
319 existing_ref->owner_objectid = ref->owner_objectid;
320 existing_ref->generation = ref->generation;
321 existing_ref->root = ref->root;
322 existing->num_bytes = update->num_bytes;
323 }
324 /*
325 * the action on the existing ref matches
326 * the action on the ref we're trying to add.
327 * Bump the ref_mod by one so the backref that
328 * is eventually added/removed has the correct
329 * reference count
330 */
331 existing->ref_mod += update->ref_mod;
332 }
333}
334
335/*
336 * helper function to update the accounting in the head ref
337 * existing and update must have the same bytenr
338 */
339static noinline void
340update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
341 struct btrfs_delayed_ref_node *update)
342{
343 struct btrfs_delayed_ref_head *existing_ref;
344 struct btrfs_delayed_ref_head *ref;
345
346 existing_ref = btrfs_delayed_node_to_head(existing);
347 ref = btrfs_delayed_node_to_head(update);
348
349 if (ref->must_insert_reserved) {
350 /* if the extent was freed and then
351 * reallocated before the delayed ref
352 * entries were processed, we can end up
353 * with an existing head ref without
354 * the must_insert_reserved flag set.
355 * Set it again here
356 */
357 existing_ref->must_insert_reserved = ref->must_insert_reserved;
358
359 /*
360 * update the num_bytes so we make sure the accounting
361 * is done correctly
362 */
363 existing->num_bytes = update->num_bytes;
364
365 }
366
367 /*
368 * update the reference mod on the head to reflect this new operation
369 */
370 existing->ref_mod += update->ref_mod;
371}
372
373/*
374 * helper function to actually insert a delayed ref into the rbtree.
375 * this does all the dirty work in terms of maintaining the correct
376 * overall modification count in the head node and properly dealing
377 * with updating existing nodes as new modifications are queued.
378 */
379static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
380 struct btrfs_delayed_ref_node *ref,
381 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
382 u64 ref_generation, u64 owner_objectid, int action,
383 int pin)
384{
385 struct btrfs_delayed_ref_node *existing;
386 struct btrfs_delayed_ref *full_ref;
387 struct btrfs_delayed_ref_head *head_ref;
388 struct btrfs_delayed_ref_root *delayed_refs;
389 int count_mod = 1;
390 int must_insert_reserved = 0;
391
392 /*
393 * the head node stores the sum of all the mods, so dropping a ref
394 * should drop the sum in the head node by one.
395 */
396 if (parent == (u64)-1 && action == BTRFS_DROP_DELAYED_REF)
397 count_mod = -1;
398
399 /*
400 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
401 * the reserved accounting when the extent is finally added, or
402 * if a later modification deletes the delayed ref without ever
403 * inserting the extent into the extent allocation tree.
404 * ref->must_insert_reserved is the flag used to record
405 * that accounting mods are required.
406 *
407 * Once we record must_insert_reserved, switch the action to
408 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
409 */
410 if (action == BTRFS_ADD_DELAYED_EXTENT) {
411 must_insert_reserved = 1;
412 action = BTRFS_ADD_DELAYED_REF;
413 } else {
414 must_insert_reserved = 0;
415 }
416
417
418 delayed_refs = &trans->transaction->delayed_refs;
419
420 /* first set the basic ref node struct up */
421 atomic_set(&ref->refs, 1);
422 ref->bytenr = bytenr;
423 ref->parent = parent;
424 ref->ref_mod = count_mod;
425 ref->in_tree = 1;
426 ref->num_bytes = num_bytes;
427
428 if (btrfs_delayed_ref_is_head(ref)) {
429 head_ref = btrfs_delayed_node_to_head(ref);
430 head_ref->must_insert_reserved = must_insert_reserved;
431 mutex_init(&head_ref->mutex);
432 } else {
433 full_ref = btrfs_delayed_node_to_ref(ref);
434 full_ref->root = ref_root;
435 full_ref->generation = ref_generation;
436 full_ref->owner_objectid = owner_objectid;
437 full_ref->pin = pin;
438 full_ref->action = action;
439 }
440
441 existing = tree_insert(&delayed_refs->root, bytenr,
442 parent, &ref->rb_node);
443
444 if (existing) {
445 if (btrfs_delayed_ref_is_head(ref))
446 update_existing_head_ref(existing, ref);
447 else
448 update_existing_ref(trans, delayed_refs, existing, ref);
449
450 /*
451 * we've updated the existing ref, free the newly
452 * allocated ref
453 */
454 kfree(ref);
455 } else {
456 delayed_refs->num_entries++;
457 trans->delayed_ref_updates++;
458 }
459 return 0;
460}
461
462/*
463 * add a delayed ref to the tree. This does all of the accounting required
464 * to make sure the delayed ref is eventually processed before this
465 * transaction commits.
466 */
467int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
468 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
469 u64 ref_generation, u64 owner_objectid, int action,
470 int pin)
471{
472 struct btrfs_delayed_ref *ref;
473 struct btrfs_delayed_ref_head *head_ref;
474 struct btrfs_delayed_ref_root *delayed_refs;
475 int ret;
476
477 ref = kmalloc(sizeof(*ref), GFP_NOFS);
478 if (!ref)
479 return -ENOMEM;
480
481 /*
482 * the parent = 0 case comes from cases where we don't actually
483 * know the parent yet. It will get updated later via a add/drop
484 * pair.
485 */
486 if (parent == 0)
487 parent = bytenr;
488
489 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
490 if (!head_ref) {
491 kfree(ref);
492 return -ENOMEM;
493 }
494 delayed_refs = &trans->transaction->delayed_refs;
495 spin_lock(&delayed_refs->lock);
496
497 /*
498 * insert both the head node and the new ref without dropping
499 * the spin lock
500 */
501 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
502 (u64)-1, 0, 0, 0, action, pin);
503 BUG_ON(ret);
504
505 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
506 parent, ref_root, ref_generation,
507 owner_objectid, action, pin);
508 BUG_ON(ret);
509 spin_unlock(&delayed_refs->lock);
510 return 0;
511}
512
513/*
514 * add a delayed ref to the tree. This does all of the accounting required
515 * to make sure the delayed ref is eventually processed before this
516 * transaction commits.
517 *
518 * The main point of this call is to add and remove a backreference in a single
519 * shot, taking the lock only once, and only searching for the head node once.
520 *
521 * It is the same as doing a ref add and delete in two separate calls.
522 */
523int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
524 u64 bytenr, u64 num_bytes, u64 orig_parent,
525 u64 parent, u64 orig_ref_root, u64 ref_root,
526 u64 orig_ref_generation, u64 ref_generation,
527 u64 owner_objectid, int pin)
528{
529 struct btrfs_delayed_ref *ref;
530 struct btrfs_delayed_ref *old_ref;
531 struct btrfs_delayed_ref_head *head_ref;
532 struct btrfs_delayed_ref_root *delayed_refs;
533 int ret;
534
535 ref = kmalloc(sizeof(*ref), GFP_NOFS);
536 if (!ref)
537 return -ENOMEM;
538
539 old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS);
540 if (!old_ref) {
541 kfree(ref);
542 return -ENOMEM;
543 }
544
545 /*
546 * the parent = 0 case comes from cases where we don't actually
547 * know the parent yet. It will get updated later via a add/drop
548 * pair.
549 */
550 if (parent == 0)
551 parent = bytenr;
552 if (orig_parent == 0)
553 orig_parent = bytenr;
554
555 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
556 if (!head_ref) {
557 kfree(ref);
558 kfree(old_ref);
559 return -ENOMEM;
560 }
561 delayed_refs = &trans->transaction->delayed_refs;
562 spin_lock(&delayed_refs->lock);
563
564 /*
565 * insert both the head node and the new ref without dropping
566 * the spin lock
567 */
568 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes,
569 (u64)-1, 0, 0, 0,
570 BTRFS_ADD_DELAYED_REF, 0);
571 BUG_ON(ret);
572
573 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes,
574 parent, ref_root, ref_generation,
575 owner_objectid, BTRFS_ADD_DELAYED_REF, 0);
576 BUG_ON(ret);
577
578 ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes,
579 orig_parent, orig_ref_root,
580 orig_ref_generation, owner_objectid,
581 BTRFS_DROP_DELAYED_REF, pin);
582 BUG_ON(ret);
583 spin_unlock(&delayed_refs->lock);
584 return 0;
585}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
new file mode 100644
index 000000000000..37919e5c007f
--- /dev/null
+++ b/fs/btrfs/delayed-ref.h
@@ -0,0 +1,182 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __DELAYED_REF__
19#define __DELAYED_REF__
20
21/* these are the possible values of struct btrfs_delayed_ref->action */
22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
25
26struct btrfs_delayed_ref_node {
27 struct rb_node rb_node;
28
29 /* the starting bytenr of the extent */
30 u64 bytenr;
31
32 /* the parent our backref will point to */
33 u64 parent;
34
35 /* the size of the extent */
36 u64 num_bytes;
37
38 /* ref count on this data structure */
39 atomic_t refs;
40
41 /*
42 * how many refs is this entry adding or deleting. For
43 * head refs, this may be a negative number because it is keeping
44 * track of the total mods done to the reference count.
45 * For individual refs, this will always be a positive number
46 *
47 * It may be more than one, since it is possible for a single
48 * parent to have more than one ref on an extent
49 */
50 int ref_mod;
51
52 /* is this node still in the rbtree? */
53 unsigned int in_tree:1;
54};
55
56/*
57 * the head refs are used to hold a lock on a given extent, which allows us
58 * to make sure that only one process is running the delayed refs
59 * at a time for a single extent. They also store the sum of all the
60 * reference count modifications we've queued up.
61 */
62struct btrfs_delayed_ref_head {
63 struct btrfs_delayed_ref_node node;
64
65 /*
66 * the mutex is held while running the refs, and it is also
67 * held when checking the sum of reference modifications.
68 */
69 struct mutex mutex;
70
71 /*
72 * when a new extent is allocated, it is just reserved in memory
73 * The actual extent isn't inserted into the extent allocation tree
74 * until the delayed ref is processed. must_insert_reserved is
75 * used to flag a delayed ref so the accounting can be updated
76 * when a full insert is done.
77 *
78 * It is possible the extent will be freed before it is ever
79 * inserted into the extent allocation tree. In this case
80 * we need to update the in ram accounting to properly reflect
81 * the free has happened.
82 */
83 unsigned int must_insert_reserved:1;
84};
85
86struct btrfs_delayed_ref {
87 struct btrfs_delayed_ref_node node;
88
89 /* the root objectid our ref will point to */
90 u64 root;
91
92 /* the generation for the backref */
93 u64 generation;
94
95 /* owner_objectid of the backref */
96 u64 owner_objectid;
97
98 /* operation done by this entry in the rbtree */
99 u8 action;
100
101 /* if pin == 1, when the extent is freed it will be pinned until
102 * transaction commit
103 */
104 unsigned int pin:1;
105};
106
107struct btrfs_delayed_ref_root {
108 struct rb_root root;
109
110 /* this spin lock protects the rbtree and the entries inside */
111 spinlock_t lock;
112
113 /* how many delayed ref updates we've queued, used by the
114 * throttling code
115 */
116 unsigned long num_entries;
117
118 /*
119 * set when the tree is flushing before a transaction commit,
120 * used by the throttling code to decide if new updates need
121 * to be run right away
122 */
123 int flushing;
124};
125
126static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
127{
128 WARN_ON(atomic_read(&ref->refs) == 0);
129 if (atomic_dec_and_test(&ref->refs)) {
130 WARN_ON(ref->in_tree);
131 kfree(ref);
132 }
133}
134
135int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
136 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
137 u64 ref_generation, u64 owner_objectid, int action,
138 int pin);
139
140struct btrfs_delayed_ref *
141btrfs_find_delayed_ref(struct btrfs_trans_handle *trans, u64 bytenr,
142 u64 parent);
143int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
144int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
145 struct btrfs_delayed_ref_node *ref,
146 struct btrfs_delayed_ref_head **next_ret);
147int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
148 struct btrfs_root *root, u64 bytenr,
149 u64 num_bytes, u32 *refs);
150int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
151 u64 bytenr, u64 num_bytes, u64 orig_parent,
152 u64 parent, u64 orig_ref_root, u64 ref_root,
153 u64 orig_ref_generation, u64 ref_generation,
154 u64 owner_objectid, int pin);
155/*
156 * a node might live in a head or a regular ref, this lets you
157 * test for the proper type to use.
158 */
159static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
160{
161 return node->parent == (u64)-1;
162}
163
164/*
165 * helper functions to cast a node into its container
166 */
167static inline struct btrfs_delayed_ref *
168btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node)
169{
170 WARN_ON(btrfs_delayed_ref_is_head(node));
171 return container_of(node, struct btrfs_delayed_ref, node);
172
173}
174
175static inline struct btrfs_delayed_ref_head *
176btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
177{
178 WARN_ON(!btrfs_delayed_ref_is_head(node));
179 return container_of(node, struct btrfs_delayed_ref_head, node);
180
181}
182#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3e18175248e0..4f43e227a297 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1458,6 +1458,7 @@ static int transaction_kthread(void *arg)
1458 struct btrfs_root *root = arg; 1458 struct btrfs_root *root = arg;
1459 struct btrfs_trans_handle *trans; 1459 struct btrfs_trans_handle *trans;
1460 struct btrfs_transaction *cur; 1460 struct btrfs_transaction *cur;
1461 struct btrfs_fs_info *info = root->fs_info;
1461 unsigned long now; 1462 unsigned long now;
1462 unsigned long delay; 1463 unsigned long delay;
1463 int ret; 1464 int ret;
@@ -1471,12 +1472,6 @@ static int transaction_kthread(void *arg)
1471 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1472 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1472 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1473 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1473 1474
1474 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1475 printk(KERN_INFO "btrfs: total reference cache "
1476 "size %llu\n",
1477 root->fs_info->total_ref_cache_size);
1478 }
1479
1480 mutex_lock(&root->fs_info->trans_mutex); 1475 mutex_lock(&root->fs_info->trans_mutex);
1481 cur = root->fs_info->running_transaction; 1476 cur = root->fs_info->running_transaction;
1482 if (!cur) { 1477 if (!cur) {
@@ -1486,13 +1481,30 @@ static int transaction_kthread(void *arg)
1486 1481
1487 now = get_seconds(); 1482 now = get_seconds();
1488 if (now < cur->start_time || now - cur->start_time < 30) { 1483 if (now < cur->start_time || now - cur->start_time < 30) {
1484 unsigned long num_delayed;
1485 num_delayed = cur->delayed_refs.num_entries;
1489 mutex_unlock(&root->fs_info->trans_mutex); 1486 mutex_unlock(&root->fs_info->trans_mutex);
1490 delay = HZ * 5; 1487 delay = HZ * 5;
1488
1489 /*
1490 * we may have been woken up early to start
1491 * processing the delayed extent ref updates
1492 * If so, run some of them and then loop around again
1493 * to see if we need to force a commit
1494 */
1495 if (num_delayed > 64) {
1496 mutex_unlock(&info->transaction_kthread_mutex);
1497 trans = btrfs_start_transaction(root, 1);
1498 btrfs_run_delayed_refs(trans, root, 256);
1499 btrfs_end_transaction(trans, root);
1500 continue;
1501 }
1491 goto sleep; 1502 goto sleep;
1492 } 1503 }
1493 mutex_unlock(&root->fs_info->trans_mutex); 1504 mutex_unlock(&root->fs_info->trans_mutex);
1494 trans = btrfs_start_transaction(root, 1); 1505 trans = btrfs_start_transaction(root, 1);
1495 ret = btrfs_commit_transaction(trans, root); 1506 ret = btrfs_commit_transaction(trans, root);
1507
1496sleep: 1508sleep:
1497 wake_up_process(root->fs_info->cleaner_kthread); 1509 wake_up_process(root->fs_info->cleaner_kthread);
1498 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1510 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1611,10 +1623,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1611 1623
1612 extent_io_tree_init(&fs_info->pinned_extents, 1624 extent_io_tree_init(&fs_info->pinned_extents,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1625 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 extent_io_tree_init(&fs_info->pending_del,
1615 fs_info->btree_inode->i_mapping, GFP_NOFS);
1616 extent_io_tree_init(&fs_info->extent_ins,
1617 fs_info->btree_inode->i_mapping, GFP_NOFS);
1618 fs_info->do_barriers = 1; 1626 fs_info->do_barriers = 1;
1619 1627
1620 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1628 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1629,7 +1637,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1629 mutex_init(&fs_info->trans_mutex); 1637 mutex_init(&fs_info->trans_mutex);
1630 mutex_init(&fs_info->tree_log_mutex); 1638 mutex_init(&fs_info->tree_log_mutex);
1631 mutex_init(&fs_info->drop_mutex); 1639 mutex_init(&fs_info->drop_mutex);
1632 mutex_init(&fs_info->extent_ins_mutex);
1633 mutex_init(&fs_info->pinned_mutex); 1640 mutex_init(&fs_info->pinned_mutex);
1634 mutex_init(&fs_info->chunk_mutex); 1641 mutex_init(&fs_info->chunk_mutex);
1635 mutex_init(&fs_info->transaction_kthread_mutex); 1642 mutex_init(&fs_info->transaction_kthread_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fefe83ad2059..9b5da2b013e4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -49,10 +49,13 @@ struct pending_extent_op {
49 int del; 49 int del;
50}; 50};
51 51
52static int finish_current_insert(struct btrfs_trans_handle *trans, 52static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
53 struct btrfs_root *extent_root, int all); 53 struct btrfs_root *root, u64 parent,
54static int del_pending_extents(struct btrfs_trans_handle *trans, 54 u64 root_objectid, u64 ref_generation,
55 struct btrfs_root *extent_root, int all); 55 u64 owner, struct btrfs_key *ins,
56 int ref_mod);
57static int update_reserved_extents(struct btrfs_root *root,
58 u64 bytenr, u64 num, int reserve);
56static int pin_down_bytes(struct btrfs_trans_handle *trans, 59static int pin_down_bytes(struct btrfs_trans_handle *trans,
57 struct btrfs_root *root, 60 struct btrfs_root *root,
58 u64 bytenr, u64 num_bytes, int is_data); 61 u64 bytenr, u64 num_bytes, int is_data);
@@ -60,6 +63,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
60 struct btrfs_root *root, 63 struct btrfs_root *root,
61 u64 bytenr, u64 num_bytes, int alloc, 64 u64 bytenr, u64 num_bytes, int alloc,
62 int mark_free); 65 int mark_free);
66static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
67 struct btrfs_root *root,
68 u64 bytenr, u64 num_bytes, u64 parent,
69 u64 root_objectid, u64 ref_generation,
70 u64 owner_objectid, int pin,
71 int ref_to_drop);
63 72
64static int do_chunk_alloc(struct btrfs_trans_handle *trans, 73static int do_chunk_alloc(struct btrfs_trans_handle *trans,
65 struct btrfs_root *extent_root, u64 alloc_bytes, 74 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -554,262 +563,13 @@ out:
554 return ret; 563 return ret;
555} 564}
556 565
557/*
558 * updates all the backrefs that are pending on update_list for the
559 * extent_root
560 */
561static noinline int update_backrefs(struct btrfs_trans_handle *trans,
562 struct btrfs_root *extent_root,
563 struct btrfs_path *path,
564 struct list_head *update_list)
565{
566 struct btrfs_key key;
567 struct btrfs_extent_ref *ref;
568 struct btrfs_fs_info *info = extent_root->fs_info;
569 struct pending_extent_op *op;
570 struct extent_buffer *leaf;
571 int ret = 0;
572 struct list_head *cur = update_list->next;
573 u64 ref_objectid;
574 u64 ref_root = extent_root->root_key.objectid;
575
576 op = list_entry(cur, struct pending_extent_op, list);
577
578search:
579 key.objectid = op->bytenr;
580 key.type = BTRFS_EXTENT_REF_KEY;
581 key.offset = op->orig_parent;
582
583 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
584 BUG_ON(ret);
585
586 leaf = path->nodes[0];
587
588loop:
589 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
590
591 ref_objectid = btrfs_ref_objectid(leaf, ref);
592
593 if (btrfs_ref_root(leaf, ref) != ref_root ||
594 btrfs_ref_generation(leaf, ref) != op->orig_generation ||
595 (ref_objectid != op->level &&
596 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
597 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, "
598 "root %llu, owner %u\n",
599 (unsigned long long)op->bytenr,
600 (unsigned long long)op->orig_parent,
601 (unsigned long long)ref_root, op->level);
602 btrfs_print_leaf(extent_root, leaf);
603 BUG();
604 }
605
606 key.objectid = op->bytenr;
607 key.offset = op->parent;
608 key.type = BTRFS_EXTENT_REF_KEY;
609 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key);
610 BUG_ON(ret);
611 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
612 btrfs_set_ref_generation(leaf, ref, op->generation);
613
614 cur = cur->next;
615
616 list_del_init(&op->list);
617 unlock_extent(&info->extent_ins, op->bytenr,
618 op->bytenr + op->num_bytes - 1, GFP_NOFS);
619 kfree(op);
620
621 if (cur == update_list) {
622 btrfs_mark_buffer_dirty(path->nodes[0]);
623 btrfs_release_path(extent_root, path);
624 goto out;
625 }
626
627 op = list_entry(cur, struct pending_extent_op, list);
628
629 path->slots[0]++;
630 while (path->slots[0] < btrfs_header_nritems(leaf)) {
631 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
632 if (key.objectid == op->bytenr &&
633 key.type == BTRFS_EXTENT_REF_KEY)
634 goto loop;
635 path->slots[0]++;
636 }
637
638 btrfs_mark_buffer_dirty(path->nodes[0]);
639 btrfs_release_path(extent_root, path);
640 goto search;
641
642out:
643 return 0;
644}
645
646static noinline int insert_extents(struct btrfs_trans_handle *trans,
647 struct btrfs_root *extent_root,
648 struct btrfs_path *path,
649 struct list_head *insert_list, int nr)
650{
651 struct btrfs_key *keys;
652 u32 *data_size;
653 struct pending_extent_op *op;
654 struct extent_buffer *leaf;
655 struct list_head *cur = insert_list->next;
656 struct btrfs_fs_info *info = extent_root->fs_info;
657 u64 ref_root = extent_root->root_key.objectid;
658 int i = 0, last = 0, ret;
659 int total = nr * 2;
660
661 if (!nr)
662 return 0;
663
664 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS);
665 if (!keys)
666 return -ENOMEM;
667
668 data_size = kzalloc(total * sizeof(u32), GFP_NOFS);
669 if (!data_size) {
670 kfree(keys);
671 return -ENOMEM;
672 }
673
674 list_for_each_entry(op, insert_list, list) {
675 keys[i].objectid = op->bytenr;
676 keys[i].offset = op->num_bytes;
677 keys[i].type = BTRFS_EXTENT_ITEM_KEY;
678 data_size[i] = sizeof(struct btrfs_extent_item);
679 i++;
680
681 keys[i].objectid = op->bytenr;
682 keys[i].offset = op->parent;
683 keys[i].type = BTRFS_EXTENT_REF_KEY;
684 data_size[i] = sizeof(struct btrfs_extent_ref);
685 i++;
686 }
687
688 op = list_entry(cur, struct pending_extent_op, list);
689 i = 0;
690 while (i < total) {
691 int c;
692 ret = btrfs_insert_some_items(trans, extent_root, path,
693 keys+i, data_size+i, total-i);
694 BUG_ON(ret < 0);
695
696 if (last && ret > 1)
697 BUG();
698
699 leaf = path->nodes[0];
700 for (c = 0; c < ret; c++) {
701 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY;
702
703 /*
704 * if the first item we inserted was a backref, then
705 * the EXTENT_ITEM will be the odd c's, else it will
706 * be the even c's
707 */
708 if ((ref_first && (c % 2)) ||
709 (!ref_first && !(c % 2))) {
710 struct btrfs_extent_item *itm;
711
712 itm = btrfs_item_ptr(leaf, path->slots[0] + c,
713 struct btrfs_extent_item);
714 btrfs_set_extent_refs(path->nodes[0], itm, 1);
715 op->del++;
716 } else {
717 struct btrfs_extent_ref *ref;
718
719 ref = btrfs_item_ptr(leaf, path->slots[0] + c,
720 struct btrfs_extent_ref);
721 btrfs_set_ref_root(leaf, ref, ref_root);
722 btrfs_set_ref_generation(leaf, ref,
723 op->generation);
724 btrfs_set_ref_objectid(leaf, ref, op->level);
725 btrfs_set_ref_num_refs(leaf, ref, 1);
726 op->del++;
727 }
728
729 /*
730 * using del to see when its ok to free up the
731 * pending_extent_op. In the case where we insert the
732 * last item on the list in order to help do batching
733 * we need to not free the extent op until we actually
734 * insert the extent_item
735 */
736 if (op->del == 2) {
737 unlock_extent(&info->extent_ins, op->bytenr,
738 op->bytenr + op->num_bytes - 1,
739 GFP_NOFS);
740 cur = cur->next;
741 list_del_init(&op->list);
742 kfree(op);
743 if (cur != insert_list)
744 op = list_entry(cur,
745 struct pending_extent_op,
746 list);
747 }
748 }
749 btrfs_mark_buffer_dirty(leaf);
750 btrfs_release_path(extent_root, path);
751
752 /*
753 * Ok backref's and items usually go right next to eachother,
754 * but if we could only insert 1 item that means that we
755 * inserted on the end of a leaf, and we have no idea what may
756 * be on the next leaf so we just play it safe. In order to
757 * try and help this case we insert the last thing on our
758 * insert list so hopefully it will end up being the last
759 * thing on the leaf and everything else will be before it,
760 * which will let us insert a whole bunch of items at the same
761 * time.
762 */
763 if (ret == 1 && !last && (i + ret < total)) {
764 /*
765 * last: where we will pick up the next time around
766 * i: our current key to insert, will be total - 1
767 * cur: the current op we are screwing with
768 * op: duh
769 */
770 last = i + ret;
771 i = total - 1;
772 cur = insert_list->prev;
773 op = list_entry(cur, struct pending_extent_op, list);
774 } else if (last) {
775 /*
776 * ok we successfully inserted the last item on the
777 * list, lets reset everything
778 *
779 * i: our current key to insert, so where we left off
780 * last time
781 * last: done with this
782 * cur: the op we are messing with
783 * op: duh
784 * total: since we inserted the last key, we need to
785 * decrement total so we dont overflow
786 */
787 i = last;
788 last = 0;
789 total--;
790 if (i < total) {
791 cur = insert_list->next;
792 op = list_entry(cur, struct pending_extent_op,
793 list);
794 }
795 } else {
796 i += ret;
797 }
798
799 cond_resched();
800 }
801 ret = 0;
802 kfree(keys);
803 kfree(data_size);
804 return ret;
805}
806
807static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 566static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
808 struct btrfs_root *root, 567 struct btrfs_root *root,
809 struct btrfs_path *path, 568 struct btrfs_path *path,
810 u64 bytenr, u64 parent, 569 u64 bytenr, u64 parent,
811 u64 ref_root, u64 ref_generation, 570 u64 ref_root, u64 ref_generation,
812 u64 owner_objectid) 571 u64 owner_objectid,
572 int refs_to_add)
813{ 573{
814 struct btrfs_key key; 574 struct btrfs_key key;
815 struct extent_buffer *leaf; 575 struct extent_buffer *leaf;
@@ -829,9 +589,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
829 btrfs_set_ref_root(leaf, ref, ref_root); 589 btrfs_set_ref_root(leaf, ref, ref_root);
830 btrfs_set_ref_generation(leaf, ref, ref_generation); 590 btrfs_set_ref_generation(leaf, ref, ref_generation);
831 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 591 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
832 btrfs_set_ref_num_refs(leaf, ref, 1); 592 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
833 } else if (ret == -EEXIST) { 593 } else if (ret == -EEXIST) {
834 u64 existing_owner; 594 u64 existing_owner;
595
835 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 596 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
836 leaf = path->nodes[0]; 597 leaf = path->nodes[0];
837 ref = btrfs_item_ptr(leaf, path->slots[0], 598 ref = btrfs_item_ptr(leaf, path->slots[0],
@@ -845,7 +606,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
845 606
846 num_refs = btrfs_ref_num_refs(leaf, ref); 607 num_refs = btrfs_ref_num_refs(leaf, ref);
847 BUG_ON(num_refs == 0); 608 BUG_ON(num_refs == 0);
848 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 609 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
849 610
850 existing_owner = btrfs_ref_objectid(leaf, ref); 611 existing_owner = btrfs_ref_objectid(leaf, ref);
851 if (existing_owner != owner_objectid && 612 if (existing_owner != owner_objectid &&
@@ -865,7 +626,8 @@ out:
865 626
866static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 627static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
867 struct btrfs_root *root, 628 struct btrfs_root *root,
868 struct btrfs_path *path) 629 struct btrfs_path *path,
630 int refs_to_drop)
869{ 631{
870 struct extent_buffer *leaf; 632 struct extent_buffer *leaf;
871 struct btrfs_extent_ref *ref; 633 struct btrfs_extent_ref *ref;
@@ -875,8 +637,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
875 leaf = path->nodes[0]; 637 leaf = path->nodes[0];
876 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 638 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
877 num_refs = btrfs_ref_num_refs(leaf, ref); 639 num_refs = btrfs_ref_num_refs(leaf, ref);
878 BUG_ON(num_refs == 0); 640 BUG_ON(num_refs < refs_to_drop);
879 num_refs -= 1; 641 num_refs -= refs_to_drop;
880 if (num_refs == 0) { 642 if (num_refs == 0) {
881 ret = btrfs_del_item(trans, root, path); 643 ret = btrfs_del_item(trans, root, path);
882 } else { 644 } else {
@@ -927,332 +689,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
927#endif 689#endif
928} 690}
929 691
930static noinline int free_extents(struct btrfs_trans_handle *trans,
931 struct btrfs_root *extent_root,
932 struct list_head *del_list)
933{
934 struct btrfs_fs_info *info = extent_root->fs_info;
935 struct btrfs_path *path;
936 struct btrfs_key key, found_key;
937 struct extent_buffer *leaf;
938 struct list_head *cur;
939 struct pending_extent_op *op;
940 struct btrfs_extent_item *ei;
941 int ret, num_to_del, extent_slot = 0, found_extent = 0;
942 u32 refs;
943 u64 bytes_freed = 0;
944
945 path = btrfs_alloc_path();
946 if (!path)
947 return -ENOMEM;
948 path->reada = 1;
949
950search:
951 /* search for the backref for the current ref we want to delete */
952 cur = del_list->next;
953 op = list_entry(cur, struct pending_extent_op, list);
954 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr,
955 op->orig_parent,
956 extent_root->root_key.objectid,
957 op->orig_generation, op->level, 1);
958 if (ret) {
959 printk(KERN_ERR "btrfs unable to find backref byte nr %llu "
960 "root %llu gen %llu owner %u\n",
961 (unsigned long long)op->bytenr,
962 (unsigned long long)extent_root->root_key.objectid,
963 (unsigned long long)op->orig_generation, op->level);
964 btrfs_print_leaf(extent_root, path->nodes[0]);
965 WARN_ON(1);
966 goto out;
967 }
968
969 extent_slot = path->slots[0];
970 num_to_del = 1;
971 found_extent = 0;
972
973 /*
974 * if we aren't the first item on the leaf we can move back one and see
975 * if our ref is right next to our extent item
976 */
977 if (likely(extent_slot)) {
978 extent_slot--;
979 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
980 extent_slot);
981 if (found_key.objectid == op->bytenr &&
982 found_key.type == BTRFS_EXTENT_ITEM_KEY &&
983 found_key.offset == op->num_bytes) {
984 num_to_del++;
985 found_extent = 1;
986 }
987 }
988
989 /*
990 * if we didn't find the extent we need to delete the backref and then
991 * search for the extent item key so we can update its ref count
992 */
993 if (!found_extent) {
994 key.objectid = op->bytenr;
995 key.type = BTRFS_EXTENT_ITEM_KEY;
996 key.offset = op->num_bytes;
997
998 ret = remove_extent_backref(trans, extent_root, path);
999 BUG_ON(ret);
1000 btrfs_release_path(extent_root, path);
1001 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1002 BUG_ON(ret);
1003 extent_slot = path->slots[0];
1004 }
1005
1006 /* this is where we update the ref count for the extent */
1007 leaf = path->nodes[0];
1008 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item);
1009 refs = btrfs_extent_refs(leaf, ei);
1010 BUG_ON(refs == 0);
1011 refs--;
1012 btrfs_set_extent_refs(leaf, ei, refs);
1013
1014 btrfs_mark_buffer_dirty(leaf);
1015
1016 /*
1017 * This extent needs deleting. The reason cur_slot is extent_slot +
1018 * num_to_del is because extent_slot points to the slot where the extent
1019 * is, and if the backref was not right next to the extent we will be
1020 * deleting at least 1 item, and will want to start searching at the
1021 * slot directly next to extent_slot. However if we did find the
1022 * backref next to the extent item them we will be deleting at least 2
1023 * items and will want to start searching directly after the ref slot
1024 */
1025 if (!refs) {
1026 struct list_head *pos, *n, *end;
1027 int cur_slot = extent_slot+num_to_del;
1028 u64 super_used;
1029 u64 root_used;
1030
1031 path->slots[0] = extent_slot;
1032 bytes_freed = op->num_bytes;
1033
1034 mutex_lock(&info->pinned_mutex);
1035 ret = pin_down_bytes(trans, extent_root, op->bytenr,
1036 op->num_bytes, op->level >=
1037 BTRFS_FIRST_FREE_OBJECTID);
1038 mutex_unlock(&info->pinned_mutex);
1039 BUG_ON(ret < 0);
1040 op->del = ret;
1041
1042 /*
1043 * we need to see if we can delete multiple things at once, so
1044 * start looping through the list of extents we are wanting to
1045 * delete and see if their extent/backref's are right next to
1046 * eachother and the extents only have 1 ref
1047 */
1048 for (pos = cur->next; pos != del_list; pos = pos->next) {
1049 struct pending_extent_op *tmp;
1050
1051 tmp = list_entry(pos, struct pending_extent_op, list);
1052
1053 /* we only want to delete extent+ref at this stage */
1054 if (cur_slot >= btrfs_header_nritems(leaf) - 1)
1055 break;
1056
1057 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot);
1058 if (found_key.objectid != tmp->bytenr ||
1059 found_key.type != BTRFS_EXTENT_ITEM_KEY ||
1060 found_key.offset != tmp->num_bytes)
1061 break;
1062
1063 /* check to make sure this extent only has one ref */
1064 ei = btrfs_item_ptr(leaf, cur_slot,
1065 struct btrfs_extent_item);
1066 if (btrfs_extent_refs(leaf, ei) != 1)
1067 break;
1068
1069 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1);
1070 if (found_key.objectid != tmp->bytenr ||
1071 found_key.type != BTRFS_EXTENT_REF_KEY ||
1072 found_key.offset != tmp->orig_parent)
1073 break;
1074
1075 /*
1076 * the ref is right next to the extent, we can set the
1077 * ref count to 0 since we will delete them both now
1078 */
1079 btrfs_set_extent_refs(leaf, ei, 0);
1080
1081 /* pin down the bytes for this extent */
1082 mutex_lock(&info->pinned_mutex);
1083 ret = pin_down_bytes(trans, extent_root, tmp->bytenr,
1084 tmp->num_bytes, tmp->level >=
1085 BTRFS_FIRST_FREE_OBJECTID);
1086 mutex_unlock(&info->pinned_mutex);
1087 BUG_ON(ret < 0);
1088
1089 /*
1090 * use the del field to tell if we need to go ahead and
1091 * free up the extent when we delete the item or not.
1092 */
1093 tmp->del = ret;
1094 bytes_freed += tmp->num_bytes;
1095
1096 num_to_del += 2;
1097 cur_slot += 2;
1098 }
1099 end = pos;
1100
1101 /* update the free space counters */
1102 spin_lock(&info->delalloc_lock);
1103 super_used = btrfs_super_bytes_used(&info->super_copy);
1104 btrfs_set_super_bytes_used(&info->super_copy,
1105 super_used - bytes_freed);
1106
1107 root_used = btrfs_root_used(&extent_root->root_item);
1108 btrfs_set_root_used(&extent_root->root_item,
1109 root_used - bytes_freed);
1110 spin_unlock(&info->delalloc_lock);
1111
1112 /* delete the items */
1113 ret = btrfs_del_items(trans, extent_root, path,
1114 path->slots[0], num_to_del);
1115 BUG_ON(ret);
1116
1117 /*
1118 * loop through the extents we deleted and do the cleanup work
1119 * on them
1120 */
1121 for (pos = cur, n = pos->next; pos != end;
1122 pos = n, n = pos->next) {
1123 struct pending_extent_op *tmp;
1124 tmp = list_entry(pos, struct pending_extent_op, list);
1125
1126 /*
1127 * remember tmp->del tells us wether or not we pinned
1128 * down the extent
1129 */
1130 ret = update_block_group(trans, extent_root,
1131 tmp->bytenr, tmp->num_bytes, 0,
1132 tmp->del);
1133 BUG_ON(ret);
1134
1135 list_del_init(&tmp->list);
1136 unlock_extent(&info->extent_ins, tmp->bytenr,
1137 tmp->bytenr + tmp->num_bytes - 1,
1138 GFP_NOFS);
1139 kfree(tmp);
1140 }
1141 } else if (refs && found_extent) {
1142 /*
1143 * the ref and extent were right next to eachother, but the
1144 * extent still has a ref, so just free the backref and keep
1145 * going
1146 */
1147 ret = remove_extent_backref(trans, extent_root, path);
1148 BUG_ON(ret);
1149
1150 list_del_init(&op->list);
1151 unlock_extent(&info->extent_ins, op->bytenr,
1152 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1153 kfree(op);
1154 } else {
1155 /*
1156 * the extent has multiple refs and the backref we were looking
1157 * for was not right next to it, so just unlock and go next,
1158 * we're good to go
1159 */
1160 list_del_init(&op->list);
1161 unlock_extent(&info->extent_ins, op->bytenr,
1162 op->bytenr + op->num_bytes - 1, GFP_NOFS);
1163 kfree(op);
1164 }
1165
1166 btrfs_release_path(extent_root, path);
1167 if (!list_empty(del_list))
1168 goto search;
1169
1170out:
1171 btrfs_free_path(path);
1172 return ret;
1173}
1174
1175static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 692static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1176 struct btrfs_root *root, u64 bytenr, 693 struct btrfs_root *root, u64 bytenr,
694 u64 num_bytes,
1177 u64 orig_parent, u64 parent, 695 u64 orig_parent, u64 parent,
1178 u64 orig_root, u64 ref_root, 696 u64 orig_root, u64 ref_root,
1179 u64 orig_generation, u64 ref_generation, 697 u64 orig_generation, u64 ref_generation,
1180 u64 owner_objectid) 698 u64 owner_objectid)
1181{ 699{
1182 int ret; 700 int ret;
1183 struct btrfs_root *extent_root = root->fs_info->extent_root; 701 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
1184 struct btrfs_path *path;
1185
1186 if (root == root->fs_info->extent_root) {
1187 struct pending_extent_op *extent_op;
1188 u64 num_bytes;
1189
1190 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
1191 num_bytes = btrfs_level_size(root, (int)owner_objectid);
1192 mutex_lock(&root->fs_info->extent_ins_mutex);
1193 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
1194 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
1195 u64 priv;
1196 ret = get_state_private(&root->fs_info->extent_ins,
1197 bytenr, &priv);
1198 BUG_ON(ret);
1199 extent_op = (struct pending_extent_op *)
1200 (unsigned long)priv;
1201 BUG_ON(extent_op->parent != orig_parent);
1202 BUG_ON(extent_op->generation != orig_generation);
1203
1204 extent_op->parent = parent;
1205 extent_op->generation = ref_generation;
1206 } else {
1207 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1208 BUG_ON(!extent_op);
1209
1210 extent_op->type = PENDING_BACKREF_UPDATE;
1211 extent_op->bytenr = bytenr;
1212 extent_op->num_bytes = num_bytes;
1213 extent_op->parent = parent;
1214 extent_op->orig_parent = orig_parent;
1215 extent_op->generation = ref_generation;
1216 extent_op->orig_generation = orig_generation;
1217 extent_op->level = (int)owner_objectid;
1218 INIT_LIST_HEAD(&extent_op->list);
1219 extent_op->del = 0;
1220
1221 set_extent_bits(&root->fs_info->extent_ins,
1222 bytenr, bytenr + num_bytes - 1,
1223 EXTENT_WRITEBACK, GFP_NOFS);
1224 set_state_private(&root->fs_info->extent_ins,
1225 bytenr, (unsigned long)extent_op);
1226 }
1227 mutex_unlock(&root->fs_info->extent_ins_mutex);
1228 return 0;
1229 }
1230 702
1231 path = btrfs_alloc_path(); 703 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
1232 if (!path) 704 orig_parent, parent, orig_root,
1233 return -ENOMEM; 705 ref_root, orig_generation,
1234 ret = lookup_extent_backref(trans, extent_root, path, 706 ref_generation, owner_objectid, pin);
1235 bytenr, orig_parent, orig_root,
1236 orig_generation, owner_objectid, 1);
1237 if (ret)
1238 goto out;
1239 ret = remove_extent_backref(trans, extent_root, path);
1240 if (ret)
1241 goto out;
1242 ret = insert_extent_backref(trans, extent_root, path, bytenr,
1243 parent, ref_root, ref_generation,
1244 owner_objectid);
1245 BUG_ON(ret); 707 BUG_ON(ret);
1246 finish_current_insert(trans, extent_root, 0);
1247 del_pending_extents(trans, extent_root, 0);
1248out:
1249 btrfs_free_path(path);
1250 return ret; 708 return ret;
1251} 709}
1252 710
1253int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 711int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1254 struct btrfs_root *root, u64 bytenr, 712 struct btrfs_root *root, u64 bytenr,
1255 u64 orig_parent, u64 parent, 713 u64 num_bytes, u64 orig_parent, u64 parent,
1256 u64 ref_root, u64 ref_generation, 714 u64 ref_root, u64 ref_generation,
1257 u64 owner_objectid) 715 u64 owner_objectid)
1258{ 716{
@@ -1260,20 +718,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1260 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 718 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1261 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 719 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1262 return 0; 720 return 0;
1263 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 721
1264 parent, ref_root, ref_root, 722 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
1265 ref_generation, ref_generation, 723 orig_parent, parent, ref_root,
1266 owner_objectid); 724 ref_root, ref_generation,
725 ref_generation, owner_objectid);
1267 return ret; 726 return ret;
1268} 727}
1269
1270static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 728static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1271 struct btrfs_root *root, u64 bytenr, 729 struct btrfs_root *root, u64 bytenr,
730 u64 num_bytes,
1272 u64 orig_parent, u64 parent, 731 u64 orig_parent, u64 parent,
1273 u64 orig_root, u64 ref_root, 732 u64 orig_root, u64 ref_root,
1274 u64 orig_generation, u64 ref_generation, 733 u64 orig_generation, u64 ref_generation,
1275 u64 owner_objectid) 734 u64 owner_objectid)
1276{ 735{
736 int ret;
737
738 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
739 ref_generation, owner_objectid,
740 BTRFS_ADD_DELAYED_REF, 0);
741 BUG_ON(ret);
742 return ret;
743}
744
745static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
746 struct btrfs_root *root, u64 bytenr,
747 u64 num_bytes, u64 parent, u64 ref_root,
748 u64 ref_generation, u64 owner_objectid,
749 int refs_to_add)
750{
1277 struct btrfs_path *path; 751 struct btrfs_path *path;
1278 int ret; 752 int ret;
1279 struct btrfs_key key; 753 struct btrfs_key key;
@@ -1288,15 +762,19 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1288 path->reada = 1; 762 path->reada = 1;
1289 key.objectid = bytenr; 763 key.objectid = bytenr;
1290 key.type = BTRFS_EXTENT_ITEM_KEY; 764 key.type = BTRFS_EXTENT_ITEM_KEY;
1291 key.offset = (u64)-1; 765 key.offset = num_bytes;
1292 766
1293 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 767 /* first find the extent item and update its reference count */
1294 0, 1); 768 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
769 path, 0, 1);
1295 if (ret < 0) 770 if (ret < 0)
1296 return ret; 771 return ret;
1297 BUG_ON(ret == 0 || path->slots[0] == 0);
1298 772
1299 path->slots[0]--; 773 if (ret > 0) {
774 WARN_ON(1);
775 btrfs_free_path(path);
776 return -EIO;
777 }
1300 l = path->nodes[0]; 778 l = path->nodes[0];
1301 779
1302 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 780 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
@@ -1310,21 +788,20 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1310 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 788 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
1311 789
1312 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 790 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791
1313 refs = btrfs_extent_refs(l, item); 792 refs = btrfs_extent_refs(l, item);
1314 btrfs_set_extent_refs(l, item, refs + 1); 793 btrfs_set_extent_refs(l, item, refs + refs_to_add);
1315 btrfs_mark_buffer_dirty(path->nodes[0]); 794 btrfs_mark_buffer_dirty(path->nodes[0]);
1316 795
1317 btrfs_release_path(root->fs_info->extent_root, path); 796 btrfs_release_path(root->fs_info->extent_root, path);
1318 797
1319 path->reada = 1; 798 path->reada = 1;
799 /* now insert the actual backref */
1320 ret = insert_extent_backref(trans, root->fs_info->extent_root, 800 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1321 path, bytenr, parent, 801 path, bytenr, parent,
1322 ref_root, ref_generation, 802 ref_root, ref_generation,
1323 owner_objectid); 803 owner_objectid, refs_to_add);
1324 BUG_ON(ret); 804 BUG_ON(ret);
1325 finish_current_insert(trans, root->fs_info->extent_root, 0);
1326 del_pending_extents(trans, root->fs_info->extent_root, 0);
1327
1328 btrfs_free_path(path); 805 btrfs_free_path(path);
1329 return 0; 806 return 0;
1330} 807}
@@ -1339,68 +816,245 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1339 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 816 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
1340 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 817 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
1341 return 0; 818 return 0;
1342 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 819
820 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
1343 0, ref_root, 0, ref_generation, 821 0, ref_root, 0, ref_generation,
1344 owner_objectid); 822 owner_objectid);
1345 return ret; 823 return ret;
1346} 824}
1347 825
1348int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 826static int drop_delayed_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_root *root) 827 struct btrfs_root *root,
828 struct btrfs_delayed_ref_node *node)
829{
830 int ret = 0;
831 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
832
833 BUG_ON(node->ref_mod == 0);
834 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
835 node->parent, ref->root, ref->generation,
836 ref->owner_objectid, ref->pin, node->ref_mod);
837
838 return ret;
839}
840
841/* helper function to actually process a single delayed ref entry */
842static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
843 struct btrfs_root *root,
844 struct btrfs_delayed_ref_node *node,
845 int insert_reserved)
1350{ 846{
1351 u64 start;
1352 u64 end;
1353 int ret; 847 int ret;
848 struct btrfs_delayed_ref *ref;
1354 849
1355 while(1) { 850 if (node->parent == (u64)-1) {
1356 finish_current_insert(trans, root->fs_info->extent_root, 1); 851 struct btrfs_delayed_ref_head *head;
1357 del_pending_extents(trans, root->fs_info->extent_root, 1); 852 /*
853 * we've hit the end of the chain and we were supposed
854 * to insert this extent into the tree. But, it got
855 * deleted before we ever needed to insert it, so all
856 * we have to do is clean up the accounting
857 */
858 if (insert_reserved) {
859 update_reserved_extents(root, node->bytenr,
860 node->num_bytes, 0);
861 }
862 head = btrfs_delayed_node_to_head(node);
863 mutex_unlock(&head->mutex);
864 return 0;
865 }
1358 866
1359 /* is there more work to do? */ 867 ref = btrfs_delayed_node_to_ref(node);
1360 ret = find_first_extent_bit(&root->fs_info->pending_del, 868 if (ref->action == BTRFS_ADD_DELAYED_REF) {
1361 0, &start, &end, EXTENT_WRITEBACK); 869 if (insert_reserved) {
1362 if (!ret) 870 struct btrfs_key ins;
1363 continue; 871
1364 ret = find_first_extent_bit(&root->fs_info->extent_ins, 872 ins.objectid = node->bytenr;
1365 0, &start, &end, EXTENT_WRITEBACK); 873 ins.offset = node->num_bytes;
1366 if (!ret) 874 ins.type = BTRFS_EXTENT_ITEM_KEY;
1367 continue; 875
1368 break; 876 /* record the full extent allocation */
877 ret = __btrfs_alloc_reserved_extent(trans, root,
878 node->parent, ref->root,
879 ref->generation, ref->owner_objectid,
880 &ins, node->ref_mod);
881 update_reserved_extents(root, node->bytenr,
882 node->num_bytes, 0);
883 } else {
884 /* just add one backref */
885 ret = add_extent_ref(trans, root, node->bytenr,
886 node->num_bytes,
887 node->parent, ref->root, ref->generation,
888 ref->owner_objectid, node->ref_mod);
889 }
890 BUG_ON(ret);
891 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
892 WARN_ON(insert_reserved);
893 ret = drop_delayed_ref(trans, root, node);
1369 } 894 }
1370 return 0; 895 return 0;
1371} 896}
1372 897
1373int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 898static noinline struct btrfs_delayed_ref_node *
1374 struct btrfs_root *root, u64 bytenr, 899select_delayed_ref(struct btrfs_delayed_ref_head *head)
1375 u64 num_bytes, u32 *refs)
1376{ 900{
1377 struct btrfs_path *path; 901 struct rb_node *node;
902 struct btrfs_delayed_ref_node *ref;
903 int action = BTRFS_ADD_DELAYED_REF;
904again:
905 /*
906 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
907 * this prevents ref count from going down to zero when
908 * there still are pending delayed ref.
909 */
910 node = rb_prev(&head->node.rb_node);
911 while (1) {
912 if (!node)
913 break;
914 ref = rb_entry(node, struct btrfs_delayed_ref_node,
915 rb_node);
916 if (ref->bytenr != head->node.bytenr)
917 break;
918 if (btrfs_delayed_node_to_ref(ref)->action == action)
919 return ref;
920 node = rb_prev(node);
921 }
922 if (action == BTRFS_ADD_DELAYED_REF) {
923 action = BTRFS_DROP_DELAYED_REF;
924 goto again;
925 }
926 return NULL;
927}
928
929/*
930 * this starts processing the delayed reference count updates and
931 * extent insertions we have queued up so far. count can be
932 * 0, which means to process everything in the tree at the start
933 * of the run (but not newly added entries), or it can be some target
934 * number you'd like to process.
935 */
936int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_root *root, unsigned long count)
938{
939 struct rb_node *node;
940 struct btrfs_delayed_ref_root *delayed_refs;
941 struct btrfs_delayed_ref_node *ref;
942 struct btrfs_delayed_ref_head *locked_ref = NULL;
1378 int ret; 943 int ret;
1379 struct btrfs_key key; 944 int must_insert_reserved = 0;
1380 struct extent_buffer *l; 945 int run_all = count == (unsigned long)-1;
1381 struct btrfs_extent_item *item;
1382 946
1383 WARN_ON(num_bytes < root->sectorsize); 947 if (root == root->fs_info->extent_root)
1384 path = btrfs_alloc_path(); 948 root = root->fs_info->tree_root;
1385 path->reada = 1; 949
1386 key.objectid = bytenr; 950 delayed_refs = &trans->transaction->delayed_refs;
1387 key.offset = num_bytes; 951again:
1388 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 952 spin_lock(&delayed_refs->lock);
1389 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 953 if (count == 0)
1390 0, 0); 954 count = delayed_refs->num_entries;
1391 if (ret < 0) 955 while (1) {
1392 goto out; 956 if (!locked_ref) {
1393 if (ret != 0) { 957 /*
1394 btrfs_print_leaf(root, path->nodes[0]); 958 * no locked ref, go find something we can
1395 printk(KERN_INFO "btrfs failed to find block number %llu\n", 959 * process in the rbtree. We start at
1396 (unsigned long long)bytenr); 960 * the beginning of the tree, there may be less
1397 BUG(); 961 * lock contention if we do something smarter here.
962 */
963 node = rb_first(&delayed_refs->root);
964 if (!node) {
965 spin_unlock(&delayed_refs->lock);
966 break;
967 }
968
969 ref = rb_entry(node, struct btrfs_delayed_ref_node,
970 rb_node);
971 ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref);
972 if (ret) {
973 spin_unlock(&delayed_refs->lock);
974 break;
975 }
976 }
977
978 /*
979 * record the must insert reserved flag before we
980 * drop the spin lock.
981 */
982 must_insert_reserved = locked_ref->must_insert_reserved;
983 locked_ref->must_insert_reserved = 0;
984
985 /*
986 * locked_ref is the head node, so we have to go one
987 * node back for any delayed ref updates
988 */
989
990 ref = select_delayed_ref(locked_ref);
991 if (!ref) {
992 /* All delayed refs have been processed, Go ahead
993 * and send the head node to run_one_delayed_ref,
994 * so that any accounting fixes can happen
995 */
996 ref = &locked_ref->node;
997 locked_ref = NULL;
998 }
999
1000 ref->in_tree = 0;
1001 rb_erase(&ref->rb_node, &delayed_refs->root);
1002 delayed_refs->num_entries--;
1003 spin_unlock(&delayed_refs->lock);
1004
1005 ret = run_one_delayed_ref(trans, root, ref,
1006 must_insert_reserved);
1007 BUG_ON(ret);
1008 btrfs_put_delayed_ref(ref);
1009
1010 /* once we lock the head ref, we have to process all the
1011 * entries for it. So, we might end up doing more entries
1012 * that count was asking us to do.
1013 */
1014 if (count > 0)
1015 count--;
1016
1017 /*
1018 * we set locked_ref to null above if we're all done
1019 * with this bytenr
1020 */
1021 if (!locked_ref && count == 0)
1022 break;
1023
1024 spin_lock(&delayed_refs->lock);
1025 }
1026 if (run_all) {
1027 spin_lock(&delayed_refs->lock);
1028 node = rb_first(&delayed_refs->root);
1029 if (!node) {
1030 spin_unlock(&delayed_refs->lock);
1031 goto out;
1032 }
1033
1034 while (node) {
1035 ref = rb_entry(node, struct btrfs_delayed_ref_node,
1036 rb_node);
1037 if (btrfs_delayed_ref_is_head(ref)) {
1038 struct btrfs_delayed_ref_head *head;
1039
1040 head = btrfs_delayed_node_to_head(ref);
1041 atomic_inc(&ref->refs);
1042
1043 spin_unlock(&delayed_refs->lock);
1044 mutex_lock(&head->mutex);
1045 mutex_unlock(&head->mutex);
1046
1047 btrfs_put_delayed_ref(ref);
1048 goto again;
1049 }
1050 node = rb_next(node);
1051 }
1052 spin_unlock(&delayed_refs->lock);
1053 count = (unsigned long)-1;
1054 schedule_timeout(1);
1055 goto again;
1398 } 1056 }
1399 l = path->nodes[0];
1400 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1401 *refs = btrfs_extent_refs(l, item);
1402out: 1057out:
1403 btrfs_free_path(path);
1404 return 0; 1058 return 0;
1405} 1059}
1406 1060
@@ -1624,7 +1278,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1624 int refi = 0; 1278 int refi = 0;
1625 int slot; 1279 int slot;
1626 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1280 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1627 u64, u64, u64, u64, u64, u64, u64, u64); 1281 u64, u64, u64, u64, u64, u64, u64, u64, u64);
1628 1282
1629 ref_root = btrfs_header_owner(buf); 1283 ref_root = btrfs_header_owner(buf);
1630 ref_generation = btrfs_header_generation(buf); 1284 ref_generation = btrfs_header_generation(buf);
@@ -1696,12 +1350,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1696 1350
1697 if (level == 0) { 1351 if (level == 0) {
1698 btrfs_item_key_to_cpu(buf, &key, slot); 1352 btrfs_item_key_to_cpu(buf, &key, slot);
1353 fi = btrfs_item_ptr(buf, slot,
1354 struct btrfs_file_extent_item);
1355
1356 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1357 if (bytenr == 0)
1358 continue;
1699 1359
1700 ret = process_func(trans, root, bytenr, 1360 ret = process_func(trans, root, bytenr,
1701 orig_buf->start, buf->start, 1361 btrfs_file_extent_disk_num_bytes(buf, fi),
1702 orig_root, ref_root, 1362 orig_buf->start, buf->start,
1703 orig_generation, ref_generation, 1363 orig_root, ref_root,
1704 key.objectid); 1364 orig_generation, ref_generation,
1365 key.objectid);
1705 1366
1706 if (ret) { 1367 if (ret) {
1707 faili = slot; 1368 faili = slot;
@@ -1709,7 +1370,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1709 goto fail; 1370 goto fail;
1710 } 1371 }
1711 } else { 1372 } else {
1712 ret = process_func(trans, root, bytenr, 1373 ret = process_func(trans, root, bytenr, buf->len,
1713 orig_buf->start, buf->start, 1374 orig_buf->start, buf->start,
1714 orig_root, ref_root, 1375 orig_root, ref_root,
1715 orig_generation, ref_generation, 1376 orig_generation, ref_generation,
@@ -1786,17 +1447,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans,
1786 if (bytenr == 0) 1447 if (bytenr == 0)
1787 continue; 1448 continue;
1788 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1449 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1789 orig_buf->start, buf->start, 1450 btrfs_file_extent_disk_num_bytes(buf, fi),
1790 orig_root, ref_root, 1451 orig_buf->start, buf->start,
1791 orig_generation, ref_generation, 1452 orig_root, ref_root, orig_generation,
1792 key.objectid); 1453 ref_generation, key.objectid);
1793 if (ret) 1454 if (ret)
1794 goto fail; 1455 goto fail;
1795 } else { 1456 } else {
1796 bytenr = btrfs_node_blockptr(buf, slot); 1457 bytenr = btrfs_node_blockptr(buf, slot);
1797 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1458 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1798 orig_buf->start, buf->start, 1459 buf->len, orig_buf->start,
1799 orig_root, ref_root, 1460 buf->start, orig_root, ref_root,
1800 orig_generation, ref_generation, 1461 orig_generation, ref_generation,
1801 level - 1); 1462 level - 1);
1802 if (ret) 1463 if (ret)
@@ -1815,7 +1476,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1815 struct btrfs_block_group_cache *cache) 1476 struct btrfs_block_group_cache *cache)
1816{ 1477{
1817 int ret; 1478 int ret;
1818 int pending_ret;
1819 struct btrfs_root *extent_root = root->fs_info->extent_root; 1479 struct btrfs_root *extent_root = root->fs_info->extent_root;
1820 unsigned long bi; 1480 unsigned long bi;
1821 struct extent_buffer *leaf; 1481 struct extent_buffer *leaf;
@@ -1831,12 +1491,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
1831 btrfs_mark_buffer_dirty(leaf); 1491 btrfs_mark_buffer_dirty(leaf);
1832 btrfs_release_path(extent_root, path); 1492 btrfs_release_path(extent_root, path);
1833fail: 1493fail:
1834 finish_current_insert(trans, extent_root, 0);
1835 pending_ret = del_pending_extents(trans, extent_root, 0);
1836 if (ret) 1494 if (ret)
1837 return ret; 1495 return ret;
1838 if (pending_ret)
1839 return pending_ret;
1840 return 0; 1496 return 0;
1841 1497
1842} 1498}
@@ -2474,193 +2130,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2474 return ret; 2130 return ret;
2475} 2131}
2476 2132
2477static int finish_current_insert(struct btrfs_trans_handle *trans,
2478 struct btrfs_root *extent_root, int all)
2479{
2480 u64 start;
2481 u64 end;
2482 u64 priv;
2483 u64 search = 0;
2484 struct btrfs_fs_info *info = extent_root->fs_info;
2485 struct btrfs_path *path;
2486 struct pending_extent_op *extent_op, *tmp;
2487 struct list_head insert_list, update_list;
2488 int ret;
2489 int num_inserts = 0, max_inserts, restart = 0;
2490
2491 path = btrfs_alloc_path();
2492 INIT_LIST_HEAD(&insert_list);
2493 INIT_LIST_HEAD(&update_list);
2494
2495 max_inserts = extent_root->leafsize /
2496 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) +
2497 sizeof(struct btrfs_extent_ref) +
2498 sizeof(struct btrfs_extent_item));
2499again:
2500 mutex_lock(&info->extent_ins_mutex);
2501 while (1) {
2502 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2503 &end, EXTENT_WRITEBACK);
2504 if (ret) {
2505 if (restart && !num_inserts &&
2506 list_empty(&update_list)) {
2507 restart = 0;
2508 search = 0;
2509 continue;
2510 }
2511 break;
2512 }
2513
2514 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2515 if (!ret) {
2516 if (all)
2517 restart = 1;
2518 search = end + 1;
2519 if (need_resched()) {
2520 mutex_unlock(&info->extent_ins_mutex);
2521 cond_resched();
2522 mutex_lock(&info->extent_ins_mutex);
2523 }
2524 continue;
2525 }
2526
2527 ret = get_state_private(&info->extent_ins, start, &priv);
2528 BUG_ON(ret);
2529 extent_op = (struct pending_extent_op *)(unsigned long) priv;
2530
2531 if (extent_op->type == PENDING_EXTENT_INSERT) {
2532 num_inserts++;
2533 list_add_tail(&extent_op->list, &insert_list);
2534 search = end + 1;
2535 if (num_inserts == max_inserts) {
2536 restart = 1;
2537 break;
2538 }
2539 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
2540 list_add_tail(&extent_op->list, &update_list);
2541 search = end + 1;
2542 } else {
2543 BUG();
2544 }
2545 }
2546
2547 /*
2548 * process the update list, clear the writeback bit for it, and if
2549 * somebody marked this thing for deletion then just unlock it and be
2550 * done, the free_extents will handle it
2551 */
2552 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2553 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2554 extent_op->bytenr + extent_op->num_bytes - 1,
2555 EXTENT_WRITEBACK, GFP_NOFS);
2556 if (extent_op->del) {
2557 list_del_init(&extent_op->list);
2558 unlock_extent(&info->extent_ins, extent_op->bytenr,
2559 extent_op->bytenr + extent_op->num_bytes
2560 - 1, GFP_NOFS);
2561 kfree(extent_op);
2562 }
2563 }
2564 mutex_unlock(&info->extent_ins_mutex);
2565
2566 /*
2567 * still have things left on the update list, go ahead an update
2568 * everything
2569 */
2570 if (!list_empty(&update_list)) {
2571 ret = update_backrefs(trans, extent_root, path, &update_list);
2572 BUG_ON(ret);
2573
2574 /* we may have COW'ed new blocks, so lets start over */
2575 if (all)
2576 restart = 1;
2577 }
2578
2579 /*
2580 * if no inserts need to be done, but we skipped some extents and we
2581 * need to make sure everything is cleaned then reset everything and
2582 * go back to the beginning
2583 */
2584 if (!num_inserts && restart) {
2585 search = 0;
2586 restart = 0;
2587 INIT_LIST_HEAD(&update_list);
2588 INIT_LIST_HEAD(&insert_list);
2589 goto again;
2590 } else if (!num_inserts) {
2591 goto out;
2592 }
2593
2594 /*
2595 * process the insert extents list. Again if we are deleting this
2596 * extent, then just unlock it, pin down the bytes if need be, and be
2597 * done with it. Saves us from having to actually insert the extent
2598 * into the tree and then subsequently come along and delete it
2599 */
2600 mutex_lock(&info->extent_ins_mutex);
2601 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) {
2602 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2603 extent_op->bytenr + extent_op->num_bytes - 1,
2604 EXTENT_WRITEBACK, GFP_NOFS);
2605 if (extent_op->del) {
2606 u64 used;
2607 list_del_init(&extent_op->list);
2608 unlock_extent(&info->extent_ins, extent_op->bytenr,
2609 extent_op->bytenr + extent_op->num_bytes
2610 - 1, GFP_NOFS);
2611
2612 mutex_lock(&extent_root->fs_info->pinned_mutex);
2613 ret = pin_down_bytes(trans, extent_root,
2614 extent_op->bytenr,
2615 extent_op->num_bytes, 0);
2616 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2617
2618 spin_lock(&info->delalloc_lock);
2619 used = btrfs_super_bytes_used(&info->super_copy);
2620 btrfs_set_super_bytes_used(&info->super_copy,
2621 used - extent_op->num_bytes);
2622 used = btrfs_root_used(&extent_root->root_item);
2623 btrfs_set_root_used(&extent_root->root_item,
2624 used - extent_op->num_bytes);
2625 spin_unlock(&info->delalloc_lock);
2626
2627 ret = update_block_group(trans, extent_root,
2628 extent_op->bytenr,
2629 extent_op->num_bytes,
2630 0, ret > 0);
2631 BUG_ON(ret);
2632 kfree(extent_op);
2633 num_inserts--;
2634 }
2635 }
2636 mutex_unlock(&info->extent_ins_mutex);
2637
2638 ret = insert_extents(trans, extent_root, path, &insert_list,
2639 num_inserts);
2640 BUG_ON(ret);
2641
2642 /*
2643 * if restart is set for whatever reason we need to go back and start
2644 * searching through the pending list again.
2645 *
2646 * We just inserted some extents, which could have resulted in new
2647 * blocks being allocated, which would result in new blocks needing
2648 * updates, so if all is set we _must_ restart to get the updated
2649 * blocks.
2650 */
2651 if (restart || all) {
2652 INIT_LIST_HEAD(&insert_list);
2653 INIT_LIST_HEAD(&update_list);
2654 search = 0;
2655 restart = 0;
2656 num_inserts = 0;
2657 goto again;
2658 }
2659out:
2660 btrfs_free_path(path);
2661 return 0;
2662}
2663
2664static int pin_down_bytes(struct btrfs_trans_handle *trans, 2133static int pin_down_bytes(struct btrfs_trans_handle *trans,
2665 struct btrfs_root *root, 2134 struct btrfs_root *root,
2666 u64 bytenr, u64 num_bytes, int is_data) 2135 u64 bytenr, u64 num_bytes, int is_data)
@@ -2686,6 +2155,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2686 u64 header_transid = btrfs_header_generation(buf); 2155 u64 header_transid = btrfs_header_generation(buf);
2687 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2156 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2688 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2157 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2158 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2689 header_transid == trans->transid && 2159 header_transid == trans->transid &&
2690 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2160 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2691 clean_tree_block(NULL, root, buf); 2161 clean_tree_block(NULL, root, buf);
@@ -2710,7 +2180,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2710 struct btrfs_root *root, 2180 struct btrfs_root *root,
2711 u64 bytenr, u64 num_bytes, u64 parent, 2181 u64 bytenr, u64 num_bytes, u64 parent,
2712 u64 root_objectid, u64 ref_generation, 2182 u64 root_objectid, u64 ref_generation,
2713 u64 owner_objectid, int pin, int mark_free) 2183 u64 owner_objectid, int pin, int mark_free,
2184 int refs_to_drop)
2714{ 2185{
2715 struct btrfs_path *path; 2186 struct btrfs_path *path;
2716 struct btrfs_key key; 2187 struct btrfs_key key;
@@ -2753,7 +2224,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2753 break; 2224 break;
2754 } 2225 }
2755 if (!found_extent) { 2226 if (!found_extent) {
2756 ret = remove_extent_backref(trans, extent_root, path); 2227 ret = remove_extent_backref(trans, extent_root, path,
2228 refs_to_drop);
2757 BUG_ON(ret); 2229 BUG_ON(ret);
2758 btrfs_release_path(extent_root, path); 2230 btrfs_release_path(extent_root, path);
2759 ret = btrfs_search_slot(trans, extent_root, 2231 ret = btrfs_search_slot(trans, extent_root,
@@ -2771,8 +2243,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2771 btrfs_print_leaf(extent_root, path->nodes[0]); 2243 btrfs_print_leaf(extent_root, path->nodes[0]);
2772 WARN_ON(1); 2244 WARN_ON(1);
2773 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2245 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2774 "root %llu gen %llu owner %llu\n", 2246 "parent %llu root %llu gen %llu owner %llu\n",
2775 (unsigned long long)bytenr, 2247 (unsigned long long)bytenr,
2248 (unsigned long long)parent,
2776 (unsigned long long)root_objectid, 2249 (unsigned long long)root_objectid,
2777 (unsigned long long)ref_generation, 2250 (unsigned long long)ref_generation,
2778 (unsigned long long)owner_objectid); 2251 (unsigned long long)owner_objectid);
@@ -2782,17 +2255,23 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2782 ei = btrfs_item_ptr(leaf, extent_slot, 2255 ei = btrfs_item_ptr(leaf, extent_slot,
2783 struct btrfs_extent_item); 2256 struct btrfs_extent_item);
2784 refs = btrfs_extent_refs(leaf, ei); 2257 refs = btrfs_extent_refs(leaf, ei);
2785 BUG_ON(refs == 0);
2786 refs -= 1;
2787 btrfs_set_extent_refs(leaf, ei, refs);
2788 2258
2259 /*
2260 * we're not allowed to delete the extent item if there
2261 * are other delayed ref updates pending
2262 */
2263
2264 BUG_ON(refs < refs_to_drop);
2265 refs -= refs_to_drop;
2266 btrfs_set_extent_refs(leaf, ei, refs);
2789 btrfs_mark_buffer_dirty(leaf); 2267 btrfs_mark_buffer_dirty(leaf);
2790 2268
2791 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2269 if (refs == 0 && found_extent &&
2270 path->slots[0] == extent_slot + 1) {
2792 struct btrfs_extent_ref *ref; 2271 struct btrfs_extent_ref *ref;
2793 ref = btrfs_item_ptr(leaf, path->slots[0], 2272 ref = btrfs_item_ptr(leaf, path->slots[0],
2794 struct btrfs_extent_ref); 2273 struct btrfs_extent_ref);
2795 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2274 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
2796 /* if the back ref and the extent are next to each other 2275 /* if the back ref and the extent are next to each other
2797 * they get deleted below in one shot 2276 * they get deleted below in one shot
2798 */ 2277 */
@@ -2800,7 +2279,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2800 num_to_del = 2; 2279 num_to_del = 2;
2801 } else if (found_extent) { 2280 } else if (found_extent) {
2802 /* otherwise delete the extent back ref */ 2281 /* otherwise delete the extent back ref */
2803 ret = remove_extent_backref(trans, extent_root, path); 2282 ret = remove_extent_backref(trans, extent_root, path,
2283 refs_to_drop);
2804 BUG_ON(ret); 2284 BUG_ON(ret);
2805 /* if refs are 0, we need to setup the path for deletion */ 2285 /* if refs are 0, we need to setup the path for deletion */
2806 if (refs == 0) { 2286 if (refs == 0) {
@@ -2850,218 +2330,35 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2850 BUG_ON(ret); 2330 BUG_ON(ret);
2851 } 2331 }
2852 btrfs_free_path(path); 2332 btrfs_free_path(path);
2853 finish_current_insert(trans, extent_root, 0);
2854 return ret; 2333 return ret;
2855} 2334}
2856 2335
2857/* 2336/*
2858 * find all the blocks marked as pending in the radix tree and remove
2859 * them from the extent map
2860 */
2861static int del_pending_extents(struct btrfs_trans_handle *trans,
2862 struct btrfs_root *extent_root, int all)
2863{
2864 int ret;
2865 int err = 0;
2866 u64 start;
2867 u64 end;
2868 u64 priv;
2869 u64 search = 0;
2870 int nr = 0, skipped = 0;
2871 struct extent_io_tree *pending_del;
2872 struct extent_io_tree *extent_ins;
2873 struct pending_extent_op *extent_op;
2874 struct btrfs_fs_info *info = extent_root->fs_info;
2875 struct list_head delete_list;
2876
2877 INIT_LIST_HEAD(&delete_list);
2878 extent_ins = &extent_root->fs_info->extent_ins;
2879 pending_del = &extent_root->fs_info->pending_del;
2880
2881again:
2882 mutex_lock(&info->extent_ins_mutex);
2883 while (1) {
2884 ret = find_first_extent_bit(pending_del, search, &start, &end,
2885 EXTENT_WRITEBACK);
2886 if (ret) {
2887 if (all && skipped && !nr) {
2888 search = 0;
2889 skipped = 0;
2890 continue;
2891 }
2892 mutex_unlock(&info->extent_ins_mutex);
2893 break;
2894 }
2895
2896 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
2897 if (!ret) {
2898 search = end+1;
2899 skipped = 1;
2900
2901 if (need_resched()) {
2902 mutex_unlock(&info->extent_ins_mutex);
2903 cond_resched();
2904 mutex_lock(&info->extent_ins_mutex);
2905 }
2906
2907 continue;
2908 }
2909 BUG_ON(ret < 0);
2910
2911 ret = get_state_private(pending_del, start, &priv);
2912 BUG_ON(ret);
2913 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2914
2915 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
2916 GFP_NOFS);
2917 if (!test_range_bit(extent_ins, start, end,
2918 EXTENT_WRITEBACK, 0)) {
2919 list_add_tail(&extent_op->list, &delete_list);
2920 nr++;
2921 } else {
2922 kfree(extent_op);
2923
2924 ret = get_state_private(&info->extent_ins, start,
2925 &priv);
2926 BUG_ON(ret);
2927 extent_op = (struct pending_extent_op *)
2928 (unsigned long)priv;
2929
2930 clear_extent_bits(&info->extent_ins, start, end,
2931 EXTENT_WRITEBACK, GFP_NOFS);
2932
2933 if (extent_op->type == PENDING_BACKREF_UPDATE) {
2934 list_add_tail(&extent_op->list, &delete_list);
2935 search = end + 1;
2936 nr++;
2937 continue;
2938 }
2939
2940 mutex_lock(&extent_root->fs_info->pinned_mutex);
2941 ret = pin_down_bytes(trans, extent_root, start,
2942 end + 1 - start, 0);
2943 mutex_unlock(&extent_root->fs_info->pinned_mutex);
2944
2945 ret = update_block_group(trans, extent_root, start,
2946 end + 1 - start, 0, ret > 0);
2947
2948 unlock_extent(extent_ins, start, end, GFP_NOFS);
2949 BUG_ON(ret);
2950 kfree(extent_op);
2951 }
2952 if (ret)
2953 err = ret;
2954
2955 search = end + 1;
2956
2957 if (need_resched()) {
2958 mutex_unlock(&info->extent_ins_mutex);
2959 cond_resched();
2960 mutex_lock(&info->extent_ins_mutex);
2961 }
2962 }
2963
2964 if (nr) {
2965 ret = free_extents(trans, extent_root, &delete_list);
2966 BUG_ON(ret);
2967 }
2968
2969 if (all && skipped) {
2970 INIT_LIST_HEAD(&delete_list);
2971 search = 0;
2972 nr = 0;
2973 goto again;
2974 }
2975
2976 if (!err)
2977 finish_current_insert(trans, extent_root, 0);
2978 return err;
2979}
2980
2981/*
2982 * remove an extent from the root, returns 0 on success 2337 * remove an extent from the root, returns 0 on success
2983 */ 2338 */
2984static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2339static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2985 struct btrfs_root *root, 2340 struct btrfs_root *root,
2986 u64 bytenr, u64 num_bytes, u64 parent, 2341 u64 bytenr, u64 num_bytes, u64 parent,
2987 u64 root_objectid, u64 ref_generation, 2342 u64 root_objectid, u64 ref_generation,
2988 u64 owner_objectid, int pin) 2343 u64 owner_objectid, int pin,
2344 int refs_to_drop)
2989{ 2345{
2990 struct btrfs_root *extent_root = root->fs_info->extent_root;
2991 int pending_ret;
2992 int ret;
2993
2994 WARN_ON(num_bytes < root->sectorsize); 2346 WARN_ON(num_bytes < root->sectorsize);
2995 if (root == extent_root) {
2996 struct pending_extent_op *extent_op = NULL;
2997
2998 mutex_lock(&root->fs_info->extent_ins_mutex);
2999 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
3000 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
3001 u64 priv;
3002 ret = get_state_private(&root->fs_info->extent_ins,
3003 bytenr, &priv);
3004 BUG_ON(ret);
3005 extent_op = (struct pending_extent_op *)
3006 (unsigned long)priv;
3007
3008 extent_op->del = 1;
3009 if (extent_op->type == PENDING_EXTENT_INSERT) {
3010 mutex_unlock(&root->fs_info->extent_ins_mutex);
3011 return 0;
3012 }
3013 }
3014
3015 if (extent_op) {
3016 ref_generation = extent_op->orig_generation;
3017 parent = extent_op->orig_parent;
3018 }
3019 2347
3020 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2348 /*
3021 BUG_ON(!extent_op); 2349 * if metadata always pin
3022 2350 * if data pin when any transaction has committed this
3023 extent_op->type = PENDING_EXTENT_DELETE; 2351 */
3024 extent_op->bytenr = bytenr; 2352 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
3025 extent_op->num_bytes = num_bytes; 2353 ref_generation != trans->transid)
3026 extent_op->parent = parent;
3027 extent_op->orig_parent = parent;
3028 extent_op->generation = ref_generation;
3029 extent_op->orig_generation = ref_generation;
3030 extent_op->level = (int)owner_objectid;
3031 INIT_LIST_HEAD(&extent_op->list);
3032 extent_op->del = 0;
3033
3034 set_extent_bits(&root->fs_info->pending_del,
3035 bytenr, bytenr + num_bytes - 1,
3036 EXTENT_WRITEBACK, GFP_NOFS);
3037 set_state_private(&root->fs_info->pending_del,
3038 bytenr, (unsigned long)extent_op);
3039 mutex_unlock(&root->fs_info->extent_ins_mutex);
3040 return 0;
3041 }
3042 /* if metadata always pin */
3043 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
3044 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3045 mutex_lock(&root->fs_info->pinned_mutex);
3046 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
3047 mutex_unlock(&root->fs_info->pinned_mutex);
3048 update_reserved_extents(root, bytenr, num_bytes, 0);
3049 return 0;
3050 }
3051 pin = 1; 2354 pin = 1;
3052 }
3053 2355
3054 /* if data pin when any transaction has committed this */
3055 if (ref_generation != trans->transid) 2356 if (ref_generation != trans->transid)
3056 pin = 1; 2357 pin = 1;
3057 2358
3058 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 2359 return __free_extent(trans, root, bytenr, num_bytes, parent,
3059 root_objectid, ref_generation, 2360 root_objectid, ref_generation,
3060 owner_objectid, pin, pin == 0); 2361 owner_objectid, pin, pin == 0, refs_to_drop);
3061
3062 finish_current_insert(trans, root->fs_info->extent_root, 0);
3063 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
3064 return ret ? ret : pending_ret;
3065} 2362}
3066 2363
3067int btrfs_free_extent(struct btrfs_trans_handle *trans, 2364int btrfs_free_extent(struct btrfs_trans_handle *trans,
@@ -3072,9 +2369,26 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3072{ 2369{
3073 int ret; 2370 int ret;
3074 2371
3075 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 2372 /*
3076 root_objectid, ref_generation, 2373 * tree log blocks never actually go into the extent allocation
3077 owner_objectid, pin); 2374 * tree, just update pinning info and exit early.
2375 *
2376 * data extents referenced by the tree log do need to have
2377 * their reference counts bumped.
2378 */
2379 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
2380 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2381 mutex_lock(&root->fs_info->pinned_mutex);
2382 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2383 mutex_unlock(&root->fs_info->pinned_mutex);
2384 update_reserved_extents(root, bytenr, num_bytes, 0);
2385 ret = 0;
2386 } else {
2387 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
2388 root_objectid, ref_generation,
2389 owner_objectid,
2390 BTRFS_DROP_DELAYED_REF, 1);
2391 }
3078 return ret; 2392 return ret;
3079} 2393}
3080 2394
@@ -3475,10 +2789,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3475static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 2789static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3476 struct btrfs_root *root, u64 parent, 2790 struct btrfs_root *root, u64 parent,
3477 u64 root_objectid, u64 ref_generation, 2791 u64 root_objectid, u64 ref_generation,
3478 u64 owner, struct btrfs_key *ins) 2792 u64 owner, struct btrfs_key *ins,
2793 int ref_mod)
3479{ 2794{
3480 int ret; 2795 int ret;
3481 int pending_ret;
3482 u64 super_used; 2796 u64 super_used;
3483 u64 root_used; 2797 u64 root_used;
3484 u64 num_bytes = ins->offset; 2798 u64 num_bytes = ins->offset;
@@ -3503,33 +2817,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3503 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 2817 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
3504 spin_unlock(&info->delalloc_lock); 2818 spin_unlock(&info->delalloc_lock);
3505 2819
3506 if (root == extent_root) {
3507 struct pending_extent_op *extent_op;
3508
3509 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3510 BUG_ON(!extent_op);
3511
3512 extent_op->type = PENDING_EXTENT_INSERT;
3513 extent_op->bytenr = ins->objectid;
3514 extent_op->num_bytes = ins->offset;
3515 extent_op->parent = parent;
3516 extent_op->orig_parent = 0;
3517 extent_op->generation = ref_generation;
3518 extent_op->orig_generation = 0;
3519 extent_op->level = (int)owner;
3520 INIT_LIST_HEAD(&extent_op->list);
3521 extent_op->del = 0;
3522
3523 mutex_lock(&root->fs_info->extent_ins_mutex);
3524 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
3525 ins->objectid + ins->offset - 1,
3526 EXTENT_WRITEBACK, GFP_NOFS);
3527 set_state_private(&root->fs_info->extent_ins,
3528 ins->objectid, (unsigned long)extent_op);
3529 mutex_unlock(&root->fs_info->extent_ins_mutex);
3530 goto update_block;
3531 }
3532
3533 memcpy(&keys[0], ins, sizeof(*ins)); 2820 memcpy(&keys[0], ins, sizeof(*ins));
3534 keys[1].objectid = ins->objectid; 2821 keys[1].objectid = ins->objectid;
3535 keys[1].type = BTRFS_EXTENT_REF_KEY; 2822 keys[1].type = BTRFS_EXTENT_REF_KEY;
@@ -3546,31 +2833,24 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3546 2833
3547 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2834 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3548 struct btrfs_extent_item); 2835 struct btrfs_extent_item);
3549 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 2836 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
3550 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 2837 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
3551 struct btrfs_extent_ref); 2838 struct btrfs_extent_ref);
3552 2839
3553 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 2840 btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
3554 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 2841 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
3555 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 2842 btrfs_set_ref_objectid(path->nodes[0], ref, owner);
3556 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 2843 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
3557 2844
3558 btrfs_mark_buffer_dirty(path->nodes[0]); 2845 btrfs_mark_buffer_dirty(path->nodes[0]);
3559 2846
3560 trans->alloc_exclude_start = 0; 2847 trans->alloc_exclude_start = 0;
3561 trans->alloc_exclude_nr = 0; 2848 trans->alloc_exclude_nr = 0;
3562 btrfs_free_path(path); 2849 btrfs_free_path(path);
3563 finish_current_insert(trans, extent_root, 0);
3564 pending_ret = del_pending_extents(trans, extent_root, 0);
3565 2850
3566 if (ret) 2851 if (ret)
3567 goto out; 2852 goto out;
3568 if (pending_ret) {
3569 ret = pending_ret;
3570 goto out;
3571 }
3572 2853
3573update_block:
3574 ret = update_block_group(trans, root, ins->objectid, 2854 ret = update_block_group(trans, root, ins->objectid,
3575 ins->offset, 1, 0); 2855 ins->offset, 1, 0);
3576 if (ret) { 2856 if (ret) {
@@ -3592,9 +2872,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3592 2872
3593 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 2873 if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
3594 return 0; 2874 return 0;
3595 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 2875
3596 ref_generation, owner, ins); 2876 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3597 update_reserved_extents(root, ins->objectid, ins->offset, 0); 2877 ins->offset, parent, root_objectid,
2878 ref_generation, owner,
2879 BTRFS_ADD_DELAYED_EXTENT, 0);
2880 BUG_ON(ret);
3598 return ret; 2881 return ret;
3599} 2882}
3600 2883
@@ -3621,7 +2904,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3621 BUG_ON(ret); 2904 BUG_ON(ret);
3622 put_block_group(block_group); 2905 put_block_group(block_group);
3623 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 2906 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
3624 ref_generation, owner, ins); 2907 ref_generation, owner, ins, 1);
3625 return ret; 2908 return ret;
3626} 2909}
3627 2910
@@ -3640,20 +2923,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3640 u64 search_end, struct btrfs_key *ins, u64 data) 2923 u64 search_end, struct btrfs_key *ins, u64 data)
3641{ 2924{
3642 int ret; 2925 int ret;
3643
3644 ret = __btrfs_reserve_extent(trans, root, num_bytes, 2926 ret = __btrfs_reserve_extent(trans, root, num_bytes,
3645 min_alloc_size, empty_size, hint_byte, 2927 min_alloc_size, empty_size, hint_byte,
3646 search_end, ins, data); 2928 search_end, ins, data);
3647 BUG_ON(ret); 2929 BUG_ON(ret);
3648 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 2930 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3649 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 2931 ret = btrfs_add_delayed_ref(trans, ins->objectid,
3650 root_objectid, ref_generation, 2932 ins->offset, parent, root_objectid,
3651 owner_objectid, ins); 2933 ref_generation, owner_objectid,
2934 BTRFS_ADD_DELAYED_EXTENT, 0);
3652 BUG_ON(ret); 2935 BUG_ON(ret);
3653
3654 } else {
3655 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3656 } 2936 }
2937 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3657 return ret; 2938 return ret;
3658} 2939}
3659 2940
@@ -3789,7 +3070,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3789 3070
3790 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3071 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3791 3072
3792 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3073 ret = btrfs_free_extent(trans, root, disk_bytenr,
3793 btrfs_file_extent_disk_num_bytes(leaf, fi), 3074 btrfs_file_extent_disk_num_bytes(leaf, fi),
3794 leaf->start, leaf_owner, leaf_generation, 3075 leaf->start, leaf_owner, leaf_generation,
3795 key.objectid, 0); 3076 key.objectid, 0);
@@ -3829,7 +3110,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3829 */ 3110 */
3830 for (i = 0; i < ref->nritems; i++) { 3111 for (i = 0; i < ref->nritems; i++) {
3831 info = ref->extents + sorted[i].slot; 3112 info = ref->extents + sorted[i].slot;
3832 ret = __btrfs_free_extent(trans, root, info->bytenr, 3113 ret = btrfs_free_extent(trans, root, info->bytenr,
3833 info->num_bytes, ref->bytenr, 3114 info->num_bytes, ref->bytenr,
3834 ref->owner, ref->generation, 3115 ref->owner, ref->generation,
3835 info->objectid, 0); 3116 info->objectid, 0);
@@ -3846,12 +3127,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3846 return 0; 3127 return 0;
3847} 3128}
3848 3129
3849static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3130static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3131 struct btrfs_root *root, u64 start,
3850 u64 len, u32 *refs) 3132 u64 len, u32 *refs)
3851{ 3133{
3852 int ret; 3134 int ret;
3853 3135
3854 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3136 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
3855 BUG_ON(ret); 3137 BUG_ON(ret);
3856 3138
3857#if 0 /* some debugging code in case we see problems here */ 3139#if 0 /* some debugging code in case we see problems here */
@@ -3959,7 +3241,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
3959 * we just decrement it below and don't update any 3241 * we just decrement it below and don't update any
3960 * of the refs the leaf points to. 3242 * of the refs the leaf points to.
3961 */ 3243 */
3962 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3244 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3245 blocksize, &refs);
3963 BUG_ON(ret); 3246 BUG_ON(ret);
3964 if (refs != 1) 3247 if (refs != 1)
3965 continue; 3248 continue;
@@ -4010,7 +3293,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
4010 */ 3293 */
4011 for (i = 0; i < refi; i++) { 3294 for (i = 0; i < refi; i++) {
4012 bytenr = sorted[i].bytenr; 3295 bytenr = sorted[i].bytenr;
4013 ret = __btrfs_free_extent(trans, root, bytenr, 3296 ret = btrfs_free_extent(trans, root, bytenr,
4014 blocksize, eb->start, 3297 blocksize, eb->start,
4015 root_owner, root_gen, 0, 1); 3298 root_owner, root_gen, 0, 1);
4016 BUG_ON(ret); 3299 BUG_ON(ret);
@@ -4053,7 +3336,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4053 3336
4054 WARN_ON(*level < 0); 3337 WARN_ON(*level < 0);
4055 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3338 WARN_ON(*level >= BTRFS_MAX_LEVEL);
4056 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 3339 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
4057 path->nodes[*level]->len, &refs); 3340 path->nodes[*level]->len, &refs);
4058 BUG_ON(ret); 3341 BUG_ON(ret);
4059 if (refs > 1) 3342 if (refs > 1)
@@ -4104,7 +3387,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4104 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3387 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
4105 blocksize = btrfs_level_size(root, *level - 1); 3388 blocksize = btrfs_level_size(root, *level - 1);
4106 3389
4107 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3390 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3391 blocksize, &refs);
4108 BUG_ON(ret); 3392 BUG_ON(ret);
4109 3393
4110 /* 3394 /*
@@ -4119,7 +3403,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4119 root_gen = btrfs_header_generation(parent); 3403 root_gen = btrfs_header_generation(parent);
4120 path->slots[*level]++; 3404 path->slots[*level]++;
4121 3405
4122 ret = __btrfs_free_extent(trans, root, bytenr, 3406 ret = btrfs_free_extent(trans, root, bytenr,
4123 blocksize, parent->start, 3407 blocksize, parent->start,
4124 root_owner, root_gen, 3408 root_owner, root_gen,
4125 *level - 1, 1); 3409 *level - 1, 1);
@@ -4165,7 +3449,7 @@ out:
4165 * cleanup and free the reference on the last node 3449 * cleanup and free the reference on the last node
4166 * we processed 3450 * we processed
4167 */ 3451 */
4168 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3452 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
4169 parent->start, root_owner, root_gen, 3453 parent->start, root_owner, root_gen,
4170 *level, 1); 3454 *level, 1);
4171 free_extent_buffer(path->nodes[*level]); 3455 free_extent_buffer(path->nodes[*level]);
@@ -5457,6 +4741,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
5457 root->root_key.objectid, 4741 root->root_key.objectid,
5458 trans->transid, key.objectid); 4742 trans->transid, key.objectid);
5459 BUG_ON(ret); 4743 BUG_ON(ret);
4744
5460 ret = btrfs_free_extent(trans, root, 4745 ret = btrfs_free_extent(trans, root,
5461 bytenr, num_bytes, leaf->start, 4746 bytenr, num_bytes, leaf->start,
5462 btrfs_header_owner(leaf), 4747 btrfs_header_owner(leaf),
@@ -5768,9 +5053,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
5768 ref_path, NULL, NULL); 5053 ref_path, NULL, NULL);
5769 BUG_ON(ret); 5054 BUG_ON(ret);
5770 5055
5771 if (root == root->fs_info->extent_root)
5772 btrfs_extent_post_op(trans, root);
5773
5774 return 0; 5056 return 0;
5775} 5057}
5776 5058
@@ -6208,6 +5490,9 @@ again:
6208 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 5490 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
6209 mutex_unlock(&root->fs_info->cleaner_mutex); 5491 mutex_unlock(&root->fs_info->cleaner_mutex);
6210 5492
5493 trans = btrfs_start_transaction(info->tree_root, 1);
5494 btrfs_commit_transaction(trans, info->tree_root);
5495
6211 while (1) { 5496 while (1) {
6212 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5497 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6213 if (ret < 0) 5498 if (ret < 0)
@@ -6500,9 +5785,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6500 sizeof(cache->item)); 5785 sizeof(cache->item));
6501 BUG_ON(ret); 5786 BUG_ON(ret);
6502 5787
6503 finish_current_insert(trans, extent_root, 0);
6504 ret = del_pending_extents(trans, extent_root, 0);
6505 BUG_ON(ret);
6506 set_avail_alloc_bits(extent_root->fs_info, type); 5788 set_avail_alloc_bits(extent_root->fs_info, type);
6507 5789
6508 return 0; 5790 return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dc78954861b3..c80075497645 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -643,7 +643,9 @@ next_slot:
643 643
644 if (disk_bytenr != 0) { 644 if (disk_bytenr != 0) {
645 ret = btrfs_update_extent_ref(trans, root, 645 ret = btrfs_update_extent_ref(trans, root,
646 disk_bytenr, orig_parent, 646 disk_bytenr,
647 le64_to_cpu(old.disk_num_bytes),
648 orig_parent,
647 leaf->start, 649 leaf->start,
648 root->root_key.objectid, 650 root->root_key.objectid,
649 trans->transid, ins.objectid); 651 trans->transid, ins.objectid);
@@ -912,7 +914,7 @@ again:
912 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 914 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
913 915
914 if (orig_parent != leaf->start) { 916 if (orig_parent != leaf->start) {
915 ret = btrfs_update_extent_ref(trans, root, bytenr, 917 ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
916 orig_parent, leaf->start, 918 orig_parent, leaf->start,
917 root->root_key.objectid, 919 root->root_key.objectid,
918 trans->transid, inode->i_ino); 920 trans->transid, inode->i_ino);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d638c54d39e9..f94c2ad8996c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -65,6 +65,12 @@ static noinline int join_transaction(struct btrfs_root *root)
65 cur_trans->use_count = 1; 65 cur_trans->use_count = 1;
66 cur_trans->commit_done = 0; 66 cur_trans->commit_done = 0;
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68
69 cur_trans->delayed_refs.root.rb_node = NULL;
70 cur_trans->delayed_refs.num_entries = 0;
71 cur_trans->delayed_refs.flushing = 0;
72 spin_lock_init(&cur_trans->delayed_refs.lock);
73
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 74 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 75 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 extent_io_tree_init(&cur_trans->dirty_pages, 76 extent_io_tree_init(&cur_trans->dirty_pages,
@@ -182,6 +188,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
182 h->block_group = 0; 188 h->block_group = 0;
183 h->alloc_exclude_nr = 0; 189 h->alloc_exclude_nr = 0;
184 h->alloc_exclude_start = 0; 190 h->alloc_exclude_start = 0;
191 h->delayed_ref_updates = 0;
185 root->fs_info->running_transaction->use_count++; 192 root->fs_info->running_transaction->use_count++;
186 mutex_unlock(&root->fs_info->trans_mutex); 193 mutex_unlock(&root->fs_info->trans_mutex);
187 return h; 194 return h;
@@ -281,6 +288,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
281 struct btrfs_transaction *cur_trans; 288 struct btrfs_transaction *cur_trans;
282 struct btrfs_fs_info *info = root->fs_info; 289 struct btrfs_fs_info *info = root->fs_info;
283 290
291 if (trans->delayed_ref_updates &&
292 (trans->transaction->delayed_refs.flushing ||
293 trans->transaction->delayed_refs.num_entries > 16384)) {
294 btrfs_run_delayed_refs(trans, root, trans->delayed_ref_updates);
295 } else if (trans->transaction->delayed_refs.num_entries > 64) {
296 wake_up_process(root->fs_info->transaction_kthread);
297 }
298
284 mutex_lock(&info->trans_mutex); 299 mutex_lock(&info->trans_mutex);
285 cur_trans = info->running_transaction; 300 cur_trans = info->running_transaction;
286 WARN_ON(cur_trans != trans->transaction); 301 WARN_ON(cur_trans != trans->transaction);
@@ -424,9 +439,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
424 u64 old_root_bytenr; 439 u64 old_root_bytenr;
425 struct btrfs_root *tree_root = root->fs_info->tree_root; 440 struct btrfs_root *tree_root = root->fs_info->tree_root;
426 441
427 btrfs_extent_post_op(trans, root);
428 btrfs_write_dirty_block_groups(trans, root); 442 btrfs_write_dirty_block_groups(trans, root);
429 btrfs_extent_post_op(trans, root); 443
444 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
445 BUG_ON(ret);
430 446
431 while (1) { 447 while (1) {
432 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 448 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
@@ -438,14 +454,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
438 btrfs_header_level(root->node)); 454 btrfs_header_level(root->node));
439 btrfs_set_root_generation(&root->root_item, trans->transid); 455 btrfs_set_root_generation(&root->root_item, trans->transid);
440 456
441 btrfs_extent_post_op(trans, root);
442
443 ret = btrfs_update_root(trans, tree_root, 457 ret = btrfs_update_root(trans, tree_root,
444 &root->root_key, 458 &root->root_key,
445 &root->root_item); 459 &root->root_item);
446 BUG_ON(ret); 460 BUG_ON(ret);
447 btrfs_write_dirty_block_groups(trans, root); 461 btrfs_write_dirty_block_groups(trans, root);
448 btrfs_extent_post_op(trans, root); 462
463 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
464 BUG_ON(ret);
449 } 465 }
450 return 0; 466 return 0;
451} 467}
@@ -459,15 +475,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
459 struct btrfs_fs_info *fs_info = root->fs_info; 475 struct btrfs_fs_info *fs_info = root->fs_info;
460 struct list_head *next; 476 struct list_head *next;
461 struct extent_buffer *eb; 477 struct extent_buffer *eb;
478 int ret;
462 479
463 btrfs_extent_post_op(trans, fs_info->tree_root); 480 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
481 BUG_ON(ret);
464 482
465 eb = btrfs_lock_root_node(fs_info->tree_root); 483 eb = btrfs_lock_root_node(fs_info->tree_root);
466 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 484 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
467 btrfs_tree_unlock(eb); 485 btrfs_tree_unlock(eb);
468 free_extent_buffer(eb); 486 free_extent_buffer(eb);
469 487
470 btrfs_extent_post_op(trans, fs_info->tree_root); 488 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
489 BUG_ON(ret);
471 490
472 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 491 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
473 next = fs_info->dirty_cowonly_roots.next; 492 next = fs_info->dirty_cowonly_roots.next;
@@ -475,6 +494,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
475 root = list_entry(next, struct btrfs_root, dirty_list); 494 root = list_entry(next, struct btrfs_root, dirty_list);
476 495
477 update_cowonly_root(trans, root); 496 update_cowonly_root(trans, root);
497
498 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
499 BUG_ON(ret);
478 } 500 }
479 return 0; 501 return 0;
480} 502}
@@ -895,6 +917,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
895 DEFINE_WAIT(wait); 917 DEFINE_WAIT(wait);
896 int ret; 918 int ret;
897 919
920 /* make a pass through all the delayed refs we have so far
921 * any runnings procs may add more while we are here
922 */
923 ret = btrfs_run_delayed_refs(trans, root, 0);
924 BUG_ON(ret);
925
926 /*
927 * set the flushing flag so procs in this transaction have to
928 * start sending their work down.
929 */
930 trans->transaction->delayed_refs.flushing = 1;
931
932 ret = btrfs_run_delayed_refs(trans, root, (u64)-1);
933 BUG_ON(ret);
934
898 INIT_LIST_HEAD(&dirty_fs_roots); 935 INIT_LIST_HEAD(&dirty_fs_roots);
899 mutex_lock(&root->fs_info->trans_mutex); 936 mutex_lock(&root->fs_info->trans_mutex);
900 if (trans->transaction->in_commit) { 937 if (trans->transaction->in_commit) {
@@ -969,6 +1006,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
969 ret = create_pending_snapshots(trans, root->fs_info); 1006 ret = create_pending_snapshots(trans, root->fs_info);
970 BUG_ON(ret); 1007 BUG_ON(ret);
971 1008
1009 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1010 BUG_ON(ret);
1011
972 WARN_ON(cur_trans != trans->transaction); 1012 WARN_ON(cur_trans != trans->transaction);
973 1013
974 /* btrfs_commit_tree_roots is responsible for getting the 1014 /* btrfs_commit_tree_roots is responsible for getting the
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ea292117f882..94876709217f 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,6 +19,7 @@
19#ifndef __BTRFS_TRANSACTION__ 19#ifndef __BTRFS_TRANSACTION__
20#define __BTRFS_TRANSACTION__ 20#define __BTRFS_TRANSACTION__
21#include "btrfs_inode.h" 21#include "btrfs_inode.h"
22#include "delayed-ref.h"
22 23
23struct btrfs_transaction { 24struct btrfs_transaction {
24 u64 transid; 25 u64 transid;
@@ -34,6 +35,7 @@ struct btrfs_transaction {
34 wait_queue_head_t writer_wait; 35 wait_queue_head_t writer_wait;
35 wait_queue_head_t commit_wait; 36 wait_queue_head_t commit_wait;
36 struct list_head pending_snapshots; 37 struct list_head pending_snapshots;
38 struct btrfs_delayed_ref_root delayed_refs;
37}; 39};
38 40
39struct btrfs_trans_handle { 41struct btrfs_trans_handle {
@@ -44,6 +46,7 @@ struct btrfs_trans_handle {
44 u64 block_group; 46 u64 block_group;
45 u64 alloc_exclude_start; 47 u64 alloc_exclude_start;
46 u64 alloc_exclude_nr; 48 u64 alloc_exclude_nr;
49 unsigned long delayed_ref_updates;
47}; 50};
48 51
49struct btrfs_pending_snapshot { 52struct btrfs_pending_snapshot {
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 98d25fa4570e..b10eacdb1620 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
124 } 124 }
125 125
126 btrfs_release_path(root, path); 126 btrfs_release_path(root, path);
127 if (is_extent)
128 btrfs_extent_post_op(trans, root);
129out: 127out:
130 if (path) 128 if (path)
131 btrfs_free_path(path); 129 btrfs_free_path(path);