aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 17:23:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-11 17:23:12 -0400
commita525890cb6a2949b644d212ae290b658967d3919 (patch)
treea2c6c1f6cefff89b235bf6556212527e47a5d50b /fs
parent3bb66d7f8cc31537a3170c9bb82b38e538b984c5 (diff)
parentb263c2c8bf13c273485bd99dbbeba79c844409dd (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (23 commits) Btrfs: fix extent_buffer leak during tree log replay Btrfs: fix oops when btrfs_inherit_iflags called with a NULL dir Btrfs: fix -o nodatasum printk spelling Btrfs: check duplicate backrefs for both data and metadata Btrfs: init worker struct fields before kthread-run Btrfs: pin buffers during write_dev_supers Btrfs: avoid races between super writeout and device list updates Fix btrfs when ACLs are configured out Btrfs: fdatasync should skip metadata writeout Btrfs: remove crc32c.h and use libcrc32c directly. Btrfs: implement FS_IOC_GETFLAGS/SETFLAGS/GETVERSION Btrfs: autodetect SSD devices Btrfs: add mount -o ssd_spread to spread allocations out Btrfs: avoid allocation clusters that are too spread out Btrfs: Add mount -o nossd Btrfs: avoid IO stalls behind congested devices in a multi-device FS Btrfs: don't allow WRITE_SYNC bios to starve out regular writes Btrfs: fix metadata dirty throttling limits Btrfs: reduce mount -o ssd CPU usage Btrfs: balance btree more often ...
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/crc32c.h29
-rw-r--r--fs/btrfs/ctree.c698
-rw-r--r--fs/btrfs/ctree.h330
-rw-r--r--fs/btrfs/delayed-ref.c509
-rw-r--r--fs/btrfs/delayed-ref.h85
-rw-r--r--fs/btrfs/disk-io.c164
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c2610
-rw-r--r--fs/btrfs/extent_io.c18
-rw-r--r--fs/btrfs/file.c78
-rw-r--r--fs/btrfs/free-space-cache.c10
-rw-r--r--fs/btrfs/free-space-cache.h1
-rw-r--r--fs/btrfs/hash.h4
-rw-r--r--fs/btrfs/inode.c159
-rw-r--r--fs/btrfs/ioctl.c197
-rw-r--r--fs/btrfs/print-tree.c155
-rw-r--r--fs/btrfs/relocation.c3711
-rw-r--r--fs/btrfs/root-tree.c17
-rw-r--r--fs/btrfs/super.c53
-rw-r--r--fs/btrfs/transaction.c410
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c103
-rw-r--r--fs/btrfs/volumes.c69
-rw-r--r--fs/btrfs/volumes.h12
29 files changed, 7306 insertions, 2153 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 94212844a9bc..a35eb36b32fd 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o \
10 compression.o delayed-ref.o 10 compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index cbba000dccbe..603972576f0f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -351,9 +351,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
351 return 0; 351 return 0;
352} 352}
353 353
354int btrfs_check_acl(struct inode *inode, int mask)
355{
356 return 0;
357}
358
359#endif /* CONFIG_FS_POSIX_ACL */ 354#endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 502c3d61de62..7f88628a1a72 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -294,10 +294,10 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
294 INIT_LIST_HEAD(&worker->worker_list); 294 INIT_LIST_HEAD(&worker->worker_list);
295 spin_lock_init(&worker->lock); 295 spin_lock_init(&worker->lock);
296 atomic_set(&worker->num_pending, 0); 296 atomic_set(&worker->num_pending, 0);
297 worker->workers = workers;
297 worker->task = kthread_run(worker_loop, worker, 298 worker->task = kthread_run(worker_loop, worker,
298 "btrfs-%s-%d", workers->name, 299 "btrfs-%s-%d", workers->name,
299 workers->num_workers + i); 300 workers->num_workers + i);
300 worker->workers = workers;
301 if (IS_ERR(worker->task)) { 301 if (IS_ERR(worker->task)) {
302 kfree(worker); 302 kfree(worker);
303 ret = PTR_ERR(worker->task); 303 ret = PTR_ERR(worker->task);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b30986f00b9d..acb4f3517582 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -72,6 +72,9 @@ struct btrfs_inode {
72 */ 72 */
73 struct list_head ordered_operations; 73 struct list_head ordered_operations;
74 74
75 /* node for the red-black tree that links inodes in subvolume root */
76 struct rb_node rb_node;
77
75 /* the space_info for where this inode's data allocations are done */ 78 /* the space_info for where this inode's data allocations are done */
76 struct btrfs_space_info *space_info; 79 struct btrfs_space_info *space_info;
77 80
@@ -154,5 +157,4 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
154 BTRFS_I(inode)->disk_i_size = size; 157 BTRFS_I(inode)->disk_i_size = size;
155} 158}
156 159
157
158#endif 160#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ab07627084f1..de1e2fd32080 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -123,7 +123,7 @@ static int check_compressed_csum(struct inode *inode,
123 u32 csum; 123 u32 csum;
124 u32 *cb_sum = &cb->sums; 124 u32 *cb_sum = &cb->sums;
125 125
126 if (btrfs_test_flag(inode, NODATASUM)) 126 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
127 return 0; 127 return 0;
128 128
129 for (i = 0; i < cb->nr_pages; i++) { 129 for (i = 0; i < cb->nr_pages; i++) {
@@ -670,7 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
670 */ 670 */
671 atomic_inc(&cb->pending_bios); 671 atomic_inc(&cb->pending_bios);
672 672
673 if (!btrfs_test_flag(inode, NODATASUM)) { 673 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
674 btrfs_lookup_bio_sums(root, inode, comp_bio, 674 btrfs_lookup_bio_sums(root, inode, comp_bio,
675 sums); 675 sums);
676 } 676 }
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
697 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 697 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
698 BUG_ON(ret); 698 BUG_ON(ret);
699 699
700 if (!btrfs_test_flag(inode, NODATASUM)) 700 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
701 btrfs_lookup_bio_sums(root, inode, comp_bio, sums); 701 btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
702 702
703 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); 703 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h
deleted file mode 100644
index 6e1b3de36700..000000000000
--- a/fs/btrfs/crc32c.h
+++ /dev/null
@@ -1,29 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_CRC32C__
20#define __BTRFS_CRC32C__
21#include <linux/crc32c.h>
22
23/*
24 * this file used to do more for selecting the HW version of crc32c,
25 * perhaps it will one day again soon.
26 */
27#define btrfs_crc32c(seed, data, length) crc32c(seed, data, length)
28#endif
29
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9f03a2..60a45f3a4e91 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
197 u32 nritems; 197 u32 nritems;
198 int ret = 0; 198 int ret = 0;
199 int level; 199 int level;
200 struct btrfs_root *new_root; 200 struct btrfs_disk_key disk_key;
201
202 new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203 if (!new_root)
204 return -ENOMEM;
205
206 memcpy(new_root, root, sizeof(*new_root));
207 new_root->root_key.objectid = new_root_objectid;
208 201
209 WARN_ON(root->ref_cows && trans->transid != 202 WARN_ON(root->ref_cows && trans->transid !=
210 root->fs_info->running_transaction->transid); 203 root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
212 205
213 level = btrfs_header_level(buf); 206 level = btrfs_header_level(buf);
214 nritems = btrfs_header_nritems(buf); 207 nritems = btrfs_header_nritems(buf);
208 if (level == 0)
209 btrfs_item_key(buf, &disk_key, 0);
210 else
211 btrfs_node_key(buf, &disk_key, 0);
215 212
216 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, 213 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
217 new_root_objectid, trans->transid, 214 new_root_objectid, &disk_key, level,
218 level, buf->start, 0); 215 buf->start, 0);
219 if (IS_ERR(cow)) { 216 if (IS_ERR(cow))
220 kfree(new_root);
221 return PTR_ERR(cow); 217 return PTR_ERR(cow);
222 }
223 218
224 copy_extent_buffer(cow, buf, 0, 0, cow->len); 219 copy_extent_buffer(cow, buf, 0, 0, cow->len);
225 btrfs_set_header_bytenr(cow, cow->start); 220 btrfs_set_header_bytenr(cow, cow->start);
226 btrfs_set_header_generation(cow, trans->transid); 221 btrfs_set_header_generation(cow, trans->transid);
227 btrfs_set_header_owner(cow, new_root_objectid); 222 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
228 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 223 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
224 BTRFS_HEADER_FLAG_RELOC);
225 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
226 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
227 else
228 btrfs_set_header_owner(cow, new_root_objectid);
229 229
230 write_extent_buffer(cow, root->fs_info->fsid, 230 write_extent_buffer(cow, root->fs_info->fsid,
231 (unsigned long)btrfs_header_fsid(cow), 231 (unsigned long)btrfs_header_fsid(cow),
232 BTRFS_FSID_SIZE); 232 BTRFS_FSID_SIZE);
233 233
234 WARN_ON(btrfs_header_generation(buf) > trans->transid); 234 WARN_ON(btrfs_header_generation(buf) > trans->transid);
235 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); 235 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
236 kfree(new_root); 236 ret = btrfs_inc_ref(trans, root, cow, 1);
237 else
238 ret = btrfs_inc_ref(trans, root, cow, 0);
237 239
238 if (ret) 240 if (ret)
239 return ret; 241 return ret;
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
244} 246}
245 247
246/* 248/*
249 * check if the tree block can be shared by multiple trees
250 */
251int btrfs_block_can_be_shared(struct btrfs_root *root,
252 struct extent_buffer *buf)
253{
254 /*
255 * Tree blocks not in refernece counted trees and tree roots
256 * are never shared. If a block was allocated after the last
257 * snapshot and the block was not allocated by tree relocation,
258 * we know the block is not shared.
259 */
260 if (root->ref_cows &&
261 buf != root->node && buf != root->commit_root &&
262 (btrfs_header_generation(buf) <=
263 btrfs_root_last_snapshot(&root->root_item) ||
264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
265 return 1;
266#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
267 if (root->ref_cows &&
268 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
269 return 1;
270#endif
271 return 0;
272}
273
274static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
275 struct btrfs_root *root,
276 struct extent_buffer *buf,
277 struct extent_buffer *cow)
278{
279 u64 refs;
280 u64 owner;
281 u64 flags;
282 u64 new_flags = 0;
283 int ret;
284
285 /*
286 * Backrefs update rules:
287 *
288 * Always use full backrefs for extent pointers in tree block
289 * allocated by tree relocation.
290 *
291 * If a shared tree block is no longer referenced by its owner
292 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
293 * use full backrefs for extent pointers in tree block.
294 *
295 * If a tree block is been relocating
296 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
297 * use full backrefs for extent pointers in tree block.
298 * The reason for this is some operations (such as drop tree)
299 * are only allowed for blocks use full backrefs.
300 */
301
302 if (btrfs_block_can_be_shared(root, buf)) {
303 ret = btrfs_lookup_extent_info(trans, root, buf->start,
304 buf->len, &refs, &flags);
305 BUG_ON(ret);
306 BUG_ON(refs == 0);
307 } else {
308 refs = 1;
309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
310 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
311 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
312 else
313 flags = 0;
314 }
315
316 owner = btrfs_header_owner(buf);
317 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
318 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
319
320 if (refs > 1) {
321 if ((owner == root->root_key.objectid ||
322 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
324 ret = btrfs_inc_ref(trans, root, buf, 1);
325 BUG_ON(ret);
326
327 if (root->root_key.objectid ==
328 BTRFS_TREE_RELOC_OBJECTID) {
329 ret = btrfs_dec_ref(trans, root, buf, 0);
330 BUG_ON(ret);
331 ret = btrfs_inc_ref(trans, root, cow, 1);
332 BUG_ON(ret);
333 }
334 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
335 } else {
336
337 if (root->root_key.objectid ==
338 BTRFS_TREE_RELOC_OBJECTID)
339 ret = btrfs_inc_ref(trans, root, cow, 1);
340 else
341 ret = btrfs_inc_ref(trans, root, cow, 0);
342 BUG_ON(ret);
343 }
344 if (new_flags != 0) {
345 ret = btrfs_set_disk_extent_flags(trans, root,
346 buf->start,
347 buf->len,
348 new_flags, 0);
349 BUG_ON(ret);
350 }
351 } else {
352 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
353 if (root->root_key.objectid ==
354 BTRFS_TREE_RELOC_OBJECTID)
355 ret = btrfs_inc_ref(trans, root, cow, 1);
356 else
357 ret = btrfs_inc_ref(trans, root, cow, 0);
358 BUG_ON(ret);
359 ret = btrfs_dec_ref(trans, root, buf, 1);
360 BUG_ON(ret);
361 }
362 clean_tree_block(trans, root, buf);
363 }
364 return 0;
365}
366
367/*
247 * does the dirty work in cow of a single block. The parent block (if 368 * does the dirty work in cow of a single block. The parent block (if
248 * supplied) is updated to point to the new cow copy. The new buffer is marked 369 * supplied) is updated to point to the new cow copy. The new buffer is marked
249 * dirty and returned locked. If you modify the block it needs to be marked 370 * dirty and returned locked. If you modify the block it needs to be marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
262 struct extent_buffer **cow_ret, 383 struct extent_buffer **cow_ret,
263 u64 search_start, u64 empty_size) 384 u64 search_start, u64 empty_size)
264{ 385{
265 u64 parent_start; 386 struct btrfs_disk_key disk_key;
266 struct extent_buffer *cow; 387 struct extent_buffer *cow;
267 u32 nritems;
268 int ret = 0;
269 int level; 388 int level;
270 int unlock_orig = 0; 389 int unlock_orig = 0;
390 u64 parent_start;
271 391
272 if (*cow_ret == buf) 392 if (*cow_ret == buf)
273 unlock_orig = 1; 393 unlock_orig = 1;
274 394
275 btrfs_assert_tree_locked(buf); 395 btrfs_assert_tree_locked(buf);
276 396
277 if (parent)
278 parent_start = parent->start;
279 else
280 parent_start = 0;
281
282 WARN_ON(root->ref_cows && trans->transid != 397 WARN_ON(root->ref_cows && trans->transid !=
283 root->fs_info->running_transaction->transid); 398 root->fs_info->running_transaction->transid);
284 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 399 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
285 400
286 level = btrfs_header_level(buf); 401 level = btrfs_header_level(buf);
287 nritems = btrfs_header_nritems(buf);
288 402
289 cow = btrfs_alloc_free_block(trans, root, buf->len, 403 if (level == 0)
290 parent_start, root->root_key.objectid, 404 btrfs_item_key(buf, &disk_key, 0);
291 trans->transid, level, 405 else
292 search_start, empty_size); 406 btrfs_node_key(buf, &disk_key, 0);
407
408 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
409 if (parent)
410 parent_start = parent->start;
411 else
412 parent_start = 0;
413 } else
414 parent_start = 0;
415
416 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
417 root->root_key.objectid, &disk_key,
418 level, search_start, empty_size);
293 if (IS_ERR(cow)) 419 if (IS_ERR(cow))
294 return PTR_ERR(cow); 420 return PTR_ERR(cow);
295 421
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
298 copy_extent_buffer(cow, buf, 0, 0, cow->len); 424 copy_extent_buffer(cow, buf, 0, 0, cow->len);
299 btrfs_set_header_bytenr(cow, cow->start); 425 btrfs_set_header_bytenr(cow, cow->start);
300 btrfs_set_header_generation(cow, trans->transid); 426 btrfs_set_header_generation(cow, trans->transid);
301 btrfs_set_header_owner(cow, root->root_key.objectid); 427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
302 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
303 434
304 write_extent_buffer(cow, root->fs_info->fsid, 435 write_extent_buffer(cow, root->fs_info->fsid,
305 (unsigned long)btrfs_header_fsid(cow), 436 (unsigned long)btrfs_header_fsid(cow),
306 BTRFS_FSID_SIZE); 437 BTRFS_FSID_SIZE);
307 438
308 WARN_ON(btrfs_header_generation(buf) > trans->transid); 439 update_ref_for_cow(trans, root, buf, cow);
309 if (btrfs_header_generation(buf) != trans->transid) {
310 u32 nr_extents;
311 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
312 if (ret)
313 return ret;
314
315 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
316 WARN_ON(ret);
317 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
318 /*
319 * There are only two places that can drop reference to
320 * tree blocks owned by living reloc trees, one is here,
321 * the other place is btrfs_drop_subtree. In both places,
322 * we check reference count while tree block is locked.
323 * Furthermore, if reference count is one, it won't get
324 * increased by someone else.
325 */
326 u32 refs;
327 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
328 buf->len, &refs);
329 BUG_ON(ret);
330 if (refs == 1) {
331 ret = btrfs_update_ref(trans, root, buf, cow,
332 0, nritems);
333 clean_tree_block(trans, root, buf);
334 } else {
335 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
336 }
337 BUG_ON(ret);
338 } else {
339 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
340 if (ret)
341 return ret;
342 clean_tree_block(trans, root, buf);
343 }
344
345 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
346 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
347 WARN_ON(ret);
348 }
349 440
350 if (buf == root->node) { 441 if (buf == root->node) {
351 WARN_ON(parent && parent != buf); 442 WARN_ON(parent && parent != buf);
443 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445 parent_start = buf->start;
446 else
447 parent_start = 0;
352 448
353 spin_lock(&root->node_lock); 449 spin_lock(&root->node_lock);
354 root->node = cow; 450 root->node = cow;
355 extent_buffer_get(cow); 451 extent_buffer_get(cow);
356 spin_unlock(&root->node_lock); 452 spin_unlock(&root->node_lock);
357 453
358 if (buf != root->commit_root) { 454 btrfs_free_extent(trans, root, buf->start, buf->len,
359 btrfs_free_extent(trans, root, buf->start, 455 parent_start, root->root_key.objectid,
360 buf->len, buf->start, 456 level, 0);
361 root->root_key.objectid,
362 btrfs_header_generation(buf),
363 level, 1);
364 }
365 free_extent_buffer(buf); 457 free_extent_buffer(buf);
366 add_root_to_dirty_list(root); 458 add_root_to_dirty_list(root);
367 } else { 459 } else {
460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
461 parent_start = parent->start;
462 else
463 parent_start = 0;
464
465 WARN_ON(trans->transid != btrfs_header_generation(parent));
368 btrfs_set_node_blockptr(parent, parent_slot, 466 btrfs_set_node_blockptr(parent, parent_slot,
369 cow->start); 467 cow->start);
370 WARN_ON(trans->transid == 0);
371 btrfs_set_node_ptr_generation(parent, parent_slot, 468 btrfs_set_node_ptr_generation(parent, parent_slot,
372 trans->transid); 469 trans->transid);
373 btrfs_mark_buffer_dirty(parent); 470 btrfs_mark_buffer_dirty(parent);
374 WARN_ON(btrfs_header_generation(parent) != trans->transid);
375 btrfs_free_extent(trans, root, buf->start, buf->len, 471 btrfs_free_extent(trans, root, buf->start, buf->len,
376 parent_start, btrfs_header_owner(parent), 472 parent_start, root->root_key.objectid,
377 btrfs_header_generation(parent), level, 1); 473 level, 0);
378 } 474 }
379 if (unlock_orig) 475 if (unlock_orig)
380 btrfs_tree_unlock(buf); 476 btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
384 return 0; 480 return 0;
385} 481}
386 482
483static inline int should_cow_block(struct btrfs_trans_handle *trans,
484 struct btrfs_root *root,
485 struct extent_buffer *buf)
486{
487 if (btrfs_header_generation(buf) == trans->transid &&
488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
489 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
490 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
491 return 0;
492 return 1;
493}
494
387/* 495/*
388 * cows a single block, see __btrfs_cow_block for the real work. 496 * cows a single block, see __btrfs_cow_block for the real work.
389 * This version of it has extra checks so that a block isn't cow'd more than 497 * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
411 WARN_ON(1); 519 WARN_ON(1);
412 } 520 }
413 521
414 if (btrfs_header_generation(buf) == trans->transid && 522 if (!should_cow_block(trans, root, buf)) {
415 btrfs_header_owner(buf) == root->root_key.objectid &&
416 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
417 *cow_ret = buf; 523 *cow_ret = buf;
418 return 0; 524 return 0;
419 } 525 }
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
469/* 575/*
470 * same as comp_keys only with two btrfs_key's 576 * same as comp_keys only with two btrfs_key's
471 */ 577 */
472static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 578int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
473{ 579{
474 if (k1->objectid > k2->objectid) 580 if (k1->objectid > k2->objectid)
475 return 1; 581 return 1;
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
845 return -1; 951 return -1;
846} 952}
847 953
954int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
955 int level, int *slot)
956{
957 return bin_search(eb, key, level, slot);
958}
959
848/* given a node and slot number, this reads the blocks it points to. The 960/* given a node and slot number, this reads the blocks it points to. The
849 * extent buffer is returned with a reference taken (but unlocked). 961 * extent buffer is returned with a reference taken (but unlocked).
850 * NULL is returned on error. 962 * NULL is returned on error.
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
921 root->node = child; 1033 root->node = child;
922 spin_unlock(&root->node_lock); 1034 spin_unlock(&root->node_lock);
923 1035
924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
926 mid->start, child->start,
927 root->root_key.objectid,
928 trans->transid, level - 1);
929 BUG_ON(ret);
930
931 add_root_to_dirty_list(root); 1036 add_root_to_dirty_list(root);
932 btrfs_tree_unlock(child); 1037 btrfs_tree_unlock(child);
933 1038
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
938 /* once for the path */ 1043 /* once for the path */
939 free_extent_buffer(mid); 1044 free_extent_buffer(mid);
940 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1045 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
941 mid->start, root->root_key.objectid, 1046 0, root->root_key.objectid, level, 1);
942 btrfs_header_generation(mid),
943 level, 1);
944 /* once for the root ptr */ 1047 /* once for the root ptr */
945 free_extent_buffer(mid); 1048 free_extent_buffer(mid);
946 return ret; 1049 return ret;
@@ -949,8 +1052,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
949 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1052 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
950 return 0; 1053 return 0;
951 1054
952 if (trans->transaction->delayed_refs.flushing && 1055 if (btrfs_header_nritems(mid) > 2)
953 btrfs_header_nritems(mid) > 2)
954 return 0; 1056 return 0;
955 1057
956 if (btrfs_header_nritems(mid) < 2) 1058 if (btrfs_header_nritems(mid) < 2)
@@ -998,7 +1100,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
998 ret = wret; 1100 ret = wret;
999 if (btrfs_header_nritems(right) == 0) { 1101 if (btrfs_header_nritems(right) == 0) {
1000 u64 bytenr = right->start; 1102 u64 bytenr = right->start;
1001 u64 generation = btrfs_header_generation(parent);
1002 u32 blocksize = right->len; 1103 u32 blocksize = right->len;
1003 1104
1004 clean_tree_block(trans, root, right); 1105 clean_tree_block(trans, root, right);
@@ -1010,9 +1111,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (wret) 1111 if (wret)
1011 ret = wret; 1112 ret = wret;
1012 wret = btrfs_free_extent(trans, root, bytenr, 1113 wret = btrfs_free_extent(trans, root, bytenr,
1013 blocksize, parent->start, 1114 blocksize, 0,
1014 btrfs_header_owner(parent), 1115 root->root_key.objectid,
1015 generation, level, 1); 1116 level, 0);
1016 if (wret) 1117 if (wret)
1017 ret = wret; 1118 ret = wret;
1018 } else { 1119 } else {
@@ -1047,7 +1148,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1047 } 1148 }
1048 if (btrfs_header_nritems(mid) == 0) { 1149 if (btrfs_header_nritems(mid) == 0) {
1049 /* we've managed to empty the middle node, drop it */ 1150 /* we've managed to empty the middle node, drop it */
1050 u64 root_gen = btrfs_header_generation(parent);
1051 u64 bytenr = mid->start; 1151 u64 bytenr = mid->start;
1052 u32 blocksize = mid->len; 1152 u32 blocksize = mid->len;
1053 1153
@@ -1059,9 +1159,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1059 if (wret) 1159 if (wret)
1060 ret = wret; 1160 ret = wret;
1061 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1161 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1062 parent->start, 1162 0, root->root_key.objectid,
1063 btrfs_header_owner(parent), 1163 level, 0);
1064 root_gen, level, 1);
1065 if (wret) 1164 if (wret)
1066 ret = wret; 1165 ret = wret;
1067 } else { 1166 } else {
@@ -1437,7 +1536,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1437{ 1536{
1438 int i; 1537 int i;
1439 1538
1440 if (path->keep_locks || path->lowest_level) 1539 if (path->keep_locks)
1441 return; 1540 return;
1442 1541
1443 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1542 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1552,7 +1651,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
1552 } 1651 }
1553 b = p->nodes[level]; 1652 b = p->nodes[level];
1554 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1653 } else if (ins_len < 0 && btrfs_header_nritems(b) <
1555 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { 1654 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
1556 int sret; 1655 int sret;
1557 1656
1558 sret = reada_for_balance(root, p, level); 1657 sret = reada_for_balance(root, p, level);
@@ -1614,10 +1713,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1614 lowest_unlock = 2; 1713 lowest_unlock = 2;
1615 1714
1616again: 1715again:
1617 if (p->skip_locking) 1716 if (p->search_commit_root) {
1618 b = btrfs_root_node(root); 1717 b = root->commit_root;
1619 else 1718 extent_buffer_get(b);
1620 b = btrfs_lock_root_node(root); 1719 if (!p->skip_locking)
1720 btrfs_tree_lock(b);
1721 } else {
1722 if (p->skip_locking)
1723 b = btrfs_root_node(root);
1724 else
1725 b = btrfs_lock_root_node(root);
1726 }
1621 1727
1622 while (b) { 1728 while (b) {
1623 level = btrfs_header_level(b); 1729 level = btrfs_header_level(b);
@@ -1638,11 +1744,9 @@ again:
1638 * then we don't want to set the path blocking, 1744 * then we don't want to set the path blocking,
1639 * so we test it here 1745 * so we test it here
1640 */ 1746 */
1641 if (btrfs_header_generation(b) == trans->transid && 1747 if (!should_cow_block(trans, root, b))
1642 btrfs_header_owner(b) == root->root_key.objectid &&
1643 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1644 goto cow_done; 1748 goto cow_done;
1645 } 1749
1646 btrfs_set_path_blocking(p); 1750 btrfs_set_path_blocking(p);
1647 1751
1648 wret = btrfs_cow_block(trans, root, b, 1752 wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1868,6 @@ done:
1764 return ret; 1868 return ret;
1765} 1869}
1766 1870
1767int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768 struct btrfs_root *root,
1769 struct btrfs_key *node_keys,
1770 u64 *nodes, int lowest_level)
1771{
1772 struct extent_buffer *eb;
1773 struct extent_buffer *parent;
1774 struct btrfs_key key;
1775 u64 bytenr;
1776 u64 generation;
1777 u32 blocksize;
1778 int level;
1779 int slot;
1780 int key_match;
1781 int ret;
1782
1783 eb = btrfs_lock_root_node(root);
1784 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1785 BUG_ON(ret);
1786
1787 btrfs_set_lock_blocking(eb);
1788
1789 parent = eb;
1790 while (1) {
1791 level = btrfs_header_level(parent);
1792 if (level == 0 || level <= lowest_level)
1793 break;
1794
1795 ret = bin_search(parent, &node_keys[lowest_level], level,
1796 &slot);
1797 if (ret && slot > 0)
1798 slot--;
1799
1800 bytenr = btrfs_node_blockptr(parent, slot);
1801 if (nodes[level - 1] == bytenr)
1802 break;
1803
1804 blocksize = btrfs_level_size(root, level - 1);
1805 generation = btrfs_node_ptr_generation(parent, slot);
1806 btrfs_node_key_to_cpu(eb, &key, slot);
1807 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1808
1809 if (generation == trans->transid) {
1810 eb = read_tree_block(root, bytenr, blocksize,
1811 generation);
1812 btrfs_tree_lock(eb);
1813 btrfs_set_lock_blocking(eb);
1814 }
1815
1816 /*
1817 * if node keys match and node pointer hasn't been modified
1818 * in the running transaction, we can merge the path. for
1819 * blocks owened by reloc trees, the node pointer check is
1820 * skipped, this is because these blocks are fully controlled
1821 * by the space balance code, no one else can modify them.
1822 */
1823 if (!nodes[level - 1] || !key_match ||
1824 (generation == trans->transid &&
1825 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1826 if (level == 1 || level == lowest_level + 1) {
1827 if (generation == trans->transid) {
1828 btrfs_tree_unlock(eb);
1829 free_extent_buffer(eb);
1830 }
1831 break;
1832 }
1833
1834 if (generation != trans->transid) {
1835 eb = read_tree_block(root, bytenr, blocksize,
1836 generation);
1837 btrfs_tree_lock(eb);
1838 btrfs_set_lock_blocking(eb);
1839 }
1840
1841 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1842 &eb);
1843 BUG_ON(ret);
1844
1845 if (root->root_key.objectid ==
1846 BTRFS_TREE_RELOC_OBJECTID) {
1847 if (!nodes[level - 1]) {
1848 nodes[level - 1] = eb->start;
1849 memcpy(&node_keys[level - 1], &key,
1850 sizeof(node_keys[0]));
1851 } else {
1852 WARN_ON(1);
1853 }
1854 }
1855
1856 btrfs_tree_unlock(parent);
1857 free_extent_buffer(parent);
1858 parent = eb;
1859 continue;
1860 }
1861
1862 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1863 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1864 btrfs_mark_buffer_dirty(parent);
1865
1866 ret = btrfs_inc_extent_ref(trans, root,
1867 nodes[level - 1],
1868 blocksize, parent->start,
1869 btrfs_header_owner(parent),
1870 btrfs_header_generation(parent),
1871 level - 1);
1872 BUG_ON(ret);
1873
1874 /*
1875 * If the block was created in the running transaction,
1876 * it's possible this is the last reference to it, so we
1877 * should drop the subtree.
1878 */
1879 if (generation == trans->transid) {
1880 ret = btrfs_drop_subtree(trans, root, eb, parent);
1881 BUG_ON(ret);
1882 btrfs_tree_unlock(eb);
1883 free_extent_buffer(eb);
1884 } else {
1885 ret = btrfs_free_extent(trans, root, bytenr,
1886 blocksize, parent->start,
1887 btrfs_header_owner(parent),
1888 btrfs_header_generation(parent),
1889 level - 1, 1);
1890 BUG_ON(ret);
1891 }
1892 break;
1893 }
1894 btrfs_tree_unlock(parent);
1895 free_extent_buffer(parent);
1896 return 0;
1897}
1898
1899/* 1871/*
1900 * adjust the pointers going up the tree, starting at level 1872 * adjust the pointers going up the tree, starting at level
1901 * making sure the right key of each node is points to 'key'. 1873 * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1993,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2021 btrfs_mark_buffer_dirty(src); 1993 btrfs_mark_buffer_dirty(src);
2022 btrfs_mark_buffer_dirty(dst); 1994 btrfs_mark_buffer_dirty(dst);
2023 1995
2024 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2025 BUG_ON(ret);
2026
2027 return ret; 1996 return ret;
2028} 1997}
2029 1998
@@ -2083,9 +2052,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
2083 btrfs_mark_buffer_dirty(src); 2052 btrfs_mark_buffer_dirty(src);
2084 btrfs_mark_buffer_dirty(dst); 2053 btrfs_mark_buffer_dirty(dst);
2085 2054
2086 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2087 BUG_ON(ret);
2088
2089 return ret; 2055 return ret;
2090} 2056}
2091 2057
@@ -2105,7 +2071,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2105 struct extent_buffer *c; 2071 struct extent_buffer *c;
2106 struct extent_buffer *old; 2072 struct extent_buffer *old;
2107 struct btrfs_disk_key lower_key; 2073 struct btrfs_disk_key lower_key;
2108 int ret;
2109 2074
2110 BUG_ON(path->nodes[level]); 2075 BUG_ON(path->nodes[level]);
2111 BUG_ON(path->nodes[level-1] != root->node); 2076 BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2082,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2117 btrfs_node_key(lower, &lower_key, 0); 2082 btrfs_node_key(lower, &lower_key, 0);
2118 2083
2119 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2084 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2120 root->root_key.objectid, trans->transid, 2085 root->root_key.objectid, &lower_key,
2121 level, root->node->start, 0); 2086 level, root->node->start, 0);
2122 if (IS_ERR(c)) 2087 if (IS_ERR(c))
2123 return PTR_ERR(c); 2088 return PTR_ERR(c);
2124 2089
2125 memset_extent_buffer(c, 0, 0, root->nodesize); 2090 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
2126 btrfs_set_header_nritems(c, 1); 2091 btrfs_set_header_nritems(c, 1);
2127 btrfs_set_header_level(c, level); 2092 btrfs_set_header_level(c, level);
2128 btrfs_set_header_bytenr(c, c->start); 2093 btrfs_set_header_bytenr(c, c->start);
2129 btrfs_set_header_generation(c, trans->transid); 2094 btrfs_set_header_generation(c, trans->transid);
2095 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
2130 btrfs_set_header_owner(c, root->root_key.objectid); 2096 btrfs_set_header_owner(c, root->root_key.objectid);
2131 2097
2132 write_extent_buffer(c, root->fs_info->fsid, 2098 write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2117,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2151 root->node = c; 2117 root->node = c;
2152 spin_unlock(&root->node_lock); 2118 spin_unlock(&root->node_lock);
2153 2119
2154 ret = btrfs_update_extent_ref(trans, root, lower->start,
2155 lower->len, lower->start, c->start,
2156 root->root_key.objectid,
2157 trans->transid, level - 1);
2158 BUG_ON(ret);
2159
2160 /* the super has an extra ref to root->node */ 2120 /* the super has an extra ref to root->node */
2161 free_extent_buffer(old); 2121 free_extent_buffer(old);
2162 2122
@@ -2233,7 +2193,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2233 ret = insert_new_root(trans, root, path, level + 1); 2193 ret = insert_new_root(trans, root, path, level + 1);
2234 if (ret) 2194 if (ret)
2235 return ret; 2195 return ret;
2236 } else if (!trans->transaction->delayed_refs.flushing) { 2196 } else {
2237 ret = push_nodes_for_insert(trans, root, path, level); 2197 ret = push_nodes_for_insert(trans, root, path, level);
2238 c = path->nodes[level]; 2198 c = path->nodes[level];
2239 if (!ret && btrfs_header_nritems(c) < 2199 if (!ret && btrfs_header_nritems(c) <
@@ -2244,20 +2204,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2244 } 2204 }
2245 2205
2246 c_nritems = btrfs_header_nritems(c); 2206 c_nritems = btrfs_header_nritems(c);
2207 mid = (c_nritems + 1) / 2;
2208 btrfs_node_key(c, &disk_key, mid);
2247 2209
2248 split = btrfs_alloc_free_block(trans, root, root->nodesize, 2210 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2249 path->nodes[level + 1]->start,
2250 root->root_key.objectid, 2211 root->root_key.objectid,
2251 trans->transid, level, c->start, 0); 2212 &disk_key, level, c->start, 0);
2252 if (IS_ERR(split)) 2213 if (IS_ERR(split))
2253 return PTR_ERR(split); 2214 return PTR_ERR(split);
2254 2215
2255 btrfs_set_header_flags(split, btrfs_header_flags(c)); 2216 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2256 btrfs_set_header_level(split, btrfs_header_level(c)); 2217 btrfs_set_header_level(split, btrfs_header_level(c));
2257 btrfs_set_header_bytenr(split, split->start); 2218 btrfs_set_header_bytenr(split, split->start);
2258 btrfs_set_header_generation(split, trans->transid); 2219 btrfs_set_header_generation(split, trans->transid);
2220 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2259 btrfs_set_header_owner(split, root->root_key.objectid); 2221 btrfs_set_header_owner(split, root->root_key.objectid);
2260 btrfs_set_header_flags(split, 0);
2261 write_extent_buffer(split, root->fs_info->fsid, 2222 write_extent_buffer(split, root->fs_info->fsid,
2262 (unsigned long)btrfs_header_fsid(split), 2223 (unsigned long)btrfs_header_fsid(split),
2263 BTRFS_FSID_SIZE); 2224 BTRFS_FSID_SIZE);
@@ -2265,7 +2226,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2265 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2226 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2266 BTRFS_UUID_SIZE); 2227 BTRFS_UUID_SIZE);
2267 2228
2268 mid = (c_nritems + 1) / 2;
2269 2229
2270 copy_extent_buffer(split, c, 2230 copy_extent_buffer(split, c,
2271 btrfs_node_key_ptr_offset(0), 2231 btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2238,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2278 btrfs_mark_buffer_dirty(c); 2238 btrfs_mark_buffer_dirty(c);
2279 btrfs_mark_buffer_dirty(split); 2239 btrfs_mark_buffer_dirty(split);
2280 2240
2281 btrfs_node_key(split, &disk_key, 0);
2282 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2241 wret = insert_ptr(trans, root, path, &disk_key, split->start,
2283 path->slots[level + 1] + 1, 2242 path->slots[level + 1] + 1,
2284 level + 1); 2243 level + 1);
2285 if (wret) 2244 if (wret)
2286 ret = wret; 2245 ret = wret;
2287 2246
2288 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2289 BUG_ON(ret);
2290
2291 if (path->slots[level] >= mid) { 2247 if (path->slots[level] >= mid) {
2292 path->slots[level] -= mid; 2248 path->slots[level] -= mid;
2293 btrfs_tree_unlock(c); 2249 btrfs_tree_unlock(c);
@@ -2360,7 +2316,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2360 u32 right_nritems; 2316 u32 right_nritems;
2361 u32 data_end; 2317 u32 data_end;
2362 u32 this_item_size; 2318 u32 this_item_size;
2363 int ret;
2364 2319
2365 if (empty) 2320 if (empty)
2366 nr = 0; 2321 nr = 0;
@@ -2473,9 +2428,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2473 btrfs_mark_buffer_dirty(left); 2428 btrfs_mark_buffer_dirty(left);
2474 btrfs_mark_buffer_dirty(right); 2429 btrfs_mark_buffer_dirty(right);
2475 2430
2476 ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2477 BUG_ON(ret);
2478
2479 btrfs_item_key(right, &disk_key, 0); 2431 btrfs_item_key(right, &disk_key, 0);
2480 btrfs_set_node_key(upper, &disk_key, slot + 1); 2432 btrfs_set_node_key(upper, &disk_key, slot + 1);
2481 btrfs_mark_buffer_dirty(upper); 2433 btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2672,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2720 if (right_nritems) 2672 if (right_nritems)
2721 btrfs_mark_buffer_dirty(right); 2673 btrfs_mark_buffer_dirty(right);
2722 2674
2723 ret = btrfs_update_ref(trans, root, right, left,
2724 old_left_nritems, push_items);
2725 BUG_ON(ret);
2726
2727 btrfs_item_key(right, &disk_key, 0); 2675 btrfs_item_key(right, &disk_key, 0);
2728 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2676 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2729 if (wret) 2677 if (wret)
@@ -2880,9 +2828,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2880 btrfs_mark_buffer_dirty(l); 2828 btrfs_mark_buffer_dirty(l);
2881 BUG_ON(path->slots[0] != slot); 2829 BUG_ON(path->slots[0] != slot);
2882 2830
2883 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2884 BUG_ON(ret);
2885
2886 if (mid <= slot) { 2831 if (mid <= slot) {
2887 btrfs_tree_unlock(path->nodes[0]); 2832 btrfs_tree_unlock(path->nodes[0]);
2888 free_extent_buffer(path->nodes[0]); 2833 free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2856,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2911 struct btrfs_path *path, int data_size, 2856 struct btrfs_path *path, int data_size,
2912 int extend) 2857 int extend)
2913{ 2858{
2859 struct btrfs_disk_key disk_key;
2914 struct extent_buffer *l; 2860 struct extent_buffer *l;
2915 u32 nritems; 2861 u32 nritems;
2916 int mid; 2862 int mid;
@@ -2918,12 +2864,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2918 struct extent_buffer *right; 2864 struct extent_buffer *right;
2919 int ret = 0; 2865 int ret = 0;
2920 int wret; 2866 int wret;
2921 int double_split; 2867 int split;
2922 int num_doubles = 0; 2868 int num_doubles = 0;
2923 2869
2924 /* first try to make some room by pushing left and right */ 2870 /* first try to make some room by pushing left and right */
2925 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && 2871 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2926 !trans->transaction->delayed_refs.flushing) {
2927 wret = push_leaf_right(trans, root, path, data_size, 0); 2872 wret = push_leaf_right(trans, root, path, data_size, 0);
2928 if (wret < 0) 2873 if (wret < 0)
2929 return wret; 2874 return wret;
@@ -2945,16 +2890,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2945 return ret; 2890 return ret;
2946 } 2891 }
2947again: 2892again:
2948 double_split = 0; 2893 split = 1;
2949 l = path->nodes[0]; 2894 l = path->nodes[0];
2950 slot = path->slots[0]; 2895 slot = path->slots[0];
2951 nritems = btrfs_header_nritems(l); 2896 nritems = btrfs_header_nritems(l);
2952 mid = (nritems + 1) / 2; 2897 mid = (nritems + 1) / 2;
2953 2898
2954 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2899 if (mid <= slot) {
2955 path->nodes[1]->start, 2900 if (nritems == 1 ||
2901 leaf_space_used(l, mid, nritems - mid) + data_size >
2902 BTRFS_LEAF_DATA_SIZE(root)) {
2903 if (slot >= nritems) {
2904 split = 0;
2905 } else {
2906 mid = slot;
2907 if (mid != nritems &&
2908 leaf_space_used(l, mid, nritems - mid) +
2909 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2910 split = 2;
2911 }
2912 }
2913 }
2914 } else {
2915 if (leaf_space_used(l, 0, mid) + data_size >
2916 BTRFS_LEAF_DATA_SIZE(root)) {
2917 if (!extend && data_size && slot == 0) {
2918 split = 0;
2919 } else if ((extend || !data_size) && slot == 0) {
2920 mid = 1;
2921 } else {
2922 mid = slot;
2923 if (mid != nritems &&
2924 leaf_space_used(l, mid, nritems - mid) +
2925 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2926 split = 2 ;
2927 }
2928 }
2929 }
2930 }
2931
2932 if (split == 0)
2933 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2934 else
2935 btrfs_item_key(l, &disk_key, mid);
2936
2937 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2956 root->root_key.objectid, 2938 root->root_key.objectid,
2957 trans->transid, 0, l->start, 0); 2939 &disk_key, 0, l->start, 0);
2958 if (IS_ERR(right)) { 2940 if (IS_ERR(right)) {
2959 BUG_ON(1); 2941 BUG_ON(1);
2960 return PTR_ERR(right); 2942 return PTR_ERR(right);
@@ -2963,6 +2945,7 @@ again:
2963 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2945 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2964 btrfs_set_header_bytenr(right, right->start); 2946 btrfs_set_header_bytenr(right, right->start);
2965 btrfs_set_header_generation(right, trans->transid); 2947 btrfs_set_header_generation(right, trans->transid);
2948 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2966 btrfs_set_header_owner(right, root->root_key.objectid); 2949 btrfs_set_header_owner(right, root->root_key.objectid);
2967 btrfs_set_header_level(right, 0); 2950 btrfs_set_header_level(right, 0);
2968 write_extent_buffer(right, root->fs_info->fsid, 2951 write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2956,47 @@ again:
2973 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2956 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2974 BTRFS_UUID_SIZE); 2957 BTRFS_UUID_SIZE);
2975 2958
2976 if (mid <= slot) { 2959 if (split == 0) {
2977 if (nritems == 1 || 2960 if (mid <= slot) {
2978 leaf_space_used(l, mid, nritems - mid) + data_size > 2961 btrfs_set_header_nritems(right, 0);
2979 BTRFS_LEAF_DATA_SIZE(root)) { 2962 wret = insert_ptr(trans, root, path,
2980 if (slot >= nritems) { 2963 &disk_key, right->start,
2981 struct btrfs_disk_key disk_key; 2964 path->slots[1] + 1, 1);
2982 2965 if (wret)
2983 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2966 ret = wret;
2984 btrfs_set_header_nritems(right, 0);
2985 wret = insert_ptr(trans, root, path,
2986 &disk_key, right->start,
2987 path->slots[1] + 1, 1);
2988 if (wret)
2989 ret = wret;
2990 2967
2991 btrfs_tree_unlock(path->nodes[0]); 2968 btrfs_tree_unlock(path->nodes[0]);
2992 free_extent_buffer(path->nodes[0]); 2969 free_extent_buffer(path->nodes[0]);
2993 path->nodes[0] = right; 2970 path->nodes[0] = right;
2994 path->slots[0] = 0; 2971 path->slots[0] = 0;
2995 path->slots[1] += 1; 2972 path->slots[1] += 1;
2996 btrfs_mark_buffer_dirty(right); 2973 } else {
2997 return ret; 2974 btrfs_set_header_nritems(right, 0);
2998 } 2975 wret = insert_ptr(trans, root, path,
2999 mid = slot; 2976 &disk_key,
3000 if (mid != nritems && 2977 right->start,
3001 leaf_space_used(l, mid, nritems - mid) + 2978 path->slots[1], 1);
3002 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2979 if (wret)
3003 double_split = 1; 2980 ret = wret;
3004 } 2981 btrfs_tree_unlock(path->nodes[0]);
3005 } 2982 free_extent_buffer(path->nodes[0]);
3006 } else { 2983 path->nodes[0] = right;
3007 if (leaf_space_used(l, 0, mid) + data_size > 2984 path->slots[0] = 0;
3008 BTRFS_LEAF_DATA_SIZE(root)) { 2985 if (path->slots[1] == 0) {
3009 if (!extend && data_size && slot == 0) { 2986 wret = fixup_low_keys(trans, root,
3010 struct btrfs_disk_key disk_key; 2987 path, &disk_key, 1);
3011
3012 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3013 btrfs_set_header_nritems(right, 0);
3014 wret = insert_ptr(trans, root, path,
3015 &disk_key,
3016 right->start,
3017 path->slots[1], 1);
3018 if (wret) 2988 if (wret)
3019 ret = wret; 2989 ret = wret;
3020 btrfs_tree_unlock(path->nodes[0]);
3021 free_extent_buffer(path->nodes[0]);
3022 path->nodes[0] = right;
3023 path->slots[0] = 0;
3024 if (path->slots[1] == 0) {
3025 wret = fixup_low_keys(trans, root,
3026 path, &disk_key, 1);
3027 if (wret)
3028 ret = wret;
3029 }
3030 btrfs_mark_buffer_dirty(right);
3031 return ret;
3032 } else if ((extend || !data_size) && slot == 0) {
3033 mid = 1;
3034 } else {
3035 mid = slot;
3036 if (mid != nritems &&
3037 leaf_space_used(l, mid, nritems - mid) +
3038 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3039 double_split = 1;
3040 }
3041 } 2990 }
3042 } 2991 }
2992 btrfs_mark_buffer_dirty(right);
2993 return ret;
3043 } 2994 }
3044 2995
3045 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2996 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3046 BUG_ON(ret); 2997 BUG_ON(ret);
3047 2998
3048 if (double_split) { 2999 if (split == 2) {
3049 BUG_ON(num_doubles != 0); 3000 BUG_ON(num_doubles != 0);
3050 num_doubles++; 3001 num_doubles++;
3051 goto again; 3002 goto again;
@@ -3447,7 +3398,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3447 /* figure out how many keys we can insert in here */ 3398 /* figure out how many keys we can insert in here */
3448 total_data = data_size[0]; 3399 total_data = data_size[0];
3449 for (i = 1; i < nr; i++) { 3400 for (i = 1; i < nr; i++) {
3450 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3401 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3451 break; 3402 break;
3452 total_data += data_size[i]; 3403 total_data += data_size[i];
3453 } 3404 }
@@ -3745,9 +3696,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3745 3696
3746/* 3697/*
3747 * a helper function to delete the leaf pointed to by path->slots[1] and 3698 * a helper function to delete the leaf pointed to by path->slots[1] and
3748 * path->nodes[1]. bytenr is the node block pointer, but since the callers 3699 * path->nodes[1].
3749 * already know it, it is faster to have them pass it down than to
3750 * read it out of the node again.
3751 * 3700 *
3752 * This deletes the pointer in path->nodes[1] and frees the leaf 3701 * This deletes the pointer in path->nodes[1] and frees the leaf
3753 * block extent. zero is returned if it all worked out, < 0 otherwise. 3702 * block extent. zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3704,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3755 * The path must have already been setup for deleting the leaf, including 3704 * The path must have already been setup for deleting the leaf, including
3756 * all the proper balancing. path->nodes[1] must be locked. 3705 * all the proper balancing. path->nodes[1] must be locked.
3757 */ 3706 */
3758noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3707static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3759 struct btrfs_root *root, 3708 struct btrfs_root *root,
3760 struct btrfs_path *path, u64 bytenr) 3709 struct btrfs_path *path,
3710 struct extent_buffer *leaf)
3761{ 3711{
3762 int ret; 3712 int ret;
3763 u64 root_gen = btrfs_header_generation(path->nodes[1]);
3764 u64 parent_start = path->nodes[1]->start;
3765 u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3766 3713
3714 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3767 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3715 ret = del_ptr(trans, root, path, 1, path->slots[1]);
3768 if (ret) 3716 if (ret)
3769 return ret; 3717 return ret;
@@ -3774,10 +3722,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3774 */ 3722 */
3775 btrfs_unlock_up_safe(path, 0); 3723 btrfs_unlock_up_safe(path, 0);
3776 3724
3777 ret = btrfs_free_extent(trans, root, bytenr, 3725 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
3778 btrfs_level_size(root, 0), 3726 0, root->root_key.objectid, 0, 0);
3779 parent_start, parent_owner,
3780 root_gen, 0, 1);
3781 return ret; 3727 return ret;
3782} 3728}
3783/* 3729/*
@@ -3845,7 +3791,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3845 if (leaf == root->node) { 3791 if (leaf == root->node) {
3846 btrfs_set_header_level(leaf, 0); 3792 btrfs_set_header_level(leaf, 0);
3847 } else { 3793 } else {
3848 ret = btrfs_del_leaf(trans, root, path, leaf->start); 3794 ret = btrfs_del_leaf(trans, root, path, leaf);
3849 BUG_ON(ret); 3795 BUG_ON(ret);
3850 } 3796 }
3851 } else { 3797 } else {
@@ -3861,8 +3807,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3861 } 3807 }
3862 3808
3863 /* delete the leaf if it is mostly empty */ 3809 /* delete the leaf if it is mostly empty */
3864 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && 3810 if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) {
3865 !trans->transaction->delayed_refs.flushing) {
3866 /* push_leaf_left fixes the path. 3811 /* push_leaf_left fixes the path.
3867 * make sure the path still points to our leaf 3812 * make sure the path still points to our leaf
3868 * for possible call to del_ptr below 3813 * for possible call to del_ptr below
@@ -3884,8 +3829,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3884 3829
3885 if (btrfs_header_nritems(leaf) == 0) { 3830 if (btrfs_header_nritems(leaf) == 0) {
3886 path->slots[1] = slot; 3831 path->slots[1] = slot;
3887 ret = btrfs_del_leaf(trans, root, path, 3832 ret = btrfs_del_leaf(trans, root, path, leaf);
3888 leaf->start);
3889 BUG_ON(ret); 3833 BUG_ON(ret);
3890 free_extent_buffer(leaf); 3834 free_extent_buffer(leaf);
3891 } else { 3835 } else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4414a5d9983a..03441a99ea38 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,8 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48#define BTRFS_COMPAT_EXTENT_TREE_V0
49
48/* 50/*
49 * files bigger than this get some pre-flushing when they are added 51 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total 52 * to the ordered operations list. That way we limit the total
@@ -267,7 +269,18 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
267} 269}
268 270
269#define BTRFS_FSID_SIZE 16 271#define BTRFS_FSID_SIZE 16
270#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) 272#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
273#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
274#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
275#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
276
277#define BTRFS_BACKREF_REV_MAX 256
278#define BTRFS_BACKREF_REV_SHIFT 56
279#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
280 BTRFS_BACKREF_REV_SHIFT)
281
282#define BTRFS_OLD_BACKREF_REV 0
283#define BTRFS_MIXED_BACKREF_REV 1
271 284
272/* 285/*
273 * every tree block (leaf or node) starts with this header. 286 * every tree block (leaf or node) starts with this header.
@@ -296,7 +309,6 @@ struct btrfs_header {
296 sizeof(struct btrfs_item) - \ 309 sizeof(struct btrfs_item) - \
297 sizeof(struct btrfs_file_extent_item)) 310 sizeof(struct btrfs_file_extent_item))
298 311
299#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
300 312
301/* 313/*
302 * this is a very generous portion of the super block, giving us 314 * this is a very generous portion of the super block, giving us
@@ -355,9 +367,12 @@ struct btrfs_super_block {
355 * Compat flags that we support. If any incompat flags are set other than the 367 * Compat flags that we support. If any incompat flags are set other than the
356 * ones specified below then we will fail to mount 368 * ones specified below then we will fail to mount
357 */ 369 */
358#define BTRFS_FEATURE_COMPAT_SUPP 0x0 370#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
359#define BTRFS_FEATURE_COMPAT_RO_SUPP 0x0 371
360#define BTRFS_FEATURE_INCOMPAT_SUPP 0x0 372#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
373#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
374#define BTRFS_FEATURE_INCOMPAT_SUPP \
375 BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
361 376
362/* 377/*
363 * A leaf is full of items. offset and size tell us where to find 378 * A leaf is full of items. offset and size tell us where to find
@@ -421,23 +436,65 @@ struct btrfs_path {
421 unsigned int keep_locks:1; 436 unsigned int keep_locks:1;
422 unsigned int skip_locking:1; 437 unsigned int skip_locking:1;
423 unsigned int leave_spinning:1; 438 unsigned int leave_spinning:1;
439 unsigned int search_commit_root:1;
424}; 440};
425 441
426/* 442/*
427 * items in the extent btree are used to record the objectid of the 443 * items in the extent btree are used to record the objectid of the
428 * owner of the block and the number of references 444 * owner of the block and the number of references
429 */ 445 */
446
430struct btrfs_extent_item { 447struct btrfs_extent_item {
448 __le64 refs;
449 __le64 generation;
450 __le64 flags;
451} __attribute__ ((__packed__));
452
453struct btrfs_extent_item_v0 {
431 __le32 refs; 454 __le32 refs;
432} __attribute__ ((__packed__)); 455} __attribute__ ((__packed__));
433 456
434struct btrfs_extent_ref { 457#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
458 sizeof(struct btrfs_item))
459
460#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
461#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
462
463/* following flags only apply to tree blocks */
464
465/* use full backrefs for extent pointers in the block */
466#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
467
468struct btrfs_tree_block_info {
469 struct btrfs_disk_key key;
470 u8 level;
471} __attribute__ ((__packed__));
472
473struct btrfs_extent_data_ref {
474 __le64 root;
475 __le64 objectid;
476 __le64 offset;
477 __le32 count;
478} __attribute__ ((__packed__));
479
480struct btrfs_shared_data_ref {
481 __le32 count;
482} __attribute__ ((__packed__));
483
484struct btrfs_extent_inline_ref {
485 u8 type;
486 u64 offset;
487} __attribute__ ((__packed__));
488
489/* old style backrefs item */
490struct btrfs_extent_ref_v0 {
435 __le64 root; 491 __le64 root;
436 __le64 generation; 492 __le64 generation;
437 __le64 objectid; 493 __le64 objectid;
438 __le32 num_refs; 494 __le32 count;
439} __attribute__ ((__packed__)); 495} __attribute__ ((__packed__));
440 496
497
441/* dev extents record free space on individual devices. The owner 498/* dev extents record free space on individual devices. The owner
442 * field points back to the chunk allocation mapping tree that allocated 499 * field points back to the chunk allocation mapping tree that allocated
443 * the extent. The chunk tree uuid field is a way to double check the owner 500 * the extent. The chunk tree uuid field is a way to double check the owner
@@ -695,12 +752,7 @@ struct btrfs_block_group_cache {
695 struct list_head cluster_list; 752 struct list_head cluster_list;
696}; 753};
697 754
698struct btrfs_leaf_ref_tree { 755struct reloc_control;
699 struct rb_root root;
700 struct list_head list;
701 spinlock_t lock;
702};
703
704struct btrfs_device; 756struct btrfs_device;
705struct btrfs_fs_devices; 757struct btrfs_fs_devices;
706struct btrfs_fs_info { 758struct btrfs_fs_info {
@@ -831,18 +883,11 @@ struct btrfs_fs_info {
831 struct task_struct *cleaner_kthread; 883 struct task_struct *cleaner_kthread;
832 int thread_pool_size; 884 int thread_pool_size;
833 885
834 /* tree relocation relocated fields */
835 struct list_head dead_reloc_roots;
836 struct btrfs_leaf_ref_tree reloc_ref_tree;
837 struct btrfs_leaf_ref_tree shared_ref_tree;
838
839 struct kobject super_kobj; 886 struct kobject super_kobj;
840 struct completion kobj_unregister; 887 struct completion kobj_unregister;
841 int do_barriers; 888 int do_barriers;
842 int closing; 889 int closing;
843 int log_root_recovering; 890 int log_root_recovering;
844 atomic_t throttles;
845 atomic_t throttle_gen;
846 891
847 u64 total_pinned; 892 u64 total_pinned;
848 893
@@ -861,6 +906,8 @@ struct btrfs_fs_info {
861 */ 906 */
862 struct list_head space_info; 907 struct list_head space_info;
863 908
909 struct reloc_control *reloc_ctl;
910
864 spinlock_t delalloc_lock; 911 spinlock_t delalloc_lock;
865 spinlock_t new_trans_lock; 912 spinlock_t new_trans_lock;
866 u64 delalloc_bytes; 913 u64 delalloc_bytes;
@@ -891,7 +938,6 @@ struct btrfs_fs_info {
891 * in ram representation of the tree. extent_root is used for all allocations 938 * in ram representation of the tree. extent_root is used for all allocations
892 * and for the extent tree extent_root root. 939 * and for the extent tree extent_root root.
893 */ 940 */
894struct btrfs_dirty_root;
895struct btrfs_root { 941struct btrfs_root {
896 struct extent_buffer *node; 942 struct extent_buffer *node;
897 943
@@ -899,9 +945,6 @@ struct btrfs_root {
899 spinlock_t node_lock; 945 spinlock_t node_lock;
900 946
901 struct extent_buffer *commit_root; 947 struct extent_buffer *commit_root;
902 struct btrfs_leaf_ref_tree *ref_tree;
903 struct btrfs_leaf_ref_tree ref_tree_struct;
904 struct btrfs_dirty_root *dirty_root;
905 struct btrfs_root *log_root; 948 struct btrfs_root *log_root;
906 struct btrfs_root *reloc_root; 949 struct btrfs_root *reloc_root;
907 950
@@ -952,10 +995,15 @@ struct btrfs_root {
952 /* the dirty list is only used by non-reference counted roots */ 995 /* the dirty list is only used by non-reference counted roots */
953 struct list_head dirty_list; 996 struct list_head dirty_list;
954 997
998 struct list_head root_list;
999
955 spinlock_t list_lock; 1000 spinlock_t list_lock;
956 struct list_head dead_list;
957 struct list_head orphan_list; 1001 struct list_head orphan_list;
958 1002
1003 spinlock_t inode_lock;
1004 /* red-black tree that keeps track of in-memory inodes */
1005 struct rb_root inode_tree;
1006
959 /* 1007 /*
960 * right now this just gets used so that a root has its own devid 1008 * right now this just gets used so that a root has its own devid
961 * for stat. It may be used for more later 1009 * for stat. It may be used for more later
@@ -1017,7 +1065,16 @@ struct btrfs_root {
1017 * are used, and how many references there are to each block 1065 * are used, and how many references there are to each block
1018 */ 1066 */
1019#define BTRFS_EXTENT_ITEM_KEY 168 1067#define BTRFS_EXTENT_ITEM_KEY 168
1020#define BTRFS_EXTENT_REF_KEY 180 1068
1069#define BTRFS_TREE_BLOCK_REF_KEY 176
1070
1071#define BTRFS_EXTENT_DATA_REF_KEY 178
1072
1073#define BTRFS_EXTENT_REF_V0_KEY 180
1074
1075#define BTRFS_SHARED_BLOCK_REF_KEY 182
1076
1077#define BTRFS_SHARED_DATA_REF_KEY 184
1021 1078
1022/* 1079/*
1023 * block groups give us hints into the extent allocation trees. Which 1080 * block groups give us hints into the extent allocation trees. Which
@@ -1043,6 +1100,8 @@ struct btrfs_root {
1043#define BTRFS_MOUNT_COMPRESS (1 << 5) 1100#define BTRFS_MOUNT_COMPRESS (1 << 5)
1044#define BTRFS_MOUNT_NOTREELOG (1 << 6) 1101#define BTRFS_MOUNT_NOTREELOG (1 << 6)
1045#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) 1102#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
1103#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
1104#define BTRFS_MOUNT_NOSSD (1 << 9)
1046 1105
1047#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1106#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1048#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1107#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1056,12 +1115,14 @@ struct btrfs_root {
1056#define BTRFS_INODE_READONLY (1 << 2) 1115#define BTRFS_INODE_READONLY (1 << 2)
1057#define BTRFS_INODE_NOCOMPRESS (1 << 3) 1116#define BTRFS_INODE_NOCOMPRESS (1 << 3)
1058#define BTRFS_INODE_PREALLOC (1 << 4) 1117#define BTRFS_INODE_PREALLOC (1 << 4)
1059#define btrfs_clear_flag(inode, flag) (BTRFS_I(inode)->flags &= \ 1118#define BTRFS_INODE_SYNC (1 << 5)
1060 ~BTRFS_INODE_##flag) 1119#define BTRFS_INODE_IMMUTABLE (1 << 6)
1061#define btrfs_set_flag(inode, flag) (BTRFS_I(inode)->flags |= \ 1120#define BTRFS_INODE_APPEND (1 << 7)
1062 BTRFS_INODE_##flag) 1121#define BTRFS_INODE_NODUMP (1 << 8)
1063#define btrfs_test_flag(inode, flag) (BTRFS_I(inode)->flags & \ 1122#define BTRFS_INODE_NOATIME (1 << 9)
1064 BTRFS_INODE_##flag) 1123#define BTRFS_INODE_DIRSYNC (1 << 10)
1124
1125
1065/* some macros to generate set/get funcs for the struct fields. This 1126/* some macros to generate set/get funcs for the struct fields. This
1066 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1127 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1067 * one for u8: 1128 * one for u8:
@@ -1317,24 +1378,67 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
1317 return (u8 *)((unsigned long)dev + ptr); 1378 return (u8 *)((unsigned long)dev + ptr);
1318} 1379}
1319 1380
1320/* struct btrfs_extent_ref */ 1381BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
1321BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); 1382BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
1322BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); 1383 generation, 64);
1323BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); 1384BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
1324BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
1325 1385
1326BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); 1386BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1327BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, 1387
1328 generation, 64); 1388
1329BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, 1389BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1330 objectid, 64); 1390
1331BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, 1391static inline void btrfs_tree_block_key(struct extent_buffer *eb,
1332 num_refs, 32); 1392 struct btrfs_tree_block_info *item,
1393 struct btrfs_disk_key *key)
1394{
1395 read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1396}
1397
1398static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
1399 struct btrfs_tree_block_info *item,
1400 struct btrfs_disk_key *key)
1401{
1402 write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1403}
1404
1405BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
1406 root, 64);
1407BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
1408 objectid, 64);
1409BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
1410 offset, 64);
1411BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
1412 count, 32);
1413
1414BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
1415 count, 32);
1333 1416
1334/* struct btrfs_extent_item */ 1417BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
1335BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); 1418 type, 8);
1336BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, 1419BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
1337 refs, 32); 1420 offset, 64);
1421
1422static inline u32 btrfs_extent_inline_ref_size(int type)
1423{
1424 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1425 type == BTRFS_SHARED_BLOCK_REF_KEY)
1426 return sizeof(struct btrfs_extent_inline_ref);
1427 if (type == BTRFS_SHARED_DATA_REF_KEY)
1428 return sizeof(struct btrfs_shared_data_ref) +
1429 sizeof(struct btrfs_extent_inline_ref);
1430 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1431 return sizeof(struct btrfs_extent_data_ref) +
1432 offsetof(struct btrfs_extent_inline_ref, offset);
1433 BUG();
1434 return 0;
1435}
1436
1437BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
1438BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
1439 generation, 64);
1440BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
1441BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
1338 1442
1339/* struct btrfs_node */ 1443/* struct btrfs_node */
1340BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 1444BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
@@ -1558,6 +1662,21 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
1558 return (flags & flag) == flag; 1662 return (flags & flag) == flag;
1559} 1663}
1560 1664
1665static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
1666{
1667 u64 flags = btrfs_header_flags(eb);
1668 return flags >> BTRFS_BACKREF_REV_SHIFT;
1669}
1670
1671static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
1672 int rev)
1673{
1674 u64 flags = btrfs_header_flags(eb);
1675 flags &= ~BTRFS_BACKREF_REV_MASK;
1676 flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
1677 btrfs_set_header_flags(eb, flags);
1678}
1679
1561static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) 1680static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
1562{ 1681{
1563 unsigned long ptr = offsetof(struct btrfs_header, fsid); 1682 unsigned long ptr = offsetof(struct btrfs_header, fsid);
@@ -1790,39 +1909,32 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
1790int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1909int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1791 struct btrfs_root *root, struct extent_buffer *leaf); 1910 struct btrfs_root *root, struct extent_buffer *leaf);
1792int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1911int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1793 struct btrfs_root *root, u64 objectid, u64 bytenr); 1912 struct btrfs_root *root,
1913 u64 objectid, u64 offset, u64 bytenr);
1794int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1914int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1795struct btrfs_block_group_cache *btrfs_lookup_block_group( 1915struct btrfs_block_group_cache *btrfs_lookup_block_group(
1796 struct btrfs_fs_info *info, 1916 struct btrfs_fs_info *info,
1797 u64 bytenr); 1917 u64 bytenr);
1918void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1798u64 btrfs_find_block_group(struct btrfs_root *root, 1919u64 btrfs_find_block_group(struct btrfs_root *root,
1799 u64 search_start, u64 search_hint, int owner); 1920 u64 search_start, u64 search_hint, int owner);
1800struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1921struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root, 1922 struct btrfs_root *root, u32 blocksize,
1802 u32 blocksize, u64 parent, 1923 u64 parent, u64 root_objectid,
1803 u64 root_objectid, 1924 struct btrfs_disk_key *key, int level,
1804 u64 ref_generation, 1925 u64 hint, u64 empty_size);
1805 int level,
1806 u64 hint,
1807 u64 empty_size);
1808struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1926struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1809 struct btrfs_root *root, 1927 struct btrfs_root *root,
1810 u64 bytenr, u32 blocksize, 1928 u64 bytenr, u32 blocksize,
1811 int level); 1929 int level);
1812int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1930int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
1813 struct btrfs_root *root, 1931 struct btrfs_root *root,
1814 u64 num_bytes, u64 parent, u64 min_bytes, 1932 u64 root_objectid, u64 owner,
1815 u64 root_objectid, u64 ref_generation, 1933 u64 offset, struct btrfs_key *ins);
1816 u64 owner, u64 empty_size, u64 hint_byte, 1934int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
1817 u64 search_end, struct btrfs_key *ins, u64 data); 1935 struct btrfs_root *root,
1818int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 1936 u64 root_objectid, u64 owner, u64 offset,
1819 struct btrfs_root *root, u64 parent, 1937 struct btrfs_key *ins);
1820 u64 root_objectid, u64 ref_generation,
1821 u64 owner, struct btrfs_key *ins);
1822int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
1823 struct btrfs_root *root, u64 parent,
1824 u64 root_objectid, u64 ref_generation,
1825 u64 owner, struct btrfs_key *ins);
1826int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 1938int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1827 struct btrfs_root *root, 1939 struct btrfs_root *root,
1828 u64 num_bytes, u64 min_alloc_size, 1940 u64 num_bytes, u64 min_alloc_size,
@@ -1830,18 +1942,18 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1830 u64 search_end, struct btrfs_key *ins, 1942 u64 search_end, struct btrfs_key *ins,
1831 u64 data); 1943 u64 data);
1832int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1944int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1833 struct extent_buffer *orig_buf, struct extent_buffer *buf, 1945 struct extent_buffer *buf, int full_backref);
1834 u32 *nr_extents); 1946int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1835int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1947 struct extent_buffer *buf, int full_backref);
1836 struct extent_buffer *buf, u32 nr_extents); 1948int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1837int btrfs_update_ref(struct btrfs_trans_handle *trans, 1949 struct btrfs_root *root,
1838 struct btrfs_root *root, struct extent_buffer *orig_buf, 1950 u64 bytenr, u64 num_bytes, u64 flags,
1839 struct extent_buffer *buf, int start_slot, int nr); 1951 int is_data);
1840int btrfs_free_extent(struct btrfs_trans_handle *trans, 1952int btrfs_free_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_root *root, 1953 struct btrfs_root *root,
1842 u64 bytenr, u64 num_bytes, u64 parent, 1954 u64 bytenr, u64 num_bytes, u64 parent,
1843 u64 root_objectid, u64 ref_generation, 1955 u64 root_objectid, u64 owner, u64 offset);
1844 u64 owner_objectid, int pin); 1956
1845int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 1957int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1846int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1958int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root, 1959 struct btrfs_root *root,
@@ -1849,13 +1961,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1849int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1961int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1850 struct btrfs_root *root, 1962 struct btrfs_root *root,
1851 u64 bytenr, u64 num_bytes, u64 parent, 1963 u64 bytenr, u64 num_bytes, u64 parent,
1852 u64 root_objectid, u64 ref_generation, 1964 u64 root_objectid, u64 owner, u64 offset);
1853 u64 owner_objectid); 1965
1854int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1855 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1856 u64 orig_parent, u64 parent,
1857 u64 root_objectid, u64 ref_generation,
1858 u64 owner_objectid);
1859int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1966int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1860 struct btrfs_root *root); 1967 struct btrfs_root *root);
1861int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 1968int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
@@ -1867,16 +1974,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1867 u64 size); 1974 u64 size);
1868int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 1975int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1869 struct btrfs_root *root, u64 group_start); 1976 struct btrfs_root *root, u64 group_start);
1870int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 1977int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
1871int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 1978 struct btrfs_block_group_cache *group);
1872 struct btrfs_root *root); 1979
1873int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1874int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1875 struct btrfs_root *root,
1876 struct extent_buffer *buf, u64 orig_start);
1877int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1878int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1879int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1880u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 1980u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1881void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 1981void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
1882void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 1982void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -1891,13 +1991,12 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1891void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 1991void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1892 u64 bytes); 1992 u64 bytes);
1893/* ctree.c */ 1993/* ctree.c */
1994int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1995 int level, int *slot);
1996int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
1894int btrfs_previous_item(struct btrfs_root *root, 1997int btrfs_previous_item(struct btrfs_root *root,
1895 struct btrfs_path *path, u64 min_objectid, 1998 struct btrfs_path *path, u64 min_objectid,
1896 int type); 1999 int type);
1897int btrfs_merge_path(struct btrfs_trans_handle *trans,
1898 struct btrfs_root *root,
1899 struct btrfs_key *node_keys,
1900 u64 *nodes, int lowest_level);
1901int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 2000int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1902 struct btrfs_root *root, struct btrfs_path *path, 2001 struct btrfs_root *root, struct btrfs_path *path,
1903 struct btrfs_key *new_key); 2002 struct btrfs_key *new_key);
@@ -1918,6 +2017,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
1918 struct btrfs_root *root, 2017 struct btrfs_root *root,
1919 struct extent_buffer *buf, 2018 struct extent_buffer *buf,
1920 struct extent_buffer **cow_ret, u64 new_root_objectid); 2019 struct extent_buffer **cow_ret, u64 new_root_objectid);
2020int btrfs_block_can_be_shared(struct btrfs_root *root,
2021 struct extent_buffer *buf);
1921int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 2022int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1922 *root, struct btrfs_path *path, u32 data_size); 2023 *root, struct btrfs_path *path, u32 data_size);
1923int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2024int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1944,9 +2045,6 @@ void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
1944 2045
1945int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2046int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1946 struct btrfs_path *path, int slot, int nr); 2047 struct btrfs_path *path, int slot, int nr);
1947int btrfs_del_leaf(struct btrfs_trans_handle *trans,
1948 struct btrfs_root *root,
1949 struct btrfs_path *path, u64 bytenr);
1950static inline int btrfs_del_item(struct btrfs_trans_handle *trans, 2048static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
1951 struct btrfs_root *root, 2049 struct btrfs_root *root,
1952 struct btrfs_path *path) 2050 struct btrfs_path *path)
@@ -2005,8 +2103,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2005 btrfs_root_item *item, struct btrfs_key *key); 2103 btrfs_root_item *item, struct btrfs_key *key);
2006int btrfs_search_root(struct btrfs_root *root, u64 search_start, 2104int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2007 u64 *found_objectid); 2105 u64 *found_objectid);
2008int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 2106int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2009 struct btrfs_root *latest_root); 2107int btrfs_set_root_node(struct btrfs_root_item *item,
2108 struct extent_buffer *node);
2010/* dir-item.c */ 2109/* dir-item.c */
2011int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2110int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2012 struct btrfs_root *root, const char *name, 2111 struct btrfs_root *root, const char *name,
@@ -2139,7 +2238,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2139int btrfs_readpage(struct file *file, struct page *page); 2238int btrfs_readpage(struct file *file, struct page *page);
2140void btrfs_delete_inode(struct inode *inode); 2239void btrfs_delete_inode(struct inode *inode);
2141void btrfs_put_inode(struct inode *inode); 2240void btrfs_put_inode(struct inode *inode);
2142void btrfs_read_locked_inode(struct inode *inode);
2143int btrfs_write_inode(struct inode *inode, int wait); 2241int btrfs_write_inode(struct inode *inode, int wait);
2144void btrfs_dirty_inode(struct inode *inode); 2242void btrfs_dirty_inode(struct inode *inode);
2145struct inode *btrfs_alloc_inode(struct super_block *sb); 2243struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2147,12 +2245,8 @@ void btrfs_destroy_inode(struct inode *inode);
2147int btrfs_init_cachep(void); 2245int btrfs_init_cachep(void);
2148void btrfs_destroy_cachep(void); 2246void btrfs_destroy_cachep(void);
2149long btrfs_ioctl_trans_end(struct file *file); 2247long btrfs_ioctl_trans_end(struct file *file);
2150struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2151 struct btrfs_root *root, int wait);
2152struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2153 struct btrfs_root *root);
2154struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2248struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2155 struct btrfs_root *root, int *is_new); 2249 struct btrfs_root *root);
2156int btrfs_commit_write(struct file *file, struct page *page, 2250int btrfs_commit_write(struct file *file, struct page *page,
2157 unsigned from, unsigned to); 2251 unsigned from, unsigned to);
2158struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2252struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2168,6 +2262,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size);
2168 2262
2169/* ioctl.c */ 2263/* ioctl.c */
2170long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 2264long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
2265void btrfs_update_iflags(struct inode *inode);
2266void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
2171 2267
2172/* file.c */ 2268/* file.c */
2173int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); 2269int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
@@ -2205,8 +2301,20 @@ int btrfs_parse_options(struct btrfs_root *root, char *options);
2205int btrfs_sync_fs(struct super_block *sb, int wait); 2301int btrfs_sync_fs(struct super_block *sb, int wait);
2206 2302
2207/* acl.c */ 2303/* acl.c */
2304#ifdef CONFIG_FS_POSIX_ACL
2208int btrfs_check_acl(struct inode *inode, int mask); 2305int btrfs_check_acl(struct inode *inode, int mask);
2306#else
2307#define btrfs_check_acl NULL
2308#endif
2209int btrfs_init_acl(struct inode *inode, struct inode *dir); 2309int btrfs_init_acl(struct inode *inode, struct inode *dir);
2210int btrfs_acl_chmod(struct inode *inode); 2310int btrfs_acl_chmod(struct inode *inode);
2211 2311
2312/* relocation.c */
2313int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
2314int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
2315 struct btrfs_root *root);
2316int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
2317 struct btrfs_root *root);
2318int btrfs_recover_relocation(struct btrfs_root *root);
2319int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
2212#endif 2320#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c096a40..84e6781413b1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
29 * add extents in the middle of btrfs_search_slot, and it allows 29 * add extents in the middle of btrfs_search_slot, and it allows
30 * us to buffer up frequently modified backrefs in an rb tree instead 30 * us to buffer up frequently modified backrefs in an rb tree instead
31 * of hammering updates on the extent allocation tree. 31 * of hammering updates on the extent allocation tree.
32 *
33 * Right now this code is only used for reference counted trees, but
34 * the long term goal is to get rid of the similar code for delayed
35 * extent tree modifications.
36 */ 32 */
37 33
38/* 34/*
39 * entries in the rb tree are ordered by the byte number of the extent 35 * compare two delayed tree backrefs with same bytenr and type
40 * and by the byte number of the parent block. 36 */
37static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
38 struct btrfs_delayed_tree_ref *ref1)
39{
40 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41 if (ref1->root < ref2->root)
42 return -1;
43 if (ref1->root > ref2->root)
44 return 1;
45 } else {
46 if (ref1->parent < ref2->parent)
47 return -1;
48 if (ref1->parent > ref2->parent)
49 return 1;
50 }
51 return 0;
52}
53
54/*
55 * compare two delayed data backrefs with same bytenr and type
41 */ 56 */
42static int comp_entry(struct btrfs_delayed_ref_node *ref, 57static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
43 u64 bytenr, u64 parent) 58 struct btrfs_delayed_data_ref *ref1)
44{ 59{
45 if (bytenr < ref->bytenr) 60 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
61 if (ref1->root < ref2->root)
62 return -1;
63 if (ref1->root > ref2->root)
64 return 1;
65 if (ref1->objectid < ref2->objectid)
66 return -1;
67 if (ref1->objectid > ref2->objectid)
68 return 1;
69 if (ref1->offset < ref2->offset)
70 return -1;
71 if (ref1->offset > ref2->offset)
72 return 1;
73 } else {
74 if (ref1->parent < ref2->parent)
75 return -1;
76 if (ref1->parent > ref2->parent)
77 return 1;
78 }
79 return 0;
80}
81
82/*
83 * entries in the rb tree are ordered by the byte number of the extent,
84 * type of the delayed backrefs and content of delayed backrefs.
85 */
86static int comp_entry(struct btrfs_delayed_ref_node *ref2,
87 struct btrfs_delayed_ref_node *ref1)
88{
89 if (ref1->bytenr < ref2->bytenr)
46 return -1; 90 return -1;
47 if (bytenr > ref->bytenr) 91 if (ref1->bytenr > ref2->bytenr)
48 return 1; 92 return 1;
49 if (parent < ref->parent) 93 if (ref1->is_head && ref2->is_head)
94 return 0;
95 if (ref2->is_head)
50 return -1; 96 return -1;
51 if (parent > ref->parent) 97 if (ref1->is_head)
52 return 1; 98 return 1;
99 if (ref1->type < ref2->type)
100 return -1;
101 if (ref1->type > ref2->type)
102 return 1;
103 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
104 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
105 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
106 btrfs_delayed_node_to_tree_ref(ref1));
107 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
108 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
109 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
110 btrfs_delayed_node_to_data_ref(ref1));
111 }
112 BUG();
53 return 0; 113 return 0;
54} 114}
55 115
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
59 * inserted. 119 * inserted.
60 */ 120 */
61static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, 121static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
62 u64 bytenr, u64 parent,
63 struct rb_node *node) 122 struct rb_node *node)
64{ 123{
65 struct rb_node **p = &root->rb_node; 124 struct rb_node **p = &root->rb_node;
66 struct rb_node *parent_node = NULL; 125 struct rb_node *parent_node = NULL;
67 struct btrfs_delayed_ref_node *entry; 126 struct btrfs_delayed_ref_node *entry;
127 struct btrfs_delayed_ref_node *ins;
68 int cmp; 128 int cmp;
69 129
130 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
70 while (*p) { 131 while (*p) {
71 parent_node = *p; 132 parent_node = *p;
72 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 133 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
73 rb_node); 134 rb_node);
74 135
75 cmp = comp_entry(entry, bytenr, parent); 136 cmp = comp_entry(entry, ins);
76 if (cmp < 0) 137 if (cmp < 0)
77 p = &(*p)->rb_left; 138 p = &(*p)->rb_left;
78 else if (cmp > 0) 139 else if (cmp > 0)
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
81 return entry; 142 return entry;
82 } 143 }
83 144
84 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
85 rb_link_node(node, parent_node, p); 145 rb_link_node(node, parent_node, p);
86 rb_insert_color(node, root); 146 rb_insert_color(node, root);
87 return NULL; 147 return NULL;
88} 148}
89 149
90/* 150/*
91 * find an entry based on (bytenr,parent). This returns the delayed 151 * find an head entry based on bytenr. This returns the delayed ref
92 * ref if it was able to find one, or NULL if nothing was in that spot 152 * head if it was able to find one, or NULL if nothing was in that spot
93 */ 153 */
94static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 154static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
95 u64 bytenr, u64 parent, 155 u64 bytenr,
96 struct btrfs_delayed_ref_node **last) 156 struct btrfs_delayed_ref_node **last)
97{ 157{
98 struct rb_node *n = root->rb_node; 158 struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
105 if (last) 165 if (last)
106 *last = entry; 166 *last = entry;
107 167
108 cmp = comp_entry(entry, bytenr, parent); 168 if (bytenr < entry->bytenr)
169 cmp = -1;
170 else if (bytenr > entry->bytenr)
171 cmp = 1;
172 else if (!btrfs_delayed_ref_is_head(entry))
173 cmp = 1;
174 else
175 cmp = 0;
176
109 if (cmp < 0) 177 if (cmp < 0)
110 n = n->rb_left; 178 n = n->rb_left;
111 else if (cmp > 0) 179 else if (cmp > 0)
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
154 node = rb_first(&delayed_refs->root); 222 node = rb_first(&delayed_refs->root);
155 } else { 223 } else {
156 ref = NULL; 224 ref = NULL;
157 tree_search(&delayed_refs->root, start, (u64)-1, &ref); 225 find_ref_head(&delayed_refs->root, start, &ref);
158 if (ref) { 226 if (ref) {
159 struct btrfs_delayed_ref_node *tmp; 227 struct btrfs_delayed_ref_node *tmp;
160 228
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
234 delayed_refs = &trans->transaction->delayed_refs; 302 delayed_refs = &trans->transaction->delayed_refs;
235 spin_lock(&delayed_refs->lock); 303 spin_lock(&delayed_refs->lock);
236 304
237 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 305 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
238 if (ref) { 306 if (ref) {
239 prev_node = rb_prev(&ref->rb_node); 307 prev_node = rb_prev(&ref->rb_node);
240 if (!prev_node) 308 if (!prev_node)
@@ -250,25 +318,28 @@ out:
250} 318}
251 319
252/* 320/*
253 * helper function to lookup reference count 321 * helper function to lookup reference count and flags of extent.
254 * 322 *
255 * the head node for delayed ref is used to store the sum of all the 323 * the head node for delayed ref is used to store the sum of all the
256 * reference count modifications queued up in the rbtree. This way you 324 * reference count modifications queued up in the rbtree. the head
257 * can check to see what the reference count would be if all of the 325 * node may also store the extent flags to set. This way you can check
258 * delayed refs are processed. 326 * to see what the reference count and extent flags would be if all of
327 * the delayed refs are not processed.
259 */ 328 */
260int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 329int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
261 struct btrfs_root *root, u64 bytenr, 330 struct btrfs_root *root, u64 bytenr,
262 u64 num_bytes, u32 *refs) 331 u64 num_bytes, u64 *refs, u64 *flags)
263{ 332{
264 struct btrfs_delayed_ref_node *ref; 333 struct btrfs_delayed_ref_node *ref;
265 struct btrfs_delayed_ref_head *head; 334 struct btrfs_delayed_ref_head *head;
266 struct btrfs_delayed_ref_root *delayed_refs; 335 struct btrfs_delayed_ref_root *delayed_refs;
267 struct btrfs_path *path; 336 struct btrfs_path *path;
268 struct extent_buffer *leaf;
269 struct btrfs_extent_item *ei; 337 struct btrfs_extent_item *ei;
338 struct extent_buffer *leaf;
270 struct btrfs_key key; 339 struct btrfs_key key;
271 u32 num_refs; 340 u32 item_size;
341 u64 num_refs;
342 u64 extent_flags;
272 int ret; 343 int ret;
273 344
274 path = btrfs_alloc_path(); 345 path = btrfs_alloc_path();
@@ -287,37 +358,60 @@ again:
287 358
288 if (ret == 0) { 359 if (ret == 0) {
289 leaf = path->nodes[0]; 360 leaf = path->nodes[0];
290 ei = btrfs_item_ptr(leaf, path->slots[0], 361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
291 struct btrfs_extent_item); 362 if (item_size >= sizeof(*ei)) {
292 num_refs = btrfs_extent_refs(leaf, ei); 363 ei = btrfs_item_ptr(leaf, path->slots[0],
364 struct btrfs_extent_item);
365 num_refs = btrfs_extent_refs(leaf, ei);
366 extent_flags = btrfs_extent_flags(leaf, ei);
367 } else {
368#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
369 struct btrfs_extent_item_v0 *ei0;
370 BUG_ON(item_size != sizeof(*ei0));
371 ei0 = btrfs_item_ptr(leaf, path->slots[0],
372 struct btrfs_extent_item_v0);
373 num_refs = btrfs_extent_refs_v0(leaf, ei0);
374 /* FIXME: this isn't correct for data */
375 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
376#else
377 BUG();
378#endif
379 }
380 BUG_ON(num_refs == 0);
293 } else { 381 } else {
294 num_refs = 0; 382 num_refs = 0;
383 extent_flags = 0;
295 ret = 0; 384 ret = 0;
296 } 385 }
297 386
298 spin_lock(&delayed_refs->lock); 387 spin_lock(&delayed_refs->lock);
299 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 388 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
300 if (ref) { 389 if (ref) {
301 head = btrfs_delayed_node_to_head(ref); 390 head = btrfs_delayed_node_to_head(ref);
302 if (mutex_trylock(&head->mutex)) { 391 if (!mutex_trylock(&head->mutex)) {
303 num_refs += ref->ref_mod; 392 atomic_inc(&ref->refs);
304 mutex_unlock(&head->mutex); 393 spin_unlock(&delayed_refs->lock);
305 *refs = num_refs;
306 goto out;
307 }
308 394
309 atomic_inc(&ref->refs); 395 btrfs_release_path(root->fs_info->extent_root, path);
310 spin_unlock(&delayed_refs->lock);
311 396
312 btrfs_release_path(root->fs_info->extent_root, path); 397 mutex_lock(&head->mutex);
398 mutex_unlock(&head->mutex);
399 btrfs_put_delayed_ref(ref);
400 goto again;
401 }
402 if (head->extent_op && head->extent_op->update_flags)
403 extent_flags |= head->extent_op->flags_to_set;
404 else
405 BUG_ON(num_refs == 0);
313 406
314 mutex_lock(&head->mutex); 407 num_refs += ref->ref_mod;
315 mutex_unlock(&head->mutex); 408 mutex_unlock(&head->mutex);
316 btrfs_put_delayed_ref(ref);
317 goto again;
318 } else {
319 *refs = num_refs;
320 } 409 }
410 WARN_ON(num_refs == 0);
411 if (refs)
412 *refs = num_refs;
413 if (flags)
414 *flags = extent_flags;
321out: 415out:
322 spin_unlock(&delayed_refs->lock); 416 spin_unlock(&delayed_refs->lock);
323 btrfs_free_path(path); 417 btrfs_free_path(path);
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
338 struct btrfs_delayed_ref_node *existing, 432 struct btrfs_delayed_ref_node *existing,
339 struct btrfs_delayed_ref_node *update) 433 struct btrfs_delayed_ref_node *update)
340{ 434{
341 struct btrfs_delayed_ref *existing_ref; 435 if (update->action != existing->action) {
342 struct btrfs_delayed_ref *ref;
343
344 existing_ref = btrfs_delayed_node_to_ref(existing);
345 ref = btrfs_delayed_node_to_ref(update);
346
347 if (ref->pin)
348 existing_ref->pin = 1;
349
350 if (ref->action != existing_ref->action) {
351 /* 436 /*
352 * this is effectively undoing either an add or a 437 * this is effectively undoing either an add or a
353 * drop. We decrement the ref_mod, and if it goes 438 * drop. We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
363 delayed_refs->num_entries--; 448 delayed_refs->num_entries--;
364 if (trans->delayed_ref_updates) 449 if (trans->delayed_ref_updates)
365 trans->delayed_ref_updates--; 450 trans->delayed_ref_updates--;
451 } else {
452 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
366 } 454 }
367 } else { 455 } else {
368 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { 456 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
369 /* if we're adding refs, make sure all the 457 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
370 * details match up. The extent could
371 * have been totally freed and reallocated
372 * by a different owner before the delayed
373 * ref entries were removed.
374 */
375 existing_ref->owner_objectid = ref->owner_objectid;
376 existing_ref->generation = ref->generation;
377 existing_ref->root = ref->root;
378 existing->num_bytes = update->num_bytes;
379 }
380 /* 458 /*
381 * the action on the existing ref matches 459 * the action on the existing ref matches
382 * the action on the ref we're trying to add. 460 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
401 479
402 existing_ref = btrfs_delayed_node_to_head(existing); 480 existing_ref = btrfs_delayed_node_to_head(existing);
403 ref = btrfs_delayed_node_to_head(update); 481 ref = btrfs_delayed_node_to_head(update);
482 BUG_ON(existing_ref->is_data != ref->is_data);
404 483
405 if (ref->must_insert_reserved) { 484 if (ref->must_insert_reserved) {
406 /* if the extent was freed and then 485 /* if the extent was freed and then
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 499
421 } 500 }
422 501
502 if (ref->extent_op) {
503 if (!existing_ref->extent_op) {
504 existing_ref->extent_op = ref->extent_op;
505 } else {
506 if (ref->extent_op->update_key) {
507 memcpy(&existing_ref->extent_op->key,
508 &ref->extent_op->key,
509 sizeof(ref->extent_op->key));
510 existing_ref->extent_op->update_key = 1;
511 }
512 if (ref->extent_op->update_flags) {
513 existing_ref->extent_op->flags_to_set |=
514 ref->extent_op->flags_to_set;
515 existing_ref->extent_op->update_flags = 1;
516 }
517 kfree(ref->extent_op);
518 }
519 }
423 /* 520 /*
424 * update the reference mod on the head to reflect this new operation 521 * update the reference mod on the head to reflect this new operation
425 */ 522 */
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
427} 524}
428 525
429/* 526/*
430 * helper function to actually insert a delayed ref into the rbtree. 527 * helper function to actually insert a head node into the rbtree.
431 * this does all the dirty work in terms of maintaining the correct 528 * this does all the dirty work in terms of maintaining the correct
432 * overall modification count in the head node and properly dealing 529 * overall modification count.
433 * with updating existing nodes as new modifications are queued.
434 */ 530 */
435static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 531static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
436 struct btrfs_delayed_ref_node *ref, 532 struct btrfs_delayed_ref_node *ref,
437 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 533 u64 bytenr, u64 num_bytes,
438 u64 ref_generation, u64 owner_objectid, int action, 534 int action, int is_data)
439 int pin)
440{ 535{
441 struct btrfs_delayed_ref_node *existing; 536 struct btrfs_delayed_ref_node *existing;
442 struct btrfs_delayed_ref *full_ref;
443 struct btrfs_delayed_ref_head *head_ref = NULL; 537 struct btrfs_delayed_ref_head *head_ref = NULL;
444 struct btrfs_delayed_ref_root *delayed_refs; 538 struct btrfs_delayed_ref_root *delayed_refs;
445 int count_mod = 1; 539 int count_mod = 1;
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
449 * the head node stores the sum of all the mods, so dropping a ref 543 * the head node stores the sum of all the mods, so dropping a ref
450 * should drop the sum in the head node by one. 544 * should drop the sum in the head node by one.
451 */ 545 */
452 if (parent == (u64)-1) { 546 if (action == BTRFS_UPDATE_DELAYED_HEAD)
453 if (action == BTRFS_DROP_DELAYED_REF) 547 count_mod = 0;
454 count_mod = -1; 548 else if (action == BTRFS_DROP_DELAYED_REF)
455 else if (action == BTRFS_UPDATE_DELAYED_HEAD) 549 count_mod = -1;
456 count_mod = 0;
457 }
458 550
459 /* 551 /*
460 * BTRFS_ADD_DELAYED_EXTENT means that we need to update 552 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
467 * Once we record must_insert_reserved, switch the action to 559 * Once we record must_insert_reserved, switch the action to
468 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 560 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
469 */ 561 */
470 if (action == BTRFS_ADD_DELAYED_EXTENT) { 562 if (action == BTRFS_ADD_DELAYED_EXTENT)
471 must_insert_reserved = 1; 563 must_insert_reserved = 1;
472 action = BTRFS_ADD_DELAYED_REF; 564 else
473 } else {
474 must_insert_reserved = 0; 565 must_insert_reserved = 0;
475 }
476
477 566
478 delayed_refs = &trans->transaction->delayed_refs; 567 delayed_refs = &trans->transaction->delayed_refs;
479 568
480 /* first set the basic ref node struct up */ 569 /* first set the basic ref node struct up */
481 atomic_set(&ref->refs, 1); 570 atomic_set(&ref->refs, 1);
482 ref->bytenr = bytenr; 571 ref->bytenr = bytenr;
483 ref->parent = parent; 572 ref->num_bytes = num_bytes;
484 ref->ref_mod = count_mod; 573 ref->ref_mod = count_mod;
574 ref->type = 0;
575 ref->action = 0;
576 ref->is_head = 1;
485 ref->in_tree = 1; 577 ref->in_tree = 1;
578
579 head_ref = btrfs_delayed_node_to_head(ref);
580 head_ref->must_insert_reserved = must_insert_reserved;
581 head_ref->is_data = is_data;
582
583 INIT_LIST_HEAD(&head_ref->cluster);
584 mutex_init(&head_ref->mutex);
585
586 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
587
588 if (existing) {
589 update_existing_head_ref(existing, ref);
590 /*
591 * we've updated the existing ref, free the newly
592 * allocated ref
593 */
594 kfree(ref);
595 } else {
596 delayed_refs->num_heads++;
597 delayed_refs->num_heads_ready++;
598 delayed_refs->num_entries++;
599 trans->delayed_ref_updates++;
600 }
601 return 0;
602}
603
604/*
605 * helper to insert a delayed tree ref into the rbtree.
606 */
607static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
608 struct btrfs_delayed_ref_node *ref,
609 u64 bytenr, u64 num_bytes, u64 parent,
610 u64 ref_root, int level, int action)
611{
612 struct btrfs_delayed_ref_node *existing;
613 struct btrfs_delayed_tree_ref *full_ref;
614 struct btrfs_delayed_ref_root *delayed_refs;
615
616 if (action == BTRFS_ADD_DELAYED_EXTENT)
617 action = BTRFS_ADD_DELAYED_REF;
618
619 delayed_refs = &trans->transaction->delayed_refs;
620
621 /* first set the basic ref node struct up */
622 atomic_set(&ref->refs, 1);
623 ref->bytenr = bytenr;
486 ref->num_bytes = num_bytes; 624 ref->num_bytes = num_bytes;
625 ref->ref_mod = 1;
626 ref->action = action;
627 ref->is_head = 0;
628 ref->in_tree = 1;
487 629
488 if (btrfs_delayed_ref_is_head(ref)) { 630 full_ref = btrfs_delayed_node_to_tree_ref(ref);
489 head_ref = btrfs_delayed_node_to_head(ref); 631 if (parent) {
490 head_ref->must_insert_reserved = must_insert_reserved; 632 full_ref->parent = parent;
491 INIT_LIST_HEAD(&head_ref->cluster); 633 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
492 mutex_init(&head_ref->mutex);
493 } else { 634 } else {
494 full_ref = btrfs_delayed_node_to_ref(ref);
495 full_ref->root = ref_root; 635 full_ref->root = ref_root;
496 full_ref->generation = ref_generation; 636 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
497 full_ref->owner_objectid = owner_objectid;
498 full_ref->pin = pin;
499 full_ref->action = action;
500 } 637 }
638 full_ref->level = level;
501 639
502 existing = tree_insert(&delayed_refs->root, bytenr, 640 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
503 parent, &ref->rb_node);
504 641
505 if (existing) { 642 if (existing) {
506 if (btrfs_delayed_ref_is_head(ref)) 643 update_existing_ref(trans, delayed_refs, existing, ref);
507 update_existing_head_ref(existing, ref); 644 /*
508 else 645 * we've updated the existing ref, free the newly
509 update_existing_ref(trans, delayed_refs, existing, ref); 646 * allocated ref
647 */
648 kfree(ref);
649 } else {
650 delayed_refs->num_entries++;
651 trans->delayed_ref_updates++;
652 }
653 return 0;
654}
655
656/*
657 * helper to insert a delayed data ref into the rbtree.
658 */
659static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
660 struct btrfs_delayed_ref_node *ref,
661 u64 bytenr, u64 num_bytes, u64 parent,
662 u64 ref_root, u64 owner, u64 offset,
663 int action)
664{
665 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_data_ref *full_ref;
667 struct btrfs_delayed_ref_root *delayed_refs;
668
669 if (action == BTRFS_ADD_DELAYED_EXTENT)
670 action = BTRFS_ADD_DELAYED_REF;
671
672 delayed_refs = &trans->transaction->delayed_refs;
673
674 /* first set the basic ref node struct up */
675 atomic_set(&ref->refs, 1);
676 ref->bytenr = bytenr;
677 ref->num_bytes = num_bytes;
678 ref->ref_mod = 1;
679 ref->action = action;
680 ref->is_head = 0;
681 ref->in_tree = 1;
682
683 full_ref = btrfs_delayed_node_to_data_ref(ref);
684 if (parent) {
685 full_ref->parent = parent;
686 ref->type = BTRFS_SHARED_DATA_REF_KEY;
687 } else {
688 full_ref->root = ref_root;
689 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
690 }
691 full_ref->objectid = owner;
692 full_ref->offset = offset;
510 693
694 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
695
696 if (existing) {
697 update_existing_ref(trans, delayed_refs, existing, ref);
511 /* 698 /*
512 * we've updated the existing ref, free the newly 699 * we've updated the existing ref, free the newly
513 * allocated ref 700 * allocated ref
514 */ 701 */
515 kfree(ref); 702 kfree(ref);
516 } else { 703 } else {
517 if (btrfs_delayed_ref_is_head(ref)) {
518 delayed_refs->num_heads++;
519 delayed_refs->num_heads_ready++;
520 }
521 delayed_refs->num_entries++; 704 delayed_refs->num_entries++;
522 trans->delayed_ref_updates++; 705 trans->delayed_ref_updates++;
523 } 706 }
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
525} 708}
526 709
527/* 710/*
528 * add a delayed ref to the tree. This does all of the accounting required 711 * add a delayed tree ref. This does all of the accounting required
529 * to make sure the delayed ref is eventually processed before this 712 * to make sure the delayed ref is eventually processed before this
530 * transaction commits. 713 * transaction commits.
531 */ 714 */
532int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 715int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
533 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 716 u64 bytenr, u64 num_bytes, u64 parent,
534 u64 ref_generation, u64 owner_objectid, int action, 717 u64 ref_root, int level, int action,
535 int pin) 718 struct btrfs_delayed_extent_op *extent_op)
536{ 719{
537 struct btrfs_delayed_ref *ref; 720 struct btrfs_delayed_tree_ref *ref;
538 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
539 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
540 int ret; 723 int ret;
541 724
725 BUG_ON(extent_op && extent_op->is_data);
542 ref = kmalloc(sizeof(*ref), GFP_NOFS); 726 ref = kmalloc(sizeof(*ref), GFP_NOFS);
543 if (!ref) 727 if (!ref)
544 return -ENOMEM; 728 return -ENOMEM;
545 729
730 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
731 if (!head_ref) {
732 kfree(ref);
733 return -ENOMEM;
734 }
735
736 head_ref->extent_op = extent_op;
737
738 delayed_refs = &trans->transaction->delayed_refs;
739 spin_lock(&delayed_refs->lock);
740
546 /* 741 /*
547 * the parent = 0 case comes from cases where we don't actually 742 * insert both the head node and the new ref without dropping
548 * know the parent yet. It will get updated later via a add/drop 743 * the spin lock
549 * pair.
550 */ 744 */
551 if (parent == 0) 745 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
552 parent = bytenr; 746 action, 0);
747 BUG_ON(ret);
748
749 ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
750 parent, ref_root, level, action);
751 BUG_ON(ret);
752 spin_unlock(&delayed_refs->lock);
753 return 0;
754}
755
756/*
757 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
758 */
759int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
760 u64 bytenr, u64 num_bytes,
761 u64 parent, u64 ref_root,
762 u64 owner, u64 offset, int action,
763 struct btrfs_delayed_extent_op *extent_op)
764{
765 struct btrfs_delayed_data_ref *ref;
766 struct btrfs_delayed_ref_head *head_ref;
767 struct btrfs_delayed_ref_root *delayed_refs;
768 int ret;
769
770 BUG_ON(extent_op && !extent_op->is_data);
771 ref = kmalloc(sizeof(*ref), GFP_NOFS);
772 if (!ref)
773 return -ENOMEM;
553 774
554 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 775 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
555 if (!head_ref) { 776 if (!head_ref) {
556 kfree(ref); 777 kfree(ref);
557 return -ENOMEM; 778 return -ENOMEM;
558 } 779 }
780
781 head_ref->extent_op = extent_op;
782
559 delayed_refs = &trans->transaction->delayed_refs; 783 delayed_refs = &trans->transaction->delayed_refs;
560 spin_lock(&delayed_refs->lock); 784 spin_lock(&delayed_refs->lock);
561 785
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
563 * insert both the head node and the new ref without dropping 787 * insert both the head node and the new ref without dropping
564 * the spin lock 788 * the spin lock
565 */ 789 */
566 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, 790 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
567 (u64)-1, 0, 0, 0, action, pin); 791 action, 1);
568 BUG_ON(ret); 792 BUG_ON(ret);
569 793
570 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, 794 ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
571 parent, ref_root, ref_generation, 795 parent, ref_root, owner, offset, action);
572 owner_objectid, action, pin); 796 BUG_ON(ret);
797 spin_unlock(&delayed_refs->lock);
798 return 0;
799}
800
801int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
802 u64 bytenr, u64 num_bytes,
803 struct btrfs_delayed_extent_op *extent_op)
804{
805 struct btrfs_delayed_ref_head *head_ref;
806 struct btrfs_delayed_ref_root *delayed_refs;
807 int ret;
808
809 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
810 if (!head_ref)
811 return -ENOMEM;
812
813 head_ref->extent_op = extent_op;
814
815 delayed_refs = &trans->transaction->delayed_refs;
816 spin_lock(&delayed_refs->lock);
817
818 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
819 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
820 extent_op->is_data);
573 BUG_ON(ret); 821 BUG_ON(ret);
822
574 spin_unlock(&delayed_refs->lock); 823 spin_unlock(&delayed_refs->lock);
575 return 0; 824 return 0;
576} 825}
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
587 struct btrfs_delayed_ref_root *delayed_refs; 836 struct btrfs_delayed_ref_root *delayed_refs;
588 837
589 delayed_refs = &trans->transaction->delayed_refs; 838 delayed_refs = &trans->transaction->delayed_refs;
590 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 839 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
591 if (ref) 840 if (ref)
592 return btrfs_delayed_node_to_head(ref); 841 return btrfs_delayed_node_to_head(ref);
593 return NULL; 842 return NULL;
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
603 * 852 *
604 * It is the same as doing a ref add and delete in two separate calls. 853 * It is the same as doing a ref add and delete in two separate calls.
605 */ 854 */
855#if 0
606int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 856int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
607 u64 bytenr, u64 num_bytes, u64 orig_parent, 857 u64 bytenr, u64 num_bytes, u64 orig_parent,
608 u64 parent, u64 orig_ref_root, u64 ref_root, 858 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
666 spin_unlock(&delayed_refs->lock); 916 spin_unlock(&delayed_refs->lock);
667 return 0; 917 return 0;
668} 918}
919#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 3bec2ff0b15c..f6fc67ddad36 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -30,9 +30,6 @@ struct btrfs_delayed_ref_node {
30 /* the starting bytenr of the extent */ 30 /* the starting bytenr of the extent */
31 u64 bytenr; 31 u64 bytenr;
32 32
33 /* the parent our backref will point to */
34 u64 parent;
35
36 /* the size of the extent */ 33 /* the size of the extent */
37 u64 num_bytes; 34 u64 num_bytes;
38 35
@@ -50,10 +47,21 @@ struct btrfs_delayed_ref_node {
50 */ 47 */
51 int ref_mod; 48 int ref_mod;
52 49
50 unsigned int action:8;
51 unsigned int type:8;
53 /* is this node still in the rbtree? */ 52 /* is this node still in the rbtree? */
53 unsigned int is_head:1;
54 unsigned int in_tree:1; 54 unsigned int in_tree:1;
55}; 55};
56 56
57struct btrfs_delayed_extent_op {
58 struct btrfs_disk_key key;
59 u64 flags_to_set;
60 unsigned int update_key:1;
61 unsigned int update_flags:1;
62 unsigned int is_data:1;
63};
64
57/* 65/*
58 * the head refs are used to hold a lock on a given extent, which allows us 66 * the head refs are used to hold a lock on a given extent, which allows us
59 * to make sure that only one process is running the delayed refs 67 * to make sure that only one process is running the delayed refs
@@ -71,6 +79,7 @@ struct btrfs_delayed_ref_head {
71 79
72 struct list_head cluster; 80 struct list_head cluster;
73 81
82 struct btrfs_delayed_extent_op *extent_op;
74 /* 83 /*
75 * when a new extent is allocated, it is just reserved in memory 84 * when a new extent is allocated, it is just reserved in memory
76 * The actual extent isn't inserted into the extent allocation tree 85 * The actual extent isn't inserted into the extent allocation tree
@@ -84,27 +93,26 @@ struct btrfs_delayed_ref_head {
84 * the free has happened. 93 * the free has happened.
85 */ 94 */
86 unsigned int must_insert_reserved:1; 95 unsigned int must_insert_reserved:1;
96 unsigned int is_data:1;
87}; 97};
88 98
89struct btrfs_delayed_ref { 99struct btrfs_delayed_tree_ref {
90 struct btrfs_delayed_ref_node node; 100 struct btrfs_delayed_ref_node node;
101 union {
102 u64 root;
103 u64 parent;
104 };
105 int level;
106};
91 107
92 /* the root objectid our ref will point to */ 108struct btrfs_delayed_data_ref {
93 u64 root; 109 struct btrfs_delayed_ref_node node;
94 110 union {
95 /* the generation for the backref */ 111 u64 root;
96 u64 generation; 112 u64 parent;
97 113 };
98 /* owner_objectid of the backref */ 114 u64 objectid;
99 u64 owner_objectid; 115 u64 offset;
100
101 /* operation done by this entry in the rbtree */
102 u8 action;
103
104 /* if pin == 1, when the extent is freed it will be pinned until
105 * transaction commit
106 */
107 unsigned int pin:1;
108}; 116};
109 117
110struct btrfs_delayed_ref_root { 118struct btrfs_delayed_ref_root {
@@ -143,17 +151,25 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
143 } 151 }
144} 152}
145 153
146int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 154int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
147 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 155 u64 bytenr, u64 num_bytes, u64 parent,
148 u64 ref_generation, u64 owner_objectid, int action, 156 u64 ref_root, int level, int action,
149 int pin); 157 struct btrfs_delayed_extent_op *extent_op);
158int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
159 u64 bytenr, u64 num_bytes,
160 u64 parent, u64 ref_root,
161 u64 owner, u64 offset, int action,
162 struct btrfs_delayed_extent_op *extent_op);
163int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
164 u64 bytenr, u64 num_bytes,
165 struct btrfs_delayed_extent_op *extent_op);
150 166
151struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
152btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
153int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); 169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
154int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 170int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
155 struct btrfs_root *root, u64 bytenr, 171 struct btrfs_root *root, u64 bytenr,
156 u64 num_bytes, u32 *refs); 172 u64 num_bytes, u64 *refs, u64 *flags);
157int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 173int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
158 u64 bytenr, u64 num_bytes, u64 orig_parent, 174 u64 bytenr, u64 num_bytes, u64 orig_parent,
159 u64 parent, u64 orig_ref_root, u64 ref_root, 175 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -169,18 +185,24 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
169 */ 185 */
170static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) 186static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
171{ 187{
172 return node->parent == (u64)-1; 188 return node->is_head;
173} 189}
174 190
175/* 191/*
176 * helper functions to cast a node into its container 192 * helper functions to cast a node into its container
177 */ 193 */
178static inline struct btrfs_delayed_ref * 194static inline struct btrfs_delayed_tree_ref *
179btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) 195btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
180{ 196{
181 WARN_ON(btrfs_delayed_ref_is_head(node)); 197 WARN_ON(btrfs_delayed_ref_is_head(node));
182 return container_of(node, struct btrfs_delayed_ref, node); 198 return container_of(node, struct btrfs_delayed_tree_ref, node);
199}
183 200
201static inline struct btrfs_delayed_data_ref *
202btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
203{
204 WARN_ON(btrfs_delayed_ref_is_head(node));
205 return container_of(node, struct btrfs_delayed_data_ref, node);
184} 206}
185 207
186static inline struct btrfs_delayed_ref_head * 208static inline struct btrfs_delayed_ref_head *
@@ -188,6 +210,5 @@ btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
188{ 210{
189 WARN_ON(!btrfs_delayed_ref_is_head(node)); 211 WARN_ON(!btrfs_delayed_ref_is_head(node));
190 return container_of(node, struct btrfs_delayed_ref_head, node); 212 return container_of(node, struct btrfs_delayed_ref_head, node);
191
192} 213}
193#endif 214#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b0ea0b80c23..0d50d49d990a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,8 +26,8 @@
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h>
29#include "compat.h" 30#include "compat.h"
30#include "crc32c.h"
31#include "ctree.h" 31#include "ctree.h"
32#include "disk-io.h" 32#include "disk-io.h"
33#include "transaction.h" 33#include "transaction.h"
@@ -36,7 +36,6 @@
36#include "print-tree.h" 36#include "print-tree.h"
37#include "async-thread.h" 37#include "async-thread.h"
38#include "locking.h" 38#include "locking.h"
39#include "ref-cache.h"
40#include "tree-log.h" 39#include "tree-log.h"
41#include "free-space-cache.h" 40#include "free-space-cache.h"
42 41
@@ -172,7 +171,7 @@ out:
172 171
173u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 172u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
174{ 173{
175 return btrfs_crc32c(seed, data, len); 174 return crc32c(seed, data, len);
176} 175}
177 176
178void btrfs_csum_final(u32 crc, char *result) 177void btrfs_csum_final(u32 crc, char *result)
@@ -884,7 +883,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
884{ 883{
885 root->node = NULL; 884 root->node = NULL;
886 root->commit_root = NULL; 885 root->commit_root = NULL;
887 root->ref_tree = NULL;
888 root->sectorsize = sectorsize; 886 root->sectorsize = sectorsize;
889 root->nodesize = nodesize; 887 root->nodesize = nodesize;
890 root->leafsize = leafsize; 888 root->leafsize = leafsize;
@@ -899,12 +897,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
899 root->last_inode_alloc = 0; 897 root->last_inode_alloc = 0;
900 root->name = NULL; 898 root->name = NULL;
901 root->in_sysfs = 0; 899 root->in_sysfs = 0;
900 root->inode_tree.rb_node = NULL;
902 901
903 INIT_LIST_HEAD(&root->dirty_list); 902 INIT_LIST_HEAD(&root->dirty_list);
904 INIT_LIST_HEAD(&root->orphan_list); 903 INIT_LIST_HEAD(&root->orphan_list);
905 INIT_LIST_HEAD(&root->dead_list); 904 INIT_LIST_HEAD(&root->root_list);
906 spin_lock_init(&root->node_lock); 905 spin_lock_init(&root->node_lock);
907 spin_lock_init(&root->list_lock); 906 spin_lock_init(&root->list_lock);
907 spin_lock_init(&root->inode_lock);
908 mutex_init(&root->objectid_mutex); 908 mutex_init(&root->objectid_mutex);
909 mutex_init(&root->log_mutex); 909 mutex_init(&root->log_mutex);
910 init_waitqueue_head(&root->log_writer_wait); 910 init_waitqueue_head(&root->log_writer_wait);
@@ -918,9 +918,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
918 extent_io_tree_init(&root->dirty_log_pages, 918 extent_io_tree_init(&root->dirty_log_pages,
919 fs_info->btree_inode->i_mapping, GFP_NOFS); 919 fs_info->btree_inode->i_mapping, GFP_NOFS);
920 920
921 btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
922 root->ref_tree = &root->ref_tree_struct;
923
924 memset(&root->root_key, 0, sizeof(root->root_key)); 921 memset(&root->root_key, 0, sizeof(root->root_key));
925 memset(&root->root_item, 0, sizeof(root->root_item)); 922 memset(&root->root_item, 0, sizeof(root->root_item));
926 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 923 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -959,6 +956,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
959 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 956 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
960 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 957 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
961 blocksize, generation); 958 blocksize, generation);
959 root->commit_root = btrfs_root_node(root);
962 BUG_ON(!root->node); 960 BUG_ON(!root->node);
963 return 0; 961 return 0;
964} 962}
@@ -1025,20 +1023,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1025 */ 1023 */
1026 root->ref_cows = 0; 1024 root->ref_cows = 0;
1027 1025
1028 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 1026 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
1029 0, BTRFS_TREE_LOG_OBJECTID, 1027 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
1030 trans->transid, 0, 0, 0);
1031 if (IS_ERR(leaf)) { 1028 if (IS_ERR(leaf)) {
1032 kfree(root); 1029 kfree(root);
1033 return ERR_CAST(leaf); 1030 return ERR_CAST(leaf);
1034 } 1031 }
1035 1032
1033 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1034 btrfs_set_header_bytenr(leaf, leaf->start);
1035 btrfs_set_header_generation(leaf, trans->transid);
1036 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1037 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1036 root->node = leaf; 1038 root->node = leaf;
1037 btrfs_set_header_nritems(root->node, 0);
1038 btrfs_set_header_level(root->node, 0);
1039 btrfs_set_header_bytenr(root->node, root->node->start);
1040 btrfs_set_header_generation(root->node, trans->transid);
1041 btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
1042 1039
1043 write_extent_buffer(root->node, root->fs_info->fsid, 1040 write_extent_buffer(root->node, root->fs_info->fsid,
1044 (unsigned long)btrfs_header_fsid(root->node), 1041 (unsigned long)btrfs_header_fsid(root->node),
@@ -1081,8 +1078,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1081 inode_item->nbytes = cpu_to_le64(root->leafsize); 1078 inode_item->nbytes = cpu_to_le64(root->leafsize);
1082 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1079 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1083 1080
1084 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); 1081 btrfs_set_root_node(&log_root->root_item, log_root->node);
1085 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1086 1082
1087 WARN_ON(root->log_root); 1083 WARN_ON(root->log_root);
1088 root->log_root = log_root; 1084 root->log_root = log_root;
@@ -1144,6 +1140,7 @@ out:
1144 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1140 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1145 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1141 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1146 blocksize, generation); 1142 blocksize, generation);
1143 root->commit_root = btrfs_root_node(root);
1147 BUG_ON(!root->node); 1144 BUG_ON(!root->node);
1148insert: 1145insert:
1149 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1146 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1210,7 +1207,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1210 } 1207 }
1211 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 1208 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
1212 ret = btrfs_find_dead_roots(fs_info->tree_root, 1209 ret = btrfs_find_dead_roots(fs_info->tree_root,
1213 root->root_key.objectid, root); 1210 root->root_key.objectid);
1214 BUG_ON(ret); 1211 BUG_ON(ret);
1215 btrfs_orphan_cleanup(root); 1212 btrfs_orphan_cleanup(root);
1216 } 1213 }
@@ -1569,8 +1566,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1569 atomic_set(&fs_info->async_delalloc_pages, 0); 1566 atomic_set(&fs_info->async_delalloc_pages, 0);
1570 atomic_set(&fs_info->async_submit_draining, 0); 1567 atomic_set(&fs_info->async_submit_draining, 0);
1571 atomic_set(&fs_info->nr_async_bios, 0); 1568 atomic_set(&fs_info->nr_async_bios, 0);
1572 atomic_set(&fs_info->throttles, 0);
1573 atomic_set(&fs_info->throttle_gen, 0);
1574 fs_info->sb = sb; 1569 fs_info->sb = sb;
1575 fs_info->max_extent = (u64)-1; 1570 fs_info->max_extent = (u64)-1;
1576 fs_info->max_inline = 8192 * 1024; 1571 fs_info->max_inline = 8192 * 1024;
@@ -1598,6 +1593,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1598 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 1593 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1599 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 1594 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1600 1595
1596 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1601 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1597 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1602 fs_info->btree_inode->i_mapping, 1598 fs_info->btree_inode->i_mapping,
1603 GFP_NOFS); 1599 GFP_NOFS);
@@ -1613,10 +1609,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1609 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 fs_info->do_barriers = 1; 1610 fs_info->do_barriers = 1;
1615 1611
1616 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1617 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1618 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1619
1620 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1612 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1621 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 1613 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1622 sizeof(struct btrfs_key)); 1614 sizeof(struct btrfs_key));
@@ -1674,6 +1666,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1674 goto fail_iput; 1666 goto fail_iput;
1675 } 1667 }
1676 1668
1669 features = btrfs_super_incompat_flags(disk_super);
1670 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
1671 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1672 btrfs_set_super_incompat_flags(disk_super, features);
1673 }
1674
1677 features = btrfs_super_compat_ro_flags(disk_super) & 1675 features = btrfs_super_compat_ro_flags(disk_super) &
1678 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1676 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1679 if (!(sb->s_flags & MS_RDONLY) && features) { 1677 if (!(sb->s_flags & MS_RDONLY) && features) {
@@ -1771,7 +1769,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1771 if (ret) { 1769 if (ret) {
1772 printk(KERN_WARNING "btrfs: failed to read the system " 1770 printk(KERN_WARNING "btrfs: failed to read the system "
1773 "array on %s\n", sb->s_id); 1771 "array on %s\n", sb->s_id);
1774 goto fail_sys_array; 1772 goto fail_sb_buffer;
1775 } 1773 }
1776 1774
1777 blocksize = btrfs_level_size(tree_root, 1775 blocksize = btrfs_level_size(tree_root,
@@ -1785,6 +1783,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1785 btrfs_super_chunk_root(disk_super), 1783 btrfs_super_chunk_root(disk_super),
1786 blocksize, generation); 1784 blocksize, generation);
1787 BUG_ON(!chunk_root->node); 1785 BUG_ON(!chunk_root->node);
1786 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1787 chunk_root->commit_root = btrfs_root_node(chunk_root);
1788 1788
1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1810,7 +1810,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1810 blocksize, generation); 1810 blocksize, generation);
1811 if (!tree_root->node) 1811 if (!tree_root->node)
1812 goto fail_chunk_root; 1812 goto fail_chunk_root;
1813 1813 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1814 tree_root->commit_root = btrfs_root_node(tree_root);
1814 1815
1815 ret = find_and_setup_root(tree_root, fs_info, 1816 ret = find_and_setup_root(tree_root, fs_info,
1816 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 1817 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1820,14 +1821,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1820 1821
1821 ret = find_and_setup_root(tree_root, fs_info, 1822 ret = find_and_setup_root(tree_root, fs_info,
1822 BTRFS_DEV_TREE_OBJECTID, dev_root); 1823 BTRFS_DEV_TREE_OBJECTID, dev_root);
1823 dev_root->track_dirty = 1;
1824 if (ret) 1824 if (ret)
1825 goto fail_extent_root; 1825 goto fail_extent_root;
1826 dev_root->track_dirty = 1;
1826 1827
1827 ret = find_and_setup_root(tree_root, fs_info, 1828 ret = find_and_setup_root(tree_root, fs_info,
1828 BTRFS_CSUM_TREE_OBJECTID, csum_root); 1829 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1829 if (ret) 1830 if (ret)
1830 goto fail_extent_root; 1831 goto fail_dev_root;
1831 1832
1832 csum_root->track_dirty = 1; 1833 csum_root->track_dirty = 1;
1833 1834
@@ -1849,6 +1850,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1849 if (IS_ERR(fs_info->transaction_kthread)) 1850 if (IS_ERR(fs_info->transaction_kthread))
1850 goto fail_cleaner; 1851 goto fail_cleaner;
1851 1852
1853 if (!btrfs_test_opt(tree_root, SSD) &&
1854 !btrfs_test_opt(tree_root, NOSSD) &&
1855 !fs_info->fs_devices->rotating) {
1856 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
1857 "mode\n");
1858 btrfs_set_opt(fs_info->mount_opt, SSD);
1859 }
1860
1852 if (btrfs_super_log_root(disk_super) != 0) { 1861 if (btrfs_super_log_root(disk_super) != 0) {
1853 u64 bytenr = btrfs_super_log_root(disk_super); 1862 u64 bytenr = btrfs_super_log_root(disk_super);
1854 1863
@@ -1881,7 +1890,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1881 } 1890 }
1882 1891
1883 if (!(sb->s_flags & MS_RDONLY)) { 1892 if (!(sb->s_flags & MS_RDONLY)) {
1884 ret = btrfs_cleanup_reloc_trees(tree_root); 1893 ret = btrfs_recover_relocation(tree_root);
1885 BUG_ON(ret); 1894 BUG_ON(ret);
1886 } 1895 }
1887 1896
@@ -1892,6 +1901,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1892 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); 1901 fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
1893 if (!fs_info->fs_root) 1902 if (!fs_info->fs_root)
1894 goto fail_trans_kthread; 1903 goto fail_trans_kthread;
1904
1895 return tree_root; 1905 return tree_root;
1896 1906
1897fail_trans_kthread: 1907fail_trans_kthread:
@@ -1908,14 +1918,19 @@ fail_cleaner:
1908 1918
1909fail_csum_root: 1919fail_csum_root:
1910 free_extent_buffer(csum_root->node); 1920 free_extent_buffer(csum_root->node);
1921 free_extent_buffer(csum_root->commit_root);
1922fail_dev_root:
1923 free_extent_buffer(dev_root->node);
1924 free_extent_buffer(dev_root->commit_root);
1911fail_extent_root: 1925fail_extent_root:
1912 free_extent_buffer(extent_root->node); 1926 free_extent_buffer(extent_root->node);
1927 free_extent_buffer(extent_root->commit_root);
1913fail_tree_root: 1928fail_tree_root:
1914 free_extent_buffer(tree_root->node); 1929 free_extent_buffer(tree_root->node);
1930 free_extent_buffer(tree_root->commit_root);
1915fail_chunk_root: 1931fail_chunk_root:
1916 free_extent_buffer(chunk_root->node); 1932 free_extent_buffer(chunk_root->node);
1917fail_sys_array: 1933 free_extent_buffer(chunk_root->commit_root);
1918 free_extent_buffer(dev_root->node);
1919fail_sb_buffer: 1934fail_sb_buffer:
1920 btrfs_stop_workers(&fs_info->fixup_workers); 1935 btrfs_stop_workers(&fs_info->fixup_workers);
1921 btrfs_stop_workers(&fs_info->delalloc_workers); 1936 btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2005,6 +2020,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2005 return latest; 2020 return latest;
2006} 2021}
2007 2022
2023/*
2024 * this should be called twice, once with wait == 0 and
2025 * once with wait == 1. When wait == 0 is done, all the buffer heads
2026 * we write are pinned.
2027 *
2028 * They are released when wait == 1 is done.
2029 * max_mirrors must be the same for both runs, and it indicates how
2030 * many supers on this one device should be written.
2031 *
2032 * max_mirrors == 0 means to write them all.
2033 */
2008static int write_dev_supers(struct btrfs_device *device, 2034static int write_dev_supers(struct btrfs_device *device,
2009 struct btrfs_super_block *sb, 2035 struct btrfs_super_block *sb,
2010 int do_barriers, int wait, int max_mirrors) 2036 int do_barriers, int wait, int max_mirrors)
@@ -2040,12 +2066,16 @@ static int write_dev_supers(struct btrfs_device *device,
2040 bh = __find_get_block(device->bdev, bytenr / 4096, 2066 bh = __find_get_block(device->bdev, bytenr / 4096,
2041 BTRFS_SUPER_INFO_SIZE); 2067 BTRFS_SUPER_INFO_SIZE);
2042 BUG_ON(!bh); 2068 BUG_ON(!bh);
2043 brelse(bh);
2044 wait_on_buffer(bh); 2069 wait_on_buffer(bh);
2045 if (buffer_uptodate(bh)) { 2070 if (!buffer_uptodate(bh))
2046 brelse(bh); 2071 errors++;
2047 continue; 2072
2048 } 2073 /* drop our reference */
2074 brelse(bh);
2075
2076 /* drop the reference from the wait == 0 run */
2077 brelse(bh);
2078 continue;
2049 } else { 2079 } else {
2050 btrfs_set_super_bytenr(sb, bytenr); 2080 btrfs_set_super_bytenr(sb, bytenr);
2051 2081
@@ -2056,12 +2086,18 @@ static int write_dev_supers(struct btrfs_device *device,
2056 BTRFS_CSUM_SIZE); 2086 BTRFS_CSUM_SIZE);
2057 btrfs_csum_final(crc, sb->csum); 2087 btrfs_csum_final(crc, sb->csum);
2058 2088
2089 /*
2090 * one reference for us, and we leave it for the
2091 * caller
2092 */
2059 bh = __getblk(device->bdev, bytenr / 4096, 2093 bh = __getblk(device->bdev, bytenr / 4096,
2060 BTRFS_SUPER_INFO_SIZE); 2094 BTRFS_SUPER_INFO_SIZE);
2061 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 2095 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2062 2096
2063 set_buffer_uptodate(bh); 2097 /* one reference for submit_bh */
2064 get_bh(bh); 2098 get_bh(bh);
2099
2100 set_buffer_uptodate(bh);
2065 lock_buffer(bh); 2101 lock_buffer(bh);
2066 bh->b_end_io = btrfs_end_buffer_write_sync; 2102 bh->b_end_io = btrfs_end_buffer_write_sync;
2067 } 2103 }
@@ -2073,6 +2109,7 @@ static int write_dev_supers(struct btrfs_device *device,
2073 device->name); 2109 device->name);
2074 set_buffer_uptodate(bh); 2110 set_buffer_uptodate(bh);
2075 device->barriers = 0; 2111 device->barriers = 0;
2112 /* one reference for submit_bh */
2076 get_bh(bh); 2113 get_bh(bh);
2077 lock_buffer(bh); 2114 lock_buffer(bh);
2078 ret = submit_bh(WRITE_SYNC, bh); 2115 ret = submit_bh(WRITE_SYNC, bh);
@@ -2081,22 +2118,15 @@ static int write_dev_supers(struct btrfs_device *device,
2081 ret = submit_bh(WRITE_SYNC, bh); 2118 ret = submit_bh(WRITE_SYNC, bh);
2082 } 2119 }
2083 2120
2084 if (!ret && wait) { 2121 if (ret)
2085 wait_on_buffer(bh);
2086 if (!buffer_uptodate(bh))
2087 errors++;
2088 } else if (ret) {
2089 errors++; 2122 errors++;
2090 }
2091 if (wait)
2092 brelse(bh);
2093 } 2123 }
2094 return errors < i ? 0 : -1; 2124 return errors < i ? 0 : -1;
2095} 2125}
2096 2126
2097int write_all_supers(struct btrfs_root *root, int max_mirrors) 2127int write_all_supers(struct btrfs_root *root, int max_mirrors)
2098{ 2128{
2099 struct list_head *head = &root->fs_info->fs_devices->devices; 2129 struct list_head *head;
2100 struct btrfs_device *dev; 2130 struct btrfs_device *dev;
2101 struct btrfs_super_block *sb; 2131 struct btrfs_super_block *sb;
2102 struct btrfs_dev_item *dev_item; 2132 struct btrfs_dev_item *dev_item;
@@ -2111,6 +2141,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2111 2141
2112 sb = &root->fs_info->super_for_commit; 2142 sb = &root->fs_info->super_for_commit;
2113 dev_item = &sb->dev_item; 2143 dev_item = &sb->dev_item;
2144
2145 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2146 head = &root->fs_info->fs_devices->devices;
2114 list_for_each_entry(dev, head, dev_list) { 2147 list_for_each_entry(dev, head, dev_list) {
2115 if (!dev->bdev) { 2148 if (!dev->bdev) {
2116 total_errors++; 2149 total_errors++;
@@ -2154,6 +2187,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2154 if (ret) 2187 if (ret)
2155 total_errors++; 2188 total_errors++;
2156 } 2189 }
2190 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2157 if (total_errors > max_errors) { 2191 if (total_errors > max_errors) {
2158 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 2192 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
2159 total_errors); 2193 total_errors);
@@ -2173,6 +2207,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2173 2207
2174int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2208int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2175{ 2209{
2210 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2176 radix_tree_delete(&fs_info->fs_roots_radix, 2211 radix_tree_delete(&fs_info->fs_roots_radix,
2177 (unsigned long)root->root_key.objectid); 2212 (unsigned long)root->root_key.objectid);
2178 if (root->anon_super.s_dev) { 2213 if (root->anon_super.s_dev) {
@@ -2219,10 +2254,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2219 ARRAY_SIZE(gang)); 2254 ARRAY_SIZE(gang));
2220 if (!ret) 2255 if (!ret)
2221 break; 2256 break;
2257
2258 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2222 for (i = 0; i < ret; i++) { 2259 for (i = 0; i < ret; i++) {
2223 root_objectid = gang[i]->root_key.objectid; 2260 root_objectid = gang[i]->root_key.objectid;
2224 ret = btrfs_find_dead_roots(fs_info->tree_root, 2261 ret = btrfs_find_dead_roots(fs_info->tree_root,
2225 root_objectid, gang[i]); 2262 root_objectid);
2226 BUG_ON(ret); 2263 BUG_ON(ret);
2227 btrfs_orphan_cleanup(gang[i]); 2264 btrfs_orphan_cleanup(gang[i]);
2228 } 2265 }
@@ -2278,20 +2315,16 @@ int close_ctree(struct btrfs_root *root)
2278 (unsigned long long)fs_info->total_ref_cache_size); 2315 (unsigned long long)fs_info->total_ref_cache_size);
2279 } 2316 }
2280 2317
2281 if (fs_info->extent_root->node) 2318 free_extent_buffer(fs_info->extent_root->node);
2282 free_extent_buffer(fs_info->extent_root->node); 2319 free_extent_buffer(fs_info->extent_root->commit_root);
2283 2320 free_extent_buffer(fs_info->tree_root->node);
2284 if (fs_info->tree_root->node) 2321 free_extent_buffer(fs_info->tree_root->commit_root);
2285 free_extent_buffer(fs_info->tree_root->node); 2322 free_extent_buffer(root->fs_info->chunk_root->node);
2286 2323 free_extent_buffer(root->fs_info->chunk_root->commit_root);
2287 if (root->fs_info->chunk_root->node) 2324 free_extent_buffer(root->fs_info->dev_root->node);
2288 free_extent_buffer(root->fs_info->chunk_root->node); 2325 free_extent_buffer(root->fs_info->dev_root->commit_root);
2289 2326 free_extent_buffer(root->fs_info->csum_root->node);
2290 if (root->fs_info->dev_root->node) 2327 free_extent_buffer(root->fs_info->csum_root->commit_root);
2291 free_extent_buffer(root->fs_info->dev_root->node);
2292
2293 if (root->fs_info->csum_root->node)
2294 free_extent_buffer(root->fs_info->csum_root->node);
2295 2328
2296 btrfs_free_block_groups(root->fs_info); 2329 btrfs_free_block_groups(root->fs_info);
2297 2330
@@ -2373,17 +2406,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2373 * looks as though older kernels can get into trouble with 2406 * looks as though older kernels can get into trouble with
2374 * this code, they end up stuck in balance_dirty_pages forever 2407 * this code, they end up stuck in balance_dirty_pages forever
2375 */ 2408 */
2376 struct extent_io_tree *tree;
2377 u64 num_dirty; 2409 u64 num_dirty;
2378 u64 start = 0;
2379 unsigned long thresh = 32 * 1024 * 1024; 2410 unsigned long thresh = 32 * 1024 * 1024;
2380 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2381 2411
2382 if (current->flags & PF_MEMALLOC) 2412 if (current->flags & PF_MEMALLOC)
2383 return; 2413 return;
2384 2414
2385 num_dirty = count_range_bits(tree, &start, (u64)-1, 2415 num_dirty = root->fs_info->dirty_metadata_bytes;
2386 thresh, EXTENT_DIRTY); 2416
2387 if (num_dirty > thresh) { 2417 if (num_dirty > thresh) {
2388 balance_dirty_pages_ratelimited_nr( 2418 balance_dirty_pages_ratelimited_nr(
2389 root->fs_info->btree_inode->i_mapping, 1); 2419 root->fs_info->btree_inode->i_mapping, 1);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 85315d2c90de..9596b40caa4e 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
79 key.offset = 0; 79 key.offset = 0;
80 80
81 inode = btrfs_iget(sb, &key, root, NULL); 81 inode = btrfs_iget(sb, &key, root);
82 if (IS_ERR(inode)) 82 if (IS_ERR(inode))
83 return (void *)inode; 83 return (void *)inode;
84 84
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
193 key.offset = 0; 193 key.offset = 0;
194 194
195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
196} 196}
197 197
198const struct export_operations btrfs_export_ops = { 198const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 35af93355063..edc7d208c5ce 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,50 +23,39 @@
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include "compat.h" 24#include "compat.h"
25#include "hash.h" 25#include "hash.h"
26#include "crc32c.h"
27#include "ctree.h" 26#include "ctree.h"
28#include "disk-io.h" 27#include "disk-io.h"
29#include "print-tree.h" 28#include "print-tree.h"
30#include "transaction.h" 29#include "transaction.h"
31#include "volumes.h" 30#include "volumes.h"
32#include "locking.h" 31#include "locking.h"
33#include "ref-cache.h"
34#include "free-space-cache.h" 32#include "free-space-cache.h"
35 33
36#define PENDING_EXTENT_INSERT 0
37#define PENDING_EXTENT_DELETE 1
38#define PENDING_BACKREF_UPDATE 2
39
40struct pending_extent_op {
41 int type;
42 u64 bytenr;
43 u64 num_bytes;
44 u64 parent;
45 u64 orig_parent;
46 u64 generation;
47 u64 orig_generation;
48 int level;
49 struct list_head list;
50 int del;
51};
52
53static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, u64 parent,
55 u64 root_objectid, u64 ref_generation,
56 u64 owner, struct btrfs_key *ins,
57 int ref_mod);
58static int update_reserved_extents(struct btrfs_root *root, 34static int update_reserved_extents(struct btrfs_root *root,
59 u64 bytenr, u64 num, int reserve); 35 u64 bytenr, u64 num, int reserve);
60static int update_block_group(struct btrfs_trans_handle *trans, 36static int update_block_group(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, 37 struct btrfs_root *root,
62 u64 bytenr, u64 num_bytes, int alloc, 38 u64 bytenr, u64 num_bytes, int alloc,
63 int mark_free); 39 int mark_free);
64static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, 40static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
65 struct btrfs_root *root, 41 struct btrfs_root *root,
66 u64 bytenr, u64 num_bytes, u64 parent, 42 u64 bytenr, u64 num_bytes, u64 parent,
67 u64 root_objectid, u64 ref_generation, 43 u64 root_objectid, u64 owner_objectid,
68 u64 owner_objectid, int pin, 44 u64 owner_offset, int refs_to_drop,
69 int ref_to_drop); 45 struct btrfs_delayed_extent_op *extra_op);
46static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
47 struct extent_buffer *leaf,
48 struct btrfs_extent_item *ei);
49static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
50 struct btrfs_root *root,
51 u64 parent, u64 root_objectid,
52 u64 flags, u64 owner, u64 offset,
53 struct btrfs_key *ins, int ref_mod);
54static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
55 struct btrfs_root *root,
56 u64 parent, u64 root_objectid,
57 u64 flags, struct btrfs_disk_key *key,
58 int level, struct btrfs_key *ins);
70 59
71static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
72 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -453,199 +442,969 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
453 * maintenance. This is actually the same as #2, but with a slightly 442 * maintenance. This is actually the same as #2, but with a slightly
454 * different use case. 443 * different use case.
455 * 444 *
445 * There are two kinds of back refs. The implicit back refs is optimized
446 * for pointers in non-shared tree blocks. For a given pointer in a block,
447 * back refs of this kind provide information about the block's owner tree
448 * and the pointer's key. These information allow us to find the block by
449 * b-tree searching. The full back refs is for pointers in tree blocks not
450 * referenced by their owner trees. The location of tree block is recorded
451 * in the back refs. Actually the full back refs is generic, and can be
452 * used in all cases the implicit back refs is used. The major shortcoming
453 * of the full back refs is its overhead. Every time a tree block gets
454 * COWed, we have to update back refs entry for all pointers in it.
455 *
456 * For a newly allocated tree block, we use implicit back refs for
457 * pointers in it. This means most tree related operations only involve
458 * implicit back refs. For a tree block created in old transaction, the
459 * only way to drop a reference to it is COW it. So we can detect the
460 * event that tree block loses its owner tree's reference and do the
461 * back refs conversion.
462 *
463 * When a tree block is COW'd through a tree, there are four cases:
464 *
465 * The reference count of the block is one and the tree is the block's
466 * owner tree. Nothing to do in this case.
467 *
468 * The reference count of the block is one and the tree is not the
469 * block's owner tree. In this case, full back refs is used for pointers
470 * in the block. Remove these full back refs, add implicit back refs for
471 * every pointers in the new block.
472 *
473 * The reference count of the block is greater than one and the tree is
474 * the block's owner tree. In this case, implicit back refs is used for
475 * pointers in the block. Add full back refs for every pointers in the
476 * block, increase lower level extents' reference counts. The original
477 * implicit back refs are entailed to the new block.
478 *
479 * The reference count of the block is greater than one and the tree is
480 * not the block's owner tree. Add implicit back refs for every pointer in
481 * the new block, increase lower level extents' reference count.
482 *
483 * Back Reference Key composing:
484 *
485 * The key objectid corresponds to the first byte in the extent,
486 * The key type is used to differentiate between types of back refs.
487 * There are different meanings of the key offset for different types
488 * of back refs.
489 *
456 * File extents can be referenced by: 490 * File extents can be referenced by:
457 * 491 *
458 * - multiple snapshots, subvolumes, or different generations in one subvol 492 * - multiple snapshots, subvolumes, or different generations in one subvol
459 * - different files inside a single subvolume 493 * - different files inside a single subvolume
460 * - different offsets inside a file (bookend extents in file.c) 494 * - different offsets inside a file (bookend extents in file.c)
461 * 495 *
462 * The extent ref structure has fields for: 496 * The extent ref structure for the implicit back refs has fields for:
463 * 497 *
464 * - Objectid of the subvolume root 498 * - Objectid of the subvolume root
465 * - Generation number of the tree holding the reference
466 * - objectid of the file holding the reference 499 * - objectid of the file holding the reference
467 * - number of references holding by parent node (alway 1 for tree blocks) 500 * - original offset in the file
468 * 501 * - how many bookend extents
469 * Btree leaf may hold multiple references to a file extent. In most cases,
470 * these references are from same file and the corresponding offsets inside
471 * the file are close together.
472 *
473 * When a file extent is allocated the fields are filled in:
474 * (root_key.objectid, trans->transid, inode objectid, 1)
475 * 502 *
476 * When a leaf is cow'd new references are added for every file extent found 503 * The key offset for the implicit back refs is hash of the first
477 * in the leaf. It looks similar to the create case, but trans->transid will 504 * three fields.
478 * be different when the block is cow'd.
479 * 505 *
480 * (root_key.objectid, trans->transid, inode objectid, 506 * The extent ref structure for the full back refs has field for:
481 * number of references in the leaf)
482 * 507 *
483 * When a file extent is removed either during snapshot deletion or 508 * - number of pointers in the tree leaf
484 * file truncation, we find the corresponding back reference and check
485 * the following fields:
486 * 509 *
487 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 510 * The key offset for the implicit back refs is the first byte of
488 * inode objectid) 511 * the tree leaf
489 * 512 *
490 * Btree extents can be referenced by: 513 * When a file extent is allocated, The implicit back refs is used.
491 * 514 * the fields are filled in:
492 * - Different subvolumes
493 * - Different generations of the same subvolume
494 *
495 * When a tree block is created, back references are inserted:
496 * 515 *
497 * (root->root_key.objectid, trans->transid, level, 1) 516 * (root_key.objectid, inode objectid, offset in file, 1)
498 * 517 *
499 * When a tree block is cow'd, new back references are added for all the 518 * When a file extent is removed file truncation, we find the
500 * blocks it points to. If the tree block isn't in reference counted root, 519 * corresponding implicit back refs and check the following fields:
501 * the old back references are removed. These new back references are of
502 * the form (trans->transid will have increased since creation):
503 * 520 *
504 * (root->root_key.objectid, trans->transid, level, 1) 521 * (btrfs_header_owner(leaf), inode objectid, offset in file)
505 * 522 *
506 * When a backref is in deleting, the following fields are checked: 523 * Btree extents can be referenced by:
507 * 524 *
508 * if backref was for a tree root: 525 * - Different subvolumes
509 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
510 * else
511 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
512 * 526 *
513 * Back Reference Key composing: 527 * Both the implicit back refs and the full back refs for tree blocks
528 * only consist of key. The key offset for the implicit back refs is
529 * objectid of block's owner tree. The key offset for the full back refs
530 * is the first byte of parent block.
514 * 531 *
515 * The key objectid corresponds to the first byte in the extent, the key 532 * When implicit back refs is used, information about the lowest key and
516 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 533 * level of the tree block are required. These information are stored in
517 * byte of parent extent. If a extent is tree root, the key offset is set 534 * tree block info structure.
518 * to the key objectid.
519 */ 535 */
520 536
521static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 537#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
522 struct btrfs_root *root, 538static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
523 struct btrfs_path *path, 539 struct btrfs_root *root,
524 u64 bytenr, u64 parent, 540 struct btrfs_path *path,
525 u64 ref_root, u64 ref_generation, 541 u64 owner, u32 extra_size)
526 u64 owner_objectid, int del)
527{ 542{
543 struct btrfs_extent_item *item;
544 struct btrfs_extent_item_v0 *ei0;
545 struct btrfs_extent_ref_v0 *ref0;
546 struct btrfs_tree_block_info *bi;
547 struct extent_buffer *leaf;
528 struct btrfs_key key; 548 struct btrfs_key key;
529 struct btrfs_extent_ref *ref; 549 struct btrfs_key found_key;
550 u32 new_size = sizeof(*item);
551 u64 refs;
552 int ret;
553
554 leaf = path->nodes[0];
555 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
556
557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
558 ei0 = btrfs_item_ptr(leaf, path->slots[0],
559 struct btrfs_extent_item_v0);
560 refs = btrfs_extent_refs_v0(leaf, ei0);
561
562 if (owner == (u64)-1) {
563 while (1) {
564 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
565 ret = btrfs_next_leaf(root, path);
566 if (ret < 0)
567 return ret;
568 BUG_ON(ret > 0);
569 leaf = path->nodes[0];
570 }
571 btrfs_item_key_to_cpu(leaf, &found_key,
572 path->slots[0]);
573 BUG_ON(key.objectid != found_key.objectid);
574 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
575 path->slots[0]++;
576 continue;
577 }
578 ref0 = btrfs_item_ptr(leaf, path->slots[0],
579 struct btrfs_extent_ref_v0);
580 owner = btrfs_ref_objectid_v0(leaf, ref0);
581 break;
582 }
583 }
584 btrfs_release_path(root, path);
585
586 if (owner < BTRFS_FIRST_FREE_OBJECTID)
587 new_size += sizeof(*bi);
588
589 new_size -= sizeof(*ei0);
590 ret = btrfs_search_slot(trans, root, &key, path,
591 new_size + extra_size, 1);
592 if (ret < 0)
593 return ret;
594 BUG_ON(ret);
595
596 ret = btrfs_extend_item(trans, root, path, new_size);
597 BUG_ON(ret);
598
599 leaf = path->nodes[0];
600 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
601 btrfs_set_extent_refs(leaf, item, refs);
602 /* FIXME: get real generation */
603 btrfs_set_extent_generation(leaf, item, 0);
604 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
605 btrfs_set_extent_flags(leaf, item,
606 BTRFS_EXTENT_FLAG_TREE_BLOCK |
607 BTRFS_BLOCK_FLAG_FULL_BACKREF);
608 bi = (struct btrfs_tree_block_info *)(item + 1);
609 /* FIXME: get first key of the block */
610 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
611 btrfs_set_tree_block_level(leaf, bi, (int)owner);
612 } else {
613 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
614 }
615 btrfs_mark_buffer_dirty(leaf);
616 return 0;
617}
618#endif
619
620static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
621{
622 u32 high_crc = ~(u32)0;
623 u32 low_crc = ~(u32)0;
624 __le64 lenum;
625
626 lenum = cpu_to_le64(root_objectid);
627 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
628 lenum = cpu_to_le64(owner);
629 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
630 lenum = cpu_to_le64(offset);
631 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
632
633 return ((u64)high_crc << 31) ^ (u64)low_crc;
634}
635
636static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
637 struct btrfs_extent_data_ref *ref)
638{
639 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
640 btrfs_extent_data_ref_objectid(leaf, ref),
641 btrfs_extent_data_ref_offset(leaf, ref));
642}
643
644static int match_extent_data_ref(struct extent_buffer *leaf,
645 struct btrfs_extent_data_ref *ref,
646 u64 root_objectid, u64 owner, u64 offset)
647{
648 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
649 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
650 btrfs_extent_data_ref_offset(leaf, ref) != offset)
651 return 0;
652 return 1;
653}
654
655static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
656 struct btrfs_root *root,
657 struct btrfs_path *path,
658 u64 bytenr, u64 parent,
659 u64 root_objectid,
660 u64 owner, u64 offset)
661{
662 struct btrfs_key key;
663 struct btrfs_extent_data_ref *ref;
530 struct extent_buffer *leaf; 664 struct extent_buffer *leaf;
531 u64 ref_objectid; 665 u32 nritems;
532 int ret; 666 int ret;
667 int recow;
668 int err = -ENOENT;
533 669
534 key.objectid = bytenr; 670 key.objectid = bytenr;
535 key.type = BTRFS_EXTENT_REF_KEY; 671 if (parent) {
536 key.offset = parent; 672 key.type = BTRFS_SHARED_DATA_REF_KEY;
673 key.offset = parent;
674 } else {
675 key.type = BTRFS_EXTENT_DATA_REF_KEY;
676 key.offset = hash_extent_data_ref(root_objectid,
677 owner, offset);
678 }
679again:
680 recow = 0;
681 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
682 if (ret < 0) {
683 err = ret;
684 goto fail;
685 }
537 686
538 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 687 if (parent) {
539 if (ret < 0) 688 if (!ret)
540 goto out; 689 return 0;
541 if (ret > 0) { 690#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
542 ret = -ENOENT; 691 key.type = BTRFS_EXTENT_REF_V0_KEY;
543 goto out; 692 btrfs_release_path(root, path);
693 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
694 if (ret < 0) {
695 err = ret;
696 goto fail;
697 }
698 if (!ret)
699 return 0;
700#endif
701 goto fail;
544 } 702 }
545 703
546 leaf = path->nodes[0]; 704 leaf = path->nodes[0];
547 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 705 nritems = btrfs_header_nritems(leaf);
548 ref_objectid = btrfs_ref_objectid(leaf, ref); 706 while (1) {
549 if (btrfs_ref_root(leaf, ref) != ref_root || 707 if (path->slots[0] >= nritems) {
550 btrfs_ref_generation(leaf, ref) != ref_generation || 708 ret = btrfs_next_leaf(root, path);
551 (ref_objectid != owner_objectid && 709 if (ret < 0)
552 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 710 err = ret;
553 ret = -EIO; 711 if (ret)
554 WARN_ON(1); 712 goto fail;
555 goto out; 713
714 leaf = path->nodes[0];
715 nritems = btrfs_header_nritems(leaf);
716 recow = 1;
717 }
718
719 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
720 if (key.objectid != bytenr ||
721 key.type != BTRFS_EXTENT_DATA_REF_KEY)
722 goto fail;
723
724 ref = btrfs_item_ptr(leaf, path->slots[0],
725 struct btrfs_extent_data_ref);
726
727 if (match_extent_data_ref(leaf, ref, root_objectid,
728 owner, offset)) {
729 if (recow) {
730 btrfs_release_path(root, path);
731 goto again;
732 }
733 err = 0;
734 break;
735 }
736 path->slots[0]++;
556 } 737 }
557 ret = 0; 738fail:
558out: 739 return err;
559 return ret;
560} 740}
561 741
562static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 742static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
563 struct btrfs_root *root, 743 struct btrfs_root *root,
564 struct btrfs_path *path, 744 struct btrfs_path *path,
565 u64 bytenr, u64 parent, 745 u64 bytenr, u64 parent,
566 u64 ref_root, u64 ref_generation, 746 u64 root_objectid, u64 owner,
567 u64 owner_objectid, 747 u64 offset, int refs_to_add)
568 int refs_to_add)
569{ 748{
570 struct btrfs_key key; 749 struct btrfs_key key;
571 struct extent_buffer *leaf; 750 struct extent_buffer *leaf;
572 struct btrfs_extent_ref *ref; 751 u32 size;
573 u32 num_refs; 752 u32 num_refs;
574 int ret; 753 int ret;
575 754
576 key.objectid = bytenr; 755 key.objectid = bytenr;
577 key.type = BTRFS_EXTENT_REF_KEY; 756 if (parent) {
578 key.offset = parent; 757 key.type = BTRFS_SHARED_DATA_REF_KEY;
758 key.offset = parent;
759 size = sizeof(struct btrfs_shared_data_ref);
760 } else {
761 key.type = BTRFS_EXTENT_DATA_REF_KEY;
762 key.offset = hash_extent_data_ref(root_objectid,
763 owner, offset);
764 size = sizeof(struct btrfs_extent_data_ref);
765 }
579 766
580 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 767 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
581 if (ret == 0) { 768 if (ret && ret != -EEXIST)
582 leaf = path->nodes[0]; 769 goto fail;
583 ref = btrfs_item_ptr(leaf, path->slots[0], 770
584 struct btrfs_extent_ref); 771 leaf = path->nodes[0];
585 btrfs_set_ref_root(leaf, ref, ref_root); 772 if (parent) {
586 btrfs_set_ref_generation(leaf, ref, ref_generation); 773 struct btrfs_shared_data_ref *ref;
587 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
588 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
589 } else if (ret == -EEXIST) {
590 u64 existing_owner;
591
592 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
593 leaf = path->nodes[0];
594 ref = btrfs_item_ptr(leaf, path->slots[0], 774 ref = btrfs_item_ptr(leaf, path->slots[0],
595 struct btrfs_extent_ref); 775 struct btrfs_shared_data_ref);
596 if (btrfs_ref_root(leaf, ref) != ref_root || 776 if (ret == 0) {
597 btrfs_ref_generation(leaf, ref) != ref_generation) { 777 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
598 ret = -EIO; 778 } else {
599 WARN_ON(1); 779 num_refs = btrfs_shared_data_ref_count(leaf, ref);
600 goto out; 780 num_refs += refs_to_add;
781 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
601 } 782 }
783 } else {
784 struct btrfs_extent_data_ref *ref;
785 while (ret == -EEXIST) {
786 ref = btrfs_item_ptr(leaf, path->slots[0],
787 struct btrfs_extent_data_ref);
788 if (match_extent_data_ref(leaf, ref, root_objectid,
789 owner, offset))
790 break;
791 btrfs_release_path(root, path);
792 key.offset++;
793 ret = btrfs_insert_empty_item(trans, root, path, &key,
794 size);
795 if (ret && ret != -EEXIST)
796 goto fail;
602 797
603 num_refs = btrfs_ref_num_refs(leaf, ref); 798 leaf = path->nodes[0];
604 BUG_ON(num_refs == 0); 799 }
605 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); 800 ref = btrfs_item_ptr(leaf, path->slots[0],
606 801 struct btrfs_extent_data_ref);
607 existing_owner = btrfs_ref_objectid(leaf, ref); 802 if (ret == 0) {
608 if (existing_owner != owner_objectid && 803 btrfs_set_extent_data_ref_root(leaf, ref,
609 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 804 root_objectid);
610 btrfs_set_ref_objectid(leaf, ref, 805 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
611 BTRFS_MULTIPLE_OBJECTIDS); 806 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
807 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
808 } else {
809 num_refs = btrfs_extent_data_ref_count(leaf, ref);
810 num_refs += refs_to_add;
811 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
612 } 812 }
613 ret = 0;
614 } else {
615 goto out;
616 } 813 }
617 btrfs_unlock_up_safe(path, 1); 814 btrfs_mark_buffer_dirty(leaf);
618 btrfs_mark_buffer_dirty(path->nodes[0]); 815 ret = 0;
619out: 816fail:
620 btrfs_release_path(root, path); 817 btrfs_release_path(root, path);
621 return ret; 818 return ret;
622} 819}
623 820
624static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 821static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
625 struct btrfs_root *root, 822 struct btrfs_root *root,
626 struct btrfs_path *path, 823 struct btrfs_path *path,
627 int refs_to_drop) 824 int refs_to_drop)
628{ 825{
826 struct btrfs_key key;
827 struct btrfs_extent_data_ref *ref1 = NULL;
828 struct btrfs_shared_data_ref *ref2 = NULL;
629 struct extent_buffer *leaf; 829 struct extent_buffer *leaf;
630 struct btrfs_extent_ref *ref; 830 u32 num_refs = 0;
631 u32 num_refs;
632 int ret = 0; 831 int ret = 0;
633 832
634 leaf = path->nodes[0]; 833 leaf = path->nodes[0];
635 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 834 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
636 num_refs = btrfs_ref_num_refs(leaf, ref); 835
836 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
837 ref1 = btrfs_item_ptr(leaf, path->slots[0],
838 struct btrfs_extent_data_ref);
839 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
840 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
841 ref2 = btrfs_item_ptr(leaf, path->slots[0],
842 struct btrfs_shared_data_ref);
843 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
844#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
845 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
846 struct btrfs_extent_ref_v0 *ref0;
847 ref0 = btrfs_item_ptr(leaf, path->slots[0],
848 struct btrfs_extent_ref_v0);
849 num_refs = btrfs_ref_count_v0(leaf, ref0);
850#endif
851 } else {
852 BUG();
853 }
854
637 BUG_ON(num_refs < refs_to_drop); 855 BUG_ON(num_refs < refs_to_drop);
638 num_refs -= refs_to_drop; 856 num_refs -= refs_to_drop;
857
639 if (num_refs == 0) { 858 if (num_refs == 0) {
640 ret = btrfs_del_item(trans, root, path); 859 ret = btrfs_del_item(trans, root, path);
641 } else { 860 } else {
642 btrfs_set_ref_num_refs(leaf, ref, num_refs); 861 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
862 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
863 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
864 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
865#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
866 else {
867 struct btrfs_extent_ref_v0 *ref0;
868 ref0 = btrfs_item_ptr(leaf, path->slots[0],
869 struct btrfs_extent_ref_v0);
870 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
871 }
872#endif
643 btrfs_mark_buffer_dirty(leaf); 873 btrfs_mark_buffer_dirty(leaf);
644 } 874 }
875 return ret;
876}
877
878static noinline u32 extent_data_ref_count(struct btrfs_root *root,
879 struct btrfs_path *path,
880 struct btrfs_extent_inline_ref *iref)
881{
882 struct btrfs_key key;
883 struct extent_buffer *leaf;
884 struct btrfs_extent_data_ref *ref1;
885 struct btrfs_shared_data_ref *ref2;
886 u32 num_refs = 0;
887
888 leaf = path->nodes[0];
889 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
890 if (iref) {
891 if (btrfs_extent_inline_ref_type(leaf, iref) ==
892 BTRFS_EXTENT_DATA_REF_KEY) {
893 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
894 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
895 } else {
896 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
897 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
898 }
899 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
900 ref1 = btrfs_item_ptr(leaf, path->slots[0],
901 struct btrfs_extent_data_ref);
902 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
903 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
904 ref2 = btrfs_item_ptr(leaf, path->slots[0],
905 struct btrfs_shared_data_ref);
906 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
907#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
908 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
909 struct btrfs_extent_ref_v0 *ref0;
910 ref0 = btrfs_item_ptr(leaf, path->slots[0],
911 struct btrfs_extent_ref_v0);
912 num_refs = btrfs_ref_count_v0(leaf, ref0);
913#endif
914 } else {
915 WARN_ON(1);
916 }
917 return num_refs;
918}
919
920static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
921 struct btrfs_root *root,
922 struct btrfs_path *path,
923 u64 bytenr, u64 parent,
924 u64 root_objectid)
925{
926 struct btrfs_key key;
927 int ret;
928
929 key.objectid = bytenr;
930 if (parent) {
931 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
932 key.offset = parent;
933 } else {
934 key.type = BTRFS_TREE_BLOCK_REF_KEY;
935 key.offset = root_objectid;
936 }
937
938 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
939 if (ret > 0)
940 ret = -ENOENT;
941#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
942 if (ret == -ENOENT && parent) {
943 btrfs_release_path(root, path);
944 key.type = BTRFS_EXTENT_REF_V0_KEY;
945 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
946 if (ret > 0)
947 ret = -ENOENT;
948 }
949#endif
950 return ret;
951}
952
953static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
954 struct btrfs_root *root,
955 struct btrfs_path *path,
956 u64 bytenr, u64 parent,
957 u64 root_objectid)
958{
959 struct btrfs_key key;
960 int ret;
961
962 key.objectid = bytenr;
963 if (parent) {
964 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
965 key.offset = parent;
966 } else {
967 key.type = BTRFS_TREE_BLOCK_REF_KEY;
968 key.offset = root_objectid;
969 }
970
971 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
645 btrfs_release_path(root, path); 972 btrfs_release_path(root, path);
646 return ret; 973 return ret;
647} 974}
648 975
976static inline int extent_ref_type(u64 parent, u64 owner)
977{
978 int type;
979 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
980 if (parent > 0)
981 type = BTRFS_SHARED_BLOCK_REF_KEY;
982 else
983 type = BTRFS_TREE_BLOCK_REF_KEY;
984 } else {
985 if (parent > 0)
986 type = BTRFS_SHARED_DATA_REF_KEY;
987 else
988 type = BTRFS_EXTENT_DATA_REF_KEY;
989 }
990 return type;
991}
992
993static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
994
995{
996 int level;
997 BUG_ON(!path->keep_locks);
998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 if (!path->nodes[level])
1000 break;
1001 btrfs_assert_tree_locked(path->nodes[level]);
1002 if (path->slots[level] + 1 >=
1003 btrfs_header_nritems(path->nodes[level]))
1004 continue;
1005 if (level == 0)
1006 btrfs_item_key_to_cpu(path->nodes[level], key,
1007 path->slots[level] + 1);
1008 else
1009 btrfs_node_key_to_cpu(path->nodes[level], key,
1010 path->slots[level] + 1);
1011 return 0;
1012 }
1013 return 1;
1014}
1015
1016/*
1017 * look for inline back ref. if back ref is found, *ref_ret is set
1018 * to the address of inline back ref, and 0 is returned.
1019 *
1020 * if back ref isn't found, *ref_ret is set to the address where it
1021 * should be inserted, and -ENOENT is returned.
1022 *
1023 * if insert is true and there are too many inline back refs, the path
1024 * points to the extent item, and -EAGAIN is returned.
1025 *
1026 * NOTE: inline back refs are ordered in the same way that back ref
1027 * items in the tree are ordered.
1028 */
1029static noinline_for_stack
1030int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1031 struct btrfs_root *root,
1032 struct btrfs_path *path,
1033 struct btrfs_extent_inline_ref **ref_ret,
1034 u64 bytenr, u64 num_bytes,
1035 u64 parent, u64 root_objectid,
1036 u64 owner, u64 offset, int insert)
1037{
1038 struct btrfs_key key;
1039 struct extent_buffer *leaf;
1040 struct btrfs_extent_item *ei;
1041 struct btrfs_extent_inline_ref *iref;
1042 u64 flags;
1043 u64 item_size;
1044 unsigned long ptr;
1045 unsigned long end;
1046 int extra_size;
1047 int type;
1048 int want;
1049 int ret;
1050 int err = 0;
1051
1052 key.objectid = bytenr;
1053 key.type = BTRFS_EXTENT_ITEM_KEY;
1054 key.offset = num_bytes;
1055
1056 want = extent_ref_type(parent, owner);
1057 if (insert) {
1058 extra_size = btrfs_extent_inline_ref_size(want);
1059 path->keep_locks = 1;
1060 } else
1061 extra_size = -1;
1062 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1063 if (ret < 0) {
1064 err = ret;
1065 goto out;
1066 }
1067 BUG_ON(ret);
1068
1069 leaf = path->nodes[0];
1070 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1071#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1072 if (item_size < sizeof(*ei)) {
1073 if (!insert) {
1074 err = -ENOENT;
1075 goto out;
1076 }
1077 ret = convert_extent_item_v0(trans, root, path, owner,
1078 extra_size);
1079 if (ret < 0) {
1080 err = ret;
1081 goto out;
1082 }
1083 leaf = path->nodes[0];
1084 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1085 }
1086#endif
1087 BUG_ON(item_size < sizeof(*ei));
1088
1089 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1090 flags = btrfs_extent_flags(leaf, ei);
1091
1092 ptr = (unsigned long)(ei + 1);
1093 end = (unsigned long)ei + item_size;
1094
1095 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1096 ptr += sizeof(struct btrfs_tree_block_info);
1097 BUG_ON(ptr > end);
1098 } else {
1099 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1100 }
1101
1102 err = -ENOENT;
1103 while (1) {
1104 if (ptr >= end) {
1105 WARN_ON(ptr > end);
1106 break;
1107 }
1108 iref = (struct btrfs_extent_inline_ref *)ptr;
1109 type = btrfs_extent_inline_ref_type(leaf, iref);
1110 if (want < type)
1111 break;
1112 if (want > type) {
1113 ptr += btrfs_extent_inline_ref_size(type);
1114 continue;
1115 }
1116
1117 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1118 struct btrfs_extent_data_ref *dref;
1119 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1120 if (match_extent_data_ref(leaf, dref, root_objectid,
1121 owner, offset)) {
1122 err = 0;
1123 break;
1124 }
1125 if (hash_extent_data_ref_item(leaf, dref) <
1126 hash_extent_data_ref(root_objectid, owner, offset))
1127 break;
1128 } else {
1129 u64 ref_offset;
1130 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1131 if (parent > 0) {
1132 if (parent == ref_offset) {
1133 err = 0;
1134 break;
1135 }
1136 if (ref_offset < parent)
1137 break;
1138 } else {
1139 if (root_objectid == ref_offset) {
1140 err = 0;
1141 break;
1142 }
1143 if (ref_offset < root_objectid)
1144 break;
1145 }
1146 }
1147 ptr += btrfs_extent_inline_ref_size(type);
1148 }
1149 if (err == -ENOENT && insert) {
1150 if (item_size + extra_size >=
1151 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1152 err = -EAGAIN;
1153 goto out;
1154 }
1155 /*
1156 * To add new inline back ref, we have to make sure
1157 * there is no corresponding back ref item.
1158 * For simplicity, we just do not add new inline back
1159 * ref if there is any kind of item for this block
1160 */
1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 err = -EAGAIN;
1164 goto out;
1165 }
1166 }
1167 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1168out:
1169 if (insert) {
1170 path->keep_locks = 0;
1171 btrfs_unlock_up_safe(path, 1);
1172 }
1173 return err;
1174}
1175
1176/*
1177 * helper to add new inline back ref
1178 */
1179static noinline_for_stack
1180int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1181 struct btrfs_root *root,
1182 struct btrfs_path *path,
1183 struct btrfs_extent_inline_ref *iref,
1184 u64 parent, u64 root_objectid,
1185 u64 owner, u64 offset, int refs_to_add,
1186 struct btrfs_delayed_extent_op *extent_op)
1187{
1188 struct extent_buffer *leaf;
1189 struct btrfs_extent_item *ei;
1190 unsigned long ptr;
1191 unsigned long end;
1192 unsigned long item_offset;
1193 u64 refs;
1194 int size;
1195 int type;
1196 int ret;
1197
1198 leaf = path->nodes[0];
1199 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1200 item_offset = (unsigned long)iref - (unsigned long)ei;
1201
1202 type = extent_ref_type(parent, owner);
1203 size = btrfs_extent_inline_ref_size(type);
1204
1205 ret = btrfs_extend_item(trans, root, path, size);
1206 BUG_ON(ret);
1207
1208 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1209 refs = btrfs_extent_refs(leaf, ei);
1210 refs += refs_to_add;
1211 btrfs_set_extent_refs(leaf, ei, refs);
1212 if (extent_op)
1213 __run_delayed_extent_op(extent_op, leaf, ei);
1214
1215 ptr = (unsigned long)ei + item_offset;
1216 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1217 if (ptr < end - size)
1218 memmove_extent_buffer(leaf, ptr + size, ptr,
1219 end - size - ptr);
1220
1221 iref = (struct btrfs_extent_inline_ref *)ptr;
1222 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1223 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1224 struct btrfs_extent_data_ref *dref;
1225 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1226 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1227 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1228 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1229 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1230 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1231 struct btrfs_shared_data_ref *sref;
1232 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1233 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1234 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1235 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1236 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1237 } else {
1238 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1239 }
1240 btrfs_mark_buffer_dirty(leaf);
1241 return 0;
1242}
1243
1244static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1245 struct btrfs_root *root,
1246 struct btrfs_path *path,
1247 struct btrfs_extent_inline_ref **ref_ret,
1248 u64 bytenr, u64 num_bytes, u64 parent,
1249 u64 root_objectid, u64 owner, u64 offset)
1250{
1251 int ret;
1252
1253 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1254 bytenr, num_bytes, parent,
1255 root_objectid, owner, offset, 0);
1256 if (ret != -ENOENT)
1257 return ret;
1258
1259 btrfs_release_path(root, path);
1260 *ref_ret = NULL;
1261
1262 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1263 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1264 root_objectid);
1265 } else {
1266 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1267 root_objectid, owner, offset);
1268 }
1269 return ret;
1270}
1271
1272/*
1273 * helper to update/remove inline back ref
1274 */
1275static noinline_for_stack
1276int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1277 struct btrfs_root *root,
1278 struct btrfs_path *path,
1279 struct btrfs_extent_inline_ref *iref,
1280 int refs_to_mod,
1281 struct btrfs_delayed_extent_op *extent_op)
1282{
1283 struct extent_buffer *leaf;
1284 struct btrfs_extent_item *ei;
1285 struct btrfs_extent_data_ref *dref = NULL;
1286 struct btrfs_shared_data_ref *sref = NULL;
1287 unsigned long ptr;
1288 unsigned long end;
1289 u32 item_size;
1290 int size;
1291 int type;
1292 int ret;
1293 u64 refs;
1294
1295 leaf = path->nodes[0];
1296 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1297 refs = btrfs_extent_refs(leaf, ei);
1298 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1299 refs += refs_to_mod;
1300 btrfs_set_extent_refs(leaf, ei, refs);
1301 if (extent_op)
1302 __run_delayed_extent_op(extent_op, leaf, ei);
1303
1304 type = btrfs_extent_inline_ref_type(leaf, iref);
1305
1306 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1307 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1308 refs = btrfs_extent_data_ref_count(leaf, dref);
1309 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1310 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1311 refs = btrfs_shared_data_ref_count(leaf, sref);
1312 } else {
1313 refs = 1;
1314 BUG_ON(refs_to_mod != -1);
1315 }
1316
1317 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1318 refs += refs_to_mod;
1319
1320 if (refs > 0) {
1321 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1322 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1323 else
1324 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1325 } else {
1326 size = btrfs_extent_inline_ref_size(type);
1327 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1328 ptr = (unsigned long)iref;
1329 end = (unsigned long)ei + item_size;
1330 if (ptr + size < end)
1331 memmove_extent_buffer(leaf, ptr, ptr + size,
1332 end - ptr - size);
1333 item_size -= size;
1334 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1335 BUG_ON(ret);
1336 }
1337 btrfs_mark_buffer_dirty(leaf);
1338 return 0;
1339}
1340
1341static noinline_for_stack
1342int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1343 struct btrfs_root *root,
1344 struct btrfs_path *path,
1345 u64 bytenr, u64 num_bytes, u64 parent,
1346 u64 root_objectid, u64 owner,
1347 u64 offset, int refs_to_add,
1348 struct btrfs_delayed_extent_op *extent_op)
1349{
1350 struct btrfs_extent_inline_ref *iref;
1351 int ret;
1352
1353 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1354 bytenr, num_bytes, parent,
1355 root_objectid, owner, offset, 1);
1356 if (ret == 0) {
1357 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1358 ret = update_inline_extent_backref(trans, root, path, iref,
1359 refs_to_add, extent_op);
1360 } else if (ret == -ENOENT) {
1361 ret = setup_inline_extent_backref(trans, root, path, iref,
1362 parent, root_objectid,
1363 owner, offset, refs_to_add,
1364 extent_op);
1365 }
1366 return ret;
1367}
1368
1369static int insert_extent_backref(struct btrfs_trans_handle *trans,
1370 struct btrfs_root *root,
1371 struct btrfs_path *path,
1372 u64 bytenr, u64 parent, u64 root_objectid,
1373 u64 owner, u64 offset, int refs_to_add)
1374{
1375 int ret;
1376 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1377 BUG_ON(refs_to_add != 1);
1378 ret = insert_tree_block_ref(trans, root, path, bytenr,
1379 parent, root_objectid);
1380 } else {
1381 ret = insert_extent_data_ref(trans, root, path, bytenr,
1382 parent, root_objectid,
1383 owner, offset, refs_to_add);
1384 }
1385 return ret;
1386}
1387
1388static int remove_extent_backref(struct btrfs_trans_handle *trans,
1389 struct btrfs_root *root,
1390 struct btrfs_path *path,
1391 struct btrfs_extent_inline_ref *iref,
1392 int refs_to_drop, int is_data)
1393{
1394 int ret;
1395
1396 BUG_ON(!is_data && refs_to_drop != 1);
1397 if (iref) {
1398 ret = update_inline_extent_backref(trans, root, path, iref,
1399 -refs_to_drop, NULL);
1400 } else if (is_data) {
1401 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1402 } else {
1403 ret = btrfs_del_item(trans, root, path);
1404 }
1405 return ret;
1406}
1407
649#ifdef BIO_RW_DISCARD 1408#ifdef BIO_RW_DISCARD
650static void btrfs_issue_discard(struct block_device *bdev, 1409static void btrfs_issue_discard(struct block_device *bdev,
651 u64 start, u64 len) 1410 u64 start, u64 len)
@@ -686,71 +1445,40 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
686#endif 1445#endif
687} 1446}
688 1447
689static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1448int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
690 struct btrfs_root *root, u64 bytenr, 1449 struct btrfs_root *root,
691 u64 num_bytes, 1450 u64 bytenr, u64 num_bytes, u64 parent,
692 u64 orig_parent, u64 parent, 1451 u64 root_objectid, u64 owner, u64 offset)
693 u64 orig_root, u64 ref_root,
694 u64 orig_generation, u64 ref_generation,
695 u64 owner_objectid)
696{ 1452{
697 int ret; 1453 int ret;
698 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; 1454 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1455 root_objectid == BTRFS_TREE_LOG_OBJECTID);
699 1456
700 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, 1457 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
701 orig_parent, parent, orig_root, 1458 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
702 ref_root, orig_generation, 1459 parent, root_objectid, (int)owner,
703 ref_generation, owner_objectid, pin); 1460 BTRFS_ADD_DELAYED_REF, NULL);
704 BUG_ON(ret); 1461 } else {
1462 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1463 parent, root_objectid, owner, offset,
1464 BTRFS_ADD_DELAYED_REF, NULL);
1465 }
705 return ret; 1466 return ret;
706} 1467}
707 1468
708int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
709 struct btrfs_root *root, u64 bytenr,
710 u64 num_bytes, u64 orig_parent, u64 parent,
711 u64 ref_root, u64 ref_generation,
712 u64 owner_objectid)
713{
714 int ret;
715 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
716 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
717 return 0;
718
719 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
720 orig_parent, parent, ref_root,
721 ref_root, ref_generation,
722 ref_generation, owner_objectid);
723 return ret;
724}
725static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1469static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
726 struct btrfs_root *root, u64 bytenr, 1470 struct btrfs_root *root,
727 u64 num_bytes, 1471 u64 bytenr, u64 num_bytes,
728 u64 orig_parent, u64 parent, 1472 u64 parent, u64 root_objectid,
729 u64 orig_root, u64 ref_root, 1473 u64 owner, u64 offset, int refs_to_add,
730 u64 orig_generation, u64 ref_generation, 1474 struct btrfs_delayed_extent_op *extent_op)
731 u64 owner_objectid)
732{
733 int ret;
734
735 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
736 ref_generation, owner_objectid,
737 BTRFS_ADD_DELAYED_REF, 0);
738 BUG_ON(ret);
739 return ret;
740}
741
742static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
743 struct btrfs_root *root, u64 bytenr,
744 u64 num_bytes, u64 parent, u64 ref_root,
745 u64 ref_generation, u64 owner_objectid,
746 int refs_to_add)
747{ 1475{
748 struct btrfs_path *path; 1476 struct btrfs_path *path;
749 int ret; 1477 struct extent_buffer *leaf;
750 struct btrfs_key key;
751 struct extent_buffer *l;
752 struct btrfs_extent_item *item; 1478 struct btrfs_extent_item *item;
753 u32 refs; 1479 u64 refs;
1480 int ret;
1481 int err = 0;
754 1482
755 path = btrfs_alloc_path(); 1483 path = btrfs_alloc_path();
756 if (!path) 1484 if (!path)
@@ -758,43 +1486,27 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
758 1486
759 path->reada = 1; 1487 path->reada = 1;
760 path->leave_spinning = 1; 1488 path->leave_spinning = 1;
761 key.objectid = bytenr; 1489 /* this will setup the path even if it fails to insert the back ref */
762 key.type = BTRFS_EXTENT_ITEM_KEY; 1490 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
763 key.offset = num_bytes; 1491 path, bytenr, num_bytes, parent,
764 1492 root_objectid, owner, offset,
765 /* first find the extent item and update its reference count */ 1493 refs_to_add, extent_op);
766 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1494 if (ret == 0)
767 path, 0, 1); 1495 goto out;
768 if (ret < 0) {
769 btrfs_set_path_blocking(path);
770 return ret;
771 }
772
773 if (ret > 0) {
774 WARN_ON(1);
775 btrfs_free_path(path);
776 return -EIO;
777 }
778 l = path->nodes[0];
779 1496
780 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1497 if (ret != -EAGAIN) {
781 if (key.objectid != bytenr) { 1498 err = ret;
782 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1499 goto out;
783 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
784 (unsigned long long)bytenr,
785 (unsigned long long)key.objectid);
786 BUG();
787 } 1500 }
788 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
789
790 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791
792 refs = btrfs_extent_refs(l, item);
793 btrfs_set_extent_refs(l, item, refs + refs_to_add);
794 btrfs_unlock_up_safe(path, 1);
795 1501
796 btrfs_mark_buffer_dirty(path->nodes[0]); 1502 leaf = path->nodes[0];
1503 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1504 refs = btrfs_extent_refs(leaf, item);
1505 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1506 if (extent_op)
1507 __run_delayed_extent_op(extent_op, leaf, item);
797 1508
1509 btrfs_mark_buffer_dirty(leaf);
798 btrfs_release_path(root->fs_info->extent_root, path); 1510 btrfs_release_path(root->fs_info->extent_root, path);
799 1511
800 path->reada = 1; 1512 path->reada = 1;
@@ -802,56 +1514,197 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
802 1514
803 /* now insert the actual backref */ 1515 /* now insert the actual backref */
804 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1516 ret = insert_extent_backref(trans, root->fs_info->extent_root,
805 path, bytenr, parent, 1517 path, bytenr, parent, root_objectid,
806 ref_root, ref_generation, 1518 owner, offset, refs_to_add);
807 owner_objectid, refs_to_add);
808 BUG_ON(ret); 1519 BUG_ON(ret);
1520out:
809 btrfs_free_path(path); 1521 btrfs_free_path(path);
810 return 0; 1522 return err;
811} 1523}
812 1524
813int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1525static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
814 struct btrfs_root *root, 1526 struct btrfs_root *root,
815 u64 bytenr, u64 num_bytes, u64 parent, 1527 struct btrfs_delayed_ref_node *node,
816 u64 ref_root, u64 ref_generation, 1528 struct btrfs_delayed_extent_op *extent_op,
817 u64 owner_objectid) 1529 int insert_reserved)
818{ 1530{
819 int ret; 1531 int ret = 0;
820 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1532 struct btrfs_delayed_data_ref *ref;
821 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1533 struct btrfs_key ins;
822 return 0; 1534 u64 parent = 0;
1535 u64 ref_root = 0;
1536 u64 flags = 0;
823 1537
824 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, 1538 ins.objectid = node->bytenr;
825 0, ref_root, 0, ref_generation, 1539 ins.offset = node->num_bytes;
826 owner_objectid); 1540 ins.type = BTRFS_EXTENT_ITEM_KEY;
1541
1542 ref = btrfs_delayed_node_to_data_ref(node);
1543 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1544 parent = ref->parent;
1545 else
1546 ref_root = ref->root;
1547
1548 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1549 if (extent_op) {
1550 BUG_ON(extent_op->update_key);
1551 flags |= extent_op->flags_to_set;
1552 }
1553 ret = alloc_reserved_file_extent(trans, root,
1554 parent, ref_root, flags,
1555 ref->objectid, ref->offset,
1556 &ins, node->ref_mod);
1557 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1558 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1559 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1560 node->num_bytes, parent,
1561 ref_root, ref->objectid,
1562 ref->offset, node->ref_mod,
1563 extent_op);
1564 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1565 ret = __btrfs_free_extent(trans, root, node->bytenr,
1566 node->num_bytes, parent,
1567 ref_root, ref->objectid,
1568 ref->offset, node->ref_mod,
1569 extent_op);
1570 } else {
1571 BUG();
1572 }
827 return ret; 1573 return ret;
828} 1574}
829 1575
830static int drop_delayed_ref(struct btrfs_trans_handle *trans, 1576static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
831 struct btrfs_root *root, 1577 struct extent_buffer *leaf,
832 struct btrfs_delayed_ref_node *node) 1578 struct btrfs_extent_item *ei)
1579{
1580 u64 flags = btrfs_extent_flags(leaf, ei);
1581 if (extent_op->update_flags) {
1582 flags |= extent_op->flags_to_set;
1583 btrfs_set_extent_flags(leaf, ei, flags);
1584 }
1585
1586 if (extent_op->update_key) {
1587 struct btrfs_tree_block_info *bi;
1588 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1589 bi = (struct btrfs_tree_block_info *)(ei + 1);
1590 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1591 }
1592}
1593
1594static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root,
1596 struct btrfs_delayed_ref_node *node,
1597 struct btrfs_delayed_extent_op *extent_op)
1598{
1599 struct btrfs_key key;
1600 struct btrfs_path *path;
1601 struct btrfs_extent_item *ei;
1602 struct extent_buffer *leaf;
1603 u32 item_size;
1604 int ret;
1605 int err = 0;
1606
1607 path = btrfs_alloc_path();
1608 if (!path)
1609 return -ENOMEM;
1610
1611 key.objectid = node->bytenr;
1612 key.type = BTRFS_EXTENT_ITEM_KEY;
1613 key.offset = node->num_bytes;
1614
1615 path->reada = 1;
1616 path->leave_spinning = 1;
1617 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1618 path, 0, 1);
1619 if (ret < 0) {
1620 err = ret;
1621 goto out;
1622 }
1623 if (ret > 0) {
1624 err = -EIO;
1625 goto out;
1626 }
1627
1628 leaf = path->nodes[0];
1629 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1630#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1631 if (item_size < sizeof(*ei)) {
1632 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1633 path, (u64)-1, 0);
1634 if (ret < 0) {
1635 err = ret;
1636 goto out;
1637 }
1638 leaf = path->nodes[0];
1639 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1640 }
1641#endif
1642 BUG_ON(item_size < sizeof(*ei));
1643 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1644 __run_delayed_extent_op(extent_op, leaf, ei);
1645
1646 btrfs_mark_buffer_dirty(leaf);
1647out:
1648 btrfs_free_path(path);
1649 return err;
1650}
1651
1652static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1653 struct btrfs_root *root,
1654 struct btrfs_delayed_ref_node *node,
1655 struct btrfs_delayed_extent_op *extent_op,
1656 int insert_reserved)
833{ 1657{
834 int ret = 0; 1658 int ret = 0;
835 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); 1659 struct btrfs_delayed_tree_ref *ref;
1660 struct btrfs_key ins;
1661 u64 parent = 0;
1662 u64 ref_root = 0;
836 1663
837 BUG_ON(node->ref_mod == 0); 1664 ins.objectid = node->bytenr;
838 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, 1665 ins.offset = node->num_bytes;
839 node->parent, ref->root, ref->generation, 1666 ins.type = BTRFS_EXTENT_ITEM_KEY;
840 ref->owner_objectid, ref->pin, node->ref_mod);
841 1667
1668 ref = btrfs_delayed_node_to_tree_ref(node);
1669 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1670 parent = ref->parent;
1671 else
1672 ref_root = ref->root;
1673
1674 BUG_ON(node->ref_mod != 1);
1675 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1676 BUG_ON(!extent_op || !extent_op->update_flags ||
1677 !extent_op->update_key);
1678 ret = alloc_reserved_tree_block(trans, root,
1679 parent, ref_root,
1680 extent_op->flags_to_set,
1681 &extent_op->key,
1682 ref->level, &ins);
1683 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1684 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1685 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1686 node->num_bytes, parent, ref_root,
1687 ref->level, 0, 1, extent_op);
1688 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1689 ret = __btrfs_free_extent(trans, root, node->bytenr,
1690 node->num_bytes, parent, ref_root,
1691 ref->level, 0, 1, extent_op);
1692 } else {
1693 BUG();
1694 }
842 return ret; 1695 return ret;
843} 1696}
844 1697
1698
845/* helper function to actually process a single delayed ref entry */ 1699/* helper function to actually process a single delayed ref entry */
846static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1700static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root, 1701 struct btrfs_root *root,
848 struct btrfs_delayed_ref_node *node, 1702 struct btrfs_delayed_ref_node *node,
849 int insert_reserved) 1703 struct btrfs_delayed_extent_op *extent_op,
1704 int insert_reserved)
850{ 1705{
851 int ret; 1706 int ret;
852 struct btrfs_delayed_ref *ref; 1707 if (btrfs_delayed_ref_is_head(node)) {
853
854 if (node->parent == (u64)-1) {
855 struct btrfs_delayed_ref_head *head; 1708 struct btrfs_delayed_ref_head *head;
856 /* 1709 /*
857 * we've hit the end of the chain and we were supposed 1710 * we've hit the end of the chain and we were supposed
@@ -859,44 +1712,35 @@ static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
859 * deleted before we ever needed to insert it, so all 1712 * deleted before we ever needed to insert it, so all
860 * we have to do is clean up the accounting 1713 * we have to do is clean up the accounting
861 */ 1714 */
1715 BUG_ON(extent_op);
1716 head = btrfs_delayed_node_to_head(node);
862 if (insert_reserved) { 1717 if (insert_reserved) {
1718 if (head->is_data) {
1719 ret = btrfs_del_csums(trans, root,
1720 node->bytenr,
1721 node->num_bytes);
1722 BUG_ON(ret);
1723 }
1724 btrfs_update_pinned_extents(root, node->bytenr,
1725 node->num_bytes, 1);
863 update_reserved_extents(root, node->bytenr, 1726 update_reserved_extents(root, node->bytenr,
864 node->num_bytes, 0); 1727 node->num_bytes, 0);
865 } 1728 }
866 head = btrfs_delayed_node_to_head(node);
867 mutex_unlock(&head->mutex); 1729 mutex_unlock(&head->mutex);
868 return 0; 1730 return 0;
869 } 1731 }
870 1732
871 ref = btrfs_delayed_node_to_ref(node); 1733 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
872 if (ref->action == BTRFS_ADD_DELAYED_REF) { 1734 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
873 if (insert_reserved) { 1735 ret = run_delayed_tree_ref(trans, root, node, extent_op,
874 struct btrfs_key ins; 1736 insert_reserved);
875 1737 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
876 ins.objectid = node->bytenr; 1738 node->type == BTRFS_SHARED_DATA_REF_KEY)
877 ins.offset = node->num_bytes; 1739 ret = run_delayed_data_ref(trans, root, node, extent_op,
878 ins.type = BTRFS_EXTENT_ITEM_KEY; 1740 insert_reserved);
879 1741 else
880 /* record the full extent allocation */ 1742 BUG();
881 ret = __btrfs_alloc_reserved_extent(trans, root, 1743 return ret;
882 node->parent, ref->root,
883 ref->generation, ref->owner_objectid,
884 &ins, node->ref_mod);
885 update_reserved_extents(root, node->bytenr,
886 node->num_bytes, 0);
887 } else {
888 /* just add one backref */
889 ret = add_extent_ref(trans, root, node->bytenr,
890 node->num_bytes,
891 node->parent, ref->root, ref->generation,
892 ref->owner_objectid, node->ref_mod);
893 }
894 BUG_ON(ret);
895 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
896 WARN_ON(insert_reserved);
897 ret = drop_delayed_ref(trans, root, node);
898 }
899 return 0;
900} 1744}
901 1745
902static noinline struct btrfs_delayed_ref_node * 1746static noinline struct btrfs_delayed_ref_node *
@@ -919,7 +1763,7 @@ again:
919 rb_node); 1763 rb_node);
920 if (ref->bytenr != head->node.bytenr) 1764 if (ref->bytenr != head->node.bytenr)
921 break; 1765 break;
922 if (btrfs_delayed_node_to_ref(ref)->action == action) 1766 if (ref->action == action)
923 return ref; 1767 return ref;
924 node = rb_prev(node); 1768 node = rb_prev(node);
925 } 1769 }
@@ -937,6 +1781,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_delayed_ref_root *delayed_refs; 1781 struct btrfs_delayed_ref_root *delayed_refs;
938 struct btrfs_delayed_ref_node *ref; 1782 struct btrfs_delayed_ref_node *ref;
939 struct btrfs_delayed_ref_head *locked_ref = NULL; 1783 struct btrfs_delayed_ref_head *locked_ref = NULL;
1784 struct btrfs_delayed_extent_op *extent_op;
940 int ret; 1785 int ret;
941 int count = 0; 1786 int count = 0;
942 int must_insert_reserved = 0; 1787 int must_insert_reserved = 0;
@@ -975,6 +1820,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
975 must_insert_reserved = locked_ref->must_insert_reserved; 1820 must_insert_reserved = locked_ref->must_insert_reserved;
976 locked_ref->must_insert_reserved = 0; 1821 locked_ref->must_insert_reserved = 0;
977 1822
1823 extent_op = locked_ref->extent_op;
1824 locked_ref->extent_op = NULL;
1825
978 /* 1826 /*
979 * locked_ref is the head node, so we have to go one 1827 * locked_ref is the head node, so we have to go one
980 * node back for any delayed ref updates 1828 * node back for any delayed ref updates
@@ -986,6 +1834,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
986 * so that any accounting fixes can happen 1834 * so that any accounting fixes can happen
987 */ 1835 */
988 ref = &locked_ref->node; 1836 ref = &locked_ref->node;
1837
1838 if (extent_op && must_insert_reserved) {
1839 kfree(extent_op);
1840 extent_op = NULL;
1841 }
1842
1843 if (extent_op) {
1844 spin_unlock(&delayed_refs->lock);
1845
1846 ret = run_delayed_extent_op(trans, root,
1847 ref, extent_op);
1848 BUG_ON(ret);
1849 kfree(extent_op);
1850
1851 cond_resched();
1852 spin_lock(&delayed_refs->lock);
1853 continue;
1854 }
1855
989 list_del_init(&locked_ref->cluster); 1856 list_del_init(&locked_ref->cluster);
990 locked_ref = NULL; 1857 locked_ref = NULL;
991 } 1858 }
@@ -993,14 +1860,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
993 ref->in_tree = 0; 1860 ref->in_tree = 0;
994 rb_erase(&ref->rb_node, &delayed_refs->root); 1861 rb_erase(&ref->rb_node, &delayed_refs->root);
995 delayed_refs->num_entries--; 1862 delayed_refs->num_entries--;
1863
996 spin_unlock(&delayed_refs->lock); 1864 spin_unlock(&delayed_refs->lock);
997 1865
998 ret = run_one_delayed_ref(trans, root, ref, 1866 ret = run_one_delayed_ref(trans, root, ref, extent_op,
999 must_insert_reserved); 1867 must_insert_reserved);
1000 BUG_ON(ret); 1868 BUG_ON(ret);
1001 btrfs_put_delayed_ref(ref);
1002 1869
1870 btrfs_put_delayed_ref(ref);
1871 kfree(extent_op);
1003 count++; 1872 count++;
1873
1004 cond_resched(); 1874 cond_resched();
1005 spin_lock(&delayed_refs->lock); 1875 spin_lock(&delayed_refs->lock);
1006 } 1876 }
@@ -1095,25 +1965,112 @@ out:
1095 return 0; 1965 return 0;
1096} 1966}
1097 1967
1098int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1968int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1099 struct btrfs_root *root, u64 objectid, u64 bytenr) 1969 struct btrfs_root *root,
1970 u64 bytenr, u64 num_bytes, u64 flags,
1971 int is_data)
1972{
1973 struct btrfs_delayed_extent_op *extent_op;
1974 int ret;
1975
1976 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1977 if (!extent_op)
1978 return -ENOMEM;
1979
1980 extent_op->flags_to_set = flags;
1981 extent_op->update_flags = 1;
1982 extent_op->update_key = 0;
1983 extent_op->is_data = is_data ? 1 : 0;
1984
1985 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
1986 if (ret)
1987 kfree(extent_op);
1988 return ret;
1989}
1990
1991static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
1992 struct btrfs_root *root,
1993 struct btrfs_path *path,
1994 u64 objectid, u64 offset, u64 bytenr)
1995{
1996 struct btrfs_delayed_ref_head *head;
1997 struct btrfs_delayed_ref_node *ref;
1998 struct btrfs_delayed_data_ref *data_ref;
1999 struct btrfs_delayed_ref_root *delayed_refs;
2000 struct rb_node *node;
2001 int ret = 0;
2002
2003 ret = -ENOENT;
2004 delayed_refs = &trans->transaction->delayed_refs;
2005 spin_lock(&delayed_refs->lock);
2006 head = btrfs_find_delayed_ref_head(trans, bytenr);
2007 if (!head)
2008 goto out;
2009
2010 if (!mutex_trylock(&head->mutex)) {
2011 atomic_inc(&head->node.refs);
2012 spin_unlock(&delayed_refs->lock);
2013
2014 btrfs_release_path(root->fs_info->extent_root, path);
2015
2016 mutex_lock(&head->mutex);
2017 mutex_unlock(&head->mutex);
2018 btrfs_put_delayed_ref(&head->node);
2019 return -EAGAIN;
2020 }
2021
2022 node = rb_prev(&head->node.rb_node);
2023 if (!node)
2024 goto out_unlock;
2025
2026 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2027
2028 if (ref->bytenr != bytenr)
2029 goto out_unlock;
2030
2031 ret = 1;
2032 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2033 goto out_unlock;
2034
2035 data_ref = btrfs_delayed_node_to_data_ref(ref);
2036
2037 node = rb_prev(node);
2038 if (node) {
2039 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2040 if (ref->bytenr == bytenr)
2041 goto out_unlock;
2042 }
2043
2044 if (data_ref->root != root->root_key.objectid ||
2045 data_ref->objectid != objectid || data_ref->offset != offset)
2046 goto out_unlock;
2047
2048 ret = 0;
2049out_unlock:
2050 mutex_unlock(&head->mutex);
2051out:
2052 spin_unlock(&delayed_refs->lock);
2053 return ret;
2054}
2055
2056static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2057 struct btrfs_root *root,
2058 struct btrfs_path *path,
2059 u64 objectid, u64 offset, u64 bytenr)
1100{ 2060{
1101 struct btrfs_root *extent_root = root->fs_info->extent_root; 2061 struct btrfs_root *extent_root = root->fs_info->extent_root;
1102 struct btrfs_path *path;
1103 struct extent_buffer *leaf; 2062 struct extent_buffer *leaf;
1104 struct btrfs_extent_ref *ref_item; 2063 struct btrfs_extent_data_ref *ref;
2064 struct btrfs_extent_inline_ref *iref;
2065 struct btrfs_extent_item *ei;
1105 struct btrfs_key key; 2066 struct btrfs_key key;
1106 struct btrfs_key found_key; 2067 u32 item_size;
1107 u64 ref_root;
1108 u64 last_snapshot;
1109 u32 nritems;
1110 int ret; 2068 int ret;
1111 2069
1112 key.objectid = bytenr; 2070 key.objectid = bytenr;
1113 key.offset = (u64)-1; 2071 key.offset = (u64)-1;
1114 key.type = BTRFS_EXTENT_ITEM_KEY; 2072 key.type = BTRFS_EXTENT_ITEM_KEY;
1115 2073
1116 path = btrfs_alloc_path();
1117 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2074 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1118 if (ret < 0) 2075 if (ret < 0)
1119 goto out; 2076 goto out;
@@ -1125,55 +2082,83 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1125 2082
1126 path->slots[0]--; 2083 path->slots[0]--;
1127 leaf = path->nodes[0]; 2084 leaf = path->nodes[0];
1128 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2085 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1129 2086
1130 if (found_key.objectid != bytenr || 2087 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
1131 found_key.type != BTRFS_EXTENT_ITEM_KEY)
1132 goto out; 2088 goto out;
1133 2089
1134 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2090 ret = 1;
1135 while (1) { 2091 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1136 leaf = path->nodes[0]; 2092#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1137 nritems = btrfs_header_nritems(leaf); 2093 if (item_size < sizeof(*ei)) {
1138 if (path->slots[0] >= nritems) { 2094 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
1139 ret = btrfs_next_leaf(extent_root, path); 2095 goto out;
1140 if (ret < 0) 2096 }
1141 goto out; 2097#endif
1142 if (ret == 0) 2098 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1143 continue;
1144 break;
1145 }
1146 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1147 if (found_key.objectid != bytenr)
1148 break;
1149 2099
1150 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 2100 if (item_size != sizeof(*ei) +
1151 path->slots[0]++; 2101 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
1152 continue; 2102 goto out;
1153 }
1154 2103
1155 ref_item = btrfs_item_ptr(leaf, path->slots[0], 2104 if (btrfs_extent_generation(leaf, ei) <=
1156 struct btrfs_extent_ref); 2105 btrfs_root_last_snapshot(&root->root_item))
1157 ref_root = btrfs_ref_root(leaf, ref_item); 2106 goto out;
1158 if ((ref_root != root->root_key.objectid && 2107
1159 ref_root != BTRFS_TREE_LOG_OBJECTID) || 2108 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1160 objectid != btrfs_ref_objectid(leaf, ref_item)) { 2109 if (btrfs_extent_inline_ref_type(leaf, iref) !=
1161 ret = 1; 2110 BTRFS_EXTENT_DATA_REF_KEY)
1162 goto out; 2111 goto out;
1163 } 2112
1164 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 2113 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
1165 ret = 1; 2114 if (btrfs_extent_refs(leaf, ei) !=
2115 btrfs_extent_data_ref_count(leaf, ref) ||
2116 btrfs_extent_data_ref_root(leaf, ref) !=
2117 root->root_key.objectid ||
2118 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2119 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2120 goto out;
2121
2122 ret = 0;
2123out:
2124 return ret;
2125}
2126
2127int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2128 struct btrfs_root *root,
2129 u64 objectid, u64 offset, u64 bytenr)
2130{
2131 struct btrfs_path *path;
2132 int ret;
2133 int ret2;
2134
2135 path = btrfs_alloc_path();
2136 if (!path)
2137 return -ENOENT;
2138
2139 do {
2140 ret = check_committed_ref(trans, root, path, objectid,
2141 offset, bytenr);
2142 if (ret && ret != -ENOENT)
1166 goto out; 2143 goto out;
1167 }
1168 2144
1169 path->slots[0]++; 2145 ret2 = check_delayed_ref(trans, root, path, objectid,
2146 offset, bytenr);
2147 } while (ret2 == -EAGAIN);
2148
2149 if (ret2 && ret2 != -ENOENT) {
2150 ret = ret2;
2151 goto out;
1170 } 2152 }
1171 ret = 0; 2153
2154 if (ret != -ENOENT || ret2 != -ENOENT)
2155 ret = 0;
1172out: 2156out:
1173 btrfs_free_path(path); 2157 btrfs_free_path(path);
1174 return ret; 2158 return ret;
1175} 2159}
1176 2160
2161#if 0
1177int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2162int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1178 struct extent_buffer *buf, u32 nr_extents) 2163 struct extent_buffer *buf, u32 nr_extents)
1179{ 2164{
@@ -1291,62 +2276,44 @@ static int refsort_cmp(const void *a_void, const void *b_void)
1291 return 1; 2276 return 1;
1292 return 0; 2277 return 0;
1293} 2278}
2279#endif
1294 2280
1295 2281static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1296noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1297 struct btrfs_root *root, 2282 struct btrfs_root *root,
1298 struct extent_buffer *orig_buf, 2283 struct extent_buffer *buf,
1299 struct extent_buffer *buf, u32 *nr_extents) 2284 int full_backref, int inc)
1300{ 2285{
1301 u64 bytenr; 2286 u64 bytenr;
2287 u64 num_bytes;
2288 u64 parent;
1302 u64 ref_root; 2289 u64 ref_root;
1303 u64 orig_root;
1304 u64 ref_generation;
1305 u64 orig_generation;
1306 struct refsort *sorted;
1307 u32 nritems; 2290 u32 nritems;
1308 u32 nr_file_extents = 0;
1309 struct btrfs_key key; 2291 struct btrfs_key key;
1310 struct btrfs_file_extent_item *fi; 2292 struct btrfs_file_extent_item *fi;
1311 int i; 2293 int i;
1312 int level; 2294 int level;
1313 int ret = 0; 2295 int ret = 0;
1314 int faili = 0;
1315 int refi = 0;
1316 int slot;
1317 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2296 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1318 u64, u64, u64, u64, u64, u64, u64, u64, u64); 2297 u64, u64, u64, u64, u64, u64);
1319 2298
1320 ref_root = btrfs_header_owner(buf); 2299 ref_root = btrfs_header_owner(buf);
1321 ref_generation = btrfs_header_generation(buf);
1322 orig_root = btrfs_header_owner(orig_buf);
1323 orig_generation = btrfs_header_generation(orig_buf);
1324
1325 nritems = btrfs_header_nritems(buf); 2300 nritems = btrfs_header_nritems(buf);
1326 level = btrfs_header_level(buf); 2301 level = btrfs_header_level(buf);
1327 2302
1328 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 2303 if (!root->ref_cows && level == 0)
1329 BUG_ON(!sorted); 2304 return 0;
1330 2305
1331 if (root->ref_cows) { 2306 if (inc)
1332 process_func = __btrfs_inc_extent_ref; 2307 process_func = btrfs_inc_extent_ref;
1333 } else { 2308 else
1334 if (level == 0 && 2309 process_func = btrfs_free_extent;
1335 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 2310
1336 goto out; 2311 if (full_backref)
1337 if (level != 0 && 2312 parent = buf->start;
1338 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 2313 else
1339 goto out; 2314 parent = 0;
1340 process_func = __btrfs_update_extent_ref;
1341 }
1342 2315
1343 /*
1344 * we make two passes through the items. In the first pass we
1345 * only record the byte number and slot. Then we sort based on
1346 * byte number and do the actual work based on the sorted results
1347 */
1348 for (i = 0; i < nritems; i++) { 2316 for (i = 0; i < nritems; i++) {
1349 cond_resched();
1350 if (level == 0) { 2317 if (level == 0) {
1351 btrfs_item_key_to_cpu(buf, &key, i); 2318 btrfs_item_key_to_cpu(buf, &key, i);
1352 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2319 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
@@ -1360,151 +2327,38 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1360 if (bytenr == 0) 2327 if (bytenr == 0)
1361 continue; 2328 continue;
1362 2329
1363 nr_file_extents++; 2330 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1364 sorted[refi].bytenr = bytenr; 2331 key.offset -= btrfs_file_extent_offset(buf, fi);
1365 sorted[refi].slot = i; 2332 ret = process_func(trans, root, bytenr, num_bytes,
1366 refi++; 2333 parent, ref_root, key.objectid,
1367 } else { 2334 key.offset);
1368 bytenr = btrfs_node_blockptr(buf, i); 2335 if (ret)
1369 sorted[refi].bytenr = bytenr;
1370 sorted[refi].slot = i;
1371 refi++;
1372 }
1373 }
1374 /*
1375 * if refi == 0, we didn't actually put anything into the sorted
1376 * array and we're done
1377 */
1378 if (refi == 0)
1379 goto out;
1380
1381 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1382
1383 for (i = 0; i < refi; i++) {
1384 cond_resched();
1385 slot = sorted[i].slot;
1386 bytenr = sorted[i].bytenr;
1387
1388 if (level == 0) {
1389 btrfs_item_key_to_cpu(buf, &key, slot);
1390 fi = btrfs_item_ptr(buf, slot,
1391 struct btrfs_file_extent_item);
1392
1393 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1394 if (bytenr == 0)
1395 continue;
1396
1397 ret = process_func(trans, root, bytenr,
1398 btrfs_file_extent_disk_num_bytes(buf, fi),
1399 orig_buf->start, buf->start,
1400 orig_root, ref_root,
1401 orig_generation, ref_generation,
1402 key.objectid);
1403
1404 if (ret) {
1405 faili = slot;
1406 WARN_ON(1);
1407 goto fail; 2336 goto fail;
1408 }
1409 } else { 2337 } else {
1410 ret = process_func(trans, root, bytenr, buf->len, 2338 bytenr = btrfs_node_blockptr(buf, i);
1411 orig_buf->start, buf->start, 2339 num_bytes = btrfs_level_size(root, level - 1);
1412 orig_root, ref_root, 2340 ret = process_func(trans, root, bytenr, num_bytes,
1413 orig_generation, ref_generation, 2341 parent, ref_root, level - 1, 0);
1414 level - 1); 2342 if (ret)
1415 if (ret) {
1416 faili = slot;
1417 WARN_ON(1);
1418 goto fail; 2343 goto fail;
1419 }
1420 } 2344 }
1421 } 2345 }
1422out:
1423 kfree(sorted);
1424 if (nr_extents) {
1425 if (level == 0)
1426 *nr_extents = nr_file_extents;
1427 else
1428 *nr_extents = nritems;
1429 }
1430 return 0; 2346 return 0;
1431fail: 2347fail:
1432 kfree(sorted); 2348 BUG();
1433 WARN_ON(1);
1434 return ret; 2349 return ret;
1435} 2350}
1436 2351
1437int btrfs_update_ref(struct btrfs_trans_handle *trans, 2352int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1438 struct btrfs_root *root, struct extent_buffer *orig_buf, 2353 struct extent_buffer *buf, int full_backref)
1439 struct extent_buffer *buf, int start_slot, int nr)
1440
1441{ 2354{
1442 u64 bytenr; 2355 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
1443 u64 ref_root; 2356}
1444 u64 orig_root;
1445 u64 ref_generation;
1446 u64 orig_generation;
1447 struct btrfs_key key;
1448 struct btrfs_file_extent_item *fi;
1449 int i;
1450 int ret;
1451 int slot;
1452 int level;
1453
1454 BUG_ON(start_slot < 0);
1455 BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1456
1457 ref_root = btrfs_header_owner(buf);
1458 ref_generation = btrfs_header_generation(buf);
1459 orig_root = btrfs_header_owner(orig_buf);
1460 orig_generation = btrfs_header_generation(orig_buf);
1461 level = btrfs_header_level(buf);
1462
1463 if (!root->ref_cows) {
1464 if (level == 0 &&
1465 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1466 return 0;
1467 if (level != 0 &&
1468 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1469 return 0;
1470 }
1471 2357
1472 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 2358int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1473 cond_resched(); 2359 struct extent_buffer *buf, int full_backref)
1474 if (level == 0) { 2360{
1475 btrfs_item_key_to_cpu(buf, &key, slot); 2361 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
1476 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1477 continue;
1478 fi = btrfs_item_ptr(buf, slot,
1479 struct btrfs_file_extent_item);
1480 if (btrfs_file_extent_type(buf, fi) ==
1481 BTRFS_FILE_EXTENT_INLINE)
1482 continue;
1483 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1484 if (bytenr == 0)
1485 continue;
1486 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1487 btrfs_file_extent_disk_num_bytes(buf, fi),
1488 orig_buf->start, buf->start,
1489 orig_root, ref_root, orig_generation,
1490 ref_generation, key.objectid);
1491 if (ret)
1492 goto fail;
1493 } else {
1494 bytenr = btrfs_node_blockptr(buf, slot);
1495 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1496 buf->len, orig_buf->start,
1497 buf->start, orig_root, ref_root,
1498 orig_generation, ref_generation,
1499 level - 1);
1500 if (ret)
1501 goto fail;
1502 }
1503 }
1504 return 0;
1505fail:
1506 WARN_ON(1);
1507 return -1;
1508} 2362}
1509 2363
1510static int write_one_cache_group(struct btrfs_trans_handle *trans, 2364static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2007,6 +2861,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
2007 u64 old_val; 2861 u64 old_val;
2008 u64 byte_in_group; 2862 u64 byte_in_group;
2009 2863
2864 /* block accounting for super block */
2865 spin_lock(&info->delalloc_lock);
2866 old_val = btrfs_super_bytes_used(&info->super_copy);
2867 if (alloc)
2868 old_val += num_bytes;
2869 else
2870 old_val -= num_bytes;
2871 btrfs_set_super_bytes_used(&info->super_copy, old_val);
2872
2873 /* block accounting for root item */
2874 old_val = btrfs_root_used(&root->root_item);
2875 if (alloc)
2876 old_val += num_bytes;
2877 else
2878 old_val -= num_bytes;
2879 btrfs_set_root_used(&root->root_item, old_val);
2880 spin_unlock(&info->delalloc_lock);
2881
2010 while (total) { 2882 while (total) {
2011 cache = btrfs_lookup_block_group(info, bytenr); 2883 cache = btrfs_lookup_block_group(info, bytenr);
2012 if (!cache) 2884 if (!cache)
@@ -2216,8 +3088,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2216 u64 header_owner = btrfs_header_owner(buf); 3088 u64 header_owner = btrfs_header_owner(buf);
2217 u64 header_transid = btrfs_header_generation(buf); 3089 u64 header_transid = btrfs_header_generation(buf);
2218 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3090 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2219 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2220 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2221 header_transid == trans->transid && 3091 header_transid == trans->transid &&
2222 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3092 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2223 *must_clean = buf; 3093 *must_clean = buf;
@@ -2235,63 +3105,77 @@ pinit:
2235 return 0; 3105 return 0;
2236} 3106}
2237 3107
2238/* 3108
2239 * remove an extent from the root, returns 0 on success 3109static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2240 */ 3110 struct btrfs_root *root,
2241static int __free_extent(struct btrfs_trans_handle *trans, 3111 u64 bytenr, u64 num_bytes, u64 parent,
2242 struct btrfs_root *root, 3112 u64 root_objectid, u64 owner_objectid,
2243 u64 bytenr, u64 num_bytes, u64 parent, 3113 u64 owner_offset, int refs_to_drop,
2244 u64 root_objectid, u64 ref_generation, 3114 struct btrfs_delayed_extent_op *extent_op)
2245 u64 owner_objectid, int pin, int mark_free,
2246 int refs_to_drop)
2247{ 3115{
2248 struct btrfs_path *path;
2249 struct btrfs_key key; 3116 struct btrfs_key key;
3117 struct btrfs_path *path;
2250 struct btrfs_fs_info *info = root->fs_info; 3118 struct btrfs_fs_info *info = root->fs_info;
2251 struct btrfs_root *extent_root = info->extent_root; 3119 struct btrfs_root *extent_root = info->extent_root;
2252 struct extent_buffer *leaf; 3120 struct extent_buffer *leaf;
3121 struct btrfs_extent_item *ei;
3122 struct btrfs_extent_inline_ref *iref;
2253 int ret; 3123 int ret;
3124 int is_data;
2254 int extent_slot = 0; 3125 int extent_slot = 0;
2255 int found_extent = 0; 3126 int found_extent = 0;
2256 int num_to_del = 1; 3127 int num_to_del = 1;
2257 struct btrfs_extent_item *ei; 3128 u32 item_size;
2258 u32 refs; 3129 u64 refs;
2259 3130
2260 key.objectid = bytenr;
2261 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2262 key.offset = num_bytes;
2263 path = btrfs_alloc_path(); 3131 path = btrfs_alloc_path();
2264 if (!path) 3132 if (!path)
2265 return -ENOMEM; 3133 return -ENOMEM;
2266 3134
2267 path->reada = 1; 3135 path->reada = 1;
2268 path->leave_spinning = 1; 3136 path->leave_spinning = 1;
2269 ret = lookup_extent_backref(trans, extent_root, path, 3137
2270 bytenr, parent, root_objectid, 3138 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2271 ref_generation, owner_objectid, 1); 3139 BUG_ON(!is_data && refs_to_drop != 1);
3140
3141 ret = lookup_extent_backref(trans, extent_root, path, &iref,
3142 bytenr, num_bytes, parent,
3143 root_objectid, owner_objectid,
3144 owner_offset);
2272 if (ret == 0) { 3145 if (ret == 0) {
2273 struct btrfs_key found_key;
2274 extent_slot = path->slots[0]; 3146 extent_slot = path->slots[0];
2275 while (extent_slot > 0) { 3147 while (extent_slot >= 0) {
2276 extent_slot--; 3148 btrfs_item_key_to_cpu(path->nodes[0], &key,
2277 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2278 extent_slot); 3149 extent_slot);
2279 if (found_key.objectid != bytenr) 3150 if (key.objectid != bytenr)
2280 break; 3151 break;
2281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 3152 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2282 found_key.offset == num_bytes) { 3153 key.offset == num_bytes) {
2283 found_extent = 1; 3154 found_extent = 1;
2284 break; 3155 break;
2285 } 3156 }
2286 if (path->slots[0] - extent_slot > 5) 3157 if (path->slots[0] - extent_slot > 5)
2287 break; 3158 break;
3159 extent_slot--;
2288 } 3160 }
3161#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3162 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3163 if (found_extent && item_size < sizeof(*ei))
3164 found_extent = 0;
3165#endif
2289 if (!found_extent) { 3166 if (!found_extent) {
3167 BUG_ON(iref);
2290 ret = remove_extent_backref(trans, extent_root, path, 3168 ret = remove_extent_backref(trans, extent_root, path,
2291 refs_to_drop); 3169 NULL, refs_to_drop,
3170 is_data);
2292 BUG_ON(ret); 3171 BUG_ON(ret);
2293 btrfs_release_path(extent_root, path); 3172 btrfs_release_path(extent_root, path);
2294 path->leave_spinning = 1; 3173 path->leave_spinning = 1;
3174
3175 key.objectid = bytenr;
3176 key.type = BTRFS_EXTENT_ITEM_KEY;
3177 key.offset = num_bytes;
3178
2295 ret = btrfs_search_slot(trans, extent_root, 3179 ret = btrfs_search_slot(trans, extent_root,
2296 &key, path, -1, 1); 3180 &key, path, -1, 1);
2297 if (ret) { 3181 if (ret) {
@@ -2307,82 +3191,98 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2307 btrfs_print_leaf(extent_root, path->nodes[0]); 3191 btrfs_print_leaf(extent_root, path->nodes[0]);
2308 WARN_ON(1); 3192 WARN_ON(1);
2309 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3193 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2310 "parent %llu root %llu gen %llu owner %llu\n", 3194 "parent %llu root %llu owner %llu offset %llu\n",
2311 (unsigned long long)bytenr, 3195 (unsigned long long)bytenr,
2312 (unsigned long long)parent, 3196 (unsigned long long)parent,
2313 (unsigned long long)root_objectid, 3197 (unsigned long long)root_objectid,
2314 (unsigned long long)ref_generation, 3198 (unsigned long long)owner_objectid,
2315 (unsigned long long)owner_objectid); 3199 (unsigned long long)owner_offset);
2316 } 3200 }
2317 3201
2318 leaf = path->nodes[0]; 3202 leaf = path->nodes[0];
3203 item_size = btrfs_item_size_nr(leaf, extent_slot);
3204#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3205 if (item_size < sizeof(*ei)) {
3206 BUG_ON(found_extent || extent_slot != path->slots[0]);
3207 ret = convert_extent_item_v0(trans, extent_root, path,
3208 owner_objectid, 0);
3209 BUG_ON(ret < 0);
3210
3211 btrfs_release_path(extent_root, path);
3212 path->leave_spinning = 1;
3213
3214 key.objectid = bytenr;
3215 key.type = BTRFS_EXTENT_ITEM_KEY;
3216 key.offset = num_bytes;
3217
3218 ret = btrfs_search_slot(trans, extent_root, &key, path,
3219 -1, 1);
3220 if (ret) {
3221 printk(KERN_ERR "umm, got %d back from search"
3222 ", was looking for %llu\n", ret,
3223 (unsigned long long)bytenr);
3224 btrfs_print_leaf(extent_root, path->nodes[0]);
3225 }
3226 BUG_ON(ret);
3227 extent_slot = path->slots[0];
3228 leaf = path->nodes[0];
3229 item_size = btrfs_item_size_nr(leaf, extent_slot);
3230 }
3231#endif
3232 BUG_ON(item_size < sizeof(*ei));
2319 ei = btrfs_item_ptr(leaf, extent_slot, 3233 ei = btrfs_item_ptr(leaf, extent_slot,
2320 struct btrfs_extent_item); 3234 struct btrfs_extent_item);
2321 refs = btrfs_extent_refs(leaf, ei); 3235 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2322 3236 struct btrfs_tree_block_info *bi;
2323 /* 3237 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2324 * we're not allowed to delete the extent item if there 3238 bi = (struct btrfs_tree_block_info *)(ei + 1);
2325 * are other delayed ref updates pending 3239 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2326 */ 3240 }
2327 3241
3242 refs = btrfs_extent_refs(leaf, ei);
2328 BUG_ON(refs < refs_to_drop); 3243 BUG_ON(refs < refs_to_drop);
2329 refs -= refs_to_drop; 3244 refs -= refs_to_drop;
2330 btrfs_set_extent_refs(leaf, ei, refs);
2331 btrfs_mark_buffer_dirty(leaf);
2332 3245
2333 if (refs == 0 && found_extent && 3246 if (refs > 0) {
2334 path->slots[0] == extent_slot + 1) { 3247 if (extent_op)
2335 struct btrfs_extent_ref *ref; 3248 __run_delayed_extent_op(extent_op, leaf, ei);
2336 ref = btrfs_item_ptr(leaf, path->slots[0], 3249 /*
2337 struct btrfs_extent_ref); 3250 * In the case of inline back ref, reference count will
2338 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); 3251 * be updated by remove_extent_backref
2339 /* if the back ref and the extent are next to each other
2340 * they get deleted below in one shot
2341 */ 3252 */
2342 path->slots[0] = extent_slot; 3253 if (iref) {
2343 num_to_del = 2; 3254 BUG_ON(!found_extent);
2344 } else if (found_extent) { 3255 } else {
2345 /* otherwise delete the extent back ref */ 3256 btrfs_set_extent_refs(leaf, ei, refs);
2346 ret = remove_extent_backref(trans, extent_root, path, 3257 btrfs_mark_buffer_dirty(leaf);
2347 refs_to_drop); 3258 }
2348 BUG_ON(ret); 3259 if (found_extent) {
2349 /* if refs are 0, we need to setup the path for deletion */ 3260 ret = remove_extent_backref(trans, extent_root, path,
2350 if (refs == 0) { 3261 iref, refs_to_drop,
2351 btrfs_release_path(extent_root, path); 3262 is_data);
2352 path->leave_spinning = 1;
2353 ret = btrfs_search_slot(trans, extent_root, &key, path,
2354 -1, 1);
2355 BUG_ON(ret); 3263 BUG_ON(ret);
2356 } 3264 }
2357 } 3265 } else {
2358 3266 int mark_free = 0;
2359 if (refs == 0) {
2360 u64 super_used;
2361 u64 root_used;
2362 struct extent_buffer *must_clean = NULL; 3267 struct extent_buffer *must_clean = NULL;
2363 3268
2364 if (pin) { 3269 if (found_extent) {
2365 ret = pin_down_bytes(trans, root, path, 3270 BUG_ON(is_data && refs_to_drop !=
2366 bytenr, num_bytes, 3271 extent_data_ref_count(root, path, iref));
2367 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 3272 if (iref) {
2368 &must_clean); 3273 BUG_ON(path->slots[0] != extent_slot);
2369 if (ret > 0) 3274 } else {
2370 mark_free = 1; 3275 BUG_ON(path->slots[0] != extent_slot + 1);
2371 BUG_ON(ret < 0); 3276 path->slots[0] = extent_slot;
3277 num_to_del = 2;
3278 }
2372 } 3279 }
2373 3280
2374 /* block accounting for super block */ 3281 ret = pin_down_bytes(trans, root, path, bytenr,
2375 spin_lock(&info->delalloc_lock); 3282 num_bytes, is_data, &must_clean);
2376 super_used = btrfs_super_bytes_used(&info->super_copy); 3283 if (ret > 0)
2377 btrfs_set_super_bytes_used(&info->super_copy, 3284 mark_free = 1;
2378 super_used - num_bytes); 3285 BUG_ON(ret < 0);
2379
2380 /* block accounting for root item */
2381 root_used = btrfs_root_used(&root->root_item);
2382 btrfs_set_root_used(&root->root_item,
2383 root_used - num_bytes);
2384 spin_unlock(&info->delalloc_lock);
2385
2386 /* 3286 /*
2387 * it is going to be very rare for someone to be waiting 3287 * it is going to be very rare for someone to be waiting
2388 * on the block we're freeing. del_items might need to 3288 * on the block we're freeing. del_items might need to
@@ -2403,7 +3303,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2403 free_extent_buffer(must_clean); 3303 free_extent_buffer(must_clean);
2404 } 3304 }
2405 3305
2406 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 3306 if (is_data) {
2407 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3307 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2408 BUG_ON(ret); 3308 BUG_ON(ret);
2409 } else { 3309 } else {
@@ -2421,34 +3321,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2421} 3321}
2422 3322
2423/* 3323/*
2424 * remove an extent from the root, returns 0 on success
2425 */
2426static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 u64 bytenr, u64 num_bytes, u64 parent,
2429 u64 root_objectid, u64 ref_generation,
2430 u64 owner_objectid, int pin,
2431 int refs_to_drop)
2432{
2433 WARN_ON(num_bytes < root->sectorsize);
2434
2435 /*
2436 * if metadata always pin
2437 * if data pin when any transaction has committed this
2438 */
2439 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
2440 ref_generation != trans->transid)
2441 pin = 1;
2442
2443 if (ref_generation != trans->transid)
2444 pin = 1;
2445
2446 return __free_extent(trans, root, bytenr, num_bytes, parent,
2447 root_objectid, ref_generation,
2448 owner_objectid, pin, pin == 0, refs_to_drop);
2449}
2450
2451/*
2452 * when we free an extent, it is possible (and likely) that we free the last 3324 * when we free an extent, it is possible (and likely) that we free the last
2453 * delayed ref for that extent as well. This searches the delayed ref tree for 3325 * delayed ref for that extent as well. This searches the delayed ref tree for
2454 * a given extent, and if there are no other delayed refs to be processed, it 3326 * a given extent, and if there are no other delayed refs to be processed, it
@@ -2479,6 +3351,13 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2479 if (ref->bytenr == bytenr) 3351 if (ref->bytenr == bytenr)
2480 goto out; 3352 goto out;
2481 3353
3354 if (head->extent_op) {
3355 if (!head->must_insert_reserved)
3356 goto out;
3357 kfree(head->extent_op);
3358 head->extent_op = NULL;
3359 }
3360
2482 /* 3361 /*
2483 * waiting for the lock here would deadlock. If someone else has it 3362 * waiting for the lock here would deadlock. If someone else has it
2484 * locked they are already in the process of dropping it anyway 3363 * locked they are already in the process of dropping it anyway
@@ -2507,7 +3386,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2507 spin_unlock(&delayed_refs->lock); 3386 spin_unlock(&delayed_refs->lock);
2508 3387
2509 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3388 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
2510 &head->node, head->must_insert_reserved); 3389 &head->node, head->extent_op,
3390 head->must_insert_reserved);
2511 BUG_ON(ret); 3391 BUG_ON(ret);
2512 btrfs_put_delayed_ref(&head->node); 3392 btrfs_put_delayed_ref(&head->node);
2513 return 0; 3393 return 0;
@@ -2519,32 +3399,32 @@ out:
2519int btrfs_free_extent(struct btrfs_trans_handle *trans, 3399int btrfs_free_extent(struct btrfs_trans_handle *trans,
2520 struct btrfs_root *root, 3400 struct btrfs_root *root,
2521 u64 bytenr, u64 num_bytes, u64 parent, 3401 u64 bytenr, u64 num_bytes, u64 parent,
2522 u64 root_objectid, u64 ref_generation, 3402 u64 root_objectid, u64 owner, u64 offset)
2523 u64 owner_objectid, int pin)
2524{ 3403{
2525 int ret; 3404 int ret;
2526 3405
2527 /* 3406 /*
2528 * tree log blocks never actually go into the extent allocation 3407 * tree log blocks never actually go into the extent allocation
2529 * tree, just update pinning info and exit early. 3408 * tree, just update pinning info and exit early.
2530 *
2531 * data extents referenced by the tree log do need to have
2532 * their reference counts bumped.
2533 */ 3409 */
2534 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && 3410 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
2535 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3411 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
2536 /* unlocks the pinned mutex */ 3412 /* unlocks the pinned mutex */
2537 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3413 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2538 update_reserved_extents(root, bytenr, num_bytes, 0); 3414 update_reserved_extents(root, bytenr, num_bytes, 0);
2539 ret = 0; 3415 ret = 0;
2540 } else { 3416 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2541 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, 3417 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
2542 root_objectid, ref_generation, 3418 parent, root_objectid, (int)owner,
2543 owner_objectid, 3419 BTRFS_DROP_DELAYED_REF, NULL);
2544 BTRFS_DROP_DELAYED_REF, 1);
2545 BUG_ON(ret); 3420 BUG_ON(ret);
2546 ret = check_ref_cleanup(trans, root, bytenr); 3421 ret = check_ref_cleanup(trans, root, bytenr);
2547 BUG_ON(ret); 3422 BUG_ON(ret);
3423 } else {
3424 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
3425 parent, root_objectid, owner,
3426 offset, BTRFS_DROP_DELAYED_REF, NULL);
3427 BUG_ON(ret);
2548 } 3428 }
2549 return ret; 3429 return ret;
2550} 3430}
@@ -2719,7 +3599,7 @@ refill_cluster:
2719 last_ptr_loop = 0; 3599 last_ptr_loop = 0;
2720 3600
2721 /* allocate a cluster in this block group */ 3601 /* allocate a cluster in this block group */
2722 ret = btrfs_find_space_cluster(trans, 3602 ret = btrfs_find_space_cluster(trans, root,
2723 block_group, last_ptr, 3603 block_group, last_ptr,
2724 offset, num_bytes, 3604 offset, num_bytes,
2725 empty_cluster + empty_size); 3605 empty_cluster + empty_size);
@@ -2969,99 +3849,147 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2969 return ret; 3849 return ret;
2970} 3850}
2971 3851
2972static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3852static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
2973 struct btrfs_root *root, u64 parent, 3853 struct btrfs_root *root,
2974 u64 root_objectid, u64 ref_generation, 3854 u64 parent, u64 root_objectid,
2975 u64 owner, struct btrfs_key *ins, 3855 u64 flags, u64 owner, u64 offset,
2976 int ref_mod) 3856 struct btrfs_key *ins, int ref_mod)
2977{ 3857{
2978 int ret; 3858 int ret;
2979 u64 super_used; 3859 struct btrfs_fs_info *fs_info = root->fs_info;
2980 u64 root_used;
2981 u64 num_bytes = ins->offset;
2982 u32 sizes[2];
2983 struct btrfs_fs_info *info = root->fs_info;
2984 struct btrfs_root *extent_root = info->extent_root;
2985 struct btrfs_extent_item *extent_item; 3860 struct btrfs_extent_item *extent_item;
2986 struct btrfs_extent_ref *ref; 3861 struct btrfs_extent_inline_ref *iref;
2987 struct btrfs_path *path; 3862 struct btrfs_path *path;
2988 struct btrfs_key keys[2]; 3863 struct extent_buffer *leaf;
2989 3864 int type;
2990 if (parent == 0) 3865 u32 size;
2991 parent = ins->objectid;
2992
2993 /* block accounting for super block */
2994 spin_lock(&info->delalloc_lock);
2995 super_used = btrfs_super_bytes_used(&info->super_copy);
2996 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2997 3866
2998 /* block accounting for root item */ 3867 if (parent > 0)
2999 root_used = btrfs_root_used(&root->root_item); 3868 type = BTRFS_SHARED_DATA_REF_KEY;
3000 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3869 else
3001 spin_unlock(&info->delalloc_lock); 3870 type = BTRFS_EXTENT_DATA_REF_KEY;
3002 3871
3003 memcpy(&keys[0], ins, sizeof(*ins)); 3872 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
3004 keys[1].objectid = ins->objectid;
3005 keys[1].type = BTRFS_EXTENT_REF_KEY;
3006 keys[1].offset = parent;
3007 sizes[0] = sizeof(*extent_item);
3008 sizes[1] = sizeof(*ref);
3009 3873
3010 path = btrfs_alloc_path(); 3874 path = btrfs_alloc_path();
3011 BUG_ON(!path); 3875 BUG_ON(!path);
3012 3876
3013 path->leave_spinning = 1; 3877 path->leave_spinning = 1;
3014 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3878 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3015 sizes, 2); 3879 ins, size);
3016 BUG_ON(ret); 3880 BUG_ON(ret);
3017 3881
3018 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3882 leaf = path->nodes[0];
3883 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3019 struct btrfs_extent_item); 3884 struct btrfs_extent_item);
3020 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); 3885 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
3021 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3886 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3022 struct btrfs_extent_ref); 3887 btrfs_set_extent_flags(leaf, extent_item,
3023 3888 flags | BTRFS_EXTENT_FLAG_DATA);
3024 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3889
3025 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3890 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
3026 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3891 btrfs_set_extent_inline_ref_type(leaf, iref, type);
3027 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); 3892 if (parent > 0) {
3893 struct btrfs_shared_data_ref *ref;
3894 ref = (struct btrfs_shared_data_ref *)(iref + 1);
3895 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3896 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
3897 } else {
3898 struct btrfs_extent_data_ref *ref;
3899 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3900 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
3901 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
3902 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
3903 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
3904 }
3028 3905
3029 btrfs_mark_buffer_dirty(path->nodes[0]); 3906 btrfs_mark_buffer_dirty(path->nodes[0]);
3030
3031 trans->alloc_exclude_start = 0;
3032 trans->alloc_exclude_nr = 0;
3033 btrfs_free_path(path); 3907 btrfs_free_path(path);
3034 3908
3035 if (ret) 3909 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3036 goto out; 3910 1, 0);
3037
3038 ret = update_block_group(trans, root, ins->objectid,
3039 ins->offset, 1, 0);
3040 if (ret) { 3911 if (ret) {
3041 printk(KERN_ERR "btrfs update block group failed for %llu " 3912 printk(KERN_ERR "btrfs update block group failed for %llu "
3042 "%llu\n", (unsigned long long)ins->objectid, 3913 "%llu\n", (unsigned long long)ins->objectid,
3043 (unsigned long long)ins->offset); 3914 (unsigned long long)ins->offset);
3044 BUG(); 3915 BUG();
3045 } 3916 }
3046out:
3047 return ret; 3917 return ret;
3048} 3918}
3049 3919
3050int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3920static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
3051 struct btrfs_root *root, u64 parent, 3921 struct btrfs_root *root,
3052 u64 root_objectid, u64 ref_generation, 3922 u64 parent, u64 root_objectid,
3053 u64 owner, struct btrfs_key *ins) 3923 u64 flags, struct btrfs_disk_key *key,
3924 int level, struct btrfs_key *ins)
3054{ 3925{
3055 int ret; 3926 int ret;
3927 struct btrfs_fs_info *fs_info = root->fs_info;
3928 struct btrfs_extent_item *extent_item;
3929 struct btrfs_tree_block_info *block_info;
3930 struct btrfs_extent_inline_ref *iref;
3931 struct btrfs_path *path;
3932 struct extent_buffer *leaf;
3933 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
3056 3934
3057 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3935 path = btrfs_alloc_path();
3058 return 0; 3936 BUG_ON(!path);
3059 3937
3060 ret = btrfs_add_delayed_ref(trans, ins->objectid, 3938 path->leave_spinning = 1;
3061 ins->offset, parent, root_objectid, 3939 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3062 ref_generation, owner, 3940 ins, size);
3063 BTRFS_ADD_DELAYED_EXTENT, 0);
3064 BUG_ON(ret); 3941 BUG_ON(ret);
3942
3943 leaf = path->nodes[0];
3944 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3945 struct btrfs_extent_item);
3946 btrfs_set_extent_refs(leaf, extent_item, 1);
3947 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3948 btrfs_set_extent_flags(leaf, extent_item,
3949 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
3950 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
3951
3952 btrfs_set_tree_block_key(leaf, block_info, key);
3953 btrfs_set_tree_block_level(leaf, block_info, level);
3954
3955 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
3956 if (parent > 0) {
3957 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
3958 btrfs_set_extent_inline_ref_type(leaf, iref,
3959 BTRFS_SHARED_BLOCK_REF_KEY);
3960 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3961 } else {
3962 btrfs_set_extent_inline_ref_type(leaf, iref,
3963 BTRFS_TREE_BLOCK_REF_KEY);
3964 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
3965 }
3966
3967 btrfs_mark_buffer_dirty(leaf);
3968 btrfs_free_path(path);
3969
3970 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3971 1, 0);
3972 if (ret) {
3973 printk(KERN_ERR "btrfs update block group failed for %llu "
3974 "%llu\n", (unsigned long long)ins->objectid,
3975 (unsigned long long)ins->offset);
3976 BUG();
3977 }
3978 return ret;
3979}
3980
3981int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3982 struct btrfs_root *root,
3983 u64 root_objectid, u64 owner,
3984 u64 offset, struct btrfs_key *ins)
3985{
3986 int ret;
3987
3988 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
3989
3990 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
3991 0, root_objectid, owner, offset,
3992 BTRFS_ADD_DELAYED_EXTENT, NULL);
3065 return ret; 3993 return ret;
3066} 3994}
3067 3995
@@ -3070,10 +3998,10 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3070 * an extent has been allocated and makes sure to clear the free 3998 * an extent has been allocated and makes sure to clear the free
3071 * space cache bits as well 3999 * space cache bits as well
3072 */ 4000 */
3073int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 4001int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3074 struct btrfs_root *root, u64 parent, 4002 struct btrfs_root *root,
3075 u64 root_objectid, u64 ref_generation, 4003 u64 root_objectid, u64 owner, u64 offset,
3076 u64 owner, struct btrfs_key *ins) 4004 struct btrfs_key *ins)
3077{ 4005{
3078 int ret; 4006 int ret;
3079 struct btrfs_block_group_cache *block_group; 4007 struct btrfs_block_group_cache *block_group;
@@ -3087,8 +4015,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3087 ins->offset); 4015 ins->offset);
3088 BUG_ON(ret); 4016 BUG_ON(ret);
3089 btrfs_put_block_group(block_group); 4017 btrfs_put_block_group(block_group);
3090 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 4018 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
3091 ref_generation, owner, ins, 1); 4019 0, owner, offset, ins, 1);
3092 return ret; 4020 return ret;
3093} 4021}
3094 4022
@@ -3099,26 +4027,48 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3099 * 4027 *
3100 * returns 0 if everything worked, non-zero otherwise. 4028 * returns 0 if everything worked, non-zero otherwise.
3101 */ 4029 */
3102int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 4030static int alloc_tree_block(struct btrfs_trans_handle *trans,
3103 struct btrfs_root *root, 4031 struct btrfs_root *root,
3104 u64 num_bytes, u64 parent, u64 min_alloc_size, 4032 u64 num_bytes, u64 parent, u64 root_objectid,
3105 u64 root_objectid, u64 ref_generation, 4033 struct btrfs_disk_key *key, int level,
3106 u64 owner_objectid, u64 empty_size, u64 hint_byte, 4034 u64 empty_size, u64 hint_byte, u64 search_end,
3107 u64 search_end, struct btrfs_key *ins, u64 data) 4035 struct btrfs_key *ins)
3108{ 4036{
3109 int ret; 4037 int ret;
3110 ret = __btrfs_reserve_extent(trans, root, num_bytes, 4038 u64 flags = 0;
3111 min_alloc_size, empty_size, hint_byte, 4039
3112 search_end, ins, data); 4040 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4041 empty_size, hint_byte, search_end,
4042 ins, 0);
3113 BUG_ON(ret); 4043 BUG_ON(ret);
4044
4045 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4046 if (parent == 0)
4047 parent = ins->objectid;
4048 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4049 } else
4050 BUG_ON(parent > 0);
4051
4052 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3114 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4053 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3115 ret = btrfs_add_delayed_ref(trans, ins->objectid, 4054 struct btrfs_delayed_extent_op *extent_op;
3116 ins->offset, parent, root_objectid, 4055 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3117 ref_generation, owner_objectid, 4056 BUG_ON(!extent_op);
3118 BTRFS_ADD_DELAYED_EXTENT, 0); 4057 if (key)
4058 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4059 else
4060 memset(&extent_op->key, 0, sizeof(extent_op->key));
4061 extent_op->flags_to_set = flags;
4062 extent_op->update_key = 1;
4063 extent_op->update_flags = 1;
4064 extent_op->is_data = 0;
4065
4066 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4067 ins->offset, parent, root_objectid,
4068 level, BTRFS_ADD_DELAYED_EXTENT,
4069 extent_op);
3119 BUG_ON(ret); 4070 BUG_ON(ret);
3120 } 4071 }
3121 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3122 return ret; 4072 return ret;
3123} 4073}
3124 4074
@@ -3157,21 +4107,17 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3157 * returns the tree buffer or NULL. 4107 * returns the tree buffer or NULL.
3158 */ 4108 */
3159struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4109struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3160 struct btrfs_root *root, 4110 struct btrfs_root *root, u32 blocksize,
3161 u32 blocksize, u64 parent, 4111 u64 parent, u64 root_objectid,
3162 u64 root_objectid, 4112 struct btrfs_disk_key *key, int level,
3163 u64 ref_generation, 4113 u64 hint, u64 empty_size)
3164 int level,
3165 u64 hint,
3166 u64 empty_size)
3167{ 4114{
3168 struct btrfs_key ins; 4115 struct btrfs_key ins;
3169 int ret; 4116 int ret;
3170 struct extent_buffer *buf; 4117 struct extent_buffer *buf;
3171 4118
3172 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 4119 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
3173 root_objectid, ref_generation, level, 4120 key, level, empty_size, hint, (u64)-1, &ins);
3174 empty_size, hint, (u64)-1, &ins, 0);
3175 if (ret) { 4121 if (ret) {
3176 BUG_ON(ret > 0); 4122 BUG_ON(ret > 0);
3177 return ERR_PTR(ret); 4123 return ERR_PTR(ret);
@@ -3185,32 +4131,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3185int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3186 struct btrfs_root *root, struct extent_buffer *leaf) 4132 struct btrfs_root *root, struct extent_buffer *leaf)
3187{ 4133{
3188 u64 leaf_owner; 4134 u64 disk_bytenr;
3189 u64 leaf_generation; 4135 u64 num_bytes;
3190 struct refsort *sorted;
3191 struct btrfs_key key; 4136 struct btrfs_key key;
3192 struct btrfs_file_extent_item *fi; 4137 struct btrfs_file_extent_item *fi;
4138 u32 nritems;
3193 int i; 4139 int i;
3194 int nritems;
3195 int ret; 4140 int ret;
3196 int refi = 0;
3197 int slot;
3198 4141
3199 BUG_ON(!btrfs_is_leaf(leaf)); 4142 BUG_ON(!btrfs_is_leaf(leaf));
3200 nritems = btrfs_header_nritems(leaf); 4143 nritems = btrfs_header_nritems(leaf);
3201 leaf_owner = btrfs_header_owner(leaf);
3202 leaf_generation = btrfs_header_generation(leaf);
3203 4144
3204 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3205 /* we do this loop twice. The first time we build a list
3206 * of the extents we have a reference on, then we sort the list
3207 * by bytenr. The second time around we actually do the
3208 * extent freeing.
3209 */
3210 for (i = 0; i < nritems; i++) { 4145 for (i = 0; i < nritems; i++) {
3211 u64 disk_bytenr;
3212 cond_resched(); 4146 cond_resched();
3213
3214 btrfs_item_key_to_cpu(leaf, &key, i); 4147 btrfs_item_key_to_cpu(leaf, &key, i);
3215 4148
3216 /* only extents have references, skip everything else */ 4149 /* only extents have references, skip everything else */
@@ -3230,45 +4163,16 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3230 if (disk_bytenr == 0) 4163 if (disk_bytenr == 0)
3231 continue; 4164 continue;
3232 4165
3233 sorted[refi].bytenr = disk_bytenr; 4166 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
3234 sorted[refi].slot = i; 4167 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
3235 refi++; 4168 leaf->start, 0, key.objectid, 0);
3236 }
3237
3238 if (refi == 0)
3239 goto out;
3240
3241 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3242
3243 for (i = 0; i < refi; i++) {
3244 u64 disk_bytenr;
3245
3246 disk_bytenr = sorted[i].bytenr;
3247 slot = sorted[i].slot;
3248
3249 cond_resched();
3250
3251 btrfs_item_key_to_cpu(leaf, &key, slot);
3252 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3253 continue;
3254
3255 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3256
3257 ret = btrfs_free_extent(trans, root, disk_bytenr,
3258 btrfs_file_extent_disk_num_bytes(leaf, fi),
3259 leaf->start, leaf_owner, leaf_generation,
3260 key.objectid, 0);
3261 BUG_ON(ret); 4169 BUG_ON(ret);
3262
3263 atomic_inc(&root->fs_info->throttle_gen);
3264 wake_up(&root->fs_info->transaction_throttle);
3265 cond_resched();
3266 } 4170 }
3267out:
3268 kfree(sorted);
3269 return 0; 4171 return 0;
3270} 4172}
3271 4173
4174#if 0
4175
3272static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4176static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3273 struct btrfs_root *root, 4177 struct btrfs_root *root,
3274 struct btrfs_leaf_ref *ref) 4178 struct btrfs_leaf_ref *ref)
@@ -3311,13 +4215,14 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3311 return 0; 4215 return 0;
3312} 4216}
3313 4217
4218
3314static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4219static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root, u64 start, 4220 struct btrfs_root *root, u64 start,
3316 u64 len, u32 *refs) 4221 u64 len, u32 *refs)
3317{ 4222{
3318 int ret; 4223 int ret;
3319 4224
3320 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); 4225 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
3321 BUG_ON(ret); 4226 BUG_ON(ret);
3322 4227
3323#if 0 /* some debugging code in case we see problems here */ 4228#if 0 /* some debugging code in case we see problems here */
@@ -3352,6 +4257,7 @@ static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3352 return ret; 4257 return ret;
3353} 4258}
3354 4259
4260
3355/* 4261/*
3356 * this is used while deleting old snapshots, and it drops the refs 4262 * this is used while deleting old snapshots, and it drops the refs
3357 * on a whole subtree starting from a level 1 node. 4263 * on a whole subtree starting from a level 1 node.
@@ -3645,32 +4551,36 @@ out:
3645 cond_resched(); 4551 cond_resched();
3646 return 0; 4552 return 0;
3647} 4553}
4554#endif
3648 4555
3649/* 4556/*
3650 * helper function for drop_subtree, this function is similar to 4557 * helper function for drop_subtree, this function is similar to
3651 * walk_down_tree. The main difference is that it checks reference 4558 * walk_down_tree. The main difference is that it checks reference
3652 * counts while tree blocks are locked. 4559 * counts while tree blocks are locked.
3653 */ 4560 */
3654static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 4561static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3655 struct btrfs_root *root, 4562 struct btrfs_root *root,
3656 struct btrfs_path *path, int *level) 4563 struct btrfs_path *path, int *level)
3657{ 4564{
3658 struct extent_buffer *next; 4565 struct extent_buffer *next;
3659 struct extent_buffer *cur; 4566 struct extent_buffer *cur;
3660 struct extent_buffer *parent; 4567 struct extent_buffer *parent;
3661 u64 bytenr; 4568 u64 bytenr;
3662 u64 ptr_gen; 4569 u64 ptr_gen;
4570 u64 refs;
4571 u64 flags;
3663 u32 blocksize; 4572 u32 blocksize;
3664 u32 refs;
3665 int ret; 4573 int ret;
3666 4574
3667 cur = path->nodes[*level]; 4575 cur = path->nodes[*level];
3668 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
3669 &refs); 4577 &refs, &flags);
3670 BUG_ON(ret); 4578 BUG_ON(ret);
3671 if (refs > 1) 4579 if (refs > 1)
3672 goto out; 4580 goto out;
3673 4581
4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4583
3674 while (*level >= 0) { 4584 while (*level >= 0) {
3675 cur = path->nodes[*level]; 4585 cur = path->nodes[*level];
3676 if (*level == 0) { 4586 if (*level == 0) {
@@ -3692,16 +4602,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3692 btrfs_tree_lock(next); 4602 btrfs_tree_lock(next);
3693 btrfs_set_lock_blocking(next); 4603 btrfs_set_lock_blocking(next);
3694 4604
3695 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
3696 &refs); 4606 &refs, &flags);
3697 BUG_ON(ret); 4607 BUG_ON(ret);
3698 if (refs > 1) { 4608 if (refs > 1) {
3699 parent = path->nodes[*level]; 4609 parent = path->nodes[*level];
3700 ret = btrfs_free_extent(trans, root, bytenr, 4610 ret = btrfs_free_extent(trans, root, bytenr,
3701 blocksize, parent->start, 4611 blocksize, parent->start,
3702 btrfs_header_owner(parent), 4612 btrfs_header_owner(parent),
3703 btrfs_header_generation(parent), 4613 *level - 1, 0);
3704 *level - 1, 1);
3705 BUG_ON(ret); 4614 BUG_ON(ret);
3706 path->slots[*level]++; 4615 path->slots[*level]++;
3707 btrfs_tree_unlock(next); 4616 btrfs_tree_unlock(next);
@@ -3709,6 +4618,8 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3709 continue; 4618 continue;
3710 } 4619 }
3711 4620
4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4622
3712 *level = btrfs_header_level(next); 4623 *level = btrfs_header_level(next);
3713 path->nodes[*level] = next; 4624 path->nodes[*level] = next;
3714 path->slots[*level] = 0; 4625 path->slots[*level] = 0;
@@ -3716,13 +4627,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3716 cond_resched(); 4627 cond_resched();
3717 } 4628 }
3718out: 4629out:
3719 parent = path->nodes[*level + 1]; 4630 if (path->nodes[*level] == root->node)
4631 parent = path->nodes[*level];
4632 else
4633 parent = path->nodes[*level + 1];
3720 bytenr = path->nodes[*level]->start; 4634 bytenr = path->nodes[*level]->start;
3721 blocksize = path->nodes[*level]->len; 4635 blocksize = path->nodes[*level]->len;
3722 4636
3723 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
3724 parent->start, btrfs_header_owner(parent), 4638 btrfs_header_owner(parent), *level, 0);
3725 btrfs_header_generation(parent), *level, 1);
3726 BUG_ON(ret); 4639 BUG_ON(ret);
3727 4640
3728 if (path->locks[*level]) { 4641 if (path->locks[*level]) {
@@ -3746,8 +4659,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3746 struct btrfs_path *path, 4659 struct btrfs_path *path,
3747 int *level, int max_level) 4660 int *level, int max_level)
3748{ 4661{
3749 u64 root_owner;
3750 u64 root_gen;
3751 struct btrfs_root_item *root_item = &root->root_item; 4662 struct btrfs_root_item *root_item = &root->root_item;
3752 int i; 4663 int i;
3753 int slot; 4664 int slot;
@@ -3755,24 +4666,22 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3755 4666
3756 for (i = *level; i < max_level && path->nodes[i]; i++) { 4667 for (i = *level; i < max_level && path->nodes[i]; i++) {
3757 slot = path->slots[i]; 4668 slot = path->slots[i];
3758 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
3759 struct extent_buffer *node;
3760 struct btrfs_disk_key disk_key;
3761
3762 /* 4670 /*
3763 * there is more work to do in this level. 4671 * there is more work to do in this level.
3764 * Update the drop_progress marker to reflect 4672 * Update the drop_progress marker to reflect
3765 * the work we've done so far, and then bump 4673 * the work we've done so far, and then bump
3766 * the slot number 4674 * the slot number
3767 */ 4675 */
3768 node = path->nodes[i];
3769 path->slots[i]++; 4676 path->slots[i]++;
3770 *level = i;
3771 WARN_ON(*level == 0); 4677 WARN_ON(*level == 0);
3772 btrfs_node_key(node, &disk_key, path->slots[i]); 4678 if (max_level == BTRFS_MAX_LEVEL) {
3773 memcpy(&root_item->drop_progress, 4679 btrfs_node_key(path->nodes[i],
3774 &disk_key, sizeof(disk_key)); 4680 &root_item->drop_progress,
3775 root_item->drop_level = i; 4681 path->slots[i]);
4682 root_item->drop_level = i;
4683 }
4684 *level = i;
3776 return 0; 4685 return 0;
3777 } else { 4686 } else {
3778 struct extent_buffer *parent; 4687 struct extent_buffer *parent;
@@ -3786,22 +4695,20 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3786 else 4695 else
3787 parent = path->nodes[*level + 1]; 4696 parent = path->nodes[*level + 1];
3788 4697
3789 root_owner = btrfs_header_owner(parent); 4698 clean_tree_block(trans, root, path->nodes[i]);
3790 root_gen = btrfs_header_generation(parent);
3791
3792 clean_tree_block(trans, root, path->nodes[*level]);
3793 ret = btrfs_free_extent(trans, root, 4699 ret = btrfs_free_extent(trans, root,
3794 path->nodes[*level]->start, 4700 path->nodes[i]->start,
3795 path->nodes[*level]->len, 4701 path->nodes[i]->len,
3796 parent->start, root_owner, 4702 parent->start,
3797 root_gen, *level, 1); 4703 btrfs_header_owner(parent),
4704 *level, 0);
3798 BUG_ON(ret); 4705 BUG_ON(ret);
3799 if (path->locks[*level]) { 4706 if (path->locks[*level]) {
3800 btrfs_tree_unlock(path->nodes[*level]); 4707 btrfs_tree_unlock(path->nodes[i]);
3801 path->locks[*level] = 0; 4708 path->locks[i] = 0;
3802 } 4709 }
3803 free_extent_buffer(path->nodes[*level]); 4710 free_extent_buffer(path->nodes[i]);
3804 path->nodes[*level] = NULL; 4711 path->nodes[i] = NULL;
3805 *level = i + 1; 4712 *level = i + 1;
3806 } 4713 }
3807 } 4714 }
@@ -3820,21 +4727,18 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3820 int wret; 4727 int wret;
3821 int level; 4728 int level;
3822 struct btrfs_path *path; 4729 struct btrfs_path *path;
3823 int i;
3824 int orig_level;
3825 int update_count; 4730 int update_count;
3826 struct btrfs_root_item *root_item = &root->root_item; 4731 struct btrfs_root_item *root_item = &root->root_item;
3827 4732
3828 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3829 path = btrfs_alloc_path(); 4733 path = btrfs_alloc_path();
3830 BUG_ON(!path); 4734 BUG_ON(!path);
3831 4735
3832 level = btrfs_header_level(root->node); 4736 level = btrfs_header_level(root->node);
3833 orig_level = level;
3834 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3835 path->nodes[level] = root->node; 4738 path->nodes[level] = btrfs_lock_root_node(root);
3836 extent_buffer_get(root->node); 4739 btrfs_set_lock_blocking(path->nodes[level]);
3837 path->slots[level] = 0; 4740 path->slots[level] = 0;
4741 path->locks[level] = 1;
3838 } else { 4742 } else {
3839 struct btrfs_key key; 4743 struct btrfs_key key;
3840 struct btrfs_disk_key found_key; 4744 struct btrfs_disk_key found_key;
@@ -3856,12 +4760,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3856 * unlock our path, this is safe because only this 4760 * unlock our path, this is safe because only this
3857 * function is allowed to delete this snapshot 4761 * function is allowed to delete this snapshot
3858 */ 4762 */
3859 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4763 btrfs_unlock_up_safe(path, 0);
3860 if (path->nodes[i] && path->locks[i]) {
3861 path->locks[i] = 0;
3862 btrfs_tree_unlock(path->nodes[i]);
3863 }
3864 }
3865 } 4764 }
3866 while (1) { 4765 while (1) {
3867 unsigned long update; 4766 unsigned long update;
@@ -3882,8 +4781,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3882 ret = -EAGAIN; 4781 ret = -EAGAIN;
3883 break; 4782 break;
3884 } 4783 }
3885 atomic_inc(&root->fs_info->throttle_gen);
3886 wake_up(&root->fs_info->transaction_throttle);
3887 for (update_count = 0; update_count < 16; update_count++) { 4784 for (update_count = 0; update_count < 16; update_count++) {
3888 update = trans->delayed_ref_updates; 4785 update = trans->delayed_ref_updates;
3889 trans->delayed_ref_updates = 0; 4786 trans->delayed_ref_updates = 0;
@@ -3893,12 +4790,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3893 break; 4790 break;
3894 } 4791 }
3895 } 4792 }
3896 for (i = 0; i <= orig_level; i++) {
3897 if (path->nodes[i]) {
3898 free_extent_buffer(path->nodes[i]);
3899 path->nodes[i] = NULL;
3900 }
3901 }
3902out: 4793out:
3903 btrfs_free_path(path); 4794 btrfs_free_path(path);
3904 return ret; 4795 return ret;
@@ -3931,7 +4822,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3931 path->slots[level] = 0; 4822 path->slots[level] = 0;
3932 4823
3933 while (1) { 4824 while (1) {
3934 wret = walk_down_subtree(trans, root, path, &level); 4825 wret = walk_down_tree(trans, root, path, &level);
3935 if (wret < 0) 4826 if (wret < 0)
3936 ret = wret; 4827 ret = wret;
3937 if (wret != 0) 4828 if (wret != 0)
@@ -3948,6 +4839,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3948 return ret; 4839 return ret;
3949} 4840}
3950 4841
4842#if 0
3951static unsigned long calc_ra(unsigned long start, unsigned long last, 4843static unsigned long calc_ra(unsigned long start, unsigned long last,
3952 unsigned long nr) 4844 unsigned long nr)
3953{ 4845{
@@ -5429,6 +6321,7 @@ out:
5429 kfree(ref_path); 6321 kfree(ref_path);
5430 return ret; 6322 return ret;
5431} 6323}
6324#endif
5432 6325
5433static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6326static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5434{ 6327{
@@ -5477,7 +6370,8 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5477 u64 calc; 6370 u64 calc;
5478 6371
5479 spin_lock(&shrink_block_group->lock); 6372 spin_lock(&shrink_block_group->lock);
5480 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 6373 if (btrfs_block_group_used(&shrink_block_group->item) +
6374 shrink_block_group->reserved > 0) {
5481 spin_unlock(&shrink_block_group->lock); 6375 spin_unlock(&shrink_block_group->lock);
5482 6376
5483 trans = btrfs_start_transaction(root, 1); 6377 trans = btrfs_start_transaction(root, 1);
@@ -5502,6 +6396,17 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5502 return 0; 6396 return 0;
5503} 6397}
5504 6398
6399
6400int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6401 struct btrfs_block_group_cache *group)
6402
6403{
6404 __alloc_chunk_for_shrink(root, group, 1);
6405 set_block_group_readonly(group);
6406 return 0;
6407}
6408
6409#if 0
5505static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6410static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5506 struct btrfs_root *root, 6411 struct btrfs_root *root,
5507 u64 objectid, u64 size) 6412 u64 objectid, u64 size)
@@ -5781,6 +6686,7 @@ out:
5781 btrfs_free_path(path); 6686 btrfs_free_path(path);
5782 return ret; 6687 return ret;
5783} 6688}
6689#endif
5784 6690
5785static int find_first_block_group(struct btrfs_root *root, 6691static int find_first_block_group(struct btrfs_root *root,
5786 struct btrfs_path *path, struct btrfs_key *key) 6692 struct btrfs_path *path, struct btrfs_key *key)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe9eb990e443..68260180f587 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -476,6 +476,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
476 struct extent_state *state; 476 struct extent_state *state;
477 struct extent_state *prealloc = NULL; 477 struct extent_state *prealloc = NULL;
478 struct rb_node *node; 478 struct rb_node *node;
479 u64 last_end;
479 int err; 480 int err;
480 int set = 0; 481 int set = 0;
481 482
@@ -498,6 +499,7 @@ again:
498 if (state->start > end) 499 if (state->start > end)
499 goto out; 500 goto out;
500 WARN_ON(state->end < start); 501 WARN_ON(state->end < start);
502 last_end = state->end;
501 503
502 /* 504 /*
503 * | ---- desired range ---- | 505 * | ---- desired range ---- |
@@ -524,9 +526,11 @@ again:
524 if (err) 526 if (err)
525 goto out; 527 goto out;
526 if (state->end <= end) { 528 if (state->end <= end) {
527 start = state->end + 1;
528 set |= clear_state_bit(tree, state, bits, 529 set |= clear_state_bit(tree, state, bits,
529 wake, delete); 530 wake, delete);
531 if (last_end == (u64)-1)
532 goto out;
533 start = last_end + 1;
530 } else { 534 } else {
531 start = state->start; 535 start = state->start;
532 } 536 }
@@ -552,8 +556,10 @@ again:
552 goto out; 556 goto out;
553 } 557 }
554 558
555 start = state->end + 1;
556 set |= clear_state_bit(tree, state, bits, wake, delete); 559 set |= clear_state_bit(tree, state, bits, wake, delete);
560 if (last_end == (u64)-1)
561 goto out;
562 start = last_end + 1;
557 goto search_again; 563 goto search_again;
558 564
559out: 565out:
@@ -707,8 +713,10 @@ again:
707 goto out; 713 goto out;
708 } 714 }
709 set_state_bits(tree, state, bits); 715 set_state_bits(tree, state, bits);
710 start = state->end + 1;
711 merge_state(tree, state); 716 merge_state(tree, state);
717 if (last_end == (u64)-1)
718 goto out;
719 start = last_end + 1;
712 goto search_again; 720 goto search_again;
713 } 721 }
714 722
@@ -742,8 +750,10 @@ again:
742 goto out; 750 goto out;
743 if (state->end <= end) { 751 if (state->end <= end) {
744 set_state_bits(tree, state, bits); 752 set_state_bits(tree, state, bits);
745 start = state->end + 1;
746 merge_state(tree, state); 753 merge_state(tree, state);
754 if (last_end == (u64)-1)
755 goto out;
756 start = last_end + 1;
747 } else { 757 } else {
748 start = state->start; 758 start = state->start;
749 } 759 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d51dc38bb49..126477eaecf5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -291,16 +291,12 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
291{ 291{
292 u64 extent_end = 0; 292 u64 extent_end = 0;
293 u64 search_start = start; 293 u64 search_start = start;
294 u64 leaf_start;
295 u64 ram_bytes = 0; 294 u64 ram_bytes = 0;
296 u64 orig_parent = 0;
297 u64 disk_bytenr = 0; 295 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end; 296 u64 orig_locked_end = locked_end;
299 u8 compression; 297 u8 compression;
300 u8 encryption; 298 u8 encryption;
301 u16 other_encoding = 0; 299 u16 other_encoding = 0;
302 u64 root_gen;
303 u64 root_owner;
304 struct extent_buffer *leaf; 300 struct extent_buffer *leaf;
305 struct btrfs_file_extent_item *extent; 301 struct btrfs_file_extent_item *extent;
306 struct btrfs_path *path; 302 struct btrfs_path *path;
@@ -340,9 +336,6 @@ next_slot:
340 bookend = 0; 336 bookend = 0;
341 found_extent = 0; 337 found_extent = 0;
342 found_inline = 0; 338 found_inline = 0;
343 leaf_start = 0;
344 root_gen = 0;
345 root_owner = 0;
346 compression = 0; 339 compression = 0;
347 encryption = 0; 340 encryption = 0;
348 extent = NULL; 341 extent = NULL;
@@ -417,9 +410,6 @@ next_slot:
417 if (found_extent) { 410 if (found_extent) {
418 read_extent_buffer(leaf, &old, (unsigned long)extent, 411 read_extent_buffer(leaf, &old, (unsigned long)extent,
419 sizeof(old)); 412 sizeof(old));
420 root_gen = btrfs_header_generation(leaf);
421 root_owner = btrfs_header_owner(leaf);
422 leaf_start = leaf->start;
423 } 413 }
424 414
425 if (end < extent_end && end >= key.offset) { 415 if (end < extent_end && end >= key.offset) {
@@ -443,14 +433,14 @@ next_slot:
443 } 433 }
444 locked_end = extent_end; 434 locked_end = extent_end;
445 } 435 }
446 orig_parent = path->nodes[0]->start;
447 disk_bytenr = le64_to_cpu(old.disk_bytenr); 436 disk_bytenr = le64_to_cpu(old.disk_bytenr);
448 if (disk_bytenr != 0) { 437 if (disk_bytenr != 0) {
449 ret = btrfs_inc_extent_ref(trans, root, 438 ret = btrfs_inc_extent_ref(trans, root,
450 disk_bytenr, 439 disk_bytenr,
451 le64_to_cpu(old.disk_num_bytes), 440 le64_to_cpu(old.disk_num_bytes), 0,
452 orig_parent, root->root_key.objectid, 441 root->root_key.objectid,
453 trans->transid, inode->i_ino); 442 key.objectid, key.offset -
443 le64_to_cpu(old.offset));
454 BUG_ON(ret); 444 BUG_ON(ret);
455 } 445 }
456 } 446 }
@@ -568,17 +558,6 @@ next_slot:
568 btrfs_mark_buffer_dirty(path->nodes[0]); 558 btrfs_mark_buffer_dirty(path->nodes[0]);
569 btrfs_set_lock_blocking(path->nodes[0]); 559 btrfs_set_lock_blocking(path->nodes[0]);
570 560
571 if (disk_bytenr != 0) {
572 ret = btrfs_update_extent_ref(trans, root,
573 disk_bytenr,
574 le64_to_cpu(old.disk_num_bytes),
575 orig_parent,
576 leaf->start,
577 root->root_key.objectid,
578 trans->transid, ins.objectid);
579
580 BUG_ON(ret);
581 }
582 path->leave_spinning = 0; 561 path->leave_spinning = 0;
583 btrfs_release_path(root, path); 562 btrfs_release_path(root, path);
584 if (disk_bytenr != 0) 563 if (disk_bytenr != 0)
@@ -594,8 +573,9 @@ next_slot:
594 ret = btrfs_free_extent(trans, root, 573 ret = btrfs_free_extent(trans, root,
595 old_disk_bytenr, 574 old_disk_bytenr,
596 le64_to_cpu(old.disk_num_bytes), 575 le64_to_cpu(old.disk_num_bytes),
597 leaf_start, root_owner, 576 0, root->root_key.objectid,
598 root_gen, key.objectid, 0); 577 key.objectid, key.offset -
578 le64_to_cpu(old.offset));
599 BUG_ON(ret); 579 BUG_ON(ret);
600 *hint_byte = old_disk_bytenr; 580 *hint_byte = old_disk_bytenr;
601 } 581 }
@@ -664,12 +644,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
664 u64 bytenr; 644 u64 bytenr;
665 u64 num_bytes; 645 u64 num_bytes;
666 u64 extent_end; 646 u64 extent_end;
667 u64 extent_offset; 647 u64 orig_offset;
668 u64 other_start; 648 u64 other_start;
669 u64 other_end; 649 u64 other_end;
670 u64 split = start; 650 u64 split = start;
671 u64 locked_end = end; 651 u64 locked_end = end;
672 u64 orig_parent;
673 int extent_type; 652 int extent_type;
674 int split_end = 1; 653 int split_end = 1;
675 int ret; 654 int ret;
@@ -703,7 +682,7 @@ again:
703 682
704 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 683 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
705 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 684 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
706 extent_offset = btrfs_file_extent_offset(leaf, fi); 685 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
707 686
708 if (key.offset == start) 687 if (key.offset == start)
709 split = end; 688 split = end;
@@ -711,8 +690,6 @@ again:
711 if (key.offset == start && extent_end == end) { 690 if (key.offset == start && extent_end == end) {
712 int del_nr = 0; 691 int del_nr = 0;
713 int del_slot = 0; 692 int del_slot = 0;
714 u64 leaf_owner = btrfs_header_owner(leaf);
715 u64 leaf_gen = btrfs_header_generation(leaf);
716 other_start = end; 693 other_start = end;
717 other_end = 0; 694 other_end = 0;
718 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 695 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
@@ -721,8 +698,8 @@ again:
721 del_slot = path->slots[0] + 1; 698 del_slot = path->slots[0] + 1;
722 del_nr++; 699 del_nr++;
723 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 700 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
724 leaf->start, leaf_owner, 701 0, root->root_key.objectid,
725 leaf_gen, inode->i_ino, 0); 702 inode->i_ino, orig_offset);
726 BUG_ON(ret); 703 BUG_ON(ret);
727 } 704 }
728 other_start = 0; 705 other_start = 0;
@@ -733,8 +710,8 @@ again:
733 del_slot = path->slots[0]; 710 del_slot = path->slots[0];
734 del_nr++; 711 del_nr++;
735 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 712 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
736 leaf->start, leaf_owner, 713 0, root->root_key.objectid,
737 leaf_gen, inode->i_ino, 0); 714 inode->i_ino, orig_offset);
738 BUG_ON(ret); 715 BUG_ON(ret);
739 } 716 }
740 split_end = 0; 717 split_end = 0;
@@ -768,13 +745,12 @@ again:
768 locked_end = extent_end; 745 locked_end = extent_end;
769 } 746 }
770 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); 747 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
771 extent_offset += split - key.offset;
772 } else { 748 } else {
773 BUG_ON(key.offset != start); 749 BUG_ON(key.offset != start);
774 btrfs_set_file_extent_offset(leaf, fi, extent_offset +
775 split - key.offset);
776 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
777 key.offset = split; 750 key.offset = split;
751 btrfs_set_file_extent_offset(leaf, fi, key.offset -
752 orig_offset);
753 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
778 btrfs_set_item_key_safe(trans, root, path, &key); 754 btrfs_set_item_key_safe(trans, root, path, &key);
779 extent_end = split; 755 extent_end = split;
780 } 756 }
@@ -793,7 +769,8 @@ again:
793 struct btrfs_file_extent_item); 769 struct btrfs_file_extent_item);
794 key.offset = split; 770 key.offset = split;
795 btrfs_set_item_key_safe(trans, root, path, &key); 771 btrfs_set_item_key_safe(trans, root, path, &key);
796 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 772 btrfs_set_file_extent_offset(leaf, fi, key.offset -
773 orig_offset);
797 btrfs_set_file_extent_num_bytes(leaf, fi, 774 btrfs_set_file_extent_num_bytes(leaf, fi,
798 other_end - split); 775 other_end - split);
799 goto done; 776 goto done;
@@ -815,10 +792,9 @@ again:
815 792
816 btrfs_mark_buffer_dirty(leaf); 793 btrfs_mark_buffer_dirty(leaf);
817 794
818 orig_parent = leaf->start; 795 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
819 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 796 root->root_key.objectid,
820 orig_parent, root->root_key.objectid, 797 inode->i_ino, orig_offset);
821 trans->transid, inode->i_ino);
822 BUG_ON(ret); 798 BUG_ON(ret);
823 btrfs_release_path(root, path); 799 btrfs_release_path(root, path);
824 800
@@ -833,20 +809,12 @@ again:
833 btrfs_set_file_extent_type(leaf, fi, extent_type); 809 btrfs_set_file_extent_type(leaf, fi, extent_type);
834 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 810 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
835 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 811 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
836 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 812 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
837 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 813 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
838 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 814 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
839 btrfs_set_file_extent_compression(leaf, fi, 0); 815 btrfs_set_file_extent_compression(leaf, fi, 0);
840 btrfs_set_file_extent_encryption(leaf, fi, 0); 816 btrfs_set_file_extent_encryption(leaf, fi, 0);
841 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 817 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
842
843 if (orig_parent != leaf->start) {
844 ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
845 orig_parent, leaf->start,
846 root->root_key.objectid,
847 trans->transid, inode->i_ino);
848 BUG_ON(ret);
849 }
850done: 818done:
851 btrfs_mark_buffer_dirty(leaf); 819 btrfs_mark_buffer_dirty(leaf);
852 820
@@ -1189,6 +1157,8 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
1189 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1157 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1190 root->log_batch++; 1158 root->log_batch++;
1191 1159
1160 if (datasync && !(inode->i_state & I_DIRTY_PAGES))
1161 goto out;
1192 /* 1162 /*
1193 * ok we haven't committed the transaction yet, lets do a commit 1163 * ok we haven't committed the transaction yet, lets do a commit
1194 */ 1164 */
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0bc93657b460..4538e48581a5 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -579,6 +579,7 @@ out:
579 * it returns -enospc 579 * it returns -enospc
580 */ 580 */
581int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 581int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
582 struct btrfs_root *root,
582 struct btrfs_block_group_cache *block_group, 583 struct btrfs_block_group_cache *block_group,
583 struct btrfs_free_cluster *cluster, 584 struct btrfs_free_cluster *cluster,
584 u64 offset, u64 bytes, u64 empty_size) 585 u64 offset, u64 bytes, u64 empty_size)
@@ -595,7 +596,9 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
595 int ret; 596 int ret;
596 597
597 /* for metadata, allow allocates with more holes */ 598 /* for metadata, allow allocates with more holes */
598 if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 599 if (btrfs_test_opt(root, SSD_SPREAD)) {
600 min_bytes = bytes + empty_size;
601 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
599 /* 602 /*
600 * we want to do larger allocations when we are 603 * we want to do larger allocations when we are
601 * flushing out the delayed refs, it helps prevent 604 * flushing out the delayed refs, it helps prevent
@@ -645,14 +648,15 @@ again:
645 * we haven't filled the empty size and the window is 648 * we haven't filled the empty size and the window is
646 * very large. reset and try again 649 * very large. reset and try again
647 */ 650 */
648 if (next->offset - window_start > (bytes + empty_size) * 2) { 651 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
652 next->offset - window_start > (bytes + empty_size) * 2) {
649 entry = next; 653 entry = next;
650 window_start = entry->offset; 654 window_start = entry->offset;
651 window_free = entry->bytes; 655 window_free = entry->bytes;
652 last = entry; 656 last = entry;
653 max_extent = 0; 657 max_extent = 0;
654 total_retries++; 658 total_retries++;
655 if (total_retries % 256 == 0) { 659 if (total_retries % 64 == 0) {
656 if (min_bytes >= (bytes + empty_size)) { 660 if (min_bytes >= (bytes + empty_size)) {
657 ret = -ENOSPC; 661 ret = -ENOSPC;
658 goto out; 662 goto out;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index ab0bdc0a63ce..266fb8764054 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -31,6 +31,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
31 u64 bytes); 31 u64 bytes);
32u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); 32u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
33int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 33int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
34 struct btrfs_root *root,
34 struct btrfs_block_group_cache *block_group, 35 struct btrfs_block_group_cache *block_group,
35 struct btrfs_free_cluster *cluster, 36 struct btrfs_free_cluster *cluster,
36 u64 offset, u64 bytes, u64 empty_size); 37 u64 offset, u64 bytes, u64 empty_size);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 2a020b276768..db2ff9773b99 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,9 +19,9 @@
19#ifndef __HASH__ 19#ifndef __HASH__
20#define __HASH__ 20#define __HASH__
21 21
22#include "crc32c.h" 22#include <linux/crc32c.h>
23static inline u64 btrfs_name_hash(const char *name, int len) 23static inline u64 btrfs_name_hash(const char *name, int len)
24{ 24{
25 return btrfs_crc32c((u32)~1, name, len); 25 return crc32c((u32)~1, name, len);
26} 26}
27#endif 27#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c8b0190d031..5b68330f8585 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -48,7 +48,6 @@
48#include "ordered-data.h" 48#include "ordered-data.h"
49#include "xattr.h" 49#include "xattr.h"
50#include "tree-log.h" 50#include "tree-log.h"
51#include "ref-cache.h"
52#include "compression.h" 51#include "compression.h"
53#include "locking.h" 52#include "locking.h"
54 53
@@ -369,7 +368,7 @@ again:
369 * inode has not been flagged as nocompress. This flag can 368 * inode has not been flagged as nocompress. This flag can
370 * change at any time if we discover bad compression ratios. 369 * change at any time if we discover bad compression ratios.
371 */ 370 */
372 if (!btrfs_test_flag(inode, NOCOMPRESS) && 371 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
373 btrfs_test_opt(root, COMPRESS)) { 372 btrfs_test_opt(root, COMPRESS)) {
374 WARN_ON(pages); 373 WARN_ON(pages);
375 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 374 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
@@ -470,7 +469,7 @@ again:
470 nr_pages_ret = 0; 469 nr_pages_ret = 0;
471 470
472 /* flag the file so we don't compress in the future */ 471 /* flag the file so we don't compress in the future */
473 btrfs_set_flag(inode, NOCOMPRESS); 472 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
474 } 473 }
475 if (will_compress) { 474 if (will_compress) {
476 *num_added += 1; 475 *num_added += 1;
@@ -863,7 +862,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
863 async_cow->locked_page = locked_page; 862 async_cow->locked_page = locked_page;
864 async_cow->start = start; 863 async_cow->start = start;
865 864
866 if (btrfs_test_flag(inode, NOCOMPRESS)) 865 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
867 cur_end = end; 866 cur_end = end;
868 else 867 else
869 cur_end = min(end, start + 512 * 1024 - 1); 868 cur_end = min(end, start + 512 * 1024 - 1);
@@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
944 u64 cow_start; 943 u64 cow_start;
945 u64 cur_offset; 944 u64 cur_offset;
946 u64 extent_end; 945 u64 extent_end;
946 u64 extent_offset;
947 u64 disk_bytenr; 947 u64 disk_bytenr;
948 u64 num_bytes; 948 u64 num_bytes;
949 int extent_type; 949 int extent_type;
@@ -1005,6 +1005,7 @@ next_slot:
1005 if (extent_type == BTRFS_FILE_EXTENT_REG || 1005 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1008 extent_offset = btrfs_file_extent_offset(leaf, fi);
1008 extent_end = found_key.offset + 1009 extent_end = found_key.offset +
1009 btrfs_file_extent_num_bytes(leaf, fi); 1010 btrfs_file_extent_num_bytes(leaf, fi);
1010 if (extent_end <= start) { 1011 if (extent_end <= start) {
@@ -1022,9 +1023,10 @@ next_slot:
1022 if (btrfs_extent_readonly(root, disk_bytenr)) 1023 if (btrfs_extent_readonly(root, disk_bytenr))
1023 goto out_check; 1024 goto out_check;
1024 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1025 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
1025 disk_bytenr)) 1026 found_key.offset -
1027 extent_offset, disk_bytenr))
1026 goto out_check; 1028 goto out_check;
1027 disk_bytenr += btrfs_file_extent_offset(leaf, fi); 1029 disk_bytenr += extent_offset;
1028 disk_bytenr += cur_offset - found_key.offset; 1030 disk_bytenr += cur_offset - found_key.offset;
1029 num_bytes = min(end + 1, extent_end) - cur_offset; 1031 num_bytes = min(end + 1, extent_end) - cur_offset;
1030 /* 1032 /*
@@ -1131,10 +1133,10 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1131 int ret; 1133 int ret;
1132 struct btrfs_root *root = BTRFS_I(inode)->root; 1134 struct btrfs_root *root = BTRFS_I(inode)->root;
1133 1135
1134 if (btrfs_test_flag(inode, NODATACOW)) 1136 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
1135 ret = run_delalloc_nocow(inode, locked_page, start, end, 1137 ret = run_delalloc_nocow(inode, locked_page, start, end,
1136 page_started, 1, nr_written); 1138 page_started, 1, nr_written);
1137 else if (btrfs_test_flag(inode, PREALLOC)) 1139 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1138 ret = run_delalloc_nocow(inode, locked_page, start, end, 1140 ret = run_delalloc_nocow(inode, locked_page, start, end,
1139 page_started, 0, nr_written); 1141 page_started, 0, nr_written);
1140 else if (!btrfs_test_opt(root, COMPRESS)) 1142 else if (!btrfs_test_opt(root, COMPRESS))
@@ -1288,7 +1290,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1288 int ret = 0; 1290 int ret = 0;
1289 int skip_sum; 1291 int skip_sum;
1290 1292
1291 skip_sum = btrfs_test_flag(inode, NODATASUM); 1293 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1292 1294
1293 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1295 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1294 BUG_ON(ret); 1296 BUG_ON(ret);
@@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1489 ins.objectid = disk_bytenr; 1491 ins.objectid = disk_bytenr;
1490 ins.offset = disk_num_bytes; 1492 ins.offset = disk_num_bytes;
1491 ins.type = BTRFS_EXTENT_ITEM_KEY; 1493 ins.type = BTRFS_EXTENT_ITEM_KEY;
1492 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, 1494 ret = btrfs_alloc_reserved_file_extent(trans, root,
1493 root->root_key.objectid, 1495 root->root_key.objectid,
1494 trans->transid, inode->i_ino, &ins); 1496 inode->i_ino, file_pos, &ins);
1495 BUG_ON(ret); 1497 BUG_ON(ret);
1496 btrfs_free_path(path); 1498 btrfs_free_path(path);
1497 1499
@@ -1788,7 +1790,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1788 ClearPageChecked(page); 1790 ClearPageChecked(page);
1789 goto good; 1791 goto good;
1790 } 1792 }
1791 if (btrfs_test_flag(inode, NODATASUM)) 1793
1794 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1792 return 0; 1795 return 0;
1793 1796
1794 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1797 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
@@ -1956,23 +1959,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
1956 * crossing root thing. we store the inode number in the 1959 * crossing root thing. we store the inode number in the
1957 * offset of the orphan item. 1960 * offset of the orphan item.
1958 */ 1961 */
1959 inode = btrfs_iget_locked(root->fs_info->sb, 1962 found_key.objectid = found_key.offset;
1960 found_key.offset, root); 1963 found_key.type = BTRFS_INODE_ITEM_KEY;
1961 if (!inode) 1964 found_key.offset = 0;
1965 inode = btrfs_iget(root->fs_info->sb, &found_key, root);
1966 if (IS_ERR(inode))
1962 break; 1967 break;
1963 1968
1964 if (inode->i_state & I_NEW) {
1965 BTRFS_I(inode)->root = root;
1966
1967 /* have to set the location manually */
1968 BTRFS_I(inode)->location.objectid = inode->i_ino;
1969 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1970 BTRFS_I(inode)->location.offset = 0;
1971
1972 btrfs_read_locked_inode(inode);
1973 unlock_new_inode(inode);
1974 }
1975
1976 /* 1969 /*
1977 * add this inode to the orphan list so btrfs_orphan_del does 1970 * add this inode to the orphan list so btrfs_orphan_del does
1978 * the proper thing when we hit it 1971 * the proper thing when we hit it
@@ -2069,7 +2062,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2069/* 2062/*
2070 * read an inode from the btree into the in-memory inode 2063 * read an inode from the btree into the in-memory inode
2071 */ 2064 */
2072void btrfs_read_locked_inode(struct inode *inode) 2065static void btrfs_read_locked_inode(struct inode *inode)
2073{ 2066{
2074 struct btrfs_path *path; 2067 struct btrfs_path *path;
2075 struct extent_buffer *leaf; 2068 struct extent_buffer *leaf;
@@ -2164,6 +2157,8 @@ void btrfs_read_locked_inode(struct inode *inode)
2164 init_special_inode(inode, inode->i_mode, rdev); 2157 init_special_inode(inode, inode->i_mode, rdev);
2165 break; 2158 break;
2166 } 2159 }
2160
2161 btrfs_update_iflags(inode);
2167 return; 2162 return;
2168 2163
2169make_bad: 2164make_bad:
@@ -2599,9 +2594,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2599 struct btrfs_file_extent_item *fi; 2594 struct btrfs_file_extent_item *fi;
2600 u64 extent_start = 0; 2595 u64 extent_start = 0;
2601 u64 extent_num_bytes = 0; 2596 u64 extent_num_bytes = 0;
2597 u64 extent_offset = 0;
2602 u64 item_end = 0; 2598 u64 item_end = 0;
2603 u64 root_gen = 0;
2604 u64 root_owner = 0;
2605 int found_extent; 2599 int found_extent;
2606 int del_item; 2600 int del_item;
2607 int pending_del_nr = 0; 2601 int pending_del_nr = 0;
@@ -2716,6 +2710,9 @@ search_again:
2716 extent_num_bytes = 2710 extent_num_bytes =
2717 btrfs_file_extent_disk_num_bytes(leaf, 2711 btrfs_file_extent_disk_num_bytes(leaf,
2718 fi); 2712 fi);
2713 extent_offset = found_key.offset -
2714 btrfs_file_extent_offset(leaf, fi);
2715
2719 /* FIXME blocksize != 4096 */ 2716 /* FIXME blocksize != 4096 */
2720 num_dec = btrfs_file_extent_num_bytes(leaf, fi); 2717 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
2721 if (extent_start != 0) { 2718 if (extent_start != 0) {
@@ -2723,8 +2720,6 @@ search_again:
2723 if (root->ref_cows) 2720 if (root->ref_cows)
2724 inode_sub_bytes(inode, num_dec); 2721 inode_sub_bytes(inode, num_dec);
2725 } 2722 }
2726 root_gen = btrfs_header_generation(leaf);
2727 root_owner = btrfs_header_owner(leaf);
2728 } 2723 }
2729 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 2724 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
2730 /* 2725 /*
@@ -2768,12 +2763,12 @@ delete:
2768 } else { 2763 } else {
2769 break; 2764 break;
2770 } 2765 }
2771 if (found_extent) { 2766 if (found_extent && root->ref_cows) {
2772 btrfs_set_path_blocking(path); 2767 btrfs_set_path_blocking(path);
2773 ret = btrfs_free_extent(trans, root, extent_start, 2768 ret = btrfs_free_extent(trans, root, extent_start,
2774 extent_num_bytes, 2769 extent_num_bytes, 0,
2775 leaf->start, root_owner, 2770 btrfs_header_owner(leaf),
2776 root_gen, inode->i_ino, 0); 2771 inode->i_ino, extent_offset);
2777 BUG_ON(ret); 2772 BUG_ON(ret);
2778 } 2773 }
2779next: 2774next:
@@ -3105,6 +3100,45 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3105 return 0; 3100 return 0;
3106} 3101}
3107 3102
3103static void inode_tree_add(struct inode *inode)
3104{
3105 struct btrfs_root *root = BTRFS_I(inode)->root;
3106 struct btrfs_inode *entry;
3107 struct rb_node **p = &root->inode_tree.rb_node;
3108 struct rb_node *parent = NULL;
3109
3110 spin_lock(&root->inode_lock);
3111 while (*p) {
3112 parent = *p;
3113 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3114
3115 if (inode->i_ino < entry->vfs_inode.i_ino)
3116 p = &(*p)->rb_left;
3117 else if (inode->i_ino > entry->vfs_inode.i_ino)
3118 p = &(*p)->rb_right;
3119 else {
3120 WARN_ON(!(entry->vfs_inode.i_state &
3121 (I_WILL_FREE | I_FREEING | I_CLEAR)));
3122 break;
3123 }
3124 }
3125 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
3126 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3127 spin_unlock(&root->inode_lock);
3128}
3129
3130static void inode_tree_del(struct inode *inode)
3131{
3132 struct btrfs_root *root = BTRFS_I(inode)->root;
3133
3134 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3135 spin_lock(&root->inode_lock);
3136 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3137 spin_unlock(&root->inode_lock);
3138 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3139 }
3140}
3141
3108static noinline void init_btrfs_i(struct inode *inode) 3142static noinline void init_btrfs_i(struct inode *inode)
3109{ 3143{
3110 struct btrfs_inode *bi = BTRFS_I(inode); 3144 struct btrfs_inode *bi = BTRFS_I(inode);
@@ -3130,6 +3164,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3130 inode->i_mapping, GFP_NOFS); 3164 inode->i_mapping, GFP_NOFS);
3131 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3165 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3132 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3166 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3167 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3133 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3168 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3134 mutex_init(&BTRFS_I(inode)->extent_mutex); 3169 mutex_init(&BTRFS_I(inode)->extent_mutex);
3135 mutex_init(&BTRFS_I(inode)->log_mutex); 3170 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3152,26 +3187,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
3152 args->root == BTRFS_I(inode)->root; 3187 args->root == BTRFS_I(inode)->root;
3153} 3188}
3154 3189
3155struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, 3190static struct inode *btrfs_iget_locked(struct super_block *s,
3156 struct btrfs_root *root, int wait) 3191 u64 objectid,
3157{ 3192 struct btrfs_root *root)
3158 struct inode *inode;
3159 struct btrfs_iget_args args;
3160 args.ino = objectid;
3161 args.root = root;
3162
3163 if (wait) {
3164 inode = ilookup5(s, objectid, btrfs_find_actor,
3165 (void *)&args);
3166 } else {
3167 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
3168 (void *)&args);
3169 }
3170 return inode;
3171}
3172
3173struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3174 struct btrfs_root *root)
3175{ 3193{
3176 struct inode *inode; 3194 struct inode *inode;
3177 struct btrfs_iget_args args; 3195 struct btrfs_iget_args args;
@@ -3188,24 +3206,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3188 * Returns in *is_new if the inode was read from disk 3206 * Returns in *is_new if the inode was read from disk
3189 */ 3207 */
3190struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3208struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3191 struct btrfs_root *root, int *is_new) 3209 struct btrfs_root *root)
3192{ 3210{
3193 struct inode *inode; 3211 struct inode *inode;
3194 3212
3195 inode = btrfs_iget_locked(s, location->objectid, root); 3213 inode = btrfs_iget_locked(s, location->objectid, root);
3196 if (!inode) 3214 if (!inode)
3197 return ERR_PTR(-EACCES); 3215 return ERR_PTR(-ENOMEM);
3198 3216
3199 if (inode->i_state & I_NEW) { 3217 if (inode->i_state & I_NEW) {
3200 BTRFS_I(inode)->root = root; 3218 BTRFS_I(inode)->root = root;
3201 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3219 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3202 btrfs_read_locked_inode(inode); 3220 btrfs_read_locked_inode(inode);
3221
3222 inode_tree_add(inode);
3203 unlock_new_inode(inode); 3223 unlock_new_inode(inode);
3204 if (is_new)
3205 *is_new = 1;
3206 } else {
3207 if (is_new)
3208 *is_new = 0;
3209 } 3224 }
3210 3225
3211 return inode; 3226 return inode;
@@ -3218,7 +3233,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3218 struct btrfs_root *root = bi->root; 3233 struct btrfs_root *root = bi->root;
3219 struct btrfs_root *sub_root = root; 3234 struct btrfs_root *sub_root = root;
3220 struct btrfs_key location; 3235 struct btrfs_key location;
3221 int ret, new; 3236 int ret;
3222 3237
3223 if (dentry->d_name.len > BTRFS_NAME_LEN) 3238 if (dentry->d_name.len > BTRFS_NAME_LEN)
3224 return ERR_PTR(-ENAMETOOLONG); 3239 return ERR_PTR(-ENAMETOOLONG);
@@ -3236,7 +3251,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3236 return ERR_PTR(ret); 3251 return ERR_PTR(ret);
3237 if (ret > 0) 3252 if (ret > 0)
3238 return ERR_PTR(-ENOENT); 3253 return ERR_PTR(-ENOENT);
3239 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); 3254 inode = btrfs_iget(dir->i_sb, &location, sub_root);
3240 if (IS_ERR(inode)) 3255 if (IS_ERR(inode))
3241 return ERR_CAST(inode); 3256 return ERR_CAST(inode);
3242 } 3257 }
@@ -3574,9 +3589,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3574 btrfs_find_block_group(root, 0, alloc_hint, owner); 3589 btrfs_find_block_group(root, 0, alloc_hint, owner);
3575 if ((mode & S_IFREG)) { 3590 if ((mode & S_IFREG)) {
3576 if (btrfs_test_opt(root, NODATASUM)) 3591 if (btrfs_test_opt(root, NODATASUM))
3577 btrfs_set_flag(inode, NODATASUM); 3592 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3578 if (btrfs_test_opt(root, NODATACOW)) 3593 if (btrfs_test_opt(root, NODATACOW))
3579 btrfs_set_flag(inode, NODATACOW); 3594 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3580 } 3595 }
3581 3596
3582 key[0].objectid = objectid; 3597 key[0].objectid = objectid;
@@ -3630,7 +3645,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3630 location->offset = 0; 3645 location->offset = 0;
3631 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); 3646 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
3632 3647
3648 btrfs_inherit_iflags(inode, dir);
3649
3633 insert_inode_hash(inode); 3650 insert_inode_hash(inode);
3651 inode_tree_add(inode);
3634 return inode; 3652 return inode;
3635fail: 3653fail:
3636 if (dir) 3654 if (dir)
@@ -4683,6 +4701,7 @@ void btrfs_destroy_inode(struct inode *inode)
4683 btrfs_put_ordered_extent(ordered); 4701 btrfs_put_ordered_extent(ordered);
4684 } 4702 }
4685 } 4703 }
4704 inode_tree_del(inode);
4686 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 4705 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
4687 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 4706 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
4688} 4707}
@@ -5061,7 +5080,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5061out: 5080out:
5062 if (cur_offset > start) { 5081 if (cur_offset > start) {
5063 inode->i_ctime = CURRENT_TIME; 5082 inode->i_ctime = CURRENT_TIME;
5064 btrfs_set_flag(inode, PREALLOC); 5083 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5065 if (!(mode & FALLOC_FL_KEEP_SIZE) && 5084 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5066 cur_offset > i_size_read(inode)) 5085 cur_offset > i_size_read(inode))
5067 btrfs_i_size_write(inode, cur_offset); 5086 btrfs_i_size_write(inode, cur_offset);
@@ -5182,7 +5201,7 @@ static int btrfs_set_page_dirty(struct page *page)
5182 5201
5183static int btrfs_permission(struct inode *inode, int mask) 5202static int btrfs_permission(struct inode *inode, int mask)
5184{ 5203{
5185 if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) 5204 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
5186 return -EACCES; 5205 return -EACCES;
5187 return generic_permission(inode, mask, btrfs_check_acl); 5206 return generic_permission(inode, mask, btrfs_check_acl);
5188} 5207}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2624b53ea783..eff18f5b5362 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -50,7 +50,177 @@
50#include "volumes.h" 50#include "volumes.h"
51#include "locking.h" 51#include "locking.h"
52 52
53/* Mask out flags that are inappropriate for the given type of inode. */
54static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
55{
56 if (S_ISDIR(mode))
57 return flags;
58 else if (S_ISREG(mode))
59 return flags & ~FS_DIRSYNC_FL;
60 else
61 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
62}
63
64/*
65 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
66 */
67static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
68{
69 unsigned int iflags = 0;
70
71 if (flags & BTRFS_INODE_SYNC)
72 iflags |= FS_SYNC_FL;
73 if (flags & BTRFS_INODE_IMMUTABLE)
74 iflags |= FS_IMMUTABLE_FL;
75 if (flags & BTRFS_INODE_APPEND)
76 iflags |= FS_APPEND_FL;
77 if (flags & BTRFS_INODE_NODUMP)
78 iflags |= FS_NODUMP_FL;
79 if (flags & BTRFS_INODE_NOATIME)
80 iflags |= FS_NOATIME_FL;
81 if (flags & BTRFS_INODE_DIRSYNC)
82 iflags |= FS_DIRSYNC_FL;
83
84 return iflags;
85}
86
87/*
88 * Update inode->i_flags based on the btrfs internal flags.
89 */
90void btrfs_update_iflags(struct inode *inode)
91{
92 struct btrfs_inode *ip = BTRFS_I(inode);
93
94 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
95
96 if (ip->flags & BTRFS_INODE_SYNC)
97 inode->i_flags |= S_SYNC;
98 if (ip->flags & BTRFS_INODE_IMMUTABLE)
99 inode->i_flags |= S_IMMUTABLE;
100 if (ip->flags & BTRFS_INODE_APPEND)
101 inode->i_flags |= S_APPEND;
102 if (ip->flags & BTRFS_INODE_NOATIME)
103 inode->i_flags |= S_NOATIME;
104 if (ip->flags & BTRFS_INODE_DIRSYNC)
105 inode->i_flags |= S_DIRSYNC;
106}
107
108/*
109 * Inherit flags from the parent inode.
110 *
111 * Unlike extN we don't have any flags we don't want to inherit currently.
112 */
113void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
114{
115 unsigned int flags;
116
117 if (!dir)
118 return;
119
120 flags = BTRFS_I(dir)->flags;
121
122 if (S_ISREG(inode->i_mode))
123 flags &= ~BTRFS_INODE_DIRSYNC;
124 else if (!S_ISDIR(inode->i_mode))
125 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
126
127 BTRFS_I(inode)->flags = flags;
128 btrfs_update_iflags(inode);
129}
130
131static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
132{
133 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
134 unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
135
136 if (copy_to_user(arg, &flags, sizeof(flags)))
137 return -EFAULT;
138 return 0;
139}
140
141static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
142{
143 struct inode *inode = file->f_path.dentry->d_inode;
144 struct btrfs_inode *ip = BTRFS_I(inode);
145 struct btrfs_root *root = ip->root;
146 struct btrfs_trans_handle *trans;
147 unsigned int flags, oldflags;
148 int ret;
149
150 if (copy_from_user(&flags, arg, sizeof(flags)))
151 return -EFAULT;
152
153 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
154 FS_NOATIME_FL | FS_NODUMP_FL | \
155 FS_SYNC_FL | FS_DIRSYNC_FL))
156 return -EOPNOTSUPP;
53 157
158 if (!is_owner_or_cap(inode))
159 return -EACCES;
160
161 mutex_lock(&inode->i_mutex);
162
163 flags = btrfs_mask_flags(inode->i_mode, flags);
164 oldflags = btrfs_flags_to_ioctl(ip->flags);
165 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
166 if (!capable(CAP_LINUX_IMMUTABLE)) {
167 ret = -EPERM;
168 goto out_unlock;
169 }
170 }
171
172 ret = mnt_want_write(file->f_path.mnt);
173 if (ret)
174 goto out_unlock;
175
176 if (flags & FS_SYNC_FL)
177 ip->flags |= BTRFS_INODE_SYNC;
178 else
179 ip->flags &= ~BTRFS_INODE_SYNC;
180 if (flags & FS_IMMUTABLE_FL)
181 ip->flags |= BTRFS_INODE_IMMUTABLE;
182 else
183 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
184 if (flags & FS_APPEND_FL)
185 ip->flags |= BTRFS_INODE_APPEND;
186 else
187 ip->flags &= ~BTRFS_INODE_APPEND;
188 if (flags & FS_NODUMP_FL)
189 ip->flags |= BTRFS_INODE_NODUMP;
190 else
191 ip->flags &= ~BTRFS_INODE_NODUMP;
192 if (flags & FS_NOATIME_FL)
193 ip->flags |= BTRFS_INODE_NOATIME;
194 else
195 ip->flags &= ~BTRFS_INODE_NOATIME;
196 if (flags & FS_DIRSYNC_FL)
197 ip->flags |= BTRFS_INODE_DIRSYNC;
198 else
199 ip->flags &= ~BTRFS_INODE_DIRSYNC;
200
201
202 trans = btrfs_join_transaction(root, 1);
203 BUG_ON(!trans);
204
205 ret = btrfs_update_inode(trans, root, inode);
206 BUG_ON(ret);
207
208 btrfs_update_iflags(inode);
209 inode->i_ctime = CURRENT_TIME;
210 btrfs_end_transaction(trans, root);
211
212 mnt_drop_write(file->f_path.mnt);
213 out_unlock:
214 mutex_unlock(&inode->i_mutex);
215 return 0;
216}
217
218static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
219{
220 struct inode *inode = file->f_path.dentry->d_inode;
221
222 return put_user(inode->i_generation, arg);
223}
54 224
55static noinline int create_subvol(struct btrfs_root *root, 225static noinline int create_subvol(struct btrfs_root *root,
56 struct dentry *dentry, 226 struct dentry *dentry,
@@ -82,22 +252,25 @@ static noinline int create_subvol(struct btrfs_root *root,
82 if (ret) 252 if (ret)
83 goto fail; 253 goto fail;
84 254
85 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 255 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
86 objectid, trans->transid, 0, 0, 0); 256 0, objectid, NULL, 0, 0, 0);
87 if (IS_ERR(leaf)) { 257 if (IS_ERR(leaf)) {
88 ret = PTR_ERR(leaf); 258 ret = PTR_ERR(leaf);
89 goto fail; 259 goto fail;
90 } 260 }
91 261
92 btrfs_set_header_nritems(leaf, 0); 262 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
93 btrfs_set_header_level(leaf, 0);
94 btrfs_set_header_bytenr(leaf, leaf->start); 263 btrfs_set_header_bytenr(leaf, leaf->start);
95 btrfs_set_header_generation(leaf, trans->transid); 264 btrfs_set_header_generation(leaf, trans->transid);
265 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
96 btrfs_set_header_owner(leaf, objectid); 266 btrfs_set_header_owner(leaf, objectid);
97 267
98 write_extent_buffer(leaf, root->fs_info->fsid, 268 write_extent_buffer(leaf, root->fs_info->fsid,
99 (unsigned long)btrfs_header_fsid(leaf), 269 (unsigned long)btrfs_header_fsid(leaf),
100 BTRFS_FSID_SIZE); 270 BTRFS_FSID_SIZE);
271 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
272 (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
273 BTRFS_UUID_SIZE);
101 btrfs_mark_buffer_dirty(leaf); 274 btrfs_mark_buffer_dirty(leaf);
102 275
103 inode_item = &root_item.inode; 276 inode_item = &root_item.inode;
@@ -125,7 +298,7 @@ static noinline int create_subvol(struct btrfs_root *root,
125 btrfs_set_root_dirid(&root_item, new_dirid); 298 btrfs_set_root_dirid(&root_item, new_dirid);
126 299
127 key.objectid = objectid; 300 key.objectid = objectid;
128 key.offset = 1; 301 key.offset = 0;
129 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 302 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
130 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 303 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
131 &root_item); 304 &root_item);
@@ -911,10 +1084,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
911 if (disko) { 1084 if (disko) {
912 inode_add_bytes(inode, datal); 1085 inode_add_bytes(inode, datal);
913 ret = btrfs_inc_extent_ref(trans, root, 1086 ret = btrfs_inc_extent_ref(trans, root,
914 disko, diskl, leaf->start, 1087 disko, diskl, 0,
915 root->root_key.objectid, 1088 root->root_key.objectid,
916 trans->transid, 1089 inode->i_ino,
917 inode->i_ino); 1090 new_key.offset - datao);
918 BUG_ON(ret); 1091 BUG_ON(ret);
919 } 1092 }
920 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 1093 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
@@ -1074,6 +1247,12 @@ long btrfs_ioctl(struct file *file, unsigned int
1074 void __user *argp = (void __user *)arg; 1247 void __user *argp = (void __user *)arg;
1075 1248
1076 switch (cmd) { 1249 switch (cmd) {
1250 case FS_IOC_GETFLAGS:
1251 return btrfs_ioctl_getflags(file, argp);
1252 case FS_IOC_SETFLAGS:
1253 return btrfs_ioctl_setflags(file, argp);
1254 case FS_IOC_GETVERSION:
1255 return btrfs_ioctl_getversion(file, argp);
1077 case BTRFS_IOC_SNAP_CREATE: 1256 case BTRFS_IOC_SNAP_CREATE:
1078 return btrfs_ioctl_snap_create(file, argp, 0); 1257 return btrfs_ioctl_snap_create(file, argp, 0);
1079 case BTRFS_IOC_SUBVOL_CREATE: 1258 case BTRFS_IOC_SUBVOL_CREATE:
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5f8f218c1005..6d6523da0a30 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -45,22 +45,132 @@ static void print_dev_item(struct extent_buffer *eb,
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item), 45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); 46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
47} 47}
48static void print_extent_data_ref(struct extent_buffer *eb,
49 struct btrfs_extent_data_ref *ref)
50{
51 printk(KERN_INFO "\t\textent data backref root %llu "
52 "objectid %llu offset %llu count %u\n",
53 (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
54 (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
55 (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
56 btrfs_extent_data_ref_count(eb, ref));
57}
58
59static void print_extent_item(struct extent_buffer *eb, int slot)
60{
61 struct btrfs_extent_item *ei;
62 struct btrfs_extent_inline_ref *iref;
63 struct btrfs_extent_data_ref *dref;
64 struct btrfs_shared_data_ref *sref;
65 struct btrfs_disk_key key;
66 unsigned long end;
67 unsigned long ptr;
68 int type;
69 u32 item_size = btrfs_item_size_nr(eb, slot);
70 u64 flags;
71 u64 offset;
72
73 if (item_size < sizeof(*ei)) {
74#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
75 struct btrfs_extent_item_v0 *ei0;
76 BUG_ON(item_size != sizeof(*ei0));
77 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
78 printk(KERN_INFO "\t\textent refs %u\n",
79 btrfs_extent_refs_v0(eb, ei0));
80 return;
81#else
82 BUG();
83#endif
84 }
85
86 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
87 flags = btrfs_extent_flags(eb, ei);
88
89 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
90 (unsigned long long)btrfs_extent_refs(eb, ei),
91 (unsigned long long)btrfs_extent_generation(eb, ei),
92 (unsigned long long)flags);
93
94 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
95 struct btrfs_tree_block_info *info;
96 info = (struct btrfs_tree_block_info *)(ei + 1);
97 btrfs_tree_block_key(eb, info, &key);
98 printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
99 "level %d\n",
100 (unsigned long long)btrfs_disk_key_objectid(&key),
101 key.type,
102 (unsigned long long)btrfs_disk_key_offset(&key),
103 btrfs_tree_block_level(eb, info));
104 iref = (struct btrfs_extent_inline_ref *)(info + 1);
105 } else {
106 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
107 }
108
109 ptr = (unsigned long)iref;
110 end = (unsigned long)ei + item_size;
111 while (ptr < end) {
112 iref = (struct btrfs_extent_inline_ref *)ptr;
113 type = btrfs_extent_inline_ref_type(eb, iref);
114 offset = btrfs_extent_inline_ref_offset(eb, iref);
115 switch (type) {
116 case BTRFS_TREE_BLOCK_REF_KEY:
117 printk(KERN_INFO "\t\ttree block backref "
118 "root %llu\n", (unsigned long long)offset);
119 break;
120 case BTRFS_SHARED_BLOCK_REF_KEY:
121 printk(KERN_INFO "\t\tshared block backref "
122 "parent %llu\n", (unsigned long long)offset);
123 break;
124 case BTRFS_EXTENT_DATA_REF_KEY:
125 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
126 print_extent_data_ref(eb, dref);
127 break;
128 case BTRFS_SHARED_DATA_REF_KEY:
129 sref = (struct btrfs_shared_data_ref *)(iref + 1);
130 printk(KERN_INFO "\t\tshared data backref "
131 "parent %llu count %u\n",
132 (unsigned long long)offset,
133 btrfs_shared_data_ref_count(eb, sref));
134 break;
135 default:
136 BUG();
137 }
138 ptr += btrfs_extent_inline_ref_size(type);
139 }
140 WARN_ON(ptr > end);
141}
142
143#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
144static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
145{
146 struct btrfs_extent_ref_v0 *ref0;
147
148 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
149 printk("\t\textent back ref root %llu gen %llu "
150 "owner %llu num_refs %lu\n",
151 (unsigned long long)btrfs_ref_root_v0(eb, ref0),
152 (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
153 (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
154 (unsigned long)btrfs_ref_count_v0(eb, ref0));
155}
156#endif
157
48void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 158void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
49{ 159{
50 int i; 160 int i;
161 u32 type;
51 u32 nr = btrfs_header_nritems(l); 162 u32 nr = btrfs_header_nritems(l);
52 struct btrfs_item *item; 163 struct btrfs_item *item;
53 struct btrfs_extent_item *ei;
54 struct btrfs_root_item *ri; 164 struct btrfs_root_item *ri;
55 struct btrfs_dir_item *di; 165 struct btrfs_dir_item *di;
56 struct btrfs_inode_item *ii; 166 struct btrfs_inode_item *ii;
57 struct btrfs_block_group_item *bi; 167 struct btrfs_block_group_item *bi;
58 struct btrfs_file_extent_item *fi; 168 struct btrfs_file_extent_item *fi;
169 struct btrfs_extent_data_ref *dref;
170 struct btrfs_shared_data_ref *sref;
171 struct btrfs_dev_extent *dev_extent;
59 struct btrfs_key key; 172 struct btrfs_key key;
60 struct btrfs_key found_key; 173 struct btrfs_key found_key;
61 struct btrfs_extent_ref *ref;
62 struct btrfs_dev_extent *dev_extent;
63 u32 type;
64 174
65 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", 175 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
66 (unsigned long long)btrfs_header_bytenr(l), nr, 176 (unsigned long long)btrfs_header_bytenr(l), nr,
@@ -100,20 +210,25 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
100 btrfs_disk_root_refs(l, ri)); 210 btrfs_disk_root_refs(l, ri));
101 break; 211 break;
102 case BTRFS_EXTENT_ITEM_KEY: 212 case BTRFS_EXTENT_ITEM_KEY:
103 ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); 213 print_extent_item(l, i);
104 printk(KERN_INFO "\t\textent data refs %u\n", 214 break;
105 btrfs_extent_refs(l, ei)); 215 case BTRFS_TREE_BLOCK_REF_KEY:
106 break; 216 printk(KERN_INFO "\t\ttree block backref\n");
107 case BTRFS_EXTENT_REF_KEY: 217 break;
108 ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); 218 case BTRFS_SHARED_BLOCK_REF_KEY:
109 printk(KERN_INFO "\t\textent back ref root %llu " 219 printk(KERN_INFO "\t\tshared block backref\n");
110 "gen %llu owner %llu num_refs %lu\n", 220 break;
111 (unsigned long long)btrfs_ref_root(l, ref), 221 case BTRFS_EXTENT_DATA_REF_KEY:
112 (unsigned long long)btrfs_ref_generation(l, ref), 222 dref = btrfs_item_ptr(l, i,
113 (unsigned long long)btrfs_ref_objectid(l, ref), 223 struct btrfs_extent_data_ref);
114 (unsigned long)btrfs_ref_num_refs(l, ref)); 224 print_extent_data_ref(l, dref);
225 break;
226 case BTRFS_SHARED_DATA_REF_KEY:
227 sref = btrfs_item_ptr(l, i,
228 struct btrfs_shared_data_ref);
229 printk(KERN_INFO "\t\tshared data backref count %u\n",
230 btrfs_shared_data_ref_count(l, sref));
115 break; 231 break;
116
117 case BTRFS_EXTENT_DATA_KEY: 232 case BTRFS_EXTENT_DATA_KEY:
118 fi = btrfs_item_ptr(l, i, 233 fi = btrfs_item_ptr(l, i,
119 struct btrfs_file_extent_item); 234 struct btrfs_file_extent_item);
@@ -139,6 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
139 (unsigned long long) 254 (unsigned long long)
140 btrfs_file_extent_ram_bytes(l, fi)); 255 btrfs_file_extent_ram_bytes(l, fi));
141 break; 256 break;
257 case BTRFS_EXTENT_REF_V0_KEY:
258#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
259 print_extent_ref_v0(l, i);
260#else
261 BUG();
262#endif
142 case BTRFS_BLOCK_GROUP_ITEM_KEY: 263 case BTRFS_BLOCK_GROUP_ITEM_KEY:
143 bi = btrfs_item_ptr(l, i, 264 bi = btrfs_item_ptr(l, i,
144 struct btrfs_block_group_item); 265 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
new file mode 100644
index 000000000000..b23dc209ae10
--- /dev/null
+++ b/fs/btrfs/relocation.c
@@ -0,0 +1,3711 @@
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include "ctree.h"
25#include "disk-io.h"
26#include "transaction.h"
27#include "volumes.h"
28#include "locking.h"
29#include "btrfs_inode.h"
30#include "async-thread.h"
31
32/*
33 * backref_node, mapping_node and tree_block start with this
34 */
35struct tree_entry {
36 struct rb_node rb_node;
37 u64 bytenr;
38};
39
40/*
41 * present a tree block in the backref cache
42 */
43struct backref_node {
44 struct rb_node rb_node;
45 u64 bytenr;
46 /* objectid tree block owner */
47 u64 owner;
48 /* list of upper level blocks reference this block */
49 struct list_head upper;
50 /* list of child blocks in the cache */
51 struct list_head lower;
52 /* NULL if this node is not tree root */
53 struct btrfs_root *root;
54 /* extent buffer got by COW the block */
55 struct extent_buffer *eb;
56 /* level of tree block */
57 unsigned int level:8;
58 /* 1 if the block is root of old snapshot */
59 unsigned int old_root:1;
60 /* 1 if no child blocks in the cache */
61 unsigned int lowest:1;
62 /* is the extent buffer locked */
63 unsigned int locked:1;
64 /* has the block been processed */
65 unsigned int processed:1;
66 /* have backrefs of this block been checked */
67 unsigned int checked:1;
68};
69
70/*
71 * present a block pointer in the backref cache
72 */
73struct backref_edge {
74 struct list_head list[2];
75 struct backref_node *node[2];
76 u64 blockptr;
77};
78
79#define LOWER 0
80#define UPPER 1
81
82struct backref_cache {
83 /* red black tree of all backref nodes in the cache */
84 struct rb_root rb_root;
85 /* list of backref nodes with no child block in the cache */
86 struct list_head pending[BTRFS_MAX_LEVEL];
87 spinlock_t lock;
88};
89
90/*
91 * map address of tree root to tree
92 */
93struct mapping_node {
94 struct rb_node rb_node;
95 u64 bytenr;
96 void *data;
97};
98
99struct mapping_tree {
100 struct rb_root rb_root;
101 spinlock_t lock;
102};
103
104/*
105 * present a tree block to process
106 */
107struct tree_block {
108 struct rb_node rb_node;
109 u64 bytenr;
110 struct btrfs_key key;
111 unsigned int level:8;
112 unsigned int key_ready:1;
113};
114
115/* inode vector */
116#define INODEVEC_SIZE 16
117
118struct inodevec {
119 struct list_head list;
120 struct inode *inode[INODEVEC_SIZE];
121 int nr;
122};
123
124struct reloc_control {
125 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group;
127 /* extent tree */
128 struct btrfs_root *extent_root;
129 /* inode for moving data */
130 struct inode *data_inode;
131 struct btrfs_workers workers;
132 /* tree blocks have been processed */
133 struct extent_io_tree processed_blocks;
134 /* map start of tree root to corresponding reloc tree */
135 struct mapping_tree reloc_root_tree;
136 /* list of reloc trees */
137 struct list_head reloc_roots;
138 u64 search_start;
139 u64 extents_found;
140 u64 extents_skipped;
141 int stage;
142 int create_reloc_root;
143 unsigned int found_file_extent:1;
144 unsigned int found_old_snapshot:1;
145};
146
147/* stages of data relocation */
148#define MOVE_DATA_EXTENTS 0
149#define UPDATE_DATA_PTRS 1
150
151/*
152 * merge reloc tree to corresponding fs tree in worker threads
153 */
154struct async_merge {
155 struct btrfs_work work;
156 struct reloc_control *rc;
157 struct btrfs_root *root;
158 struct completion *done;
159 atomic_t *num_pending;
160};
161
162static void mapping_tree_init(struct mapping_tree *tree)
163{
164 tree->rb_root.rb_node = NULL;
165 spin_lock_init(&tree->lock);
166}
167
168static void backref_cache_init(struct backref_cache *cache)
169{
170 int i;
171 cache->rb_root.rb_node = NULL;
172 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
173 INIT_LIST_HEAD(&cache->pending[i]);
174 spin_lock_init(&cache->lock);
175}
176
177static void backref_node_init(struct backref_node *node)
178{
179 memset(node, 0, sizeof(*node));
180 INIT_LIST_HEAD(&node->upper);
181 INIT_LIST_HEAD(&node->lower);
182 RB_CLEAR_NODE(&node->rb_node);
183}
184
185static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
186 struct rb_node *node)
187{
188 struct rb_node **p = &root->rb_node;
189 struct rb_node *parent = NULL;
190 struct tree_entry *entry;
191
192 while (*p) {
193 parent = *p;
194 entry = rb_entry(parent, struct tree_entry, rb_node);
195
196 if (bytenr < entry->bytenr)
197 p = &(*p)->rb_left;
198 else if (bytenr > entry->bytenr)
199 p = &(*p)->rb_right;
200 else
201 return parent;
202 }
203
204 rb_link_node(node, parent, p);
205 rb_insert_color(node, root);
206 return NULL;
207}
208
209static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
210{
211 struct rb_node *n = root->rb_node;
212 struct tree_entry *entry;
213
214 while (n) {
215 entry = rb_entry(n, struct tree_entry, rb_node);
216
217 if (bytenr < entry->bytenr)
218 n = n->rb_left;
219 else if (bytenr > entry->bytenr)
220 n = n->rb_right;
221 else
222 return n;
223 }
224 return NULL;
225}
226
227/*
228 * walk up backref nodes until reach node presents tree root
229 */
230static struct backref_node *walk_up_backref(struct backref_node *node,
231 struct backref_edge *edges[],
232 int *index)
233{
234 struct backref_edge *edge;
235 int idx = *index;
236
237 while (!list_empty(&node->upper)) {
238 edge = list_entry(node->upper.next,
239 struct backref_edge, list[LOWER]);
240 edges[idx++] = edge;
241 node = edge->node[UPPER];
242 }
243 *index = idx;
244 return node;
245}
246
247/*
248 * walk down backref nodes to find start of next reference path
249 */
250static struct backref_node *walk_down_backref(struct backref_edge *edges[],
251 int *index)
252{
253 struct backref_edge *edge;
254 struct backref_node *lower;
255 int idx = *index;
256
257 while (idx > 0) {
258 edge = edges[idx - 1];
259 lower = edge->node[LOWER];
260 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
261 idx--;
262 continue;
263 }
264 edge = list_entry(edge->list[LOWER].next,
265 struct backref_edge, list[LOWER]);
266 edges[idx - 1] = edge;
267 *index = idx;
268 return edge->node[UPPER];
269 }
270 *index = 0;
271 return NULL;
272}
273
274static void drop_node_buffer(struct backref_node *node)
275{
276 if (node->eb) {
277 if (node->locked) {
278 btrfs_tree_unlock(node->eb);
279 node->locked = 0;
280 }
281 free_extent_buffer(node->eb);
282 node->eb = NULL;
283 }
284}
285
286static void drop_backref_node(struct backref_cache *tree,
287 struct backref_node *node)
288{
289 BUG_ON(!node->lowest);
290 BUG_ON(!list_empty(&node->upper));
291
292 drop_node_buffer(node);
293 list_del(&node->lower);
294
295 rb_erase(&node->rb_node, &tree->rb_root);
296 kfree(node);
297}
298
299/*
300 * remove a backref node from the backref cache
301 */
302static void remove_backref_node(struct backref_cache *cache,
303 struct backref_node *node)
304{
305 struct backref_node *upper;
306 struct backref_edge *edge;
307
308 if (!node)
309 return;
310
311 BUG_ON(!node->lowest);
312 while (!list_empty(&node->upper)) {
313 edge = list_entry(node->upper.next, struct backref_edge,
314 list[LOWER]);
315 upper = edge->node[UPPER];
316 list_del(&edge->list[LOWER]);
317 list_del(&edge->list[UPPER]);
318 kfree(edge);
319 /*
320 * add the node to pending list if no other
321 * child block cached.
322 */
323 if (list_empty(&upper->lower)) {
324 list_add_tail(&upper->lower,
325 &cache->pending[upper->level]);
326 upper->lowest = 1;
327 }
328 }
329 drop_backref_node(cache, node);
330}
331
332/*
333 * find reloc tree by address of tree root
334 */
335static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
336 u64 bytenr)
337{
338 struct rb_node *rb_node;
339 struct mapping_node *node;
340 struct btrfs_root *root = NULL;
341
342 spin_lock(&rc->reloc_root_tree.lock);
343 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
344 if (rb_node) {
345 node = rb_entry(rb_node, struct mapping_node, rb_node);
346 root = (struct btrfs_root *)node->data;
347 }
348 spin_unlock(&rc->reloc_root_tree.lock);
349 return root;
350}
351
352static int is_cowonly_root(u64 root_objectid)
353{
354 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
355 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
356 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
357 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
358 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
359 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
360 return 1;
361 return 0;
362}
363
364static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
365 u64 root_objectid)
366{
367 struct btrfs_key key;
368
369 key.objectid = root_objectid;
370 key.type = BTRFS_ROOT_ITEM_KEY;
371 if (is_cowonly_root(root_objectid))
372 key.offset = 0;
373 else
374 key.offset = (u64)-1;
375
376 return btrfs_read_fs_root_no_name(fs_info, &key);
377}
378
379#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
380static noinline_for_stack
381struct btrfs_root *find_tree_root(struct reloc_control *rc,
382 struct extent_buffer *leaf,
383 struct btrfs_extent_ref_v0 *ref0)
384{
385 struct btrfs_root *root;
386 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
387 u64 generation = btrfs_ref_generation_v0(leaf, ref0);
388
389 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
390
391 root = read_fs_root(rc->extent_root->fs_info, root_objectid);
392 BUG_ON(IS_ERR(root));
393
394 if (root->ref_cows &&
395 generation != btrfs_root_generation(&root->root_item))
396 return NULL;
397
398 return root;
399}
400#endif
401
402static noinline_for_stack
403int find_inline_backref(struct extent_buffer *leaf, int slot,
404 unsigned long *ptr, unsigned long *end)
405{
406 struct btrfs_extent_item *ei;
407 struct btrfs_tree_block_info *bi;
408 u32 item_size;
409
410 item_size = btrfs_item_size_nr(leaf, slot);
411#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
412 if (item_size < sizeof(*ei)) {
413 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
414 return 1;
415 }
416#endif
417 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
418 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
419 BTRFS_EXTENT_FLAG_TREE_BLOCK));
420
421 if (item_size <= sizeof(*ei) + sizeof(*bi)) {
422 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
423 return 1;
424 }
425
426 bi = (struct btrfs_tree_block_info *)(ei + 1);
427 *ptr = (unsigned long)(bi + 1);
428 *end = (unsigned long)ei + item_size;
429 return 0;
430}
431
432/*
433 * build backref tree for a given tree block. root of the backref tree
434 * corresponds the tree block, leaves of the backref tree correspond
435 * roots of b-trees that reference the tree block.
436 *
437 * the basic idea of this function is check backrefs of a given block
438 * to find upper level blocks that refernece the block, and then check
439 * bakcrefs of these upper level blocks recursively. the recursion stop
440 * when tree root is reached or backrefs for the block is cached.
441 *
442 * NOTE: if we find backrefs for a block are cached, we know backrefs
443 * for all upper level blocks that directly/indirectly reference the
444 * block are also cached.
445 */
446static struct backref_node *build_backref_tree(struct reloc_control *rc,
447 struct backref_cache *cache,
448 struct btrfs_key *node_key,
449 int level, u64 bytenr)
450{
451 struct btrfs_path *path1;
452 struct btrfs_path *path2;
453 struct extent_buffer *eb;
454 struct btrfs_root *root;
455 struct backref_node *cur;
456 struct backref_node *upper;
457 struct backref_node *lower;
458 struct backref_node *node = NULL;
459 struct backref_node *exist = NULL;
460 struct backref_edge *edge;
461 struct rb_node *rb_node;
462 struct btrfs_key key;
463 unsigned long end;
464 unsigned long ptr;
465 LIST_HEAD(list);
466 int ret;
467 int err = 0;
468
469 path1 = btrfs_alloc_path();
470 path2 = btrfs_alloc_path();
471 if (!path1 || !path2) {
472 err = -ENOMEM;
473 goto out;
474 }
475
476 node = kmalloc(sizeof(*node), GFP_NOFS);
477 if (!node) {
478 err = -ENOMEM;
479 goto out;
480 }
481
482 backref_node_init(node);
483 node->bytenr = bytenr;
484 node->owner = 0;
485 node->level = level;
486 node->lowest = 1;
487 cur = node;
488again:
489 end = 0;
490 ptr = 0;
491 key.objectid = cur->bytenr;
492 key.type = BTRFS_EXTENT_ITEM_KEY;
493 key.offset = (u64)-1;
494
495 path1->search_commit_root = 1;
496 path1->skip_locking = 1;
497 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
498 0, 0);
499 if (ret < 0) {
500 err = ret;
501 goto out;
502 }
503 BUG_ON(!ret || !path1->slots[0]);
504
505 path1->slots[0]--;
506
507 WARN_ON(cur->checked);
508 if (!list_empty(&cur->upper)) {
509 /*
510 * the backref was added previously when processsing
511 * backref of type BTRFS_TREE_BLOCK_REF_KEY
512 */
513 BUG_ON(!list_is_singular(&cur->upper));
514 edge = list_entry(cur->upper.next, struct backref_edge,
515 list[LOWER]);
516 BUG_ON(!list_empty(&edge->list[UPPER]));
517 exist = edge->node[UPPER];
518 /*
519 * add the upper level block to pending list if we need
520 * check its backrefs
521 */
522 if (!exist->checked)
523 list_add_tail(&edge->list[UPPER], &list);
524 } else {
525 exist = NULL;
526 }
527
528 while (1) {
529 cond_resched();
530 eb = path1->nodes[0];
531
532 if (ptr >= end) {
533 if (path1->slots[0] >= btrfs_header_nritems(eb)) {
534 ret = btrfs_next_leaf(rc->extent_root, path1);
535 if (ret < 0) {
536 err = ret;
537 goto out;
538 }
539 if (ret > 0)
540 break;
541 eb = path1->nodes[0];
542 }
543
544 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
545 if (key.objectid != cur->bytenr) {
546 WARN_ON(exist);
547 break;
548 }
549
550 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
551 ret = find_inline_backref(eb, path1->slots[0],
552 &ptr, &end);
553 if (ret)
554 goto next;
555 }
556 }
557
558 if (ptr < end) {
559 /* update key for inline back ref */
560 struct btrfs_extent_inline_ref *iref;
561 iref = (struct btrfs_extent_inline_ref *)ptr;
562 key.type = btrfs_extent_inline_ref_type(eb, iref);
563 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
564 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
565 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
566 }
567
568 if (exist &&
569 ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
570 exist->owner == key.offset) ||
571 (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
572 exist->bytenr == key.offset))) {
573 exist = NULL;
574 goto next;
575 }
576
577#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
578 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
579 key.type == BTRFS_EXTENT_REF_V0_KEY) {
580 if (key.objectid == key.offset &&
581 key.type == BTRFS_EXTENT_REF_V0_KEY) {
582 struct btrfs_extent_ref_v0 *ref0;
583 ref0 = btrfs_item_ptr(eb, path1->slots[0],
584 struct btrfs_extent_ref_v0);
585 root = find_tree_root(rc, eb, ref0);
586 if (root)
587 cur->root = root;
588 else
589 cur->old_root = 1;
590 break;
591 }
592#else
593 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
594 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
595#endif
596 if (key.objectid == key.offset) {
597 /*
598 * only root blocks of reloc trees use
599 * backref of this type.
600 */
601 root = find_reloc_root(rc, cur->bytenr);
602 BUG_ON(!root);
603 cur->root = root;
604 break;
605 }
606
607 edge = kzalloc(sizeof(*edge), GFP_NOFS);
608 if (!edge) {
609 err = -ENOMEM;
610 goto out;
611 }
612 rb_node = tree_search(&cache->rb_root, key.offset);
613 if (!rb_node) {
614 upper = kmalloc(sizeof(*upper), GFP_NOFS);
615 if (!upper) {
616 kfree(edge);
617 err = -ENOMEM;
618 goto out;
619 }
620 backref_node_init(upper);
621 upper->bytenr = key.offset;
622 upper->owner = 0;
623 upper->level = cur->level + 1;
624 /*
625 * backrefs for the upper level block isn't
626 * cached, add the block to pending list
627 */
628 list_add_tail(&edge->list[UPPER], &list);
629 } else {
630 upper = rb_entry(rb_node, struct backref_node,
631 rb_node);
632 INIT_LIST_HEAD(&edge->list[UPPER]);
633 }
634 list_add(&edge->list[LOWER], &cur->upper);
635 edge->node[UPPER] = upper;
636 edge->node[LOWER] = cur;
637
638 goto next;
639 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
640 goto next;
641 }
642
643 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
644 root = read_fs_root(rc->extent_root->fs_info, key.offset);
645 if (IS_ERR(root)) {
646 err = PTR_ERR(root);
647 goto out;
648 }
649
650 if (btrfs_root_level(&root->root_item) == cur->level) {
651 /* tree root */
652 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
653 cur->bytenr);
654 cur->root = root;
655 break;
656 }
657
658 level = cur->level + 1;
659
660 /*
661 * searching the tree to find upper level blocks
662 * reference the block.
663 */
664 path2->search_commit_root = 1;
665 path2->skip_locking = 1;
666 path2->lowest_level = level;
667 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
668 path2->lowest_level = 0;
669 if (ret < 0) {
670 err = ret;
671 goto out;
672 }
673
674 eb = path2->nodes[level];
675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
676 cur->bytenr);
677
678 lower = cur;
679 for (; level < BTRFS_MAX_LEVEL; level++) {
680 if (!path2->nodes[level]) {
681 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
682 lower->bytenr);
683 lower->root = root;
684 break;
685 }
686
687 edge = kzalloc(sizeof(*edge), GFP_NOFS);
688 if (!edge) {
689 err = -ENOMEM;
690 goto out;
691 }
692
693 eb = path2->nodes[level];
694 rb_node = tree_search(&cache->rb_root, eb->start);
695 if (!rb_node) {
696 upper = kmalloc(sizeof(*upper), GFP_NOFS);
697 if (!upper) {
698 kfree(edge);
699 err = -ENOMEM;
700 goto out;
701 }
702 backref_node_init(upper);
703 upper->bytenr = eb->start;
704 upper->owner = btrfs_header_owner(eb);
705 upper->level = lower->level + 1;
706
707 /*
708 * if we know the block isn't shared
709 * we can void checking its backrefs.
710 */
711 if (btrfs_block_can_be_shared(root, eb))
712 upper->checked = 0;
713 else
714 upper->checked = 1;
715
716 /*
717 * add the block to pending list if we
718 * need check its backrefs. only block
719 * at 'cur->level + 1' is added to the
720 * tail of pending list. this guarantees
721 * we check backrefs from lower level
722 * blocks to upper level blocks.
723 */
724 if (!upper->checked &&
725 level == cur->level + 1) {
726 list_add_tail(&edge->list[UPPER],
727 &list);
728 } else
729 INIT_LIST_HEAD(&edge->list[UPPER]);
730 } else {
731 upper = rb_entry(rb_node, struct backref_node,
732 rb_node);
733 BUG_ON(!upper->checked);
734 INIT_LIST_HEAD(&edge->list[UPPER]);
735 }
736 list_add_tail(&edge->list[LOWER], &lower->upper);
737 edge->node[UPPER] = upper;
738 edge->node[LOWER] = lower;
739
740 if (rb_node)
741 break;
742 lower = upper;
743 upper = NULL;
744 }
745 btrfs_release_path(root, path2);
746next:
747 if (ptr < end) {
748 ptr += btrfs_extent_inline_ref_size(key.type);
749 if (ptr >= end) {
750 WARN_ON(ptr > end);
751 ptr = 0;
752 end = 0;
753 }
754 }
755 if (ptr >= end)
756 path1->slots[0]++;
757 }
758 btrfs_release_path(rc->extent_root, path1);
759
760 cur->checked = 1;
761 WARN_ON(exist);
762
763 /* the pending list isn't empty, take the first block to process */
764 if (!list_empty(&list)) {
765 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
766 list_del_init(&edge->list[UPPER]);
767 cur = edge->node[UPPER];
768 goto again;
769 }
770
771 /*
772 * everything goes well, connect backref nodes and insert backref nodes
773 * into the cache.
774 */
775 BUG_ON(!node->checked);
776 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
777 BUG_ON(rb_node);
778
779 list_for_each_entry(edge, &node->upper, list[LOWER])
780 list_add_tail(&edge->list[UPPER], &list);
781
782 while (!list_empty(&list)) {
783 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
784 list_del_init(&edge->list[UPPER]);
785 upper = edge->node[UPPER];
786
787 if (!RB_EMPTY_NODE(&upper->rb_node)) {
788 if (upper->lowest) {
789 list_del_init(&upper->lower);
790 upper->lowest = 0;
791 }
792
793 list_add_tail(&edge->list[UPPER], &upper->lower);
794 continue;
795 }
796
797 BUG_ON(!upper->checked);
798 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
799 &upper->rb_node);
800 BUG_ON(rb_node);
801
802 list_add_tail(&edge->list[UPPER], &upper->lower);
803
804 list_for_each_entry(edge, &upper->upper, list[LOWER])
805 list_add_tail(&edge->list[UPPER], &list);
806 }
807out:
808 btrfs_free_path(path1);
809 btrfs_free_path(path2);
810 if (err) {
811 INIT_LIST_HEAD(&list);
812 upper = node;
813 while (upper) {
814 if (RB_EMPTY_NODE(&upper->rb_node)) {
815 list_splice_tail(&upper->upper, &list);
816 kfree(upper);
817 }
818
819 if (list_empty(&list))
820 break;
821
822 edge = list_entry(list.next, struct backref_edge,
823 list[LOWER]);
824 upper = edge->node[UPPER];
825 kfree(edge);
826 }
827 return ERR_PTR(err);
828 }
829 return node;
830}
831
832/*
833 * helper to add 'address of tree root -> reloc tree' mapping
834 */
835static int __add_reloc_root(struct btrfs_root *root)
836{
837 struct rb_node *rb_node;
838 struct mapping_node *node;
839 struct reloc_control *rc = root->fs_info->reloc_ctl;
840
841 node = kmalloc(sizeof(*node), GFP_NOFS);
842 BUG_ON(!node);
843
844 node->bytenr = root->node->start;
845 node->data = root;
846
847 spin_lock(&rc->reloc_root_tree.lock);
848 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
849 node->bytenr, &node->rb_node);
850 spin_unlock(&rc->reloc_root_tree.lock);
851 BUG_ON(rb_node);
852
853 list_add_tail(&root->root_list, &rc->reloc_roots);
854 return 0;
855}
856
857/*
858 * helper to update/delete the 'address of tree root -> reloc tree'
859 * mapping
860 */
861static int __update_reloc_root(struct btrfs_root *root, int del)
862{
863 struct rb_node *rb_node;
864 struct mapping_node *node = NULL;
865 struct reloc_control *rc = root->fs_info->reloc_ctl;
866
867 spin_lock(&rc->reloc_root_tree.lock);
868 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
869 root->commit_root->start);
870 if (rb_node) {
871 node = rb_entry(rb_node, struct mapping_node, rb_node);
872 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
873 }
874 spin_unlock(&rc->reloc_root_tree.lock);
875
876 BUG_ON((struct btrfs_root *)node->data != root);
877
878 if (!del) {
879 spin_lock(&rc->reloc_root_tree.lock);
880 node->bytenr = root->node->start;
881 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
882 node->bytenr, &node->rb_node);
883 spin_unlock(&rc->reloc_root_tree.lock);
884 BUG_ON(rb_node);
885 } else {
886 list_del_init(&root->root_list);
887 kfree(node);
888 }
889 return 0;
890}
891
892/*
893 * create reloc tree for a given fs tree. reloc tree is just a
894 * snapshot of the fs tree with special root objectid.
895 */
896int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
897 struct btrfs_root *root)
898{
899 struct btrfs_root *reloc_root;
900 struct extent_buffer *eb;
901 struct btrfs_root_item *root_item;
902 struct btrfs_key root_key;
903 int ret;
904
905 if (root->reloc_root) {
906 reloc_root = root->reloc_root;
907 reloc_root->last_trans = trans->transid;
908 return 0;
909 }
910
911 if (!root->fs_info->reloc_ctl ||
912 !root->fs_info->reloc_ctl->create_reloc_root ||
913 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
914 return 0;
915
916 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
917 BUG_ON(!root_item);
918
919 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
920 root_key.type = BTRFS_ROOT_ITEM_KEY;
921 root_key.offset = root->root_key.objectid;
922
923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
924 BTRFS_TREE_RELOC_OBJECTID);
925 BUG_ON(ret);
926
927 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
928 memcpy(root_item, &root->root_item, sizeof(*root_item));
929 btrfs_set_root_refs(root_item, 1);
930 btrfs_set_root_bytenr(root_item, eb->start);
931 btrfs_set_root_level(root_item, btrfs_header_level(eb));
932 btrfs_set_root_generation(root_item, trans->transid);
933 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
934 root_item->drop_level = 0;
935
936 btrfs_tree_unlock(eb);
937 free_extent_buffer(eb);
938
939 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
940 &root_key, root_item);
941 BUG_ON(ret);
942 kfree(root_item);
943
944 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
945 &root_key);
946 BUG_ON(IS_ERR(reloc_root));
947 reloc_root->last_trans = trans->transid;
948
949 __add_reloc_root(reloc_root);
950 root->reloc_root = reloc_root;
951 return 0;
952}
953
954/*
955 * update root item of reloc tree
956 */
957int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
958 struct btrfs_root *root)
959{
960 struct btrfs_root *reloc_root;
961 struct btrfs_root_item *root_item;
962 int del = 0;
963 int ret;
964
965 if (!root->reloc_root)
966 return 0;
967
968 reloc_root = root->reloc_root;
969 root_item = &reloc_root->root_item;
970
971 if (btrfs_root_refs(root_item) == 0) {
972 root->reloc_root = NULL;
973 del = 1;
974 }
975
976 __update_reloc_root(reloc_root, del);
977
978 if (reloc_root->commit_root != reloc_root->node) {
979 btrfs_set_root_node(root_item, reloc_root->node);
980 free_extent_buffer(reloc_root->commit_root);
981 reloc_root->commit_root = btrfs_root_node(reloc_root);
982 }
983
984 ret = btrfs_update_root(trans, root->fs_info->tree_root,
985 &reloc_root->root_key, root_item);
986 BUG_ON(ret);
987 return 0;
988}
989
990/*
991 * helper to find first cached inode with inode number >= objectid
992 * in a subvolume
993 */
994static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
995{
996 struct rb_node *node;
997 struct rb_node *prev;
998 struct btrfs_inode *entry;
999 struct inode *inode;
1000
1001 spin_lock(&root->inode_lock);
1002again:
1003 node = root->inode_tree.rb_node;
1004 prev = NULL;
1005 while (node) {
1006 prev = node;
1007 entry = rb_entry(node, struct btrfs_inode, rb_node);
1008
1009 if (objectid < entry->vfs_inode.i_ino)
1010 node = node->rb_left;
1011 else if (objectid > entry->vfs_inode.i_ino)
1012 node = node->rb_right;
1013 else
1014 break;
1015 }
1016 if (!node) {
1017 while (prev) {
1018 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1019 if (objectid <= entry->vfs_inode.i_ino) {
1020 node = prev;
1021 break;
1022 }
1023 prev = rb_next(prev);
1024 }
1025 }
1026 while (node) {
1027 entry = rb_entry(node, struct btrfs_inode, rb_node);
1028 inode = igrab(&entry->vfs_inode);
1029 if (inode) {
1030 spin_unlock(&root->inode_lock);
1031 return inode;
1032 }
1033
1034 objectid = entry->vfs_inode.i_ino + 1;
1035 if (cond_resched_lock(&root->inode_lock))
1036 goto again;
1037
1038 node = rb_next(node);
1039 }
1040 spin_unlock(&root->inode_lock);
1041 return NULL;
1042}
1043
1044static int in_block_group(u64 bytenr,
1045 struct btrfs_block_group_cache *block_group)
1046{
1047 if (bytenr >= block_group->key.objectid &&
1048 bytenr < block_group->key.objectid + block_group->key.offset)
1049 return 1;
1050 return 0;
1051}
1052
1053/*
1054 * get new location of data
1055 */
1056static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1057 u64 bytenr, u64 num_bytes)
1058{
1059 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1060 struct btrfs_path *path;
1061 struct btrfs_file_extent_item *fi;
1062 struct extent_buffer *leaf;
1063 int ret;
1064
1065 path = btrfs_alloc_path();
1066 if (!path)
1067 return -ENOMEM;
1068
1069 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1070 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1071 bytenr, 0);
1072 if (ret < 0)
1073 goto out;
1074 if (ret > 0) {
1075 ret = -ENOENT;
1076 goto out;
1077 }
1078
1079 leaf = path->nodes[0];
1080 fi = btrfs_item_ptr(leaf, path->slots[0],
1081 struct btrfs_file_extent_item);
1082
1083 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1084 btrfs_file_extent_compression(leaf, fi) ||
1085 btrfs_file_extent_encryption(leaf, fi) ||
1086 btrfs_file_extent_other_encoding(leaf, fi));
1087
1088 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1089 ret = 1;
1090 goto out;
1091 }
1092
1093 if (new_bytenr)
1094 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1095 ret = 0;
1096out:
1097 btrfs_free_path(path);
1098 return ret;
1099}
1100
1101/*
1102 * update file extent items in the tree leaf to point to
1103 * the new locations.
1104 */
1105static int replace_file_extents(struct btrfs_trans_handle *trans,
1106 struct reloc_control *rc,
1107 struct btrfs_root *root,
1108 struct extent_buffer *leaf,
1109 struct list_head *inode_list)
1110{
1111 struct btrfs_key key;
1112 struct btrfs_file_extent_item *fi;
1113 struct inode *inode = NULL;
1114 struct inodevec *ivec = NULL;
1115 u64 parent;
1116 u64 bytenr;
1117 u64 new_bytenr;
1118 u64 num_bytes;
1119 u64 end;
1120 u32 nritems;
1121 u32 i;
1122 int ret;
1123 int first = 1;
1124 int dirty = 0;
1125
1126 if (rc->stage != UPDATE_DATA_PTRS)
1127 return 0;
1128
1129 /* reloc trees always use full backref */
1130 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1131 parent = leaf->start;
1132 else
1133 parent = 0;
1134
1135 nritems = btrfs_header_nritems(leaf);
1136 for (i = 0; i < nritems; i++) {
1137 cond_resched();
1138 btrfs_item_key_to_cpu(leaf, &key, i);
1139 if (key.type != BTRFS_EXTENT_DATA_KEY)
1140 continue;
1141 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1142 if (btrfs_file_extent_type(leaf, fi) ==
1143 BTRFS_FILE_EXTENT_INLINE)
1144 continue;
1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1147 if (bytenr == 0)
1148 continue;
1149 if (!in_block_group(bytenr, rc->block_group))
1150 continue;
1151
1152 /*
1153 * if we are modifying block in fs tree, wait for readpage
1154 * to complete and drop the extent cache
1155 */
1156 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1157 if (!ivec || ivec->nr == INODEVEC_SIZE) {
1158 ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
1159 BUG_ON(!ivec);
1160 ivec->nr = 0;
1161 list_add_tail(&ivec->list, inode_list);
1162 }
1163 if (first) {
1164 inode = find_next_inode(root, key.objectid);
1165 if (inode)
1166 ivec->inode[ivec->nr++] = inode;
1167 first = 0;
1168 } else if (inode && inode->i_ino < key.objectid) {
1169 inode = find_next_inode(root, key.objectid);
1170 if (inode)
1171 ivec->inode[ivec->nr++] = inode;
1172 }
1173 if (inode && inode->i_ino == key.objectid) {
1174 end = key.offset +
1175 btrfs_file_extent_num_bytes(leaf, fi);
1176 WARN_ON(!IS_ALIGNED(key.offset,
1177 root->sectorsize));
1178 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1179 end--;
1180 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1181 key.offset, end,
1182 GFP_NOFS);
1183 if (!ret)
1184 continue;
1185
1186 btrfs_drop_extent_cache(inode, key.offset, end,
1187 1);
1188 unlock_extent(&BTRFS_I(inode)->io_tree,
1189 key.offset, end, GFP_NOFS);
1190 }
1191 }
1192
1193 ret = get_new_location(rc->data_inode, &new_bytenr,
1194 bytenr, num_bytes);
1195 if (ret > 0)
1196 continue;
1197 BUG_ON(ret < 0);
1198
1199 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1200 dirty = 1;
1201
1202 key.offset -= btrfs_file_extent_offset(leaf, fi);
1203 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1204 num_bytes, parent,
1205 btrfs_header_owner(leaf),
1206 key.objectid, key.offset);
1207 BUG_ON(ret);
1208
1209 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1210 parent, btrfs_header_owner(leaf),
1211 key.objectid, key.offset);
1212 BUG_ON(ret);
1213 }
1214 if (dirty)
1215 btrfs_mark_buffer_dirty(leaf);
1216 return 0;
1217}
1218
1219static noinline_for_stack
1220int memcmp_node_keys(struct extent_buffer *eb, int slot,
1221 struct btrfs_path *path, int level)
1222{
1223 struct btrfs_disk_key key1;
1224 struct btrfs_disk_key key2;
1225 btrfs_node_key(eb, &key1, slot);
1226 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1227 return memcmp(&key1, &key2, sizeof(key1));
1228}
1229
1230/*
1231 * try to replace tree blocks in fs tree with the new blocks
1232 * in reloc tree. tree blocks haven't been modified since the
1233 * reloc tree was create can be replaced.
1234 *
1235 * if a block was replaced, level of the block + 1 is returned.
1236 * if no block got replaced, 0 is returned. if there are other
1237 * errors, a negative error number is returned.
1238 */
1239static int replace_path(struct btrfs_trans_handle *trans,
1240 struct btrfs_root *dest, struct btrfs_root *src,
1241 struct btrfs_path *path, struct btrfs_key *next_key,
1242 struct extent_buffer **leaf,
1243 int lowest_level, int max_level)
1244{
1245 struct extent_buffer *eb;
1246 struct extent_buffer *parent;
1247 struct btrfs_key key;
1248 u64 old_bytenr;
1249 u64 new_bytenr;
1250 u64 old_ptr_gen;
1251 u64 new_ptr_gen;
1252 u64 last_snapshot;
1253 u32 blocksize;
1254 int level;
1255 int ret;
1256 int slot;
1257
1258 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1259 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1260 BUG_ON(lowest_level > 1 && leaf);
1261
1262 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1263
1264 slot = path->slots[lowest_level];
1265 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1266
1267 eb = btrfs_lock_root_node(dest);
1268 btrfs_set_lock_blocking(eb);
1269 level = btrfs_header_level(eb);
1270
1271 if (level < lowest_level) {
1272 btrfs_tree_unlock(eb);
1273 free_extent_buffer(eb);
1274 return 0;
1275 }
1276
1277 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1278 BUG_ON(ret);
1279 btrfs_set_lock_blocking(eb);
1280
1281 if (next_key) {
1282 next_key->objectid = (u64)-1;
1283 next_key->type = (u8)-1;
1284 next_key->offset = (u64)-1;
1285 }
1286
1287 parent = eb;
1288 while (1) {
1289 level = btrfs_header_level(parent);
1290 BUG_ON(level < lowest_level);
1291
1292 ret = btrfs_bin_search(parent, &key, level, &slot);
1293 if (ret && slot > 0)
1294 slot--;
1295
1296 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1297 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1298
1299 old_bytenr = btrfs_node_blockptr(parent, slot);
1300 blocksize = btrfs_level_size(dest, level - 1);
1301 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1302
1303 if (level <= max_level) {
1304 eb = path->nodes[level];
1305 new_bytenr = btrfs_node_blockptr(eb,
1306 path->slots[level]);
1307 new_ptr_gen = btrfs_node_ptr_generation(eb,
1308 path->slots[level]);
1309 } else {
1310 new_bytenr = 0;
1311 new_ptr_gen = 0;
1312 }
1313
1314 if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1315 WARN_ON(1);
1316 ret = level;
1317 break;
1318 }
1319
1320 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1321 memcmp_node_keys(parent, slot, path, level)) {
1322 if (level <= lowest_level && !leaf) {
1323 ret = 0;
1324 break;
1325 }
1326
1327 eb = read_tree_block(dest, old_bytenr, blocksize,
1328 old_ptr_gen);
1329 btrfs_tree_lock(eb);
1330 ret = btrfs_cow_block(trans, dest, eb, parent,
1331 slot, &eb);
1332 BUG_ON(ret);
1333 btrfs_set_lock_blocking(eb);
1334
1335 if (level <= lowest_level) {
1336 *leaf = eb;
1337 ret = 0;
1338 break;
1339 }
1340
1341 btrfs_tree_unlock(parent);
1342 free_extent_buffer(parent);
1343
1344 parent = eb;
1345 continue;
1346 }
1347
1348 btrfs_node_key_to_cpu(path->nodes[level], &key,
1349 path->slots[level]);
1350 btrfs_release_path(src, path);
1351
1352 path->lowest_level = level;
1353 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1354 path->lowest_level = 0;
1355 BUG_ON(ret);
1356
1357 /*
1358 * swap blocks in fs tree and reloc tree.
1359 */
1360 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1362 btrfs_mark_buffer_dirty(parent);
1363
1364 btrfs_set_node_blockptr(path->nodes[level],
1365 path->slots[level], old_bytenr);
1366 btrfs_set_node_ptr_generation(path->nodes[level],
1367 path->slots[level], old_ptr_gen);
1368 btrfs_mark_buffer_dirty(path->nodes[level]);
1369
1370 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1371 path->nodes[level]->start,
1372 src->root_key.objectid, level - 1, 0);
1373 BUG_ON(ret);
1374 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1375 0, dest->root_key.objectid, level - 1,
1376 0);
1377 BUG_ON(ret);
1378
1379 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1380 path->nodes[level]->start,
1381 src->root_key.objectid, level - 1, 0);
1382 BUG_ON(ret);
1383
1384 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1385 0, dest->root_key.objectid, level - 1,
1386 0);
1387 BUG_ON(ret);
1388
1389 btrfs_unlock_up_safe(path, 0);
1390
1391 ret = level;
1392 break;
1393 }
1394 btrfs_tree_unlock(parent);
1395 free_extent_buffer(parent);
1396 return ret;
1397}
1398
1399/*
1400 * helper to find next relocated block in reloc tree
1401 */
1402static noinline_for_stack
1403int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1404 int *level)
1405{
1406 struct extent_buffer *eb;
1407 int i;
1408 u64 last_snapshot;
1409 u32 nritems;
1410
1411 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1412
1413 for (i = 0; i < *level; i++) {
1414 free_extent_buffer(path->nodes[i]);
1415 path->nodes[i] = NULL;
1416 }
1417
1418 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1419 eb = path->nodes[i];
1420 nritems = btrfs_header_nritems(eb);
1421 while (path->slots[i] + 1 < nritems) {
1422 path->slots[i]++;
1423 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1424 last_snapshot)
1425 continue;
1426
1427 *level = i;
1428 return 0;
1429 }
1430 free_extent_buffer(path->nodes[i]);
1431 path->nodes[i] = NULL;
1432 }
1433 return 1;
1434}
1435
1436/*
1437 * walk down reloc tree to find relocated block of lowest level
1438 */
1439static noinline_for_stack
1440int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1441 int *level)
1442{
1443 struct extent_buffer *eb = NULL;
1444 int i;
1445 u64 bytenr;
1446 u64 ptr_gen = 0;
1447 u64 last_snapshot;
1448 u32 blocksize;
1449 u32 nritems;
1450
1451 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1452
1453 for (i = *level; i > 0; i--) {
1454 eb = path->nodes[i];
1455 nritems = btrfs_header_nritems(eb);
1456 while (path->slots[i] < nritems) {
1457 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1458 if (ptr_gen > last_snapshot)
1459 break;
1460 path->slots[i]++;
1461 }
1462 if (path->slots[i] >= nritems) {
1463 if (i == *level)
1464 break;
1465 *level = i + 1;
1466 return 0;
1467 }
1468 if (i == 1) {
1469 *level = i;
1470 return 0;
1471 }
1472
1473 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1474 blocksize = btrfs_level_size(root, i - 1);
1475 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1476 BUG_ON(btrfs_header_level(eb) != i - 1);
1477 path->nodes[i - 1] = eb;
1478 path->slots[i - 1] = 0;
1479 }
1480 return 1;
1481}
1482
1483/*
1484 * invalidate extent cache for file extents whose key in range of
1485 * [min_key, max_key)
1486 */
1487static int invalidate_extent_cache(struct btrfs_root *root,
1488 struct btrfs_key *min_key,
1489 struct btrfs_key *max_key)
1490{
1491 struct inode *inode = NULL;
1492 u64 objectid;
1493 u64 start, end;
1494
1495 objectid = min_key->objectid;
1496 while (1) {
1497 cond_resched();
1498 iput(inode);
1499
1500 if (objectid > max_key->objectid)
1501 break;
1502
1503 inode = find_next_inode(root, objectid);
1504 if (!inode)
1505 break;
1506
1507 if (inode->i_ino > max_key->objectid) {
1508 iput(inode);
1509 break;
1510 }
1511
1512 objectid = inode->i_ino + 1;
1513 if (!S_ISREG(inode->i_mode))
1514 continue;
1515
1516 if (unlikely(min_key->objectid == inode->i_ino)) {
1517 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1518 continue;
1519 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1520 start = 0;
1521 else {
1522 start = min_key->offset;
1523 WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1524 }
1525 } else {
1526 start = 0;
1527 }
1528
1529 if (unlikely(max_key->objectid == inode->i_ino)) {
1530 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1531 continue;
1532 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1533 end = (u64)-1;
1534 } else {
1535 if (max_key->offset == 0)
1536 continue;
1537 end = max_key->offset;
1538 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1539 end--;
1540 }
1541 } else {
1542 end = (u64)-1;
1543 }
1544
1545 /* the lock_extent waits for readpage to complete */
1546 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1547 btrfs_drop_extent_cache(inode, start, end, 1);
1548 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1549 }
1550 return 0;
1551}
1552
1553static int find_next_key(struct btrfs_path *path, int level,
1554 struct btrfs_key *key)
1555
1556{
1557 while (level < BTRFS_MAX_LEVEL) {
1558 if (!path->nodes[level])
1559 break;
1560 if (path->slots[level] + 1 <
1561 btrfs_header_nritems(path->nodes[level])) {
1562 btrfs_node_key_to_cpu(path->nodes[level], key,
1563 path->slots[level] + 1);
1564 return 0;
1565 }
1566 level++;
1567 }
1568 return 1;
1569}
1570
1571/*
1572 * merge the relocated tree blocks in reloc tree with corresponding
1573 * fs tree.
1574 */
1575static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1576 struct btrfs_root *root)
1577{
1578 LIST_HEAD(inode_list);
1579 struct btrfs_key key;
1580 struct btrfs_key next_key;
1581 struct btrfs_trans_handle *trans;
1582 struct btrfs_root *reloc_root;
1583 struct btrfs_root_item *root_item;
1584 struct btrfs_path *path;
1585 struct extent_buffer *leaf = NULL;
1586 unsigned long nr;
1587 int level;
1588 int max_level;
1589 int replaced = 0;
1590 int ret;
1591 int err = 0;
1592
1593 path = btrfs_alloc_path();
1594 if (!path)
1595 return -ENOMEM;
1596
1597 reloc_root = root->reloc_root;
1598 root_item = &reloc_root->root_item;
1599
1600 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1601 level = btrfs_root_level(root_item);
1602 extent_buffer_get(reloc_root->node);
1603 path->nodes[level] = reloc_root->node;
1604 path->slots[level] = 0;
1605 } else {
1606 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1607
1608 level = root_item->drop_level;
1609 BUG_ON(level == 0);
1610 path->lowest_level = level;
1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1612 if (ret < 0) {
1613 btrfs_free_path(path);
1614 return ret;
1615 }
1616
1617 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1618 path->slots[level]);
1619 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1620
1621 btrfs_unlock_up_safe(path, 0);
1622 }
1623
1624 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
1625 trans = btrfs_start_transaction(root, 1);
1626
1627 leaf = path->nodes[0];
1628 btrfs_item_key_to_cpu(leaf, &key, 0);
1629 btrfs_release_path(reloc_root, path);
1630
1631 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1632 if (ret < 0) {
1633 err = ret;
1634 goto out;
1635 }
1636
1637 leaf = path->nodes[0];
1638 btrfs_unlock_up_safe(path, 1);
1639 ret = replace_file_extents(trans, rc, root, leaf,
1640 &inode_list);
1641 if (ret < 0)
1642 err = ret;
1643 goto out;
1644 }
1645
1646 memset(&next_key, 0, sizeof(next_key));
1647
1648 while (1) {
1649 leaf = NULL;
1650 replaced = 0;
1651 trans = btrfs_start_transaction(root, 1);
1652 max_level = level;
1653
1654 ret = walk_down_reloc_tree(reloc_root, path, &level);
1655 if (ret < 0) {
1656 err = ret;
1657 goto out;
1658 }
1659 if (ret > 0)
1660 break;
1661
1662 if (!find_next_key(path, level, &key) &&
1663 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1664 ret = 0;
1665 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
1666 ret = replace_path(trans, root, reloc_root,
1667 path, &next_key, &leaf,
1668 level, max_level);
1669 } else {
1670 ret = replace_path(trans, root, reloc_root,
1671 path, &next_key, NULL,
1672 level, max_level);
1673 }
1674 if (ret < 0) {
1675 err = ret;
1676 goto out;
1677 }
1678
1679 if (ret > 0) {
1680 level = ret;
1681 btrfs_node_key_to_cpu(path->nodes[level], &key,
1682 path->slots[level]);
1683 replaced = 1;
1684 } else if (leaf) {
1685 /*
1686 * no block got replaced, try replacing file extents
1687 */
1688 btrfs_item_key_to_cpu(leaf, &key, 0);
1689 ret = replace_file_extents(trans, rc, root, leaf,
1690 &inode_list);
1691 btrfs_tree_unlock(leaf);
1692 free_extent_buffer(leaf);
1693 BUG_ON(ret < 0);
1694 }
1695
1696 ret = walk_up_reloc_tree(reloc_root, path, &level);
1697 if (ret > 0)
1698 break;
1699
1700 BUG_ON(level == 0);
1701 /*
1702 * save the merging progress in the drop_progress.
1703 * this is OK since root refs == 1 in this case.
1704 */
1705 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1706 path->slots[level]);
1707 root_item->drop_level = level;
1708
1709 nr = trans->blocks_used;
1710 btrfs_end_transaction(trans, root);
1711
1712 btrfs_btree_balance_dirty(root, nr);
1713
1714 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1715 invalidate_extent_cache(root, &key, &next_key);
1716 }
1717
1718 /*
1719 * handle the case only one block in the fs tree need to be
1720 * relocated and the block is tree root.
1721 */
1722 leaf = btrfs_lock_root_node(root);
1723 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
1724 btrfs_tree_unlock(leaf);
1725 free_extent_buffer(leaf);
1726 if (ret < 0)
1727 err = ret;
1728out:
1729 btrfs_free_path(path);
1730
1731 if (err == 0) {
1732 memset(&root_item->drop_progress, 0,
1733 sizeof(root_item->drop_progress));
1734 root_item->drop_level = 0;
1735 btrfs_set_root_refs(root_item, 0);
1736 }
1737
1738 nr = trans->blocks_used;
1739 btrfs_end_transaction(trans, root);
1740
1741 btrfs_btree_balance_dirty(root, nr);
1742
1743 /*
1744 * put inodes while we aren't holding the tree locks
1745 */
1746 while (!list_empty(&inode_list)) {
1747 struct inodevec *ivec;
1748 ivec = list_entry(inode_list.next, struct inodevec, list);
1749 list_del(&ivec->list);
1750 while (ivec->nr > 0) {
1751 ivec->nr--;
1752 iput(ivec->inode[ivec->nr]);
1753 }
1754 kfree(ivec);
1755 }
1756
1757 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1758 invalidate_extent_cache(root, &key, &next_key);
1759
1760 return err;
1761}
1762
1763/*
1764 * callback for the work threads.
1765 * this function merges reloc tree with corresponding fs tree,
1766 * and then drops the reloc tree.
1767 */
1768static void merge_func(struct btrfs_work *work)
1769{
1770 struct btrfs_trans_handle *trans;
1771 struct btrfs_root *root;
1772 struct btrfs_root *reloc_root;
1773 struct async_merge *async;
1774
1775 async = container_of(work, struct async_merge, work);
1776 reloc_root = async->root;
1777
1778 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1779 root = read_fs_root(reloc_root->fs_info,
1780 reloc_root->root_key.offset);
1781 BUG_ON(IS_ERR(root));
1782 BUG_ON(root->reloc_root != reloc_root);
1783
1784 merge_reloc_root(async->rc, root);
1785
1786 trans = btrfs_start_transaction(root, 1);
1787 btrfs_update_reloc_root(trans, root);
1788 btrfs_end_transaction(trans, root);
1789 }
1790
1791 btrfs_drop_dead_root(reloc_root);
1792
1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done);
1795
1796 kfree(async);
1797}
1798
1799static int merge_reloc_roots(struct reloc_control *rc)
1800{
1801 struct async_merge *async;
1802 struct btrfs_root *root;
1803 struct completion done;
1804 atomic_t num_pending;
1805
1806 init_completion(&done);
1807 atomic_set(&num_pending, 1);
1808
1809 while (!list_empty(&rc->reloc_roots)) {
1810 root = list_entry(rc->reloc_roots.next,
1811 struct btrfs_root, root_list);
1812 list_del_init(&root->root_list);
1813
1814 async = kmalloc(sizeof(*async), GFP_NOFS);
1815 BUG_ON(!async);
1816 async->work.func = merge_func;
1817 async->work.flags = 0;
1818 async->rc = rc;
1819 async->root = root;
1820 async->done = &done;
1821 async->num_pending = &num_pending;
1822 atomic_inc(&num_pending);
1823 btrfs_queue_worker(&rc->workers, &async->work);
1824 }
1825
1826 if (!atomic_dec_and_test(&num_pending))
1827 wait_for_completion(&done);
1828
1829 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
1830 return 0;
1831}
1832
1833static void free_block_list(struct rb_root *blocks)
1834{
1835 struct tree_block *block;
1836 struct rb_node *rb_node;
1837 while ((rb_node = rb_first(blocks))) {
1838 block = rb_entry(rb_node, struct tree_block, rb_node);
1839 rb_erase(rb_node, blocks);
1840 kfree(block);
1841 }
1842}
1843
1844static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
1845 struct btrfs_root *reloc_root)
1846{
1847 struct btrfs_root *root;
1848
1849 if (reloc_root->last_trans == trans->transid)
1850 return 0;
1851
1852 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
1853 BUG_ON(IS_ERR(root));
1854 BUG_ON(root->reloc_root != reloc_root);
1855
1856 return btrfs_record_root_in_trans(trans, root);
1857}
1858
1859/*
1860 * select one tree from trees that references the block.
1861 * for blocks in refernce counted trees, we preper reloc tree.
1862 * if no reloc tree found and reloc_only is true, NULL is returned.
1863 */
1864static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
1865 struct backref_node *node,
1866 struct backref_edge *edges[],
1867 int *nr, int reloc_only)
1868{
1869 struct backref_node *next;
1870 struct btrfs_root *root;
1871 int index;
1872 int loop = 0;
1873again:
1874 index = 0;
1875 next = node;
1876 while (1) {
1877 cond_resched();
1878 next = walk_up_backref(next, edges, &index);
1879 root = next->root;
1880 if (!root) {
1881 BUG_ON(!node->old_root);
1882 goto skip;
1883 }
1884
1885 /* no other choice for non-refernce counted tree */
1886 if (!root->ref_cows) {
1887 BUG_ON(reloc_only);
1888 break;
1889 }
1890
1891 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1892 record_reloc_root_in_trans(trans, root);
1893 break;
1894 }
1895
1896 if (loop) {
1897 btrfs_record_root_in_trans(trans, root);
1898 break;
1899 }
1900
1901 if (reloc_only || next != node) {
1902 if (!root->reloc_root)
1903 btrfs_record_root_in_trans(trans, root);
1904 root = root->reloc_root;
1905 /*
1906 * if the reloc tree was created in current
1907 * transation, there is no node in backref tree
1908 * corresponds to the root of the reloc tree.
1909 */
1910 if (btrfs_root_last_snapshot(&root->root_item) ==
1911 trans->transid - 1)
1912 break;
1913 }
1914skip:
1915 root = NULL;
1916 next = walk_down_backref(edges, &index);
1917 if (!next || next->level <= node->level)
1918 break;
1919 }
1920
1921 if (!root && !loop && !reloc_only) {
1922 loop = 1;
1923 goto again;
1924 }
1925
1926 if (root)
1927 *nr = index;
1928 else
1929 *nr = 0;
1930
1931 return root;
1932}
1933
1934static noinline_for_stack
1935struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
1936 struct backref_node *node)
1937{
1938 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1939 int nr;
1940 return __select_one_root(trans, node, edges, &nr, 0);
1941}
1942
1943static noinline_for_stack
1944struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
1945 struct backref_node *node,
1946 struct backref_edge *edges[], int *nr)
1947{
1948 return __select_one_root(trans, node, edges, nr, 1);
1949}
1950
1951static void grab_path_buffers(struct btrfs_path *path,
1952 struct backref_node *node,
1953 struct backref_edge *edges[], int nr)
1954{
1955 int i = 0;
1956 while (1) {
1957 drop_node_buffer(node);
1958 node->eb = path->nodes[node->level];
1959 BUG_ON(!node->eb);
1960 if (path->locks[node->level])
1961 node->locked = 1;
1962 path->nodes[node->level] = NULL;
1963 path->locks[node->level] = 0;
1964
1965 if (i >= nr)
1966 break;
1967
1968 edges[i]->blockptr = node->eb->start;
1969 node = edges[i]->node[UPPER];
1970 i++;
1971 }
1972}
1973
1974/*
1975 * relocate a block tree, and then update pointers in upper level
1976 * blocks that reference the block to point to the new location.
1977 *
1978 * if called by link_to_upper, the block has already been relocated.
1979 * in that case this function just updates pointers.
1980 */
1981static int do_relocation(struct btrfs_trans_handle *trans,
1982 struct backref_node *node,
1983 struct btrfs_key *key,
1984 struct btrfs_path *path, int lowest)
1985{
1986 struct backref_node *upper;
1987 struct backref_edge *edge;
1988 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1989 struct btrfs_root *root;
1990 struct extent_buffer *eb;
1991 u32 blocksize;
1992 u64 bytenr;
1993 u64 generation;
1994 int nr;
1995 int slot;
1996 int ret;
1997 int err = 0;
1998
1999 BUG_ON(lowest && node->eb);
2000
2001 path->lowest_level = node->level + 1;
2002 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2003 cond_resched();
2004 if (node->eb && node->eb->start == edge->blockptr)
2005 continue;
2006
2007 upper = edge->node[UPPER];
2008 root = select_reloc_root(trans, upper, edges, &nr);
2009 if (!root)
2010 continue;
2011
2012 if (upper->eb && !upper->locked)
2013 drop_node_buffer(upper);
2014
2015 if (!upper->eb) {
2016 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2017 if (ret < 0) {
2018 err = ret;
2019 break;
2020 }
2021 BUG_ON(ret > 0);
2022
2023 slot = path->slots[upper->level];
2024
2025 btrfs_unlock_up_safe(path, upper->level + 1);
2026 grab_path_buffers(path, upper, edges, nr);
2027
2028 btrfs_release_path(NULL, path);
2029 } else {
2030 ret = btrfs_bin_search(upper->eb, key, upper->level,
2031 &slot);
2032 BUG_ON(ret);
2033 }
2034
2035 bytenr = btrfs_node_blockptr(upper->eb, slot);
2036 if (!lowest) {
2037 if (node->eb->start == bytenr) {
2038 btrfs_tree_unlock(upper->eb);
2039 upper->locked = 0;
2040 continue;
2041 }
2042 } else {
2043 BUG_ON(node->bytenr != bytenr);
2044 }
2045
2046 blocksize = btrfs_level_size(root, node->level);
2047 generation = btrfs_node_ptr_generation(upper->eb, slot);
2048 eb = read_tree_block(root, bytenr, blocksize, generation);
2049 btrfs_tree_lock(eb);
2050 btrfs_set_lock_blocking(eb);
2051
2052 if (!node->eb) {
2053 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2054 slot, &eb);
2055 if (ret < 0) {
2056 err = ret;
2057 break;
2058 }
2059 btrfs_set_lock_blocking(eb);
2060 node->eb = eb;
2061 node->locked = 1;
2062 } else {
2063 btrfs_set_node_blockptr(upper->eb, slot,
2064 node->eb->start);
2065 btrfs_set_node_ptr_generation(upper->eb, slot,
2066 trans->transid);
2067 btrfs_mark_buffer_dirty(upper->eb);
2068
2069 ret = btrfs_inc_extent_ref(trans, root,
2070 node->eb->start, blocksize,
2071 upper->eb->start,
2072 btrfs_header_owner(upper->eb),
2073 node->level, 0);
2074 BUG_ON(ret);
2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 }
2082 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb);
2084 upper->locked = 0;
2085 }
2086 }
2087 path->lowest_level = 0;
2088 return err;
2089}
2090
2091static int link_to_upper(struct btrfs_trans_handle *trans,
2092 struct backref_node *node,
2093 struct btrfs_path *path)
2094{
2095 struct btrfs_key key;
2096 if (!node->eb || list_empty(&node->upper))
2097 return 0;
2098
2099 btrfs_node_key_to_cpu(node->eb, &key, 0);
2100 return do_relocation(trans, node, &key, path, 0);
2101}
2102
2103static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2104 struct backref_cache *cache,
2105 struct btrfs_path *path)
2106{
2107 struct backref_node *node;
2108 int level;
2109 int ret;
2110 int err = 0;
2111
2112 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2113 while (!list_empty(&cache->pending[level])) {
2114 node = list_entry(cache->pending[level].next,
2115 struct backref_node, lower);
2116 BUG_ON(node->level != level);
2117
2118 ret = link_to_upper(trans, node, path);
2119 if (ret < 0)
2120 err = ret;
2121 /*
2122 * this remove the node from the pending list and
2123 * may add some other nodes to the level + 1
2124 * pending list
2125 */
2126 remove_backref_node(cache, node);
2127 }
2128 }
2129 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
2130 return err;
2131}
2132
2133static void mark_block_processed(struct reloc_control *rc,
2134 struct backref_node *node)
2135{
2136 u32 blocksize;
2137 if (node->level == 0 ||
2138 in_block_group(node->bytenr, rc->block_group)) {
2139 blocksize = btrfs_level_size(rc->extent_root, node->level);
2140 set_extent_bits(&rc->processed_blocks, node->bytenr,
2141 node->bytenr + blocksize - 1, EXTENT_DIRTY,
2142 GFP_NOFS);
2143 }
2144 node->processed = 1;
2145}
2146
2147/*
2148 * mark a block and all blocks directly/indirectly reference the block
2149 * as processed.
2150 */
2151static void update_processed_blocks(struct reloc_control *rc,
2152 struct backref_node *node)
2153{
2154 struct backref_node *next = node;
2155 struct backref_edge *edge;
2156 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2157 int index = 0;
2158
2159 while (next) {
2160 cond_resched();
2161 while (1) {
2162 if (next->processed)
2163 break;
2164
2165 mark_block_processed(rc, next);
2166
2167 if (list_empty(&next->upper))
2168 break;
2169
2170 edge = list_entry(next->upper.next,
2171 struct backref_edge, list[LOWER]);
2172 edges[index++] = edge;
2173 next = edge->node[UPPER];
2174 }
2175 next = walk_down_backref(edges, &index);
2176 }
2177}
2178
2179static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc)
2181{
2182 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1))
2184 return 1;
2185 return 0;
2186}
2187
2188/*
2189 * check if there are any file extent pointers in the leaf point to
2190 * data require processing
2191 */
2192static int check_file_extents(struct reloc_control *rc,
2193 u64 bytenr, u32 blocksize, u64 ptr_gen)
2194{
2195 struct btrfs_key found_key;
2196 struct btrfs_file_extent_item *fi;
2197 struct extent_buffer *leaf;
2198 u32 nritems;
2199 int i;
2200 int ret = 0;
2201
2202 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
2203
2204 nritems = btrfs_header_nritems(leaf);
2205 for (i = 0; i < nritems; i++) {
2206 cond_resched();
2207 btrfs_item_key_to_cpu(leaf, &found_key, i);
2208 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
2209 continue;
2210 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2211 if (btrfs_file_extent_type(leaf, fi) ==
2212 BTRFS_FILE_EXTENT_INLINE)
2213 continue;
2214 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2215 if (bytenr == 0)
2216 continue;
2217 if (in_block_group(bytenr, rc->block_group)) {
2218 ret = 1;
2219 break;
2220 }
2221 }
2222 free_extent_buffer(leaf);
2223 return ret;
2224}
2225
2226/*
2227 * scan child blocks of a given block to find blocks require processing
2228 */
2229static int add_child_blocks(struct btrfs_trans_handle *trans,
2230 struct reloc_control *rc,
2231 struct backref_node *node,
2232 struct rb_root *blocks)
2233{
2234 struct tree_block *block;
2235 struct rb_node *rb_node;
2236 u64 bytenr;
2237 u64 ptr_gen;
2238 u32 blocksize;
2239 u32 nritems;
2240 int i;
2241 int err = 0;
2242
2243 nritems = btrfs_header_nritems(node->eb);
2244 blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
2245 for (i = 0; i < nritems; i++) {
2246 cond_resched();
2247 bytenr = btrfs_node_blockptr(node->eb, i);
2248 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2249 if (ptr_gen == trans->transid)
2250 continue;
2251 if (!in_block_group(bytenr, rc->block_group) &&
2252 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2253 continue;
2254 if (tree_block_processed(bytenr, blocksize, rc))
2255 continue;
2256
2257 readahead_tree_block(rc->extent_root,
2258 bytenr, blocksize, ptr_gen);
2259 }
2260
2261 for (i = 0; i < nritems; i++) {
2262 cond_resched();
2263 bytenr = btrfs_node_blockptr(node->eb, i);
2264 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2265 if (ptr_gen == trans->transid)
2266 continue;
2267 if (!in_block_group(bytenr, rc->block_group) &&
2268 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2269 continue;
2270 if (tree_block_processed(bytenr, blocksize, rc))
2271 continue;
2272 if (!in_block_group(bytenr, rc->block_group) &&
2273 !check_file_extents(rc, bytenr, blocksize, ptr_gen))
2274 continue;
2275
2276 block = kmalloc(sizeof(*block), GFP_NOFS);
2277 if (!block) {
2278 err = -ENOMEM;
2279 break;
2280 }
2281 block->bytenr = bytenr;
2282 btrfs_node_key_to_cpu(node->eb, &block->key, i);
2283 block->level = node->level - 1;
2284 block->key_ready = 1;
2285 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2286 BUG_ON(rb_node);
2287 }
2288 if (err)
2289 free_block_list(blocks);
2290 return err;
2291}
2292
2293/*
2294 * find adjacent blocks require processing
2295 */
2296static noinline_for_stack
2297int add_adjacent_blocks(struct btrfs_trans_handle *trans,
2298 struct reloc_control *rc,
2299 struct backref_cache *cache,
2300 struct rb_root *blocks, int level,
2301 struct backref_node **upper)
2302{
2303 struct backref_node *node;
2304 int ret = 0;
2305
2306 WARN_ON(!list_empty(&cache->pending[level]));
2307
2308 if (list_empty(&cache->pending[level + 1]))
2309 return 1;
2310
2311 node = list_entry(cache->pending[level + 1].next,
2312 struct backref_node, lower);
2313 if (node->eb)
2314 ret = add_child_blocks(trans, rc, node, blocks);
2315
2316 *upper = node;
2317 return ret;
2318}
2319
2320static int get_tree_block_key(struct reloc_control *rc,
2321 struct tree_block *block)
2322{
2323 struct extent_buffer *eb;
2324
2325 BUG_ON(block->key_ready);
2326 eb = read_tree_block(rc->extent_root, block->bytenr,
2327 block->key.objectid, block->key.offset);
2328 WARN_ON(btrfs_header_level(eb) != block->level);
2329 if (block->level == 0)
2330 btrfs_item_key_to_cpu(eb, &block->key, 0);
2331 else
2332 btrfs_node_key_to_cpu(eb, &block->key, 0);
2333 free_extent_buffer(eb);
2334 block->key_ready = 1;
2335 return 0;
2336}
2337
2338static int reada_tree_block(struct reloc_control *rc,
2339 struct tree_block *block)
2340{
2341 BUG_ON(block->key_ready);
2342 readahead_tree_block(rc->extent_root, block->bytenr,
2343 block->key.objectid, block->key.offset);
2344 return 0;
2345}
2346
2347/*
2348 * helper function to relocate a tree block
2349 */
2350static int relocate_tree_block(struct btrfs_trans_handle *trans,
2351 struct reloc_control *rc,
2352 struct backref_node *node,
2353 struct btrfs_key *key,
2354 struct btrfs_path *path)
2355{
2356 struct btrfs_root *root;
2357 int ret;
2358
2359 root = select_one_root(trans, node);
2360 if (unlikely(!root)) {
2361 rc->found_old_snapshot = 1;
2362 update_processed_blocks(rc, node);
2363 return 0;
2364 }
2365
2366 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2367 ret = do_relocation(trans, node, key, path, 1);
2368 if (ret < 0)
2369 goto out;
2370 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
2371 ret = replace_file_extents(trans, rc, root,
2372 node->eb, NULL);
2373 if (ret < 0)
2374 goto out;
2375 }
2376 drop_node_buffer(node);
2377 } else if (!root->ref_cows) {
2378 path->lowest_level = node->level;
2379 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2380 btrfs_release_path(root, path);
2381 if (ret < 0)
2382 goto out;
2383 } else if (root != node->root) {
2384 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
2385 }
2386
2387 update_processed_blocks(rc, node);
2388 ret = 0;
2389out:
2390 drop_node_buffer(node);
2391 return ret;
2392}
2393
2394/*
2395 * relocate a list of blocks
2396 */
2397static noinline_for_stack
2398int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2399 struct reloc_control *rc, struct rb_root *blocks)
2400{
2401 struct backref_cache *cache;
2402 struct backref_node *node;
2403 struct btrfs_path *path;
2404 struct tree_block *block;
2405 struct rb_node *rb_node;
2406 int level = -1;
2407 int ret;
2408 int err = 0;
2409
2410 path = btrfs_alloc_path();
2411 if (!path)
2412 return -ENOMEM;
2413
2414 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2415 if (!cache) {
2416 btrfs_free_path(path);
2417 return -ENOMEM;
2418 }
2419
2420 backref_cache_init(cache);
2421
2422 rb_node = rb_first(blocks);
2423 while (rb_node) {
2424 block = rb_entry(rb_node, struct tree_block, rb_node);
2425 if (level == -1)
2426 level = block->level;
2427 else
2428 BUG_ON(level != block->level);
2429 if (!block->key_ready)
2430 reada_tree_block(rc, block);
2431 rb_node = rb_next(rb_node);
2432 }
2433
2434 rb_node = rb_first(blocks);
2435 while (rb_node) {
2436 block = rb_entry(rb_node, struct tree_block, rb_node);
2437 if (!block->key_ready)
2438 get_tree_block_key(rc, block);
2439 rb_node = rb_next(rb_node);
2440 }
2441
2442 rb_node = rb_first(blocks);
2443 while (rb_node) {
2444 block = rb_entry(rb_node, struct tree_block, rb_node);
2445
2446 node = build_backref_tree(rc, cache, &block->key,
2447 block->level, block->bytenr);
2448 if (IS_ERR(node)) {
2449 err = PTR_ERR(node);
2450 goto out;
2451 }
2452
2453 ret = relocate_tree_block(trans, rc, node, &block->key,
2454 path);
2455 if (ret < 0) {
2456 err = ret;
2457 goto out;
2458 }
2459 remove_backref_node(cache, node);
2460 rb_node = rb_next(rb_node);
2461 }
2462
2463 if (level > 0)
2464 goto out;
2465
2466 free_block_list(blocks);
2467
2468 /*
2469 * now backrefs of some upper level tree blocks have been cached,
2470 * try relocating blocks referenced by these upper level blocks.
2471 */
2472 while (1) {
2473 struct backref_node *upper = NULL;
2474 if (trans->transaction->in_commit ||
2475 trans->transaction->delayed_refs.flushing)
2476 break;
2477
2478 ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
2479 &upper);
2480 if (ret < 0)
2481 err = ret;
2482 if (ret != 0)
2483 break;
2484
2485 rb_node = rb_first(blocks);
2486 while (rb_node) {
2487 block = rb_entry(rb_node, struct tree_block, rb_node);
2488 if (trans->transaction->in_commit ||
2489 trans->transaction->delayed_refs.flushing)
2490 goto out;
2491 BUG_ON(!block->key_ready);
2492 node = build_backref_tree(rc, cache, &block->key,
2493 level, block->bytenr);
2494 if (IS_ERR(node)) {
2495 err = PTR_ERR(node);
2496 goto out;
2497 }
2498
2499 ret = relocate_tree_block(trans, rc, node,
2500 &block->key, path);
2501 if (ret < 0) {
2502 err = ret;
2503 goto out;
2504 }
2505 remove_backref_node(cache, node);
2506 rb_node = rb_next(rb_node);
2507 }
2508 free_block_list(blocks);
2509
2510 if (upper) {
2511 ret = link_to_upper(trans, upper, path);
2512 if (ret < 0) {
2513 err = ret;
2514 break;
2515 }
2516 remove_backref_node(cache, upper);
2517 }
2518 }
2519out:
2520 free_block_list(blocks);
2521
2522 ret = finish_pending_nodes(trans, cache, path);
2523 if (ret < 0)
2524 err = ret;
2525
2526 kfree(cache);
2527 btrfs_free_path(path);
2528 return err;
2529}
2530
2531static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2533{
2534 u64 page_start;
2535 u64 page_end;
2536 unsigned long i;
2537 unsigned long first_index;
2538 unsigned long last_index;
2539 unsigned int total_read = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page;
2542 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0;
2546
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra)
2549 return -ENOMEM;
2550
2551 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554
2555 /* make sure the dirty trick played by the caller work */
2556 ret = invalidate_inode_pages2_range(inode->i_mapping,
2557 first_index, last_index);
2558 if (ret)
2559 goto out_unlock;
2560
2561 file_ra_state_init(ra, inode->i_mapping);
2562
2563 for (i = first_index ; i <= last_index; i++) {
2564 if (total_read % ra->ra_pages == 0) {
2565 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2566 min(last_index, ra->ra_pages + i - 1));
2567 }
2568 total_read++;
2569again:
2570 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2571 BUG_ON(1);
2572 page = grab_cache_page(inode->i_mapping, i);
2573 if (!page) {
2574 ret = -ENOMEM;
2575 goto out_unlock;
2576 }
2577 if (!PageUptodate(page)) {
2578 btrfs_readpage(NULL, page);
2579 lock_page(page);
2580 if (!PageUptodate(page)) {
2581 unlock_page(page);
2582 page_cache_release(page);
2583 ret = -EIO;
2584 goto out_unlock;
2585 }
2586 }
2587 wait_on_page_writeback(page);
2588
2589 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2590 page_end = page_start + PAGE_CACHE_SIZE - 1;
2591 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2592
2593 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2594 if (ordered) {
2595 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2596 unlock_page(page);
2597 page_cache_release(page);
2598 btrfs_start_ordered_extent(inode, ordered, 1);
2599 btrfs_put_ordered_extent(ordered);
2600 goto again;
2601 }
2602 set_page_extent_mapped(page);
2603
2604 if (i == first_index)
2605 set_extent_bits(io_tree, page_start, page_end,
2606 EXTENT_BOUNDARY, GFP_NOFS);
2607 btrfs_set_extent_delalloc(inode, page_start, page_end);
2608
2609 set_page_dirty(page);
2610 total_dirty++;
2611
2612 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2613 unlock_page(page);
2614 page_cache_release(page);
2615 }
2616out_unlock:
2617 mutex_unlock(&inode->i_mutex);
2618 kfree(ra);
2619 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2620 return ret;
2621}
2622
2623static noinline_for_stack
2624int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
2625{
2626 struct btrfs_root *root = BTRFS_I(inode)->root;
2627 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2628 struct extent_map *em;
2629 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2630 u64 end = start + extent_key->offset - 1;
2631
2632 em = alloc_extent_map(GFP_NOFS);
2633 em->start = start;
2634 em->len = extent_key->offset;
2635 em->block_len = extent_key->offset;
2636 em->block_start = extent_key->objectid;
2637 em->bdev = root->fs_info->fs_devices->latest_bdev;
2638 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2639
2640 /* setup extent map to cheat btrfs_readpage */
2641 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2642 while (1) {
2643 int ret;
2644 spin_lock(&em_tree->lock);
2645 ret = add_extent_mapping(em_tree, em);
2646 spin_unlock(&em_tree->lock);
2647 if (ret != -EEXIST) {
2648 free_extent_map(em);
2649 break;
2650 }
2651 btrfs_drop_extent_cache(inode, start, end, 0);
2652 }
2653 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2654
2655 return relocate_inode_pages(inode, start, extent_key->offset);
2656}
2657
2658#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2659static int get_ref_objectid_v0(struct reloc_control *rc,
2660 struct btrfs_path *path,
2661 struct btrfs_key *extent_key,
2662 u64 *ref_objectid, int *path_change)
2663{
2664 struct btrfs_key key;
2665 struct extent_buffer *leaf;
2666 struct btrfs_extent_ref_v0 *ref0;
2667 int ret;
2668 int slot;
2669
2670 leaf = path->nodes[0];
2671 slot = path->slots[0];
2672 while (1) {
2673 if (slot >= btrfs_header_nritems(leaf)) {
2674 ret = btrfs_next_leaf(rc->extent_root, path);
2675 if (ret < 0)
2676 return ret;
2677 BUG_ON(ret > 0);
2678 leaf = path->nodes[0];
2679 slot = path->slots[0];
2680 if (path_change)
2681 *path_change = 1;
2682 }
2683 btrfs_item_key_to_cpu(leaf, &key, slot);
2684 if (key.objectid != extent_key->objectid)
2685 return -ENOENT;
2686
2687 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
2688 slot++;
2689 continue;
2690 }
2691 ref0 = btrfs_item_ptr(leaf, slot,
2692 struct btrfs_extent_ref_v0);
2693 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
2694 break;
2695 }
2696 return 0;
2697}
2698#endif
2699
2700/*
2701 * helper to add a tree block to the list.
2702 * the major work is getting the generation and level of the block
2703 */
2704static int add_tree_block(struct reloc_control *rc,
2705 struct btrfs_key *extent_key,
2706 struct btrfs_path *path,
2707 struct rb_root *blocks)
2708{
2709 struct extent_buffer *eb;
2710 struct btrfs_extent_item *ei;
2711 struct btrfs_tree_block_info *bi;
2712 struct tree_block *block;
2713 struct rb_node *rb_node;
2714 u32 item_size;
2715 int level = -1;
2716 int generation;
2717
2718 eb = path->nodes[0];
2719 item_size = btrfs_item_size_nr(eb, path->slots[0]);
2720
2721 if (item_size >= sizeof(*ei) + sizeof(*bi)) {
2722 ei = btrfs_item_ptr(eb, path->slots[0],
2723 struct btrfs_extent_item);
2724 bi = (struct btrfs_tree_block_info *)(ei + 1);
2725 generation = btrfs_extent_generation(eb, ei);
2726 level = btrfs_tree_block_level(eb, bi);
2727 } else {
2728#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2729 u64 ref_owner;
2730 int ret;
2731
2732 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2733 ret = get_ref_objectid_v0(rc, path, extent_key,
2734 &ref_owner, NULL);
2735 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
2736 level = (int)ref_owner;
2737 /* FIXME: get real generation */
2738 generation = 0;
2739#else
2740 BUG();
2741#endif
2742 }
2743
2744 btrfs_release_path(rc->extent_root, path);
2745
2746 BUG_ON(level == -1);
2747
2748 block = kmalloc(sizeof(*block), GFP_NOFS);
2749 if (!block)
2750 return -ENOMEM;
2751
2752 block->bytenr = extent_key->objectid;
2753 block->key.objectid = extent_key->offset;
2754 block->key.offset = generation;
2755 block->level = level;
2756 block->key_ready = 0;
2757
2758 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2759 BUG_ON(rb_node);
2760
2761 return 0;
2762}
2763
2764/*
2765 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
2766 */
2767static int __add_tree_block(struct reloc_control *rc,
2768 u64 bytenr, u32 blocksize,
2769 struct rb_root *blocks)
2770{
2771 struct btrfs_path *path;
2772 struct btrfs_key key;
2773 int ret;
2774
2775 if (tree_block_processed(bytenr, blocksize, rc))
2776 return 0;
2777
2778 if (tree_search(blocks, bytenr))
2779 return 0;
2780
2781 path = btrfs_alloc_path();
2782 if (!path)
2783 return -ENOMEM;
2784
2785 key.objectid = bytenr;
2786 key.type = BTRFS_EXTENT_ITEM_KEY;
2787 key.offset = blocksize;
2788
2789 path->search_commit_root = 1;
2790 path->skip_locking = 1;
2791 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
2792 if (ret < 0)
2793 goto out;
2794 BUG_ON(ret);
2795
2796 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2797 ret = add_tree_block(rc, &key, path, blocks);
2798out:
2799 btrfs_free_path(path);
2800 return ret;
2801}
2802
2803/*
2804 * helper to check if the block use full backrefs for pointers in it
2805 */
2806static int block_use_full_backref(struct reloc_control *rc,
2807 struct extent_buffer *eb)
2808{
2809 struct btrfs_path *path;
2810 struct btrfs_extent_item *ei;
2811 struct btrfs_key key;
2812 u64 flags;
2813 int ret;
2814
2815 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
2816 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
2817 return 1;
2818
2819 path = btrfs_alloc_path();
2820 BUG_ON(!path);
2821
2822 key.objectid = eb->start;
2823 key.type = BTRFS_EXTENT_ITEM_KEY;
2824 key.offset = eb->len;
2825
2826 path->search_commit_root = 1;
2827 path->skip_locking = 1;
2828 ret = btrfs_search_slot(NULL, rc->extent_root,
2829 &key, path, 0, 0);
2830 BUG_ON(ret);
2831
2832 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2833 struct btrfs_extent_item);
2834 flags = btrfs_extent_flags(path->nodes[0], ei);
2835 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2836 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2837 ret = 1;
2838 else
2839 ret = 0;
2840 btrfs_free_path(path);
2841 return ret;
2842}
2843
2844/*
2845 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
2846 * this function scans fs tree to find blocks reference the data extent
2847 */
2848static int find_data_references(struct reloc_control *rc,
2849 struct btrfs_key *extent_key,
2850 struct extent_buffer *leaf,
2851 struct btrfs_extent_data_ref *ref,
2852 struct rb_root *blocks)
2853{
2854 struct btrfs_path *path;
2855 struct tree_block *block;
2856 struct btrfs_root *root;
2857 struct btrfs_file_extent_item *fi;
2858 struct rb_node *rb_node;
2859 struct btrfs_key key;
2860 u64 ref_root;
2861 u64 ref_objectid;
2862 u64 ref_offset;
2863 u32 ref_count;
2864 u32 nritems;
2865 int err = 0;
2866 int added = 0;
2867 int counted;
2868 int ret;
2869
2870 path = btrfs_alloc_path();
2871 if (!path)
2872 return -ENOMEM;
2873
2874 ref_root = btrfs_extent_data_ref_root(leaf, ref);
2875 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
2876 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
2877 ref_count = btrfs_extent_data_ref_count(leaf, ref);
2878
2879 root = read_fs_root(rc->extent_root->fs_info, ref_root);
2880 if (IS_ERR(root)) {
2881 err = PTR_ERR(root);
2882 goto out;
2883 }
2884
2885 key.objectid = ref_objectid;
2886 key.offset = ref_offset;
2887 key.type = BTRFS_EXTENT_DATA_KEY;
2888
2889 path->search_commit_root = 1;
2890 path->skip_locking = 1;
2891 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2892 if (ret < 0) {
2893 err = ret;
2894 goto out;
2895 }
2896
2897 leaf = path->nodes[0];
2898 nritems = btrfs_header_nritems(leaf);
2899 /*
2900 * the references in tree blocks that use full backrefs
2901 * are not counted in
2902 */
2903 if (block_use_full_backref(rc, leaf))
2904 counted = 0;
2905 else
2906 counted = 1;
2907 rb_node = tree_search(blocks, leaf->start);
2908 if (rb_node) {
2909 if (counted)
2910 added = 1;
2911 else
2912 path->slots[0] = nritems;
2913 }
2914
2915 while (ref_count > 0) {
2916 while (path->slots[0] >= nritems) {
2917 ret = btrfs_next_leaf(root, path);
2918 if (ret < 0) {
2919 err = ret;
2920 goto out;
2921 }
2922 if (ret > 0) {
2923 WARN_ON(1);
2924 goto out;
2925 }
2926
2927 leaf = path->nodes[0];
2928 nritems = btrfs_header_nritems(leaf);
2929 added = 0;
2930
2931 if (block_use_full_backref(rc, leaf))
2932 counted = 0;
2933 else
2934 counted = 1;
2935 rb_node = tree_search(blocks, leaf->start);
2936 if (rb_node) {
2937 if (counted)
2938 added = 1;
2939 else
2940 path->slots[0] = nritems;
2941 }
2942 }
2943
2944 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2945 if (key.objectid != ref_objectid ||
2946 key.type != BTRFS_EXTENT_DATA_KEY) {
2947 WARN_ON(1);
2948 break;
2949 }
2950
2951 fi = btrfs_item_ptr(leaf, path->slots[0],
2952 struct btrfs_file_extent_item);
2953
2954 if (btrfs_file_extent_type(leaf, fi) ==
2955 BTRFS_FILE_EXTENT_INLINE)
2956 goto next;
2957
2958 if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
2959 extent_key->objectid)
2960 goto next;
2961
2962 key.offset -= btrfs_file_extent_offset(leaf, fi);
2963 if (key.offset != ref_offset)
2964 goto next;
2965
2966 if (counted)
2967 ref_count--;
2968 if (added)
2969 goto next;
2970
2971 if (!tree_block_processed(leaf->start, leaf->len, rc)) {
2972 block = kmalloc(sizeof(*block), GFP_NOFS);
2973 if (!block) {
2974 err = -ENOMEM;
2975 break;
2976 }
2977 block->bytenr = leaf->start;
2978 btrfs_item_key_to_cpu(leaf, &block->key, 0);
2979 block->level = 0;
2980 block->key_ready = 1;
2981 rb_node = tree_insert(blocks, block->bytenr,
2982 &block->rb_node);
2983 BUG_ON(rb_node);
2984 }
2985 if (counted)
2986 added = 1;
2987 else
2988 path->slots[0] = nritems;
2989next:
2990 path->slots[0]++;
2991
2992 }
2993out:
2994 btrfs_free_path(path);
2995 return err;
2996}
2997
2998/*
2999 * hepler to find all tree blocks that reference a given data extent
3000 */
3001static noinline_for_stack
3002int add_data_references(struct reloc_control *rc,
3003 struct btrfs_key *extent_key,
3004 struct btrfs_path *path,
3005 struct rb_root *blocks)
3006{
3007 struct btrfs_key key;
3008 struct extent_buffer *eb;
3009 struct btrfs_extent_data_ref *dref;
3010 struct btrfs_extent_inline_ref *iref;
3011 unsigned long ptr;
3012 unsigned long end;
3013 u32 blocksize;
3014 int ret;
3015 int err = 0;
3016
3017 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
3018 extent_key->offset);
3019 BUG_ON(ret < 0);
3020 if (ret > 0) {
3021 /* the relocated data is fragmented */
3022 rc->extents_skipped++;
3023 btrfs_release_path(rc->extent_root, path);
3024 return 0;
3025 }
3026
3027 blocksize = btrfs_level_size(rc->extent_root, 0);
3028
3029 eb = path->nodes[0];
3030 ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3031 end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3032#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3033 if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3034 ptr = end;
3035 else
3036#endif
3037 ptr += sizeof(struct btrfs_extent_item);
3038
3039 while (ptr < end) {
3040 iref = (struct btrfs_extent_inline_ref *)ptr;
3041 key.type = btrfs_extent_inline_ref_type(eb, iref);
3042 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3043 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3044 ret = __add_tree_block(rc, key.offset, blocksize,
3045 blocks);
3046 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3047 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3048 ret = find_data_references(rc, extent_key,
3049 eb, dref, blocks);
3050 } else {
3051 BUG();
3052 }
3053 ptr += btrfs_extent_inline_ref_size(key.type);
3054 }
3055 WARN_ON(ptr > end);
3056
3057 while (1) {
3058 cond_resched();
3059 eb = path->nodes[0];
3060 if (path->slots[0] >= btrfs_header_nritems(eb)) {
3061 ret = btrfs_next_leaf(rc->extent_root, path);
3062 if (ret < 0) {
3063 err = ret;
3064 break;
3065 }
3066 if (ret > 0)
3067 break;
3068 eb = path->nodes[0];
3069 }
3070
3071 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3072 if (key.objectid != extent_key->objectid)
3073 break;
3074
3075#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3076 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3077 key.type == BTRFS_EXTENT_REF_V0_KEY) {
3078#else
3079 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3080 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3081#endif
3082 ret = __add_tree_block(rc, key.offset, blocksize,
3083 blocks);
3084 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3085 dref = btrfs_item_ptr(eb, path->slots[0],
3086 struct btrfs_extent_data_ref);
3087 ret = find_data_references(rc, extent_key,
3088 eb, dref, blocks);
3089 } else {
3090 ret = 0;
3091 }
3092 if (ret) {
3093 err = ret;
3094 break;
3095 }
3096 path->slots[0]++;
3097 }
3098 btrfs_release_path(rc->extent_root, path);
3099 if (err)
3100 free_block_list(blocks);
3101 return err;
3102}
3103
3104/*
3105 * hepler to find next unprocessed extent
3106 */
3107static noinline_for_stack
3108int find_next_extent(struct btrfs_trans_handle *trans,
3109 struct reloc_control *rc, struct btrfs_path *path)
3110{
3111 struct btrfs_key key;
3112 struct extent_buffer *leaf;
3113 u64 start, end, last;
3114 int ret;
3115
3116 last = rc->block_group->key.objectid + rc->block_group->key.offset;
3117 while (1) {
3118 cond_resched();
3119 if (rc->search_start >= last) {
3120 ret = 1;
3121 break;
3122 }
3123
3124 key.objectid = rc->search_start;
3125 key.type = BTRFS_EXTENT_ITEM_KEY;
3126 key.offset = 0;
3127
3128 path->search_commit_root = 1;
3129 path->skip_locking = 1;
3130 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3131 0, 0);
3132 if (ret < 0)
3133 break;
3134next:
3135 leaf = path->nodes[0];
3136 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3137 ret = btrfs_next_leaf(rc->extent_root, path);
3138 if (ret != 0)
3139 break;
3140 leaf = path->nodes[0];
3141 }
3142
3143 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3144 if (key.objectid >= last) {
3145 ret = 1;
3146 break;
3147 }
3148
3149 if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3150 key.objectid + key.offset <= rc->search_start) {
3151 path->slots[0]++;
3152 goto next;
3153 }
3154
3155 ret = find_first_extent_bit(&rc->processed_blocks,
3156 key.objectid, &start, &end,
3157 EXTENT_DIRTY);
3158
3159 if (ret == 0 && start <= key.objectid) {
3160 btrfs_release_path(rc->extent_root, path);
3161 rc->search_start = end + 1;
3162 } else {
3163 rc->search_start = key.objectid + key.offset;
3164 return 0;
3165 }
3166 }
3167 btrfs_release_path(rc->extent_root, path);
3168 return ret;
3169}
3170
3171static void set_reloc_control(struct reloc_control *rc)
3172{
3173 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3174 mutex_lock(&fs_info->trans_mutex);
3175 fs_info->reloc_ctl = rc;
3176 mutex_unlock(&fs_info->trans_mutex);
3177}
3178
3179static void unset_reloc_control(struct reloc_control *rc)
3180{
3181 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3182 mutex_lock(&fs_info->trans_mutex);
3183 fs_info->reloc_ctl = NULL;
3184 mutex_unlock(&fs_info->trans_mutex);
3185}
3186
3187static int check_extent_flags(u64 flags)
3188{
3189 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3190 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3191 return 1;
3192 if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3193 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3194 return 1;
3195 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3196 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3197 return 1;
3198 return 0;
3199}
3200
3201static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3202{
3203 struct rb_root blocks = RB_ROOT;
3204 struct btrfs_key key;
3205 struct btrfs_trans_handle *trans = NULL;
3206 struct btrfs_path *path;
3207 struct btrfs_extent_item *ei;
3208 unsigned long nr;
3209 u64 flags;
3210 u32 item_size;
3211 int ret;
3212 int err = 0;
3213
3214 path = btrfs_alloc_path();
3215 if (!path)
3216 return -ENOMEM;
3217
3218 rc->search_start = rc->block_group->key.objectid;
3219 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3220 GFP_NOFS);
3221
3222 rc->create_reloc_root = 1;
3223 set_reloc_control(rc);
3224
3225 trans = btrfs_start_transaction(rc->extent_root, 1);
3226 btrfs_commit_transaction(trans, rc->extent_root);
3227
3228 while (1) {
3229 trans = btrfs_start_transaction(rc->extent_root, 1);
3230
3231 ret = find_next_extent(trans, rc, path);
3232 if (ret < 0)
3233 err = ret;
3234 if (ret != 0)
3235 break;
3236
3237 rc->extents_found++;
3238
3239 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3240 struct btrfs_extent_item);
3241 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3242 item_size = btrfs_item_size_nr(path->nodes[0],
3243 path->slots[0]);
3244 if (item_size >= sizeof(*ei)) {
3245 flags = btrfs_extent_flags(path->nodes[0], ei);
3246 ret = check_extent_flags(flags);
3247 BUG_ON(ret);
3248
3249 } else {
3250#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3251 u64 ref_owner;
3252 int path_change = 0;
3253
3254 BUG_ON(item_size !=
3255 sizeof(struct btrfs_extent_item_v0));
3256 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3257 &path_change);
3258 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3259 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3260 else
3261 flags = BTRFS_EXTENT_FLAG_DATA;
3262
3263 if (path_change) {
3264 btrfs_release_path(rc->extent_root, path);
3265
3266 path->search_commit_root = 1;
3267 path->skip_locking = 1;
3268 ret = btrfs_search_slot(NULL, rc->extent_root,
3269 &key, path, 0, 0);
3270 if (ret < 0) {
3271 err = ret;
3272 break;
3273 }
3274 BUG_ON(ret > 0);
3275 }
3276#else
3277 BUG();
3278#endif
3279 }
3280
3281 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3282 ret = add_tree_block(rc, &key, path, &blocks);
3283 } else if (rc->stage == UPDATE_DATA_PTRS &&
3284 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3285 ret = add_data_references(rc, &key, path, &blocks);
3286 } else {
3287 btrfs_release_path(rc->extent_root, path);
3288 ret = 0;
3289 }
3290 if (ret < 0) {
3291 err = 0;
3292 break;
3293 }
3294
3295 if (!RB_EMPTY_ROOT(&blocks)) {
3296 ret = relocate_tree_blocks(trans, rc, &blocks);
3297 if (ret < 0) {
3298 err = ret;
3299 break;
3300 }
3301 }
3302
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction_throttle(trans, rc->extent_root);
3305 trans = NULL;
3306 btrfs_btree_balance_dirty(rc->extent_root, nr);
3307
3308 if (rc->stage == MOVE_DATA_EXTENTS &&
3309 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3310 rc->found_file_extent = 1;
3311 ret = relocate_data_extent(rc->data_inode, &key);
3312 if (ret < 0) {
3313 err = ret;
3314 break;
3315 }
3316 }
3317 }
3318 btrfs_free_path(path);
3319
3320 if (trans) {
3321 nr = trans->blocks_used;
3322 btrfs_end_transaction(trans, rc->extent_root);
3323 btrfs_btree_balance_dirty(rc->extent_root, nr);
3324 }
3325
3326 rc->create_reloc_root = 0;
3327 smp_mb();
3328
3329 if (rc->extents_found > 0) {
3330 trans = btrfs_start_transaction(rc->extent_root, 1);
3331 btrfs_commit_transaction(trans, rc->extent_root);
3332 }
3333
3334 merge_reloc_roots(rc);
3335
3336 unset_reloc_control(rc);
3337
3338 /* get rid of pinned extents */
3339 trans = btrfs_start_transaction(rc->extent_root, 1);
3340 btrfs_commit_transaction(trans, rc->extent_root);
3341
3342 return err;
3343}
3344
3345static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3346 struct btrfs_root *root,
3347 u64 objectid, u64 size)
3348{
3349 struct btrfs_path *path;
3350 struct btrfs_inode_item *item;
3351 struct extent_buffer *leaf;
3352 int ret;
3353
3354 path = btrfs_alloc_path();
3355 if (!path)
3356 return -ENOMEM;
3357
3358 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3359 if (ret)
3360 goto out;
3361
3362 leaf = path->nodes[0];
3363 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3364 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3365 btrfs_set_inode_generation(leaf, item, 1);
3366 btrfs_set_inode_size(leaf, item, size);
3367 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3368 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3369 btrfs_mark_buffer_dirty(leaf);
3370 btrfs_release_path(root, path);
3371out:
3372 btrfs_free_path(path);
3373 return ret;
3374}
3375
3376/*
3377 * helper to create inode for data relocation.
3378 * the inode is in data relocation tree and its link count is 0
3379 */
3380static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3381 struct btrfs_block_group_cache *group)
3382{
3383 struct inode *inode = NULL;
3384 struct btrfs_trans_handle *trans;
3385 struct btrfs_root *root;
3386 struct btrfs_key key;
3387 unsigned long nr;
3388 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3389 int err = 0;
3390
3391 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3392 if (IS_ERR(root))
3393 return ERR_CAST(root);
3394
3395 trans = btrfs_start_transaction(root, 1);
3396 BUG_ON(!trans);
3397
3398 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3399 if (err)
3400 goto out;
3401
3402 err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
3403 BUG_ON(err);
3404
3405 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3406 group->key.offset, 0, group->key.offset,
3407 0, 0, 0);
3408 BUG_ON(err);
3409
3410 key.objectid = objectid;
3411 key.type = BTRFS_INODE_ITEM_KEY;
3412 key.offset = 0;
3413 inode = btrfs_iget(root->fs_info->sb, &key, root);
3414 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3415 BTRFS_I(inode)->index_cnt = group->key.objectid;
3416
3417 err = btrfs_orphan_add(trans, inode);
3418out:
3419 nr = trans->blocks_used;
3420 btrfs_end_transaction(trans, root);
3421
3422 btrfs_btree_balance_dirty(root, nr);
3423 if (err) {
3424 if (inode)
3425 iput(inode);
3426 inode = ERR_PTR(err);
3427 }
3428 return inode;
3429}
3430
3431/*
3432 * function to relocate all extents in a block group.
3433 */
3434int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3435{
3436 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3437 struct reloc_control *rc;
3438 int ret;
3439 int err = 0;
3440
3441 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3442 if (!rc)
3443 return -ENOMEM;
3444
3445 mapping_tree_init(&rc->reloc_root_tree);
3446 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3447 INIT_LIST_HEAD(&rc->reloc_roots);
3448
3449 rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3450 BUG_ON(!rc->block_group);
3451
3452 btrfs_init_workers(&rc->workers, "relocate",
3453 fs_info->thread_pool_size);
3454
3455 rc->extent_root = extent_root;
3456 btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
3457
3458 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3459 if (IS_ERR(rc->data_inode)) {
3460 err = PTR_ERR(rc->data_inode);
3461 rc->data_inode = NULL;
3462 goto out;
3463 }
3464
3465 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3466 (unsigned long long)rc->block_group->key.objectid,
3467 (unsigned long long)rc->block_group->flags);
3468
3469 btrfs_start_delalloc_inodes(fs_info->tree_root);
3470 btrfs_wait_ordered_extents(fs_info->tree_root, 0);
3471
3472 while (1) {
3473 mutex_lock(&fs_info->cleaner_mutex);
3474 btrfs_clean_old_snapshots(fs_info->tree_root);
3475 mutex_unlock(&fs_info->cleaner_mutex);
3476
3477 rc->extents_found = 0;
3478 rc->extents_skipped = 0;
3479
3480 ret = relocate_block_group(rc);
3481 if (ret < 0) {
3482 err = ret;
3483 break;
3484 }
3485
3486 if (rc->extents_found == 0)
3487 break;
3488
3489 printk(KERN_INFO "btrfs: found %llu extents\n",
3490 (unsigned long long)rc->extents_found);
3491
3492 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3493 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3494 invalidate_mapping_pages(rc->data_inode->i_mapping,
3495 0, -1);
3496 rc->stage = UPDATE_DATA_PTRS;
3497 } else if (rc->stage == UPDATE_DATA_PTRS &&
3498 rc->extents_skipped >= rc->extents_found) {
3499 iput(rc->data_inode);
3500 rc->data_inode = create_reloc_inode(fs_info,
3501 rc->block_group);
3502 if (IS_ERR(rc->data_inode)) {
3503 err = PTR_ERR(rc->data_inode);
3504 rc->data_inode = NULL;
3505 break;
3506 }
3507 rc->stage = MOVE_DATA_EXTENTS;
3508 rc->found_file_extent = 0;
3509 }
3510 }
3511
3512 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
3513 rc->block_group->key.objectid,
3514 rc->block_group->key.objectid +
3515 rc->block_group->key.offset - 1);
3516
3517 WARN_ON(rc->block_group->pinned > 0);
3518 WARN_ON(rc->block_group->reserved > 0);
3519 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3520out:
3521 iput(rc->data_inode);
3522 btrfs_stop_workers(&rc->workers);
3523 btrfs_put_block_group(rc->block_group);
3524 kfree(rc);
3525 return err;
3526}
3527
3528/*
3529 * recover relocation interrupted by system crash.
3530 *
3531 * this function resumes merging reloc trees with corresponding fs trees.
3532 * this is important for keeping the sharing of tree blocks
3533 */
3534int btrfs_recover_relocation(struct btrfs_root *root)
3535{
3536 LIST_HEAD(reloc_roots);
3537 struct btrfs_key key;
3538 struct btrfs_root *fs_root;
3539 struct btrfs_root *reloc_root;
3540 struct btrfs_path *path;
3541 struct extent_buffer *leaf;
3542 struct reloc_control *rc = NULL;
3543 struct btrfs_trans_handle *trans;
3544 int ret;
3545 int err = 0;
3546
3547 path = btrfs_alloc_path();
3548 if (!path)
3549 return -ENOMEM;
3550
3551 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3552 key.type = BTRFS_ROOT_ITEM_KEY;
3553 key.offset = (u64)-1;
3554
3555 while (1) {
3556 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3557 path, 0, 0);
3558 if (ret < 0) {
3559 err = ret;
3560 goto out;
3561 }
3562 if (ret > 0) {
3563 if (path->slots[0] == 0)
3564 break;
3565 path->slots[0]--;
3566 }
3567 leaf = path->nodes[0];
3568 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3569 btrfs_release_path(root->fs_info->tree_root, path);
3570
3571 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3572 key.type != BTRFS_ROOT_ITEM_KEY)
3573 break;
3574
3575 reloc_root = btrfs_read_fs_root_no_radix(root, &key);
3576 if (IS_ERR(reloc_root)) {
3577 err = PTR_ERR(reloc_root);
3578 goto out;
3579 }
3580
3581 list_add(&reloc_root->root_list, &reloc_roots);
3582
3583 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
3584 fs_root = read_fs_root(root->fs_info,
3585 reloc_root->root_key.offset);
3586 if (IS_ERR(fs_root)) {
3587 err = PTR_ERR(fs_root);
3588 goto out;
3589 }
3590 }
3591
3592 if (key.offset == 0)
3593 break;
3594
3595 key.offset--;
3596 }
3597 btrfs_release_path(root->fs_info->tree_root, path);
3598
3599 if (list_empty(&reloc_roots))
3600 goto out;
3601
3602 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3603 if (!rc) {
3604 err = -ENOMEM;
3605 goto out;
3606 }
3607
3608 mapping_tree_init(&rc->reloc_root_tree);
3609 INIT_LIST_HEAD(&rc->reloc_roots);
3610 btrfs_init_workers(&rc->workers, "relocate",
3611 root->fs_info->thread_pool_size);
3612 rc->extent_root = root->fs_info->extent_root;
3613
3614 set_reloc_control(rc);
3615
3616 while (!list_empty(&reloc_roots)) {
3617 reloc_root = list_entry(reloc_roots.next,
3618 struct btrfs_root, root_list);
3619 list_del(&reloc_root->root_list);
3620
3621 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
3622 list_add_tail(&reloc_root->root_list,
3623 &rc->reloc_roots);
3624 continue;
3625 }
3626
3627 fs_root = read_fs_root(root->fs_info,
3628 reloc_root->root_key.offset);
3629 BUG_ON(IS_ERR(fs_root));
3630
3631 __add_reloc_root(reloc_root);
3632 fs_root->reloc_root = reloc_root;
3633 }
3634
3635 trans = btrfs_start_transaction(rc->extent_root, 1);
3636 btrfs_commit_transaction(trans, rc->extent_root);
3637
3638 merge_reloc_roots(rc);
3639
3640 unset_reloc_control(rc);
3641
3642 trans = btrfs_start_transaction(rc->extent_root, 1);
3643 btrfs_commit_transaction(trans, rc->extent_root);
3644out:
3645 if (rc) {
3646 btrfs_stop_workers(&rc->workers);
3647 kfree(rc);
3648 }
3649 while (!list_empty(&reloc_roots)) {
3650 reloc_root = list_entry(reloc_roots.next,
3651 struct btrfs_root, root_list);
3652 list_del(&reloc_root->root_list);
3653 free_extent_buffer(reloc_root->node);
3654 free_extent_buffer(reloc_root->commit_root);
3655 kfree(reloc_root);
3656 }
3657 btrfs_free_path(path);
3658
3659 if (err == 0) {
3660 /* cleanup orphan inode in data relocation tree */
3661 fs_root = read_fs_root(root->fs_info,
3662 BTRFS_DATA_RELOC_TREE_OBJECTID);
3663 if (IS_ERR(fs_root))
3664 err = PTR_ERR(fs_root);
3665 }
3666 return err;
3667}
3668
3669/*
3670 * helper to add ordered checksum for data relocation.
3671 *
3672 * cloning checksum properly handles the nodatasum extents.
3673 * it also saves CPU time to re-calculate the checksum.
3674 */
3675int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
3676{
3677 struct btrfs_ordered_sum *sums;
3678 struct btrfs_sector_sum *sector_sum;
3679 struct btrfs_ordered_extent *ordered;
3680 struct btrfs_root *root = BTRFS_I(inode)->root;
3681 size_t offset;
3682 int ret;
3683 u64 disk_bytenr;
3684 LIST_HEAD(list);
3685
3686 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
3687 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
3688
3689 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
3690 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
3691 disk_bytenr + len - 1, &list);
3692
3693 while (!list_empty(&list)) {
3694 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
3695 list_del_init(&sums->list);
3696
3697 sector_sum = sums->sums;
3698 sums->bytenr = ordered->start;
3699
3700 offset = 0;
3701 while (offset < sums->len) {
3702 sector_sum->bytenr += ordered->start - disk_bytenr;
3703 sector_sum++;
3704 offset += root->sectorsize;
3705 }
3706
3707 btrfs_add_ordered_sum(inode, ordered, sums);
3708 }
3709 btrfs_put_ordered_extent(ordered);
3710 return 0;
3711}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b48650de4472..0ddc6d61c55a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -111,6 +111,15 @@ out:
111 return ret; 111 return ret;
112} 112}
113 113
114int btrfs_set_root_node(struct btrfs_root_item *item,
115 struct extent_buffer *node)
116{
117 btrfs_set_root_bytenr(item, node->start);
118 btrfs_set_root_level(item, btrfs_header_level(node));
119 btrfs_set_root_generation(item, btrfs_header_generation(node));
120 return 0;
121}
122
114/* 123/*
115 * copy the data in 'item' into the btree 124 * copy the data in 'item' into the btree
116 */ 125 */
@@ -164,8 +173,7 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
164 * offset lower than the latest root. They need to be queued for deletion to 173 * offset lower than the latest root. They need to be queued for deletion to
165 * finish what was happening when we crashed. 174 * finish what was happening when we crashed.
166 */ 175 */
167int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 176int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
168 struct btrfs_root *latest)
169{ 177{
170 struct btrfs_root *dead_root; 178 struct btrfs_root *dead_root;
171 struct btrfs_item *item; 179 struct btrfs_item *item;
@@ -227,10 +235,7 @@ again:
227 goto err; 235 goto err;
228 } 236 }
229 237
230 if (objectid == BTRFS_TREE_RELOC_OBJECTID) 238 ret = btrfs_add_dead_root(dead_root);
231 ret = btrfs_add_dead_reloc_root(dead_root);
232 else
233 ret = btrfs_add_dead_root(dead_root, latest);
234 if (ret) 239 if (ret)
235 goto err; 240 goto err;
236 goto again; 241 goto again;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2ff7cd2db25f..708ac06b953b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,7 +52,6 @@
52#include "export.h" 52#include "export.h"
53#include "compression.h" 53#include "compression.h"
54 54
55
56static struct super_operations btrfs_super_ops; 55static struct super_operations btrfs_super_ops;
57 56
58static void btrfs_put_super(struct super_block *sb) 57static void btrfs_put_super(struct super_block *sb)
@@ -67,8 +66,8 @@ static void btrfs_put_super(struct super_block *sb)
67enum { 66enum {
68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 67 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 68 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, 69 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
71 Opt_ratio, Opt_flushoncommit, Opt_err, 70 Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
72}; 71};
73 72
74static match_table_t tokens = { 73static match_table_t tokens = {
@@ -84,6 +83,8 @@ static match_table_t tokens = {
84 {Opt_thread_pool, "thread_pool=%d"}, 83 {Opt_thread_pool, "thread_pool=%d"},
85 {Opt_compress, "compress"}, 84 {Opt_compress, "compress"},
86 {Opt_ssd, "ssd"}, 85 {Opt_ssd, "ssd"},
86 {Opt_ssd_spread, "ssd_spread"},
87 {Opt_nossd, "nossd"},
87 {Opt_noacl, "noacl"}, 88 {Opt_noacl, "noacl"},
88 {Opt_notreelog, "notreelog"}, 89 {Opt_notreelog, "notreelog"},
89 {Opt_flushoncommit, "flushoncommit"}, 90 {Opt_flushoncommit, "flushoncommit"},
@@ -158,7 +159,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
158 */ 159 */
159 break; 160 break;
160 case Opt_nodatasum: 161 case Opt_nodatasum:
161 printk(KERN_INFO "btrfs: setting nodatacsum\n"); 162 printk(KERN_INFO "btrfs: setting nodatasum\n");
162 btrfs_set_opt(info->mount_opt, NODATASUM); 163 btrfs_set_opt(info->mount_opt, NODATASUM);
163 break; 164 break;
164 case Opt_nodatacow: 165 case Opt_nodatacow:
@@ -174,6 +175,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
174 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 175 printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
175 btrfs_set_opt(info->mount_opt, SSD); 176 btrfs_set_opt(info->mount_opt, SSD);
176 break; 177 break;
178 case Opt_ssd_spread:
179 printk(KERN_INFO "btrfs: use spread ssd "
180 "allocation scheme\n");
181 btrfs_set_opt(info->mount_opt, SSD);
182 btrfs_set_opt(info->mount_opt, SSD_SPREAD);
183 break;
184 case Opt_nossd:
185 printk(KERN_INFO "btrfs: not using ssd allocation "
186 "scheme\n");
187 btrfs_set_opt(info->mount_opt, NOSSD);
188 btrfs_clear_opt(info->mount_opt, SSD);
189 btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
190 break;
177 case Opt_nobarrier: 191 case Opt_nobarrier:
178 printk(KERN_INFO "btrfs: turning off barriers\n"); 192 printk(KERN_INFO "btrfs: turning off barriers\n");
179 btrfs_set_opt(info->mount_opt, NOBARRIER); 193 btrfs_set_opt(info->mount_opt, NOBARRIER);
@@ -322,7 +336,7 @@ static int btrfs_fill_super(struct super_block *sb,
322 struct dentry *root_dentry; 336 struct dentry *root_dentry;
323 struct btrfs_super_block *disk_super; 337 struct btrfs_super_block *disk_super;
324 struct btrfs_root *tree_root; 338 struct btrfs_root *tree_root;
325 struct btrfs_inode *bi; 339 struct btrfs_key key;
326 int err; 340 int err;
327 341
328 sb->s_maxbytes = MAX_LFS_FILESIZE; 342 sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -341,23 +355,15 @@ static int btrfs_fill_super(struct super_block *sb,
341 } 355 }
342 sb->s_fs_info = tree_root; 356 sb->s_fs_info = tree_root;
343 disk_super = &tree_root->fs_info->super_copy; 357 disk_super = &tree_root->fs_info->super_copy;
344 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
345 tree_root->fs_info->fs_root);
346 bi = BTRFS_I(inode);
347 bi->location.objectid = inode->i_ino;
348 bi->location.offset = 0;
349 bi->root = tree_root->fs_info->fs_root;
350 358
351 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); 359 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
352 360 key.type = BTRFS_INODE_ITEM_KEY;
353 if (!inode) { 361 key.offset = 0;
354 err = -ENOMEM; 362 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root);
363 if (IS_ERR(inode)) {
364 err = PTR_ERR(inode);
355 goto fail_close; 365 goto fail_close;
356 } 366 }
357 if (inode->i_state & I_NEW) {
358 btrfs_read_locked_inode(inode);
359 unlock_new_inode(inode);
360 }
361 367
362 root_dentry = d_alloc_root(inode); 368 root_dentry = d_alloc_root(inode);
363 if (!root_dentry) { 369 if (!root_dentry) {
@@ -433,7 +439,11 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
433 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 439 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
434 if (btrfs_test_opt(root, COMPRESS)) 440 if (btrfs_test_opt(root, COMPRESS))
435 seq_puts(seq, ",compress"); 441 seq_puts(seq, ",compress");
436 if (btrfs_test_opt(root, SSD)) 442 if (btrfs_test_opt(root, NOSSD))
443 seq_puts(seq, ",nossd");
444 if (btrfs_test_opt(root, SSD_SPREAD))
445 seq_puts(seq, ",ssd_spread");
446 else if (btrfs_test_opt(root, SSD))
437 seq_puts(seq, ",ssd"); 447 seq_puts(seq, ",ssd");
438 if (btrfs_test_opt(root, NOTREELOG)) 448 if (btrfs_test_opt(root, NOTREELOG))
439 seq_puts(seq, ",notreelog"); 449 seq_puts(seq, ",notreelog");
@@ -584,7 +594,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
584 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 594 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
585 return -EINVAL; 595 return -EINVAL;
586 596
587 ret = btrfs_cleanup_reloc_trees(root); 597 /* recover relocation */
598 ret = btrfs_recover_relocation(root);
588 WARN_ON(ret); 599 WARN_ON(ret);
589 600
590 ret = btrfs_cleanup_fs_roots(root->fs_info); 601 ret = btrfs_cleanup_fs_roots(root->fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b143605ec1..2e177d7f4bb9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
25#include "disk-io.h" 25#include "disk-io.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "locking.h" 27#include "locking.h"
28#include "ref-cache.h"
29#include "tree-log.h" 28#include "tree-log.h"
30 29
31#define BTRFS_ROOT_TRANS_TAG 0 30#define BTRFS_ROOT_TRANS_TAG 0
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root)
94 * to make sure the old root from before we joined the transaction is deleted 93 * to make sure the old root from before we joined the transaction is deleted
95 * when the transaction commits 94 * when the transaction commits
96 */ 95 */
97noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 96static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root)
98{ 98{
99 struct btrfs_dirty_root *dirty; 99 if (root->ref_cows && root->last_trans < trans->transid) {
100 u64 running_trans_id = root->fs_info->running_transaction->transid;
101 if (root->ref_cows && root->last_trans < running_trans_id) {
102 WARN_ON(root == root->fs_info->extent_root); 100 WARN_ON(root == root->fs_info->extent_root);
103 if (root->root_item.refs != 0) { 101 WARN_ON(root->root_item.refs == 0);
104 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 102 WARN_ON(root->commit_root != root->node);
105 (unsigned long)root->root_key.objectid, 103
106 BTRFS_ROOT_TRANS_TAG); 104 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
107 105 (unsigned long)root->root_key.objectid,
108 dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 106 BTRFS_ROOT_TRANS_TAG);
109 BUG_ON(!dirty); 107 root->last_trans = trans->transid;
110 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 108 btrfs_init_reloc_root(trans, root);
111 BUG_ON(!dirty->root); 109 }
112 dirty->latest_root = root; 110 return 0;
113 INIT_LIST_HEAD(&dirty->list); 111}
114
115 root->commit_root = btrfs_root_node(root);
116
117 memcpy(dirty->root, root, sizeof(*root));
118 spin_lock_init(&dirty->root->node_lock);
119 spin_lock_init(&dirty->root->list_lock);
120 mutex_init(&dirty->root->objectid_mutex);
121 mutex_init(&dirty->root->log_mutex);
122 INIT_LIST_HEAD(&dirty->root->dead_list);
123 dirty->root->node = root->commit_root;
124 dirty->root->commit_root = NULL;
125 112
126 spin_lock(&root->list_lock); 113int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
127 list_add(&dirty->root->dead_list, &root->dead_list); 114 struct btrfs_root *root)
128 spin_unlock(&root->list_lock); 115{
116 if (!root->ref_cows)
117 return 0;
129 118
130 root->dirty_root = dirty; 119 mutex_lock(&root->fs_info->trans_mutex);
131 } else { 120 if (root->last_trans == trans->transid) {
132 WARN_ON(1); 121 mutex_unlock(&root->fs_info->trans_mutex);
133 } 122 return 0;
134 root->last_trans = running_trans_id;
135 } 123 }
124
125 record_root_in_trans(trans, root);
126 mutex_unlock(&root->fs_info->trans_mutex);
136 return 0; 127 return 0;
137} 128}
138 129
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 ret = join_transaction(root); 172 ret = join_transaction(root);
182 BUG_ON(ret); 173 BUG_ON(ret);
183 174
184 btrfs_record_root_in_trans(root);
185 h->transid = root->fs_info->running_transaction->transid; 175 h->transid = root->fs_info->running_transaction->transid;
186 h->transaction = root->fs_info->running_transaction; 176 h->transaction = root->fs_info->running_transaction;
187 h->blocks_reserved = num_blocks; 177 h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
192 h->delayed_ref_updates = 0; 182 h->delayed_ref_updates = 0;
193 183
194 root->fs_info->running_transaction->use_count++; 184 root->fs_info->running_transaction->use_count++;
185 record_root_in_trans(h, root);
195 mutex_unlock(&root->fs_info->trans_mutex); 186 mutex_unlock(&root->fs_info->trans_mutex);
196 return h; 187 return h;
197} 188}
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root,
233 return 0; 224 return 0;
234} 225}
235 226
227#if 0
236/* 228/*
237 * rate limit against the drop_snapshot code. This helps to slow down new 229 * rate limit against the drop_snapshot code. This helps to slow down new
238 * operations if the drop_snapshot code isn't able to keep up. 230 * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@ harder:
273 goto harder; 265 goto harder;
274 } 266 }
275} 267}
268#endif
276 269
277void btrfs_throttle(struct btrfs_root *root) 270void btrfs_throttle(struct btrfs_root *root)
278{ 271{
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root)
280 if (!root->fs_info->open_ioctl_trans) 273 if (!root->fs_info->open_ioctl_trans)
281 wait_current_trans(root); 274 wait_current_trans(root);
282 mutex_unlock(&root->fs_info->trans_mutex); 275 mutex_unlock(&root->fs_info->trans_mutex);
283 throttle_on_drops(root);
284} 276}
285 277
286static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 278static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
323 memset(trans, 0, sizeof(*trans)); 315 memset(trans, 0, sizeof(*trans));
324 kmem_cache_free(btrfs_trans_handle_cachep, trans); 316 kmem_cache_free(btrfs_trans_handle_cachep, trans);
325 317
326 if (throttle)
327 throttle_on_drops(root);
328
329 return 0; 318 return 0;
330} 319}
331 320
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
462 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 451 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
463 if (old_root_bytenr == root->node->start) 452 if (old_root_bytenr == root->node->start)
464 break; 453 break;
465 btrfs_set_root_bytenr(&root->root_item,
466 root->node->start);
467 btrfs_set_root_level(&root->root_item,
468 btrfs_header_level(root->node));
469 btrfs_set_root_generation(&root->root_item, trans->transid);
470 454
455 btrfs_set_root_node(&root->root_item, root->node);
471 ret = btrfs_update_root(trans, tree_root, 456 ret = btrfs_update_root(trans, tree_root,
472 &root->root_key, 457 &root->root_key,
473 &root->root_item); 458 &root->root_item);
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
477 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
478 BUG_ON(ret); 463 BUG_ON(ret);
479 } 464 }
465 free_extent_buffer(root->commit_root);
466 root->commit_root = btrfs_root_node(root);
480 return 0; 467 return 0;
481} 468}
482 469
483/* 470/*
484 * update all the cowonly tree roots on disk 471 * update all the cowonly tree roots on disk
485 */ 472 */
486int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 473static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root) 474 struct btrfs_root *root)
488{ 475{
489 struct btrfs_fs_info *fs_info = root->fs_info; 476 struct btrfs_fs_info *fs_info = root->fs_info;
490 struct list_head *next; 477 struct list_head *next;
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
520 * a dirty root struct and adds it into the list of dead roots that need to 507 * a dirty root struct and adds it into the list of dead roots that need to
521 * be deleted 508 * be deleted
522 */ 509 */
523int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 510int btrfs_add_dead_root(struct btrfs_root *root)
524{ 511{
525 struct btrfs_dirty_root *dirty;
526
527 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
528 if (!dirty)
529 return -ENOMEM;
530 dirty->root = root;
531 dirty->latest_root = latest;
532
533 mutex_lock(&root->fs_info->trans_mutex); 512 mutex_lock(&root->fs_info->trans_mutex);
534 list_add(&dirty->list, &latest->fs_info->dead_roots); 513 list_add(&root->root_list, &root->fs_info->dead_roots);
535 mutex_unlock(&root->fs_info->trans_mutex); 514 mutex_unlock(&root->fs_info->trans_mutex);
536 return 0; 515 return 0;
537} 516}
538 517
539/* 518/*
540 * at transaction commit time we need to schedule the old roots for 519 * update all the cowonly tree roots on disk
541 * deletion via btrfs_drop_snapshot. This runs through all the
542 * reference counted roots that were modified in the current
543 * transaction and puts them into the drop list
544 */ 520 */
545static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 521static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
546 struct radix_tree_root *radix, 522 struct btrfs_root *root)
547 struct list_head *list)
548{ 523{
549 struct btrfs_dirty_root *dirty;
550 struct btrfs_root *gang[8]; 524 struct btrfs_root *gang[8];
551 struct btrfs_root *root; 525 struct btrfs_fs_info *fs_info = root->fs_info;
552 int i; 526 int i;
553 int ret; 527 int ret;
554 int err = 0; 528 int err = 0;
555 u32 refs;
556 529
557 while (1) { 530 while (1) {
558 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 531 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
532 (void **)gang, 0,
559 ARRAY_SIZE(gang), 533 ARRAY_SIZE(gang),
560 BTRFS_ROOT_TRANS_TAG); 534 BTRFS_ROOT_TRANS_TAG);
561 if (ret == 0) 535 if (ret == 0)
562 break; 536 break;
563 for (i = 0; i < ret; i++) { 537 for (i = 0; i < ret; i++) {
564 root = gang[i]; 538 root = gang[i];
565 radix_tree_tag_clear(radix, 539 radix_tree_tag_clear(&fs_info->fs_roots_radix,
566 (unsigned long)root->root_key.objectid, 540 (unsigned long)root->root_key.objectid,
567 BTRFS_ROOT_TRANS_TAG); 541 BTRFS_ROOT_TRANS_TAG);
568
569 BUG_ON(!root->ref_tree);
570 dirty = root->dirty_root;
571 542
572 btrfs_free_log(trans, root); 543 btrfs_free_log(trans, root);
573 btrfs_free_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
574
575 if (root->commit_root == root->node) {
576 WARN_ON(root->node->start !=
577 btrfs_root_bytenr(&root->root_item));
578
579 free_extent_buffer(root->commit_root);
580 root->commit_root = NULL;
581 root->dirty_root = NULL;
582
583 spin_lock(&root->list_lock);
584 list_del_init(&dirty->root->dead_list);
585 spin_unlock(&root->list_lock);
586 545
587 kfree(dirty->root); 546 if (root->commit_root == root->node)
588 kfree(dirty);
589
590 /* make sure to update the root on disk
591 * so we get any updates to the block used
592 * counts
593 */
594 err = btrfs_update_root(trans,
595 root->fs_info->tree_root,
596 &root->root_key,
597 &root->root_item);
598 continue; 547 continue;
599 }
600 548
601 memset(&root->root_item.drop_progress, 0, 549 free_extent_buffer(root->commit_root);
602 sizeof(struct btrfs_disk_key)); 550 root->commit_root = btrfs_root_node(root);
603 root->root_item.drop_level = 0; 551
604 root->commit_root = NULL; 552 btrfs_set_root_node(&root->root_item, root->node);
605 root->dirty_root = NULL; 553 err = btrfs_update_root(trans, fs_info->tree_root,
606 root->root_key.offset = root->fs_info->generation;
607 btrfs_set_root_bytenr(&root->root_item,
608 root->node->start);
609 btrfs_set_root_level(&root->root_item,
610 btrfs_header_level(root->node));
611 btrfs_set_root_generation(&root->root_item,
612 root->root_key.offset);
613
614 err = btrfs_insert_root(trans, root->fs_info->tree_root,
615 &root->root_key, 554 &root->root_key,
616 &root->root_item); 555 &root->root_item);
617 if (err) 556 if (err)
618 break; 557 break;
619
620 refs = btrfs_root_refs(&dirty->root->root_item);
621 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
622 err = btrfs_update_root(trans, root->fs_info->tree_root,
623 &dirty->root->root_key,
624 &dirty->root->root_item);
625
626 BUG_ON(err);
627 if (refs == 1) {
628 list_add(&dirty->list, list);
629 } else {
630 WARN_ON(1);
631 free_extent_buffer(dirty->root->node);
632 kfree(dirty->root);
633 kfree(dirty);
634 }
635 } 558 }
636 } 559 }
637 return err; 560 return err;
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
688 TASK_UNINTERRUPTIBLE); 611 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 612 mutex_unlock(&info->trans_mutex);
690 613
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
694 schedule(); 614 schedule();
695 615
696 atomic_inc(&info->throttles);
697 mutex_lock(&info->trans_mutex); 616 mutex_lock(&info->trans_mutex);
698 finish_wait(&info->transaction_wait, &wait); 617 finish_wait(&info->transaction_wait, &wait);
699 } 618 }
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
705 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 624 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
706 * all of them 625 * all of them
707 */ 626 */
708static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 627int btrfs_drop_dead_root(struct btrfs_root *root)
709 struct list_head *list)
710{ 628{
711 struct btrfs_dirty_root *dirty;
712 struct btrfs_trans_handle *trans; 629 struct btrfs_trans_handle *trans;
630 struct btrfs_root *tree_root = root->fs_info->tree_root;
713 unsigned long nr; 631 unsigned long nr;
714 u64 num_bytes; 632 int ret;
715 u64 bytes_used;
716 u64 max_useless;
717 int ret = 0;
718 int err;
719
720 while (!list_empty(list)) {
721 struct btrfs_root *root;
722
723 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
724 list_del_init(&dirty->list);
725
726 num_bytes = btrfs_root_used(&dirty->root->root_item);
727 root = dirty->latest_root;
728 atomic_inc(&root->fs_info->throttles);
729
730 while (1) {
731 /*
732 * we don't want to jump in and create a bunch of
733 * delayed refs if the transaction is starting to close
734 */
735 wait_transaction_pre_flush(tree_root->fs_info);
736 trans = btrfs_start_transaction(tree_root, 1);
737
738 /*
739 * we've joined a transaction, make sure it isn't
740 * closing right now
741 */
742 if (trans->transaction->delayed_refs.flushing) {
743 btrfs_end_transaction(trans, tree_root);
744 continue;
745 }
746
747 mutex_lock(&root->fs_info->drop_mutex);
748 ret = btrfs_drop_snapshot(trans, dirty->root);
749 if (ret != -EAGAIN)
750 break;
751 mutex_unlock(&root->fs_info->drop_mutex);
752 633
753 err = btrfs_update_root(trans, 634 while (1) {
754 tree_root, 635 /*
755 &dirty->root->root_key, 636 * we don't want to jump in and create a bunch of
756 &dirty->root->root_item); 637 * delayed refs if the transaction is starting to close
757 if (err) 638 */
758 ret = err; 639 wait_transaction_pre_flush(tree_root->fs_info);
759 nr = trans->blocks_used; 640 trans = btrfs_start_transaction(tree_root, 1);
760 ret = btrfs_end_transaction(trans, tree_root);
761 BUG_ON(ret);
762 641
763 btrfs_btree_balance_dirty(tree_root, nr); 642 /*
764 cond_resched(); 643 * we've joined a transaction, make sure it isn't
644 * closing right now
645 */
646 if (trans->transaction->delayed_refs.flushing) {
647 btrfs_end_transaction(trans, tree_root);
648 continue;
765 } 649 }
766 BUG_ON(ret);
767 atomic_dec(&root->fs_info->throttles);
768 wake_up(&root->fs_info->transaction_throttle);
769 650
770 num_bytes -= btrfs_root_used(&dirty->root->root_item); 651 ret = btrfs_drop_snapshot(trans, root);
771 bytes_used = btrfs_root_used(&root->root_item); 652 if (ret != -EAGAIN)
772 if (num_bytes) { 653 break;
773 mutex_lock(&root->fs_info->trans_mutex);
774 btrfs_record_root_in_trans(root);
775 mutex_unlock(&root->fs_info->trans_mutex);
776 btrfs_set_root_used(&root->root_item,
777 bytes_used - num_bytes);
778 }
779 654
780 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 655 ret = btrfs_update_root(trans, tree_root,
781 if (ret) { 656 &root->root_key,
782 BUG(); 657 &root->root_item);
658 if (ret)
783 break; 659 break;
784 }
785 mutex_unlock(&root->fs_info->drop_mutex);
786
787 spin_lock(&root->list_lock);
788 list_del_init(&dirty->root->dead_list);
789 if (!list_empty(&root->dead_list)) {
790 struct btrfs_root *oldest;
791 oldest = list_entry(root->dead_list.prev,
792 struct btrfs_root, dead_list);
793 max_useless = oldest->root_key.offset - 1;
794 } else {
795 max_useless = root->root_key.offset - 1;
796 }
797 spin_unlock(&root->list_lock);
798 660
799 nr = trans->blocks_used; 661 nr = trans->blocks_used;
800 ret = btrfs_end_transaction(trans, tree_root); 662 ret = btrfs_end_transaction(trans, tree_root);
801 BUG_ON(ret); 663 BUG_ON(ret);
802 664
803 ret = btrfs_remove_leaf_refs(root, max_useless, 0);
804 BUG_ON(ret);
805
806 free_extent_buffer(dirty->root->node);
807 kfree(dirty->root);
808 kfree(dirty);
809
810 btrfs_btree_balance_dirty(tree_root, nr); 665 btrfs_btree_balance_dirty(tree_root, nr);
811 cond_resched(); 666 cond_resched();
812 } 667 }
668 BUG_ON(ret);
669
670 ret = btrfs_del_root(trans, tree_root, &root->root_key);
671 BUG_ON(ret);
672
673 nr = trans->blocks_used;
674 ret = btrfs_end_transaction(trans, tree_root);
675 BUG_ON(ret);
676
677 free_extent_buffer(root->node);
678 free_extent_buffer(root->commit_root);
679 kfree(root);
680
681 btrfs_btree_balance_dirty(tree_root, nr);
813 return ret; 682 return ret;
814} 683}
815 684
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
839 if (ret) 708 if (ret)
840 goto fail; 709 goto fail;
841 710
842 btrfs_record_root_in_trans(root); 711 record_root_in_trans(trans, root);
843 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 712 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
844 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 713 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
845 714
846 key.objectid = objectid; 715 key.objectid = objectid;
847 key.offset = trans->transid; 716 key.offset = 0;
848 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 717 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
849 718
850 old = btrfs_lock_root_node(root); 719 old = btrfs_lock_root_node(root);
851 btrfs_cow_block(trans, root, old, NULL, 0, &old); 720 btrfs_cow_block(trans, root, old, NULL, 0, &old);
721 btrfs_set_lock_blocking(old);
852 722
853 btrfs_copy_root(trans, root, old, &tmp, objectid); 723 btrfs_copy_root(trans, root, old, &tmp, objectid);
854 btrfs_tree_unlock(old); 724 btrfs_tree_unlock(old);
855 free_extent_buffer(old); 725 free_extent_buffer(old);
856 726
857 btrfs_set_root_bytenr(new_root_item, tmp->start); 727 btrfs_set_root_node(new_root_item, tmp);
858 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
859 btrfs_set_root_generation(new_root_item, trans->transid);
860 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 728 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
861 new_root_item); 729 new_root_item);
862 btrfs_tree_unlock(tmp); 730 btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
964 return 0; 832 return 0;
965} 833}
966 834
835static void update_super_roots(struct btrfs_root *root)
836{
837 struct btrfs_root_item *root_item;
838 struct btrfs_super_block *super;
839
840 super = &root->fs_info->super_copy;
841
842 root_item = &root->fs_info->chunk_root->root_item;
843 super->chunk_root = root_item->bytenr;
844 super->chunk_root_generation = root_item->generation;
845 super->chunk_root_level = root_item->level;
846
847 root_item = &root->fs_info->tree_root->root_item;
848 super->root = root_item->bytenr;
849 super->generation = root_item->generation;
850 super->root_level = root_item->level;
851}
852
967int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 853int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
968 struct btrfs_root *root) 854 struct btrfs_root *root)
969{ 855{
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
971 unsigned long timeout = 1; 857 unsigned long timeout = 1;
972 struct btrfs_transaction *cur_trans; 858 struct btrfs_transaction *cur_trans;
973 struct btrfs_transaction *prev_trans = NULL; 859 struct btrfs_transaction *prev_trans = NULL;
974 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
975 struct list_head dirty_fs_roots;
976 struct extent_io_tree *pinned_copy; 860 struct extent_io_tree *pinned_copy;
977 DEFINE_WAIT(wait); 861 DEFINE_WAIT(wait);
978 int ret; 862 int ret;
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
999 BUG_ON(ret); 883 BUG_ON(ret);
1000 884
1001 mutex_lock(&root->fs_info->trans_mutex); 885 mutex_lock(&root->fs_info->trans_mutex);
1002 INIT_LIST_HEAD(&dirty_fs_roots);
1003 if (cur_trans->in_commit) { 886 if (cur_trans->in_commit) {
1004 cur_trans->use_count++; 887 cur_trans->use_count++;
1005 mutex_unlock(&root->fs_info->trans_mutex); 888 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1105 * with the tree-log code. 988 * with the tree-log code.
1106 */ 989 */
1107 mutex_lock(&root->fs_info->tree_log_mutex); 990 mutex_lock(&root->fs_info->tree_log_mutex);
1108 /*
1109 * keep tree reloc code from adding new reloc trees
1110 */
1111 mutex_lock(&root->fs_info->tree_reloc_mutex);
1112
1113 991
1114 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 992 ret = commit_fs_roots(trans, root);
1115 &dirty_fs_roots);
1116 BUG_ON(ret); 993 BUG_ON(ret);
1117 994
1118 /* add_dirty_roots gets rid of all the tree log roots, it is now 995 /* commit_fs_roots gets rid of all the tree log roots, it is now
1119 * safe to free the root of tree log roots 996 * safe to free the root of tree log roots
1120 */ 997 */
1121 btrfs_free_log_root_tree(trans, root->fs_info); 998 btrfs_free_log_root_tree(trans, root->fs_info);
1122 999
1123 ret = btrfs_commit_tree_roots(trans, root); 1000 ret = commit_cowonly_roots(trans, root);
1124 BUG_ON(ret); 1001 BUG_ON(ret);
1125 1002
1126 cur_trans = root->fs_info->running_transaction; 1003 cur_trans = root->fs_info->running_transaction;
1127 spin_lock(&root->fs_info->new_trans_lock); 1004 spin_lock(&root->fs_info->new_trans_lock);
1128 root->fs_info->running_transaction = NULL; 1005 root->fs_info->running_transaction = NULL;
1129 spin_unlock(&root->fs_info->new_trans_lock); 1006 spin_unlock(&root->fs_info->new_trans_lock);
1130 btrfs_set_super_generation(&root->fs_info->super_copy, 1007
1131 cur_trans->transid); 1008 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1132 btrfs_set_super_root(&root->fs_info->super_copy, 1009 root->fs_info->tree_root->node);
1133 root->fs_info->tree_root->node->start); 1010 free_extent_buffer(root->fs_info->tree_root->commit_root);
1134 btrfs_set_super_root_level(&root->fs_info->super_copy, 1011 root->fs_info->tree_root->commit_root =
1135 btrfs_header_level(root->fs_info->tree_root->node)); 1012 btrfs_root_node(root->fs_info->tree_root);
1136 1013
1137 btrfs_set_super_chunk_root(&root->fs_info->super_copy, 1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1138 chunk_root->node->start); 1015 root->fs_info->chunk_root->node);
1139 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 1016 free_extent_buffer(root->fs_info->chunk_root->commit_root);
1140 btrfs_header_level(chunk_root->node)); 1017 root->fs_info->chunk_root->commit_root =
1141 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 1018 btrfs_root_node(root->fs_info->chunk_root);
1142 btrfs_header_generation(chunk_root->node)); 1019
1020 update_super_roots(root);
1143 1021
1144 if (!root->fs_info->log_root_recovering) { 1022 if (!root->fs_info->log_root_recovering) {
1145 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1023 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1153 1031
1154 trans->transaction->blocked = 0; 1032 trans->transaction->blocked = 0;
1155 1033
1156 wake_up(&root->fs_info->transaction_throttle);
1157 wake_up(&root->fs_info->transaction_wait); 1034 wake_up(&root->fs_info->transaction_wait);
1158 1035
1159 mutex_unlock(&root->fs_info->trans_mutex); 1036 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1170 btrfs_finish_extent_commit(trans, root, pinned_copy); 1047 btrfs_finish_extent_commit(trans, root, pinned_copy);
1171 kfree(pinned_copy); 1048 kfree(pinned_copy);
1172 1049
1173 btrfs_drop_dead_reloc_roots(root);
1174 mutex_unlock(&root->fs_info->tree_reloc_mutex);
1175
1176 /* do the directory inserts of any pending snapshot creations */ 1050 /* do the directory inserts of any pending snapshot creations */
1177 finish_pending_snapshots(trans, root->fs_info); 1051 finish_pending_snapshots(trans, root->fs_info);
1178 1052
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1186 put_transaction(cur_trans); 1060 put_transaction(cur_trans);
1187 put_transaction(cur_trans); 1061 put_transaction(cur_trans);
1188 1062
1189 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
1190 if (root->fs_info->closing)
1191 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
1192
1193 mutex_unlock(&root->fs_info->trans_mutex); 1063 mutex_unlock(&root->fs_info->trans_mutex);
1194 1064
1195 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1065 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1196
1197 if (root->fs_info->closing)
1198 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
1199 return ret; 1066 return ret;
1200} 1067}
1201 1068
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1204 */ 1071 */
1205int btrfs_clean_old_snapshots(struct btrfs_root *root) 1072int btrfs_clean_old_snapshots(struct btrfs_root *root)
1206{ 1073{
1207 struct list_head dirty_roots; 1074 LIST_HEAD(list);
1208 INIT_LIST_HEAD(&dirty_roots); 1075 struct btrfs_fs_info *fs_info = root->fs_info;
1209again: 1076
1210 mutex_lock(&root->fs_info->trans_mutex); 1077 mutex_lock(&fs_info->trans_mutex);
1211 list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1078 list_splice_init(&fs_info->dead_roots, &list);
1212 mutex_unlock(&root->fs_info->trans_mutex); 1079 mutex_unlock(&fs_info->trans_mutex);
1213 1080
1214 if (!list_empty(&dirty_roots)) { 1081 while (!list_empty(&list)) {
1215 drop_dirty_roots(root, &dirty_roots); 1082 root = list_entry(list.next, struct btrfs_root, root_list);
1216 goto again; 1083 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root);
1217 } 1085 }
1218 return 0; 1086 return 0;
1219} 1087}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94f5bde2b58d..961c3ee5a2e1 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,12 +62,6 @@ struct btrfs_pending_snapshot {
62 struct list_head list; 62 struct list_head list;
63}; 63};
64 64
65struct btrfs_dirty_root {
66 struct list_head list;
67 struct btrfs_root *root;
68 struct btrfs_root *latest_root;
69};
70
71static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, 65static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
72 struct inode *inode) 66 struct inode *inode)
73{ 67{
@@ -100,7 +94,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
100int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 94int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
101 struct btrfs_root *root); 95 struct btrfs_root *root);
102 96
103int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); 97int btrfs_add_dead_root(struct btrfs_root *root);
98int btrfs_drop_dead_root(struct btrfs_root *root);
104int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 99int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
105int btrfs_clean_old_snapshots(struct btrfs_root *root); 100int btrfs_clean_old_snapshots(struct btrfs_root *root);
106int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 101int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -108,7 +103,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
108int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 103int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
109 struct btrfs_root *root); 104 struct btrfs_root *root);
110void btrfs_throttle(struct btrfs_root *root); 105void btrfs_throttle(struct btrfs_root *root);
111int btrfs_record_root_in_trans(struct btrfs_root *root); 106int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root);
112int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
113 struct extent_io_tree *dirty_pages); 109 struct extent_io_tree *dirty_pages);
114#endif 110#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index db5e212e8445..c13922206d1b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -430,18 +430,16 @@ no_copy:
430static noinline struct inode *read_one_inode(struct btrfs_root *root, 430static noinline struct inode *read_one_inode(struct btrfs_root *root,
431 u64 objectid) 431 u64 objectid)
432{ 432{
433 struct btrfs_key key;
433 struct inode *inode; 434 struct inode *inode;
434 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
435 if (inode->i_state & I_NEW) {
436 BTRFS_I(inode)->root = root;
437 BTRFS_I(inode)->location.objectid = objectid;
438 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
439 BTRFS_I(inode)->location.offset = 0;
440 btrfs_read_locked_inode(inode);
441 unlock_new_inode(inode);
442 435
443 } 436 key.objectid = objectid;
444 if (is_bad_inode(inode)) { 437 key.type = BTRFS_INODE_ITEM_KEY;
438 key.offset = 0;
439 inode = btrfs_iget(root->fs_info->sb, &key, root);
440 if (IS_ERR(inode)) {
441 inode = NULL;
442 } else if (is_bad_inode(inode)) {
445 iput(inode); 443 iput(inode);
446 inode = NULL; 444 inode = NULL;
447 } 445 }
@@ -541,6 +539,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
541 539
542 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||
543 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 541 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
542 u64 offset;
544 unsigned long dest_offset; 543 unsigned long dest_offset;
545 struct btrfs_key ins; 544 struct btrfs_key ins;
546 545
@@ -555,6 +554,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
555 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 554 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
556 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 555 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
557 ins.type = BTRFS_EXTENT_ITEM_KEY; 556 ins.type = BTRFS_EXTENT_ITEM_KEY;
557 offset = key->offset - btrfs_file_extent_offset(eb, item);
558 558
559 if (ins.objectid > 0) { 559 if (ins.objectid > 0) {
560 u64 csum_start; 560 u64 csum_start;
@@ -569,19 +569,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
569 if (ret == 0) { 569 if (ret == 0) {
570 ret = btrfs_inc_extent_ref(trans, root, 570 ret = btrfs_inc_extent_ref(trans, root,
571 ins.objectid, ins.offset, 571 ins.objectid, ins.offset,
572 path->nodes[0]->start, 572 0, root->root_key.objectid,
573 root->root_key.objectid, 573 key->objectid, offset);
574 trans->transid, key->objectid);
575 } else { 574 } else {
576 /* 575 /*
577 * insert the extent pointer in the extent 576 * insert the extent pointer in the extent
578 * allocation tree 577 * allocation tree
579 */ 578 */
580 ret = btrfs_alloc_logged_extent(trans, root, 579 ret = btrfs_alloc_logged_file_extent(trans,
581 path->nodes[0]->start, 580 root, root->root_key.objectid,
582 root->root_key.objectid, 581 key->objectid, offset, &ins);
583 trans->transid, key->objectid,
584 &ins);
585 BUG_ON(ret); 582 BUG_ON(ret);
586 } 583 }
587 btrfs_release_path(root, path); 584 btrfs_release_path(root, path);
@@ -1706,9 +1703,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1706 btrfs_wait_tree_block_writeback(next); 1703 btrfs_wait_tree_block_writeback(next);
1707 btrfs_tree_unlock(next); 1704 btrfs_tree_unlock(next);
1708 1705
1709 ret = btrfs_drop_leaf_ref(trans, root, next);
1710 BUG_ON(ret);
1711
1712 WARN_ON(root_owner != 1706 WARN_ON(root_owner !=
1713 BTRFS_TREE_LOG_OBJECTID); 1707 BTRFS_TREE_LOG_OBJECTID);
1714 ret = btrfs_free_reserved_extent(root, 1708 ret = btrfs_free_reserved_extent(root,
@@ -1753,10 +1747,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1753 btrfs_wait_tree_block_writeback(next); 1747 btrfs_wait_tree_block_writeback(next);
1754 btrfs_tree_unlock(next); 1748 btrfs_tree_unlock(next);
1755 1749
1756 if (*level == 0) {
1757 ret = btrfs_drop_leaf_ref(trans, root, next);
1758 BUG_ON(ret);
1759 }
1760 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1750 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1761 ret = btrfs_free_reserved_extent(root, bytenr, blocksize); 1751 ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
1762 BUG_ON(ret); 1752 BUG_ON(ret);
@@ -1811,12 +1801,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1811 btrfs_wait_tree_block_writeback(next); 1801 btrfs_wait_tree_block_writeback(next);
1812 btrfs_tree_unlock(next); 1802 btrfs_tree_unlock(next);
1813 1803
1814 if (*level == 0) {
1815 ret = btrfs_drop_leaf_ref(trans, root,
1816 next);
1817 BUG_ON(ret);
1818 }
1819
1820 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1804 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1821 ret = btrfs_free_reserved_extent(root, 1805 ret = btrfs_free_reserved_extent(root,
1822 path->nodes[*level]->start, 1806 path->nodes[*level]->start,
@@ -1884,11 +1868,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1884 btrfs_wait_tree_block_writeback(next); 1868 btrfs_wait_tree_block_writeback(next);
1885 btrfs_tree_unlock(next); 1869 btrfs_tree_unlock(next);
1886 1870
1887 if (orig_level == 0) {
1888 ret = btrfs_drop_leaf_ref(trans, log,
1889 next);
1890 BUG_ON(ret);
1891 }
1892 WARN_ON(log->root_key.objectid != 1871 WARN_ON(log->root_key.objectid !=
1893 BTRFS_TREE_LOG_OBJECTID); 1872 BTRFS_TREE_LOG_OBJECTID);
1894 ret = btrfs_free_reserved_extent(log, next->start, 1873 ret = btrfs_free_reserved_extent(log, next->start,
@@ -2027,9 +2006,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2027 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2006 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
2028 BUG_ON(ret); 2007 BUG_ON(ret);
2029 2008
2030 btrfs_set_root_bytenr(&log->root_item, log->node->start); 2009 btrfs_set_root_node(&log->root_item, log->node);
2031 btrfs_set_root_generation(&log->root_item, trans->transid);
2032 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2033 2010
2034 root->log_batch = 0; 2011 root->log_batch = 0;
2035 root->log_transid++; 2012 root->log_transid++;
@@ -2581,7 +2558,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2581 ins_keys, ins_sizes, nr); 2558 ins_keys, ins_sizes, nr);
2582 BUG_ON(ret); 2559 BUG_ON(ret);
2583 2560
2584 for (i = 0; i < nr; i++) { 2561 for (i = 0; i < nr; i++, dst_path->slots[0]++) {
2585 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 2562 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
2586 dst_path->slots[0]); 2563 dst_path->slots[0]);
2587 2564
@@ -2617,36 +2594,31 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2617 found_type = btrfs_file_extent_type(src, extent); 2594 found_type = btrfs_file_extent_type(src, extent);
2618 if (found_type == BTRFS_FILE_EXTENT_REG || 2595 if (found_type == BTRFS_FILE_EXTENT_REG ||
2619 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2596 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
2620 u64 ds = btrfs_file_extent_disk_bytenr(src, 2597 u64 ds, dl, cs, cl;
2621 extent); 2598 ds = btrfs_file_extent_disk_bytenr(src,
2622 u64 dl = btrfs_file_extent_disk_num_bytes(src, 2599 extent);
2623 extent); 2600 /* ds == 0 is a hole */
2624 u64 cs = btrfs_file_extent_offset(src, extent); 2601 if (ds == 0)
2625 u64 cl = btrfs_file_extent_num_bytes(src, 2602 continue;
2626 extent);; 2603
2604 dl = btrfs_file_extent_disk_num_bytes(src,
2605 extent);
2606 cs = btrfs_file_extent_offset(src, extent);
2607 cl = btrfs_file_extent_num_bytes(src,
2608 extent);;
2627 if (btrfs_file_extent_compression(src, 2609 if (btrfs_file_extent_compression(src,
2628 extent)) { 2610 extent)) {
2629 cs = 0; 2611 cs = 0;
2630 cl = dl; 2612 cl = dl;
2631 } 2613 }
2632 /* ds == 0 is a hole */ 2614
2633 if (ds != 0) { 2615 ret = btrfs_lookup_csums_range(
2634 ret = btrfs_inc_extent_ref(trans, log, 2616 log->fs_info->csum_root,
2635 ds, dl, 2617 ds + cs, ds + cs + cl - 1,
2636 dst_path->nodes[0]->start, 2618 &ordered_sums);
2637 BTRFS_TREE_LOG_OBJECTID, 2619 BUG_ON(ret);
2638 trans->transid,
2639 ins_keys[i].objectid);
2640 BUG_ON(ret);
2641 ret = btrfs_lookup_csums_range(
2642 log->fs_info->csum_root,
2643 ds + cs, ds + cs + cl - 1,
2644 &ordered_sums);
2645 BUG_ON(ret);
2646 }
2647 } 2620 }
2648 } 2621 }
2649 dst_path->slots[0]++;
2650 } 2622 }
2651 2623
2652 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2624 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
@@ -3029,9 +3001,7 @@ again:
3029 BUG_ON(!wc.replay_dest); 3001 BUG_ON(!wc.replay_dest);
3030 3002
3031 wc.replay_dest->log_root = log; 3003 wc.replay_dest->log_root = log;
3032 mutex_lock(&fs_info->trans_mutex); 3004 btrfs_record_root_in_trans(trans, wc.replay_dest);
3033 btrfs_record_root_in_trans(wc.replay_dest);
3034 mutex_unlock(&fs_info->trans_mutex);
3035 ret = walk_log_tree(trans, log, &wc); 3005 ret = walk_log_tree(trans, log, &wc);
3036 BUG_ON(ret); 3006 BUG_ON(ret);
3037 3007
@@ -3049,6 +3019,7 @@ again:
3049 key.offset = found_key.offset - 1; 3019 key.offset = found_key.offset - 1;
3050 wc.replay_dest->log_root = NULL; 3020 wc.replay_dest->log_root = NULL;
3051 free_extent_buffer(log->node); 3021 free_extent_buffer(log->node);
3022 free_extent_buffer(log->commit_root);
3052 kfree(log); 3023 kfree(log);
3053 3024
3054 if (found_key.offset == 0) 3025 if (found_key.offset == 0)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6d35b0054ca..3ab80e9cd767 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -161,8 +161,10 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
161 int again = 0; 161 int again = 0;
162 unsigned long num_run; 162 unsigned long num_run;
163 unsigned long num_sync_run; 163 unsigned long num_sync_run;
164 unsigned long batch_run = 0;
164 unsigned long limit; 165 unsigned long limit;
165 unsigned long last_waited = 0; 166 unsigned long last_waited = 0;
167 int force_reg = 0;
166 168
167 bdi = blk_get_backing_dev_info(device->bdev); 169 bdi = blk_get_backing_dev_info(device->bdev);
168 fs_info = device->dev_root->fs_info; 170 fs_info = device->dev_root->fs_info;
@@ -176,19 +178,22 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
176 178
177loop: 179loop:
178 spin_lock(&device->io_lock); 180 spin_lock(&device->io_lock);
179 num_run = 0;
180 181
181loop_lock: 182loop_lock:
183 num_run = 0;
182 184
183 /* take all the bios off the list at once and process them 185 /* take all the bios off the list at once and process them
184 * later on (without the lock held). But, remember the 186 * later on (without the lock held). But, remember the
185 * tail and other pointers so the bios can be properly reinserted 187 * tail and other pointers so the bios can be properly reinserted
186 * into the list if we hit congestion 188 * into the list if we hit congestion
187 */ 189 */
188 if (device->pending_sync_bios.head) 190 if (!force_reg && device->pending_sync_bios.head) {
189 pending_bios = &device->pending_sync_bios; 191 pending_bios = &device->pending_sync_bios;
190 else 192 force_reg = 1;
193 } else {
191 pending_bios = &device->pending_bios; 194 pending_bios = &device->pending_bios;
195 force_reg = 0;
196 }
192 197
193 pending = pending_bios->head; 198 pending = pending_bios->head;
194 tail = pending_bios->tail; 199 tail = pending_bios->tail;
@@ -228,10 +233,14 @@ loop_lock:
228 while (pending) { 233 while (pending) {
229 234
230 rmb(); 235 rmb();
231 if (pending_bios != &device->pending_sync_bios && 236 /* we want to work on both lists, but do more bios on the
232 device->pending_sync_bios.head && 237 * sync list than the regular list
233 num_run > 16) { 238 */
234 cond_resched(); 239 if ((num_run > 32 &&
240 pending_bios != &device->pending_sync_bios &&
241 device->pending_sync_bios.head) ||
242 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
243 device->pending_bios.head)) {
235 spin_lock(&device->io_lock); 244 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail); 245 requeue_list(pending_bios, pending, tail);
237 goto loop_lock; 246 goto loop_lock;
@@ -249,6 +258,8 @@ loop_lock:
249 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 258 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
250 submit_bio(cur->bi_rw, cur); 259 submit_bio(cur->bi_rw, cur);
251 num_run++; 260 num_run++;
261 batch_run++;
262
252 if (bio_sync(cur)) 263 if (bio_sync(cur))
253 num_sync_run++; 264 num_sync_run++;
254 265
@@ -265,7 +276,7 @@ loop_lock:
265 * is now congested. Back off and let other work structs 276 * is now congested. Back off and let other work structs
266 * run instead 277 * run instead
267 */ 278 */
268 if (pending && bdi_write_congested(bdi) && num_run > 16 && 279 if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
269 fs_info->fs_devices->open_devices > 1) { 280 fs_info->fs_devices->open_devices > 1) {
270 struct io_context *ioc; 281 struct io_context *ioc;
271 282
@@ -366,6 +377,7 @@ static noinline int device_list_add(const char *path,
366 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); 377 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
367 fs_devices->latest_devid = devid; 378 fs_devices->latest_devid = devid;
368 fs_devices->latest_trans = found_transid; 379 fs_devices->latest_trans = found_transid;
380 mutex_init(&fs_devices->device_list_mutex);
369 device = NULL; 381 device = NULL;
370 } else { 382 } else {
371 device = __find_device(&fs_devices->devices, devid, 383 device = __find_device(&fs_devices->devices, devid,
@@ -392,7 +404,11 @@ static noinline int device_list_add(const char *path,
392 return -ENOMEM; 404 return -ENOMEM;
393 } 405 }
394 INIT_LIST_HEAD(&device->dev_alloc_list); 406 INIT_LIST_HEAD(&device->dev_alloc_list);
407
408 mutex_lock(&fs_devices->device_list_mutex);
395 list_add(&device->dev_list, &fs_devices->devices); 409 list_add(&device->dev_list, &fs_devices->devices);
410 mutex_unlock(&fs_devices->device_list_mutex);
411
396 device->fs_devices = fs_devices; 412 device->fs_devices = fs_devices;
397 fs_devices->num_devices++; 413 fs_devices->num_devices++;
398 } 414 }
@@ -418,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
418 INIT_LIST_HEAD(&fs_devices->devices); 434 INIT_LIST_HEAD(&fs_devices->devices);
419 INIT_LIST_HEAD(&fs_devices->alloc_list); 435 INIT_LIST_HEAD(&fs_devices->alloc_list);
420 INIT_LIST_HEAD(&fs_devices->list); 436 INIT_LIST_HEAD(&fs_devices->list);
437 mutex_init(&fs_devices->device_list_mutex);
421 fs_devices->latest_devid = orig->latest_devid; 438 fs_devices->latest_devid = orig->latest_devid;
422 fs_devices->latest_trans = orig->latest_trans; 439 fs_devices->latest_trans = orig->latest_trans;
423 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); 440 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
424 441
442 mutex_lock(&orig->device_list_mutex);
425 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 443 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
426 device = kzalloc(sizeof(*device), GFP_NOFS); 444 device = kzalloc(sizeof(*device), GFP_NOFS);
427 if (!device) 445 if (!device)
@@ -443,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
443 device->fs_devices = fs_devices; 461 device->fs_devices = fs_devices;
444 fs_devices->num_devices++; 462 fs_devices->num_devices++;
445 } 463 }
464 mutex_unlock(&orig->device_list_mutex);
446 return fs_devices; 465 return fs_devices;
447error: 466error:
467 mutex_unlock(&orig->device_list_mutex);
448 free_fs_devices(fs_devices); 468 free_fs_devices(fs_devices);
449 return ERR_PTR(-ENOMEM); 469 return ERR_PTR(-ENOMEM);
450} 470}
@@ -455,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
455 475
456 mutex_lock(&uuid_mutex); 476 mutex_lock(&uuid_mutex);
457again: 477again:
478 mutex_lock(&fs_devices->device_list_mutex);
458 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 479 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
459 if (device->in_fs_metadata) 480 if (device->in_fs_metadata)
460 continue; 481 continue;
@@ -474,6 +495,7 @@ again:
474 kfree(device->name); 495 kfree(device->name);
475 kfree(device); 496 kfree(device);
476 } 497 }
498 mutex_unlock(&fs_devices->device_list_mutex);
477 499
478 if (fs_devices->seed) { 500 if (fs_devices->seed) {
479 fs_devices = fs_devices->seed; 501 fs_devices = fs_devices->seed;
@@ -594,6 +616,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
594 device->in_fs_metadata = 0; 616 device->in_fs_metadata = 0;
595 device->mode = flags; 617 device->mode = flags;
596 618
619 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
620 fs_devices->rotating = 1;
621
597 fs_devices->open_devices++; 622 fs_devices->open_devices++;
598 if (device->writeable) { 623 if (device->writeable) {
599 fs_devices->rw_devices++; 624 fs_devices->rw_devices++;
@@ -1121,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1121 1146
1122 device = NULL; 1147 device = NULL;
1123 devices = &root->fs_info->fs_devices->devices; 1148 devices = &root->fs_info->fs_devices->devices;
1149 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1124 list_for_each_entry(tmp, devices, dev_list) { 1150 list_for_each_entry(tmp, devices, dev_list) {
1125 if (tmp->in_fs_metadata && !tmp->bdev) { 1151 if (tmp->in_fs_metadata && !tmp->bdev) {
1126 device = tmp; 1152 device = tmp;
1127 break; 1153 break;
1128 } 1154 }
1129 } 1155 }
1156 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1130 bdev = NULL; 1157 bdev = NULL;
1131 bh = NULL; 1158 bh = NULL;
1132 disk_super = NULL; 1159 disk_super = NULL;
@@ -1181,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1181 goto error_brelse; 1208 goto error_brelse;
1182 1209
1183 device->in_fs_metadata = 0; 1210 device->in_fs_metadata = 0;
1211
1212 /*
1213 * the device list mutex makes sure that we don't change
1214 * the device list while someone else is writing out all
1215 * the device supers.
1216 */
1217 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1184 list_del_init(&device->dev_list); 1218 list_del_init(&device->dev_list);
1219 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1220
1185 device->fs_devices->num_devices--; 1221 device->fs_devices->num_devices--;
1186 1222
1187 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1223 next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1275,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1275 seed_devices->opened = 1; 1311 seed_devices->opened = 1;
1276 INIT_LIST_HEAD(&seed_devices->devices); 1312 INIT_LIST_HEAD(&seed_devices->devices);
1277 INIT_LIST_HEAD(&seed_devices->alloc_list); 1313 INIT_LIST_HEAD(&seed_devices->alloc_list);
1314 mutex_init(&seed_devices->device_list_mutex);
1278 list_splice_init(&fs_devices->devices, &seed_devices->devices); 1315 list_splice_init(&fs_devices->devices, &seed_devices->devices);
1279 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1316 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1280 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1317 list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1400,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1400 mutex_lock(&root->fs_info->volume_mutex); 1437 mutex_lock(&root->fs_info->volume_mutex);
1401 1438
1402 devices = &root->fs_info->fs_devices->devices; 1439 devices = &root->fs_info->fs_devices->devices;
1440 /*
1441 * we have the volume lock, so we don't need the extra
1442 * device list mutex while reading the list here.
1443 */
1403 list_for_each_entry(device, devices, dev_list) { 1444 list_for_each_entry(device, devices, dev_list) {
1404 if (device->bdev == bdev) { 1445 if (device->bdev == bdev) {
1405 ret = -EEXIST; 1446 ret = -EEXIST;
@@ -1454,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1454 } 1495 }
1455 1496
1456 device->fs_devices = root->fs_info->fs_devices; 1497 device->fs_devices = root->fs_info->fs_devices;
1498
1499 /*
1500 * we don't want write_supers to jump in here with our device
1501 * half setup
1502 */
1503 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1457 list_add(&device->dev_list, &root->fs_info->fs_devices->devices); 1504 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1458 list_add(&device->dev_alloc_list, 1505 list_add(&device->dev_alloc_list,
1459 &root->fs_info->fs_devices->alloc_list); 1506 &root->fs_info->fs_devices->alloc_list);
@@ -1462,6 +1509,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1462 root->fs_info->fs_devices->rw_devices++; 1509 root->fs_info->fs_devices->rw_devices++;
1463 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; 1510 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1464 1511
1512 if (!blk_queue_nonrot(bdev_get_queue(bdev)))
1513 root->fs_info->fs_devices->rotating = 1;
1514
1465 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 1515 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1466 btrfs_set_super_total_bytes(&root->fs_info->super_copy, 1516 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
1467 total_bytes + device->total_bytes); 1517 total_bytes + device->total_bytes);
@@ -1469,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1469 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); 1519 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
1470 btrfs_set_super_num_devices(&root->fs_info->super_copy, 1520 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1471 total_bytes + 1); 1521 total_bytes + 1);
1522 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1472 1523
1473 if (seeding_dev) { 1524 if (seeding_dev) {
1474 ret = init_first_rw_device(trans, root, device); 1525 ret = init_first_rw_device(trans, root, device);
@@ -1671,8 +1722,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1671 int ret; 1722 int ret;
1672 int i; 1723 int i;
1673 1724
1674 printk(KERN_INFO "btrfs relocating chunk %llu\n",
1675 (unsigned long long)chunk_offset);
1676 root = root->fs_info->chunk_root; 1725 root = root->fs_info->chunk_root;
1677 extent_root = root->fs_info->extent_root; 1726 extent_root = root->fs_info->extent_root;
1678 em_tree = &root->fs_info->mapping_tree.map_tree; 1727 em_tree = &root->fs_info->mapping_tree.map_tree;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5c3ff6d02fd7..5139a833f721 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -96,7 +96,12 @@ struct btrfs_fs_devices {
96 u64 rw_devices; 96 u64 rw_devices;
97 u64 total_rw_bytes; 97 u64 total_rw_bytes;
98 struct block_device *latest_bdev; 98 struct block_device *latest_bdev;
99 /* all of the devices in the FS */ 99
100 /* all of the devices in the FS, protected by a mutex
101 * so we can safely walk it to write out the supers without
102 * worrying about add/remove by the multi-device code
103 */
104 struct mutex device_list_mutex;
100 struct list_head devices; 105 struct list_head devices;
101 106
102 /* devices not currently being allocated */ 107 /* devices not currently being allocated */
@@ -107,6 +112,11 @@ struct btrfs_fs_devices {
107 int seeding; 112 int seeding;
108 113
109 int opened; 114 int opened;
115
116 /* set when we find or add a device that doesn't have the
117 * nonrot flag set
118 */
119 int rotating;
110}; 120};
111 121
112struct btrfs_bio_stripe { 122struct btrfs_bio_stripe {