aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c618
1 files changed, 308 insertions, 310 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c01509dd8ab..32312e09f0f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
35#include "locking.h" 35#include "locking.h"
36#include "free-space-cache.h" 36#include "free-space-cache.h"
37#include "math.h" 37#include "math.h"
38#include "sysfs.h"
38 39
39#undef SCRAMBLE_DELAYED_REFS 40#undef SCRAMBLE_DELAYED_REFS
40 41
@@ -441,7 +442,8 @@ next:
441 if (ret) 442 if (ret)
442 break; 443 break;
443 444
444 if (need_resched()) { 445 if (need_resched() ||
446 rwsem_is_contended(&fs_info->extent_commit_sem)) {
445 caching_ctl->progress = last; 447 caching_ctl->progress = last;
446 btrfs_release_path(path); 448 btrfs_release_path(path);
447 up_read(&fs_info->extent_commit_sem); 449 up_read(&fs_info->extent_commit_sem);
@@ -855,12 +857,14 @@ again:
855 btrfs_put_delayed_ref(&head->node); 857 btrfs_put_delayed_ref(&head->node);
856 goto search_again; 858 goto search_again;
857 } 859 }
860 spin_lock(&head->lock);
858 if (head->extent_op && head->extent_op->update_flags) 861 if (head->extent_op && head->extent_op->update_flags)
859 extent_flags |= head->extent_op->flags_to_set; 862 extent_flags |= head->extent_op->flags_to_set;
860 else 863 else
861 BUG_ON(num_refs == 0); 864 BUG_ON(num_refs == 0);
862 865
863 num_refs += head->node.ref_mod; 866 num_refs += head->node.ref_mod;
867 spin_unlock(&head->lock);
864 mutex_unlock(&head->mutex); 868 mutex_unlock(&head->mutex);
865 } 869 }
866 spin_unlock(&delayed_refs->lock); 870 spin_unlock(&delayed_refs->lock);
@@ -1070,11 +1074,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1070 __le64 lenum; 1074 __le64 lenum;
1071 1075
1072 lenum = cpu_to_le64(root_objectid); 1076 lenum = cpu_to_le64(root_objectid);
1073 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 1077 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1074 lenum = cpu_to_le64(owner); 1078 lenum = cpu_to_le64(owner);
1075 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1079 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1076 lenum = cpu_to_le64(offset); 1080 lenum = cpu_to_le64(offset);
1077 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1081 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1078 1082
1079 return ((u64)high_crc << 31) ^ (u64)low_crc; 1083 return ((u64)high_crc << 31) ^ (u64)low_crc;
1080} 1084}
@@ -2285,64 +2289,62 @@ static noinline struct btrfs_delayed_ref_node *
2285select_delayed_ref(struct btrfs_delayed_ref_head *head) 2289select_delayed_ref(struct btrfs_delayed_ref_head *head)
2286{ 2290{
2287 struct rb_node *node; 2291 struct rb_node *node;
2288 struct btrfs_delayed_ref_node *ref; 2292 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2289 int action = BTRFS_ADD_DELAYED_REF; 2293
2290again:
2291 /* 2294 /*
2292 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2295 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
2293 * this prevents ref count from going down to zero when 2296 * this prevents ref count from going down to zero when
2294 * there still are pending delayed ref. 2297 * there still are pending delayed ref.
2295 */ 2298 */
2296 node = rb_prev(&head->node.rb_node); 2299 node = rb_first(&head->ref_root);
2297 while (1) { 2300 while (node) {
2298 if (!node)
2299 break;
2300 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2301 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2301 rb_node); 2302 rb_node);
2302 if (ref->bytenr != head->node.bytenr) 2303 if (ref->action == BTRFS_ADD_DELAYED_REF)
2303 break;
2304 if (ref->action == action)
2305 return ref; 2304 return ref;
2306 node = rb_prev(node); 2305 else if (last == NULL)
2307 } 2306 last = ref;
2308 if (action == BTRFS_ADD_DELAYED_REF) { 2307 node = rb_next(node);
2309 action = BTRFS_DROP_DELAYED_REF;
2310 goto again;
2311 } 2308 }
2312 return NULL; 2309 return last;
2313} 2310}
2314 2311
2315/* 2312/*
2316 * Returns 0 on success or if called with an already aborted transaction. 2313 * Returns 0 on success or if called with an already aborted transaction.
2317 * Returns -ENOMEM or -EIO on failure and will abort the transaction. 2314 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2318 */ 2315 */
2319static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2316static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2320 struct btrfs_root *root, 2317 struct btrfs_root *root,
2321 struct list_head *cluster) 2318 unsigned long nr)
2322{ 2319{
2323 struct btrfs_delayed_ref_root *delayed_refs; 2320 struct btrfs_delayed_ref_root *delayed_refs;
2324 struct btrfs_delayed_ref_node *ref; 2321 struct btrfs_delayed_ref_node *ref;
2325 struct btrfs_delayed_ref_head *locked_ref = NULL; 2322 struct btrfs_delayed_ref_head *locked_ref = NULL;
2326 struct btrfs_delayed_extent_op *extent_op; 2323 struct btrfs_delayed_extent_op *extent_op;
2327 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
2325 ktime_t start = ktime_get();
2328 int ret; 2326 int ret;
2329 int count = 0; 2327 unsigned long count = 0;
2328 unsigned long actual_count = 0;
2330 int must_insert_reserved = 0; 2329 int must_insert_reserved = 0;
2331 2330
2332 delayed_refs = &trans->transaction->delayed_refs; 2331 delayed_refs = &trans->transaction->delayed_refs;
2333 while (1) { 2332 while (1) {
2334 if (!locked_ref) { 2333 if (!locked_ref) {
2335 /* pick a new head ref from the cluster list */ 2334 if (count >= nr)
2336 if (list_empty(cluster))
2337 break; 2335 break;
2338 2336
2339 locked_ref = list_entry(cluster->next, 2337 spin_lock(&delayed_refs->lock);
2340 struct btrfs_delayed_ref_head, cluster); 2338 locked_ref = btrfs_select_ref_head(trans);
2339 if (!locked_ref) {
2340 spin_unlock(&delayed_refs->lock);
2341 break;
2342 }
2341 2343
2342 /* grab the lock that says we are going to process 2344 /* grab the lock that says we are going to process
2343 * all the refs for this head */ 2345 * all the refs for this head */
2344 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2346 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2345 2347 spin_unlock(&delayed_refs->lock);
2346 /* 2348 /*
2347 * we may have dropped the spin lock to get the head 2349 * we may have dropped the spin lock to get the head
2348 * mutex lock, and that might have given someone else 2350 * mutex lock, and that might have given someone else
@@ -2363,6 +2365,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2363 * finish. If we merged anything we need to re-loop so we can 2365 * finish. If we merged anything we need to re-loop so we can
2364 * get a good ref. 2366 * get a good ref.
2365 */ 2367 */
2368 spin_lock(&locked_ref->lock);
2366 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, 2369 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2367 locked_ref); 2370 locked_ref);
2368 2371
@@ -2374,17 +2377,15 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2374 2377
2375 if (ref && ref->seq && 2378 if (ref && ref->seq &&
2376 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { 2379 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2377 /* 2380 spin_unlock(&locked_ref->lock);
2378 * there are still refs with lower seq numbers in the
2379 * process of being added. Don't run this ref yet.
2380 */
2381 list_del_init(&locked_ref->cluster);
2382 btrfs_delayed_ref_unlock(locked_ref); 2381 btrfs_delayed_ref_unlock(locked_ref);
2383 locked_ref = NULL; 2382 spin_lock(&delayed_refs->lock);
2383 locked_ref->processing = 0;
2384 delayed_refs->num_heads_ready++; 2384 delayed_refs->num_heads_ready++;
2385 spin_unlock(&delayed_refs->lock); 2385 spin_unlock(&delayed_refs->lock);
2386 locked_ref = NULL;
2386 cond_resched(); 2387 cond_resched();
2387 spin_lock(&delayed_refs->lock); 2388 count++;
2388 continue; 2389 continue;
2389 } 2390 }
2390 2391
@@ -2399,6 +2400,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2399 locked_ref->extent_op = NULL; 2400 locked_ref->extent_op = NULL;
2400 2401
2401 if (!ref) { 2402 if (!ref) {
2403
2404
2402 /* All delayed refs have been processed, Go ahead 2405 /* All delayed refs have been processed, Go ahead
2403 * and send the head node to run_one_delayed_ref, 2406 * and send the head node to run_one_delayed_ref,
2404 * so that any accounting fixes can happen 2407 * so that any accounting fixes can happen
@@ -2411,8 +2414,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2411 } 2414 }
2412 2415
2413 if (extent_op) { 2416 if (extent_op) {
2414 spin_unlock(&delayed_refs->lock); 2417 spin_unlock(&locked_ref->lock);
2415
2416 ret = run_delayed_extent_op(trans, root, 2418 ret = run_delayed_extent_op(trans, root,
2417 ref, extent_op); 2419 ref, extent_op);
2418 btrfs_free_delayed_extent_op(extent_op); 2420 btrfs_free_delayed_extent_op(extent_op);
@@ -2426,19 +2428,39 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2426 */ 2428 */
2427 if (must_insert_reserved) 2429 if (must_insert_reserved)
2428 locked_ref->must_insert_reserved = 1; 2430 locked_ref->must_insert_reserved = 1;
2431 locked_ref->processing = 0;
2429 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2432 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2430 spin_lock(&delayed_refs->lock);
2431 btrfs_delayed_ref_unlock(locked_ref); 2433 btrfs_delayed_ref_unlock(locked_ref);
2432 return ret; 2434 return ret;
2433 } 2435 }
2436 continue;
2437 }
2434 2438
2435 goto next; 2439 /*
2440 * Need to drop our head ref lock and re-aqcuire the
2441 * delayed ref lock and then re-check to make sure
2442 * nobody got added.
2443 */
2444 spin_unlock(&locked_ref->lock);
2445 spin_lock(&delayed_refs->lock);
2446 spin_lock(&locked_ref->lock);
2447 if (rb_first(&locked_ref->ref_root)) {
2448 spin_unlock(&locked_ref->lock);
2449 spin_unlock(&delayed_refs->lock);
2450 continue;
2436 } 2451 }
2452 ref->in_tree = 0;
2453 delayed_refs->num_heads--;
2454 rb_erase(&locked_ref->href_node,
2455 &delayed_refs->href_root);
2456 spin_unlock(&delayed_refs->lock);
2457 } else {
2458 actual_count++;
2459 ref->in_tree = 0;
2460 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2437 } 2461 }
2462 atomic_dec(&delayed_refs->num_entries);
2438 2463
2439 ref->in_tree = 0;
2440 rb_erase(&ref->rb_node, &delayed_refs->root);
2441 delayed_refs->num_entries--;
2442 if (!btrfs_delayed_ref_is_head(ref)) { 2464 if (!btrfs_delayed_ref_is_head(ref)) {
2443 /* 2465 /*
2444 * when we play the delayed ref, also correct the 2466 * when we play the delayed ref, also correct the
@@ -2455,20 +2477,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2455 default: 2477 default:
2456 WARN_ON(1); 2478 WARN_ON(1);
2457 } 2479 }
2458 } else {
2459 list_del_init(&locked_ref->cluster);
2460 } 2480 }
2461 spin_unlock(&delayed_refs->lock); 2481 spin_unlock(&locked_ref->lock);
2462 2482
2463 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2483 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2464 must_insert_reserved); 2484 must_insert_reserved);
2465 2485
2466 btrfs_free_delayed_extent_op(extent_op); 2486 btrfs_free_delayed_extent_op(extent_op);
2467 if (ret) { 2487 if (ret) {
2488 locked_ref->processing = 0;
2468 btrfs_delayed_ref_unlock(locked_ref); 2489 btrfs_delayed_ref_unlock(locked_ref);
2469 btrfs_put_delayed_ref(ref); 2490 btrfs_put_delayed_ref(ref);
2470 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); 2491 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2471 spin_lock(&delayed_refs->lock);
2472 return ret; 2492 return ret;
2473 } 2493 }
2474 2494
@@ -2484,11 +2504,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2484 } 2504 }
2485 btrfs_put_delayed_ref(ref); 2505 btrfs_put_delayed_ref(ref);
2486 count++; 2506 count++;
2487next:
2488 cond_resched(); 2507 cond_resched();
2508 }
2509
2510 /*
2511 * We don't want to include ref heads since we can have empty ref heads
2512 * and those will drastically skew our runtime down since we just do
2513 * accounting, no actual extent tree updates.
2514 */
2515 if (actual_count > 0) {
2516 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2517 u64 avg;
2518
2519 /*
2520 * We weigh the current average higher than our current runtime
2521 * to avoid large swings in the average.
2522 */
2489 spin_lock(&delayed_refs->lock); 2523 spin_lock(&delayed_refs->lock);
2524 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2525 avg = div64_u64(avg, 4);
2526 fs_info->avg_delayed_ref_runtime = avg;
2527 spin_unlock(&delayed_refs->lock);
2490 } 2528 }
2491 return count; 2529 return 0;
2492} 2530}
2493 2531
2494#ifdef SCRAMBLE_DELAYED_REFS 2532#ifdef SCRAMBLE_DELAYED_REFS
@@ -2570,16 +2608,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2570 return ret; 2608 return ret;
2571} 2609}
2572 2610
2573static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2574 int count)
2575{
2576 int val = atomic_read(&delayed_refs->ref_seq);
2577
2578 if (val < seq || val >= seq + count)
2579 return 1;
2580 return 0;
2581}
2582
2583static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2611static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2584{ 2612{
2585 u64 num_bytes; 2613 u64 num_bytes;
@@ -2596,7 +2624,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2596 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2624 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2597} 2625}
2598 2626
2599int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, 2627int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2600 struct btrfs_root *root) 2628 struct btrfs_root *root)
2601{ 2629{
2602 struct btrfs_block_rsv *global_rsv; 2630 struct btrfs_block_rsv *global_rsv;
@@ -2625,6 +2653,22 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2625 return ret; 2653 return ret;
2626} 2654}
2627 2655
2656int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2657 struct btrfs_root *root)
2658{
2659 struct btrfs_fs_info *fs_info = root->fs_info;
2660 u64 num_entries =
2661 atomic_read(&trans->transaction->delayed_refs.num_entries);
2662 u64 avg_runtime;
2663
2664 smp_mb();
2665 avg_runtime = fs_info->avg_delayed_ref_runtime;
2666 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2667 return 1;
2668
2669 return btrfs_check_space_for_delayed_refs(trans, root);
2670}
2671
2628/* 2672/*
2629 * this starts processing the delayed reference count updates and 2673 * this starts processing the delayed reference count updates and
2630 * extent insertions we have queued up so far. count can be 2674 * extent insertions we have queued up so far. count can be
@@ -2640,13 +2684,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2640{ 2684{
2641 struct rb_node *node; 2685 struct rb_node *node;
2642 struct btrfs_delayed_ref_root *delayed_refs; 2686 struct btrfs_delayed_ref_root *delayed_refs;
2643 struct btrfs_delayed_ref_node *ref; 2687 struct btrfs_delayed_ref_head *head;
2644 struct list_head cluster;
2645 int ret; 2688 int ret;
2646 u64 delayed_start;
2647 int run_all = count == (unsigned long)-1; 2689 int run_all = count == (unsigned long)-1;
2648 int run_most = 0; 2690 int run_most = 0;
2649 int loops;
2650 2691
2651 /* We'll clean this up in btrfs_cleanup_transaction */ 2692 /* We'll clean this up in btrfs_cleanup_transaction */
2652 if (trans->aborted) 2693 if (trans->aborted)
@@ -2658,130 +2699,40 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2658 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 2699 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2659 2700
2660 delayed_refs = &trans->transaction->delayed_refs; 2701 delayed_refs = &trans->transaction->delayed_refs;
2661 INIT_LIST_HEAD(&cluster);
2662 if (count == 0) { 2702 if (count == 0) {
2663 count = delayed_refs->num_entries * 2; 2703 count = atomic_read(&delayed_refs->num_entries) * 2;
2664 run_most = 1; 2704 run_most = 1;
2665 } 2705 }
2666 2706
2667 if (!run_all && !run_most) {
2668 int old;
2669 int seq = atomic_read(&delayed_refs->ref_seq);
2670
2671progress:
2672 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2673 if (old) {
2674 DEFINE_WAIT(__wait);
2675 if (delayed_refs->flushing ||
2676 !btrfs_should_throttle_delayed_refs(trans, root))
2677 return 0;
2678
2679 prepare_to_wait(&delayed_refs->wait, &__wait,
2680 TASK_UNINTERRUPTIBLE);
2681
2682 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2683 if (old) {
2684 schedule();
2685 finish_wait(&delayed_refs->wait, &__wait);
2686
2687 if (!refs_newer(delayed_refs, seq, 256))
2688 goto progress;
2689 else
2690 return 0;
2691 } else {
2692 finish_wait(&delayed_refs->wait, &__wait);
2693 goto again;
2694 }
2695 }
2696
2697 } else {
2698 atomic_inc(&delayed_refs->procs_running_refs);
2699 }
2700
2701again: 2707again:
2702 loops = 0;
2703 spin_lock(&delayed_refs->lock);
2704
2705#ifdef SCRAMBLE_DELAYED_REFS 2708#ifdef SCRAMBLE_DELAYED_REFS
2706 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2709 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2707#endif 2710#endif
2708 2711 ret = __btrfs_run_delayed_refs(trans, root, count);
2709 while (1) { 2712 if (ret < 0) {
2710 if (!(run_all || run_most) && 2713 btrfs_abort_transaction(trans, root, ret);
2711 !btrfs_should_throttle_delayed_refs(trans, root)) 2714 return ret;
2712 break;
2713
2714 /*
2715 * go find something we can process in the rbtree. We start at
2716 * the beginning of the tree, and then build a cluster
2717 * of refs to process starting at the first one we are able to
2718 * lock
2719 */
2720 delayed_start = delayed_refs->run_delayed_start;
2721 ret = btrfs_find_ref_cluster(trans, &cluster,
2722 delayed_refs->run_delayed_start);
2723 if (ret)
2724 break;
2725
2726 ret = run_clustered_refs(trans, root, &cluster);
2727 if (ret < 0) {
2728 btrfs_release_ref_cluster(&cluster);
2729 spin_unlock(&delayed_refs->lock);
2730 btrfs_abort_transaction(trans, root, ret);
2731 atomic_dec(&delayed_refs->procs_running_refs);
2732 wake_up(&delayed_refs->wait);
2733 return ret;
2734 }
2735
2736 atomic_add(ret, &delayed_refs->ref_seq);
2737
2738 count -= min_t(unsigned long, ret, count);
2739
2740 if (count == 0)
2741 break;
2742
2743 if (delayed_start >= delayed_refs->run_delayed_start) {
2744 if (loops == 0) {
2745 /*
2746 * btrfs_find_ref_cluster looped. let's do one
2747 * more cycle. if we don't run any delayed ref
2748 * during that cycle (because we can't because
2749 * all of them are blocked), bail out.
2750 */
2751 loops = 1;
2752 } else {
2753 /*
2754 * no runnable refs left, stop trying
2755 */
2756 BUG_ON(run_all);
2757 break;
2758 }
2759 }
2760 if (ret) {
2761 /* refs were run, let's reset staleness detection */
2762 loops = 0;
2763 }
2764 } 2715 }
2765 2716
2766 if (run_all) { 2717 if (run_all) {
2767 if (!list_empty(&trans->new_bgs)) { 2718 if (!list_empty(&trans->new_bgs))
2768 spin_unlock(&delayed_refs->lock);
2769 btrfs_create_pending_block_groups(trans, root); 2719 btrfs_create_pending_block_groups(trans, root);
2770 spin_lock(&delayed_refs->lock);
2771 }
2772 2720
2773 node = rb_first(&delayed_refs->root); 2721 spin_lock(&delayed_refs->lock);
2774 if (!node) 2722 node = rb_first(&delayed_refs->href_root);
2723 if (!node) {
2724 spin_unlock(&delayed_refs->lock);
2775 goto out; 2725 goto out;
2726 }
2776 count = (unsigned long)-1; 2727 count = (unsigned long)-1;
2777 2728
2778 while (node) { 2729 while (node) {
2779 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2730 head = rb_entry(node, struct btrfs_delayed_ref_head,
2780 rb_node); 2731 href_node);
2781 if (btrfs_delayed_ref_is_head(ref)) { 2732 if (btrfs_delayed_ref_is_head(&head->node)) {
2782 struct btrfs_delayed_ref_head *head; 2733 struct btrfs_delayed_ref_node *ref;
2783 2734
2784 head = btrfs_delayed_node_to_head(ref); 2735 ref = &head->node;
2785 atomic_inc(&ref->refs); 2736 atomic_inc(&ref->refs);
2786 2737
2787 spin_unlock(&delayed_refs->lock); 2738 spin_unlock(&delayed_refs->lock);
@@ -2795,20 +2746,16 @@ again:
2795 btrfs_put_delayed_ref(ref); 2746 btrfs_put_delayed_ref(ref);
2796 cond_resched(); 2747 cond_resched();
2797 goto again; 2748 goto again;
2749 } else {
2750 WARN_ON(1);
2798 } 2751 }
2799 node = rb_next(node); 2752 node = rb_next(node);
2800 } 2753 }
2801 spin_unlock(&delayed_refs->lock); 2754 spin_unlock(&delayed_refs->lock);
2802 schedule_timeout(1); 2755 cond_resched();
2803 goto again; 2756 goto again;
2804 } 2757 }
2805out: 2758out:
2806 atomic_dec(&delayed_refs->procs_running_refs);
2807 smp_mb();
2808 if (waitqueue_active(&delayed_refs->wait))
2809 wake_up(&delayed_refs->wait);
2810
2811 spin_unlock(&delayed_refs->lock);
2812 assert_qgroups_uptodate(trans); 2759 assert_qgroups_uptodate(trans);
2813 return 0; 2760 return 0;
2814} 2761}
@@ -2850,12 +2797,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2850 struct rb_node *node; 2797 struct rb_node *node;
2851 int ret = 0; 2798 int ret = 0;
2852 2799
2853 ret = -ENOENT;
2854 delayed_refs = &trans->transaction->delayed_refs; 2800 delayed_refs = &trans->transaction->delayed_refs;
2855 spin_lock(&delayed_refs->lock); 2801 spin_lock(&delayed_refs->lock);
2856 head = btrfs_find_delayed_ref_head(trans, bytenr); 2802 head = btrfs_find_delayed_ref_head(trans, bytenr);
2857 if (!head) 2803 if (!head) {
2858 goto out; 2804 spin_unlock(&delayed_refs->lock);
2805 return 0;
2806 }
2859 2807
2860 if (!mutex_trylock(&head->mutex)) { 2808 if (!mutex_trylock(&head->mutex)) {
2861 atomic_inc(&head->node.refs); 2809 atomic_inc(&head->node.refs);
@@ -2872,40 +2820,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2872 btrfs_put_delayed_ref(&head->node); 2820 btrfs_put_delayed_ref(&head->node);
2873 return -EAGAIN; 2821 return -EAGAIN;
2874 } 2822 }
2823 spin_unlock(&delayed_refs->lock);
2875 2824
2876 node = rb_prev(&head->node.rb_node); 2825 spin_lock(&head->lock);
2877 if (!node) 2826 node = rb_first(&head->ref_root);
2878 goto out_unlock; 2827 while (node) {
2879 2828 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2880 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2829 node = rb_next(node);
2881
2882 if (ref->bytenr != bytenr)
2883 goto out_unlock;
2884
2885 ret = 1;
2886 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2887 goto out_unlock;
2888 2830
2889 data_ref = btrfs_delayed_node_to_data_ref(ref); 2831 /* If it's a shared ref we know a cross reference exists */
2832 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2833 ret = 1;
2834 break;
2835 }
2890 2836
2891 node = rb_prev(node); 2837 data_ref = btrfs_delayed_node_to_data_ref(ref);
2892 if (node) {
2893 int seq = ref->seq;
2894 2838
2895 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2839 /*
2896 if (ref->bytenr == bytenr && ref->seq == seq) 2840 * If our ref doesn't match the one we're currently looking at
2897 goto out_unlock; 2841 * then we have a cross reference.
2842 */
2843 if (data_ref->root != root->root_key.objectid ||
2844 data_ref->objectid != objectid ||
2845 data_ref->offset != offset) {
2846 ret = 1;
2847 break;
2848 }
2898 } 2849 }
2899 2850 spin_unlock(&head->lock);
2900 if (data_ref->root != root->root_key.objectid ||
2901 data_ref->objectid != objectid || data_ref->offset != offset)
2902 goto out_unlock;
2903
2904 ret = 0;
2905out_unlock:
2906 mutex_unlock(&head->mutex); 2851 mutex_unlock(&head->mutex);
2907out:
2908 spin_unlock(&delayed_refs->lock);
2909 return ret; 2852 return ret;
2910} 2853}
2911 2854
@@ -3402,6 +3345,23 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3402 return readonly; 3345 return readonly;
3403} 3346}
3404 3347
3348static const char *alloc_name(u64 flags)
3349{
3350 switch (flags) {
3351 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3352 return "mixed";
3353 case BTRFS_BLOCK_GROUP_METADATA:
3354 return "metadata";
3355 case BTRFS_BLOCK_GROUP_DATA:
3356 return "data";
3357 case BTRFS_BLOCK_GROUP_SYSTEM:
3358 return "system";
3359 default:
3360 WARN_ON(1);
3361 return "invalid-combination";
3362 };
3363}
3364
3405static int update_space_info(struct btrfs_fs_info *info, u64 flags, 3365static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3406 u64 total_bytes, u64 bytes_used, 3366 u64 total_bytes, u64 bytes_used,
3407 struct btrfs_space_info **space_info) 3367 struct btrfs_space_info **space_info)
@@ -3439,8 +3399,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3439 return ret; 3399 return ret;
3440 } 3400 }
3441 3401
3442 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 3402 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
3443 INIT_LIST_HEAD(&found->block_groups[i]); 3403 INIT_LIST_HEAD(&found->block_groups[i]);
3404 kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
3405 }
3444 init_rwsem(&found->groups_sem); 3406 init_rwsem(&found->groups_sem);
3445 spin_lock_init(&found->lock); 3407 spin_lock_init(&found->lock);
3446 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 3408 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3457,11 +3419,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3457 found->chunk_alloc = 0; 3419 found->chunk_alloc = 0;
3458 found->flush = 0; 3420 found->flush = 0;
3459 init_waitqueue_head(&found->wait); 3421 init_waitqueue_head(&found->wait);
3422
3423 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3424 info->space_info_kobj, "%s",
3425 alloc_name(found->flags));
3426 if (ret) {
3427 kfree(found);
3428 return ret;
3429 }
3430
3460 *space_info = found; 3431 *space_info = found;
3461 list_add_rcu(&found->list, &info->space_info); 3432 list_add_rcu(&found->list, &info->space_info);
3462 if (flags & BTRFS_BLOCK_GROUP_DATA) 3433 if (flags & BTRFS_BLOCK_GROUP_DATA)
3463 info->data_sinfo = found; 3434 info->data_sinfo = found;
3464 return 0; 3435
3436 return ret;
3465} 3437}
3466 3438
3467static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3439static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
@@ -4637,7 +4609,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
4637 u64 num_bytes) 4609 u64 num_bytes)
4638{ 4610{
4639 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 4611 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4640 if (global_rsv->full || global_rsv == block_rsv || 4612 if (global_rsv == block_rsv ||
4641 block_rsv->space_info != global_rsv->space_info) 4613 block_rsv->space_info != global_rsv->space_info)
4642 global_rsv = NULL; 4614 global_rsv = NULL;
4643 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv, 4615 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
@@ -5916,24 +5888,16 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5916{ 5888{
5917 struct btrfs_delayed_ref_head *head; 5889 struct btrfs_delayed_ref_head *head;
5918 struct btrfs_delayed_ref_root *delayed_refs; 5890 struct btrfs_delayed_ref_root *delayed_refs;
5919 struct btrfs_delayed_ref_node *ref;
5920 struct rb_node *node;
5921 int ret = 0; 5891 int ret = 0;
5922 5892
5923 delayed_refs = &trans->transaction->delayed_refs; 5893 delayed_refs = &trans->transaction->delayed_refs;
5924 spin_lock(&delayed_refs->lock); 5894 spin_lock(&delayed_refs->lock);
5925 head = btrfs_find_delayed_ref_head(trans, bytenr); 5895 head = btrfs_find_delayed_ref_head(trans, bytenr);
5926 if (!head) 5896 if (!head)
5927 goto out; 5897 goto out_delayed_unlock;
5928 5898
5929 node = rb_prev(&head->node.rb_node); 5899 spin_lock(&head->lock);
5930 if (!node) 5900 if (rb_first(&head->ref_root))
5931 goto out;
5932
5933 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
5934
5935 /* there are still entries for this ref, we can't drop it */
5936 if (ref->bytenr == bytenr)
5937 goto out; 5901 goto out;
5938 5902
5939 if (head->extent_op) { 5903 if (head->extent_op) {
@@ -5955,19 +5919,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5955 * ahead and process it. 5919 * ahead and process it.
5956 */ 5920 */
5957 head->node.in_tree = 0; 5921 head->node.in_tree = 0;
5958 rb_erase(&head->node.rb_node, &delayed_refs->root); 5922 rb_erase(&head->href_node, &delayed_refs->href_root);
5959 5923
5960 delayed_refs->num_entries--; 5924 atomic_dec(&delayed_refs->num_entries);
5961 5925
5962 /* 5926 /*
5963 * we don't take a ref on the node because we're removing it from the 5927 * we don't take a ref on the node because we're removing it from the
5964 * tree, so we just steal the ref the tree was holding. 5928 * tree, so we just steal the ref the tree was holding.
5965 */ 5929 */
5966 delayed_refs->num_heads--; 5930 delayed_refs->num_heads--;
5967 if (list_empty(&head->cluster)) 5931 if (head->processing == 0)
5968 delayed_refs->num_heads_ready--; 5932 delayed_refs->num_heads_ready--;
5969 5933 head->processing = 0;
5970 list_del_init(&head->cluster); 5934 spin_unlock(&head->lock);
5971 spin_unlock(&delayed_refs->lock); 5935 spin_unlock(&delayed_refs->lock);
5972 5936
5973 BUG_ON(head->extent_op); 5937 BUG_ON(head->extent_op);
@@ -5978,6 +5942,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5978 btrfs_put_delayed_ref(&head->node); 5942 btrfs_put_delayed_ref(&head->node);
5979 return ret; 5943 return ret;
5980out: 5944out:
5945 spin_unlock(&head->lock);
5946
5947out_delayed_unlock:
5981 spin_unlock(&delayed_refs->lock); 5948 spin_unlock(&delayed_refs->lock);
5982 return 0; 5949 return 0;
5983} 5950}
@@ -6145,11 +6112,29 @@ int __get_raid_index(u64 flags)
6145 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 6112 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
6146} 6113}
6147 6114
6148static int get_block_group_index(struct btrfs_block_group_cache *cache) 6115int get_block_group_index(struct btrfs_block_group_cache *cache)
6149{ 6116{
6150 return __get_raid_index(cache->flags); 6117 return __get_raid_index(cache->flags);
6151} 6118}
6152 6119
6120static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6121 [BTRFS_RAID_RAID10] = "raid10",
6122 [BTRFS_RAID_RAID1] = "raid1",
6123 [BTRFS_RAID_DUP] = "dup",
6124 [BTRFS_RAID_RAID0] = "raid0",
6125 [BTRFS_RAID_SINGLE] = "single",
6126 [BTRFS_RAID_RAID5] = "raid5",
6127 [BTRFS_RAID_RAID6] = "raid6",
6128};
6129
6130static const char *get_raid_name(enum btrfs_raid_types type)
6131{
6132 if (type >= BTRFS_NR_RAID_TYPES)
6133 return NULL;
6134
6135 return btrfs_raid_type_names[type];
6136}
6137
6153enum btrfs_loop_type { 6138enum btrfs_loop_type {
6154 LOOP_CACHING_NOWAIT = 0, 6139 LOOP_CACHING_NOWAIT = 0,
6155 LOOP_CACHING_WAIT = 1, 6140 LOOP_CACHING_WAIT = 1,
@@ -6177,7 +6162,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6177 struct btrfs_root *root = orig_root->fs_info->extent_root; 6162 struct btrfs_root *root = orig_root->fs_info->extent_root;
6178 struct btrfs_free_cluster *last_ptr = NULL; 6163 struct btrfs_free_cluster *last_ptr = NULL;
6179 struct btrfs_block_group_cache *block_group = NULL; 6164 struct btrfs_block_group_cache *block_group = NULL;
6180 struct btrfs_block_group_cache *used_block_group;
6181 u64 search_start = 0; 6165 u64 search_start = 0;
6182 u64 max_extent_size = 0; 6166 u64 max_extent_size = 0;
6183 int empty_cluster = 2 * 1024 * 1024; 6167 int empty_cluster = 2 * 1024 * 1024;
@@ -6186,7 +6170,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6186 int index = __get_raid_index(flags); 6170 int index = __get_raid_index(flags);
6187 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? 6171 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6188 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 6172 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6189 bool found_uncached_bg = false;
6190 bool failed_cluster_refill = false; 6173 bool failed_cluster_refill = false;
6191 bool failed_alloc = false; 6174 bool failed_alloc = false;
6192 bool use_cluster = true; 6175 bool use_cluster = true;
@@ -6239,7 +6222,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6239 if (search_start == hint_byte) { 6222 if (search_start == hint_byte) {
6240 block_group = btrfs_lookup_block_group(root->fs_info, 6223 block_group = btrfs_lookup_block_group(root->fs_info,
6241 search_start); 6224 search_start);
6242 used_block_group = block_group;
6243 /* 6225 /*
6244 * we don't want to use the block group if it doesn't match our 6226 * we don't want to use the block group if it doesn't match our
6245 * allocation bits, or if its not cached. 6227 * allocation bits, or if its not cached.
@@ -6276,7 +6258,6 @@ search:
6276 u64 offset; 6258 u64 offset;
6277 int cached; 6259 int cached;
6278 6260
6279 used_block_group = block_group;
6280 btrfs_get_block_group(block_group); 6261 btrfs_get_block_group(block_group);
6281 search_start = block_group->key.objectid; 6262 search_start = block_group->key.objectid;
6282 6263
@@ -6304,7 +6285,6 @@ search:
6304have_block_group: 6285have_block_group:
6305 cached = block_group_cache_done(block_group); 6286 cached = block_group_cache_done(block_group);
6306 if (unlikely(!cached)) { 6287 if (unlikely(!cached)) {
6307 found_uncached_bg = true;
6308 ret = cache_block_group(block_group, 0); 6288 ret = cache_block_group(block_group, 0);
6309 BUG_ON(ret < 0); 6289 BUG_ON(ret < 0);
6310 ret = 0; 6290 ret = 0;
@@ -6320,6 +6300,7 @@ have_block_group:
6320 * lets look there 6300 * lets look there
6321 */ 6301 */
6322 if (last_ptr) { 6302 if (last_ptr) {
6303 struct btrfs_block_group_cache *used_block_group;
6323 unsigned long aligned_cluster; 6304 unsigned long aligned_cluster;
6324 /* 6305 /*
6325 * the refill lock keeps out other 6306 * the refill lock keeps out other
@@ -6330,10 +6311,8 @@ have_block_group:
6330 if (used_block_group != block_group && 6311 if (used_block_group != block_group &&
6331 (!used_block_group || 6312 (!used_block_group ||
6332 used_block_group->ro || 6313 used_block_group->ro ||
6333 !block_group_bits(used_block_group, flags))) { 6314 !block_group_bits(used_block_group, flags)))
6334 used_block_group = block_group;
6335 goto refill_cluster; 6315 goto refill_cluster;
6336 }
6337 6316
6338 if (used_block_group != block_group) 6317 if (used_block_group != block_group)
6339 btrfs_get_block_group(used_block_group); 6318 btrfs_get_block_group(used_block_group);
@@ -6347,17 +6326,19 @@ have_block_group:
6347 /* we have a block, we're done */ 6326 /* we have a block, we're done */
6348 spin_unlock(&last_ptr->refill_lock); 6327 spin_unlock(&last_ptr->refill_lock);
6349 trace_btrfs_reserve_extent_cluster(root, 6328 trace_btrfs_reserve_extent_cluster(root,
6350 block_group, search_start, num_bytes); 6329 used_block_group,
6330 search_start, num_bytes);
6331 if (used_block_group != block_group) {
6332 btrfs_put_block_group(block_group);
6333 block_group = used_block_group;
6334 }
6351 goto checks; 6335 goto checks;
6352 } 6336 }
6353 6337
6354 WARN_ON(last_ptr->block_group != used_block_group); 6338 WARN_ON(last_ptr->block_group != used_block_group);
6355 if (used_block_group != block_group) { 6339 if (used_block_group != block_group)
6356 btrfs_put_block_group(used_block_group); 6340 btrfs_put_block_group(used_block_group);
6357 used_block_group = block_group;
6358 }
6359refill_cluster: 6341refill_cluster:
6360 BUG_ON(used_block_group != block_group);
6361 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6342 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6362 * set up a new clusters, so lets just skip it 6343 * set up a new clusters, so lets just skip it
6363 * and let the allocator find whatever block 6344 * and let the allocator find whatever block
@@ -6476,25 +6457,25 @@ unclustered_alloc:
6476 goto loop; 6457 goto loop;
6477 } 6458 }
6478checks: 6459checks:
6479 search_start = stripe_align(root, used_block_group, 6460 search_start = stripe_align(root, block_group,
6480 offset, num_bytes); 6461 offset, num_bytes);
6481 6462
6482 /* move on to the next group */ 6463 /* move on to the next group */
6483 if (search_start + num_bytes > 6464 if (search_start + num_bytes >
6484 used_block_group->key.objectid + used_block_group->key.offset) { 6465 block_group->key.objectid + block_group->key.offset) {
6485 btrfs_add_free_space(used_block_group, offset, num_bytes); 6466 btrfs_add_free_space(block_group, offset, num_bytes);
6486 goto loop; 6467 goto loop;
6487 } 6468 }
6488 6469
6489 if (offset < search_start) 6470 if (offset < search_start)
6490 btrfs_add_free_space(used_block_group, offset, 6471 btrfs_add_free_space(block_group, offset,
6491 search_start - offset); 6472 search_start - offset);
6492 BUG_ON(offset > search_start); 6473 BUG_ON(offset > search_start);
6493 6474
6494 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, 6475 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6495 alloc_type); 6476 alloc_type);
6496 if (ret == -EAGAIN) { 6477 if (ret == -EAGAIN) {
6497 btrfs_add_free_space(used_block_group, offset, num_bytes); 6478 btrfs_add_free_space(block_group, offset, num_bytes);
6498 goto loop; 6479 goto loop;
6499 } 6480 }
6500 6481
@@ -6504,16 +6485,12 @@ checks:
6504 6485
6505 trace_btrfs_reserve_extent(orig_root, block_group, 6486 trace_btrfs_reserve_extent(orig_root, block_group,
6506 search_start, num_bytes); 6487 search_start, num_bytes);
6507 if (used_block_group != block_group)
6508 btrfs_put_block_group(used_block_group);
6509 btrfs_put_block_group(block_group); 6488 btrfs_put_block_group(block_group);
6510 break; 6489 break;
6511loop: 6490loop:
6512 failed_cluster_refill = false; 6491 failed_cluster_refill = false;
6513 failed_alloc = false; 6492 failed_alloc = false;
6514 BUG_ON(index != get_block_group_index(block_group)); 6493 BUG_ON(index != get_block_group_index(block_group));
6515 if (used_block_group != block_group)
6516 btrfs_put_block_group(used_block_group);
6517 btrfs_put_block_group(block_group); 6494 btrfs_put_block_group(block_group);
6518 } 6495 }
6519 up_read(&space_info->groups_sem); 6496 up_read(&space_info->groups_sem);
@@ -6584,12 +6561,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6584 int index = 0; 6561 int index = 0;
6585 6562
6586 spin_lock(&info->lock); 6563 spin_lock(&info->lock);
6587 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", 6564 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
6588 info->flags, 6565 info->flags,
6589 info->total_bytes - info->bytes_used - info->bytes_pinned - 6566 info->total_bytes - info->bytes_used - info->bytes_pinned -
6590 info->bytes_reserved - info->bytes_readonly, 6567 info->bytes_reserved - info->bytes_readonly,
6591 (info->full) ? "" : "not "); 6568 (info->full) ? "" : "not ");
6592 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 6569 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
6593 "reserved=%llu, may_use=%llu, readonly=%llu\n", 6570 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6594 info->total_bytes, info->bytes_used, info->bytes_pinned, 6571 info->total_bytes, info->bytes_used, info->bytes_pinned,
6595 info->bytes_reserved, info->bytes_may_use, 6572 info->bytes_reserved, info->bytes_may_use,
@@ -6603,7 +6580,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6603again: 6580again:
6604 list_for_each_entry(cache, &info->block_groups[index], list) { 6581 list_for_each_entry(cache, &info->block_groups[index], list) {
6605 spin_lock(&cache->lock); 6582 spin_lock(&cache->lock);
6606 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", 6583 printk(KERN_INFO "BTRFS: "
6584 "block group %llu has %llu bytes, "
6585 "%llu used %llu pinned %llu reserved %s\n",
6607 cache->key.objectid, cache->key.offset, 6586 cache->key.objectid, cache->key.offset,
6608 btrfs_block_group_used(&cache->item), cache->pinned, 6587 btrfs_block_group_used(&cache->item), cache->pinned,
6609 cache->reserved, cache->ro ? "[readonly]" : ""); 6588 cache->reserved, cache->ro ? "[readonly]" : "");
@@ -6966,7 +6945,7 @@ again:
6966 /*DEFAULT_RATELIMIT_BURST*/ 1); 6945 /*DEFAULT_RATELIMIT_BURST*/ 1);
6967 if (__ratelimit(&_rs)) 6946 if (__ratelimit(&_rs))
6968 WARN(1, KERN_DEBUG 6947 WARN(1, KERN_DEBUG
6969 "btrfs: block rsv returned %d\n", ret); 6948 "BTRFS: block rsv returned %d\n", ret);
6970 } 6949 }
6971try_reserve: 6950try_reserve:
6972 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6951 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
@@ -7714,7 +7693,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7714 7693
7715 btrfs_end_transaction_throttle(trans, tree_root); 7694 btrfs_end_transaction_throttle(trans, tree_root);
7716 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 7695 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7717 pr_debug("btrfs: drop snapshot early exit\n"); 7696 pr_debug("BTRFS: drop snapshot early exit\n");
7718 err = -EAGAIN; 7697 err = -EAGAIN;
7719 goto out_free; 7698 goto out_free;
7720 } 7699 }
@@ -7779,7 +7758,7 @@ out:
7779 */ 7758 */
7780 if (!for_reloc && root_dropped == false) 7759 if (!for_reloc && root_dropped == false)
7781 btrfs_add_dead_root(root); 7760 btrfs_add_dead_root(root);
7782 if (err) 7761 if (err && err != -EAGAIN)
7783 btrfs_std_error(root->fs_info, err); 7762 btrfs_std_error(root->fs_info, err);
7784 return err; 7763 return err;
7785} 7764}
@@ -8333,6 +8312,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8333 release_global_block_rsv(info); 8312 release_global_block_rsv(info);
8334 8313
8335 while (!list_empty(&info->space_info)) { 8314 while (!list_empty(&info->space_info)) {
8315 int i;
8316
8336 space_info = list_entry(info->space_info.next, 8317 space_info = list_entry(info->space_info.next,
8337 struct btrfs_space_info, 8318 struct btrfs_space_info,
8338 list); 8319 list);
@@ -8343,9 +8324,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8343 dump_space_info(space_info, 0, 0); 8324 dump_space_info(space_info, 0, 0);
8344 } 8325 }
8345 } 8326 }
8346 percpu_counter_destroy(&space_info->total_bytes_pinned);
8347 list_del(&space_info->list); 8327 list_del(&space_info->list);
8348 kfree(space_info); 8328 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
8329 struct kobject *kobj;
8330 kobj = &space_info->block_group_kobjs[i];
8331 if (kobj->parent) {
8332 kobject_del(kobj);
8333 kobject_put(kobj);
8334 }
8335 }
8336 kobject_del(&space_info->kobj);
8337 kobject_put(&space_info->kobj);
8349 } 8338 }
8350 return 0; 8339 return 0;
8351} 8340}
@@ -8356,10 +8345,57 @@ static void __link_block_group(struct btrfs_space_info *space_info,
8356 int index = get_block_group_index(cache); 8345 int index = get_block_group_index(cache);
8357 8346
8358 down_write(&space_info->groups_sem); 8347 down_write(&space_info->groups_sem);
8348 if (list_empty(&space_info->block_groups[index])) {
8349 struct kobject *kobj = &space_info->block_group_kobjs[index];
8350 int ret;
8351
8352 kobject_get(&space_info->kobj); /* put in release */
8353 ret = kobject_add(kobj, &space_info->kobj, "%s",
8354 get_raid_name(index));
8355 if (ret) {
8356 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
8357 kobject_put(&space_info->kobj);
8358 }
8359 }
8359 list_add_tail(&cache->list, &space_info->block_groups[index]); 8360 list_add_tail(&cache->list, &space_info->block_groups[index]);
8360 up_write(&space_info->groups_sem); 8361 up_write(&space_info->groups_sem);
8361} 8362}
8362 8363
8364static struct btrfs_block_group_cache *
8365btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8366{
8367 struct btrfs_block_group_cache *cache;
8368
8369 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8370 if (!cache)
8371 return NULL;
8372
8373 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8374 GFP_NOFS);
8375 if (!cache->free_space_ctl) {
8376 kfree(cache);
8377 return NULL;
8378 }
8379
8380 cache->key.objectid = start;
8381 cache->key.offset = size;
8382 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8383
8384 cache->sectorsize = root->sectorsize;
8385 cache->fs_info = root->fs_info;
8386 cache->full_stripe_len = btrfs_full_stripe_len(root,
8387 &root->fs_info->mapping_tree,
8388 start);
8389 atomic_set(&cache->count, 1);
8390 spin_lock_init(&cache->lock);
8391 INIT_LIST_HEAD(&cache->list);
8392 INIT_LIST_HEAD(&cache->cluster_list);
8393 INIT_LIST_HEAD(&cache->new_bg_list);
8394 btrfs_init_free_space_ctl(cache);
8395
8396 return cache;
8397}
8398
8363int btrfs_read_block_groups(struct btrfs_root *root) 8399int btrfs_read_block_groups(struct btrfs_root *root)
8364{ 8400{
8365 struct btrfs_path *path; 8401 struct btrfs_path *path;
@@ -8395,26 +8431,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8395 break; 8431 break;
8396 if (ret != 0) 8432 if (ret != 0)
8397 goto error; 8433 goto error;
8434
8398 leaf = path->nodes[0]; 8435 leaf = path->nodes[0];
8399 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8436 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8400 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8437
8438 cache = btrfs_create_block_group_cache(root, found_key.objectid,
8439 found_key.offset);
8401 if (!cache) { 8440 if (!cache) {
8402 ret = -ENOMEM; 8441 ret = -ENOMEM;
8403 goto error; 8442 goto error;
8404 } 8443 }
8405 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8406 GFP_NOFS);
8407 if (!cache->free_space_ctl) {
8408 kfree(cache);
8409 ret = -ENOMEM;
8410 goto error;
8411 }
8412
8413 atomic_set(&cache->count, 1);
8414 spin_lock_init(&cache->lock);
8415 cache->fs_info = info;
8416 INIT_LIST_HEAD(&cache->list);
8417 INIT_LIST_HEAD(&cache->cluster_list);
8418 8444
8419 if (need_clear) { 8445 if (need_clear) {
8420 /* 8446 /*
@@ -8435,16 +8461,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8435 read_extent_buffer(leaf, &cache->item, 8461 read_extent_buffer(leaf, &cache->item,
8436 btrfs_item_ptr_offset(leaf, path->slots[0]), 8462 btrfs_item_ptr_offset(leaf, path->slots[0]),
8437 sizeof(cache->item)); 8463 sizeof(cache->item));
8438 memcpy(&cache->key, &found_key, sizeof(found_key)); 8464 cache->flags = btrfs_block_group_flags(&cache->item);
8439 8465
8440 key.objectid = found_key.objectid + found_key.offset; 8466 key.objectid = found_key.objectid + found_key.offset;
8441 btrfs_release_path(path); 8467 btrfs_release_path(path);
8442 cache->flags = btrfs_block_group_flags(&cache->item);
8443 cache->sectorsize = root->sectorsize;
8444 cache->full_stripe_len = btrfs_full_stripe_len(root,
8445 &root->fs_info->mapping_tree,
8446 found_key.objectid);
8447 btrfs_init_free_space_ctl(cache);
8448 8468
8449 /* 8469 /*
8450 * We need to exclude the super stripes now so that the space 8470 * We need to exclude the super stripes now so that the space
@@ -8458,8 +8478,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8458 * case. 8478 * case.
8459 */ 8479 */
8460 free_excluded_extents(root, cache); 8480 free_excluded_extents(root, cache);
8461 kfree(cache->free_space_ctl); 8481 btrfs_put_block_group(cache);
8462 kfree(cache);
8463 goto error; 8482 goto error;
8464 } 8483 }
8465 8484
@@ -8590,38 +8609,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8590 8609
8591 root->fs_info->last_trans_log_full_commit = trans->transid; 8610 root->fs_info->last_trans_log_full_commit = trans->transid;
8592 8611
8593 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8612 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
8594 if (!cache) 8613 if (!cache)
8595 return -ENOMEM; 8614 return -ENOMEM;
8596 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8597 GFP_NOFS);
8598 if (!cache->free_space_ctl) {
8599 kfree(cache);
8600 return -ENOMEM;
8601 }
8602
8603 cache->key.objectid = chunk_offset;
8604 cache->key.offset = size;
8605 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8606 cache->sectorsize = root->sectorsize;
8607 cache->fs_info = root->fs_info;
8608 cache->full_stripe_len = btrfs_full_stripe_len(root,
8609 &root->fs_info->mapping_tree,
8610 chunk_offset);
8611
8612 atomic_set(&cache->count, 1);
8613 spin_lock_init(&cache->lock);
8614 INIT_LIST_HEAD(&cache->list);
8615 INIT_LIST_HEAD(&cache->cluster_list);
8616 INIT_LIST_HEAD(&cache->new_bg_list);
8617
8618 btrfs_init_free_space_ctl(cache);
8619 8615
8620 btrfs_set_block_group_used(&cache->item, bytes_used); 8616 btrfs_set_block_group_used(&cache->item, bytes_used);
8621 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 8617 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8622 cache->flags = type;
8623 btrfs_set_block_group_flags(&cache->item, type); 8618 btrfs_set_block_group_flags(&cache->item, type);
8624 8619
8620 cache->flags = type;
8625 cache->last_byte_to_unpin = (u64)-1; 8621 cache->last_byte_to_unpin = (u64)-1;
8626 cache->cached = BTRFS_CACHE_FINISHED; 8622 cache->cached = BTRFS_CACHE_FINISHED;
8627 ret = exclude_super_stripes(root, cache); 8623 ret = exclude_super_stripes(root, cache);
@@ -8631,8 +8627,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8631 * case. 8627 * case.
8632 */ 8628 */
8633 free_excluded_extents(root, cache); 8629 free_excluded_extents(root, cache);
8634 kfree(cache->free_space_ctl); 8630 btrfs_put_block_group(cache);
8635 kfree(cache);
8636 return ret; 8631 return ret;
8637 } 8632 }
8638 8633
@@ -8796,8 +8791,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8796 * are still on the list after taking the semaphore 8791 * are still on the list after taking the semaphore
8797 */ 8792 */
8798 list_del_init(&block_group->list); 8793 list_del_init(&block_group->list);
8799 if (list_empty(&block_group->space_info->block_groups[index])) 8794 if (list_empty(&block_group->space_info->block_groups[index])) {
8795 kobject_del(&block_group->space_info->block_group_kobjs[index]);
8796 kobject_put(&block_group->space_info->block_group_kobjs[index]);
8800 clear_avail_alloc_bits(root->fs_info, block_group->flags); 8797 clear_avail_alloc_bits(root->fs_info, block_group->flags);
8798 }
8801 up_write(&block_group->space_info->groups_sem); 8799 up_write(&block_group->space_info->groups_sem);
8802 8800
8803 if (block_group->cached == BTRFS_CACHE_STARTED) 8801 if (block_group->cached == BTRFS_CACHE_STARTED)