diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 578 |
1 files changed, 392 insertions, 186 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cf54bdfee334..3e074dab2d57 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include "print-tree.h" | 31 | #include "print-tree.h" |
| 32 | #include "transaction.h" | 32 | #include "transaction.h" |
| 33 | #include "volumes.h" | 33 | #include "volumes.h" |
| 34 | #include "raid56.h" | ||
| 34 | #include "locking.h" | 35 | #include "locking.h" |
| 35 | #include "free-space-cache.h" | 36 | #include "free-space-cache.h" |
| 36 | #include "math.h" | 37 | #include "math.h" |
| @@ -72,8 +73,7 @@ enum { | |||
| 72 | RESERVE_ALLOC_NO_ACCOUNT = 2, | 73 | RESERVE_ALLOC_NO_ACCOUNT = 2, |
| 73 | }; | 74 | }; |
| 74 | 75 | ||
| 75 | static int update_block_group(struct btrfs_trans_handle *trans, | 76 | static int update_block_group(struct btrfs_root *root, |
| 76 | struct btrfs_root *root, | ||
| 77 | u64 bytenr, u64 num_bytes, int alloc); | 77 | u64 bytenr, u64 num_bytes, int alloc); |
| 78 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 78 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 79 | struct btrfs_root *root, | 79 | struct btrfs_root *root, |
| @@ -103,6 +103,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 103 | int dump_block_groups); | 103 | int dump_block_groups); |
| 104 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 104 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 105 | u64 num_bytes, int reserve); | 105 | u64 num_bytes, int reserve); |
| 106 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 107 | u64 num_bytes); | ||
| 106 | 108 | ||
| 107 | static noinline int | 109 | static noinline int |
| 108 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 110 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -162,6 +164,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, | |||
| 162 | rb_link_node(&block_group->cache_node, parent, p); | 164 | rb_link_node(&block_group->cache_node, parent, p); |
| 163 | rb_insert_color(&block_group->cache_node, | 165 | rb_insert_color(&block_group->cache_node, |
| 164 | &info->block_group_cache_tree); | 166 | &info->block_group_cache_tree); |
| 167 | |||
| 168 | if (info->first_logical_byte > block_group->key.objectid) | ||
| 169 | info->first_logical_byte = block_group->key.objectid; | ||
| 170 | |||
| 165 | spin_unlock(&info->block_group_cache_lock); | 171 | spin_unlock(&info->block_group_cache_lock); |
| 166 | 172 | ||
| 167 | return 0; | 173 | return 0; |
| @@ -203,8 +209,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
| 203 | break; | 209 | break; |
| 204 | } | 210 | } |
| 205 | } | 211 | } |
| 206 | if (ret) | 212 | if (ret) { |
| 207 | btrfs_get_block_group(ret); | 213 | btrfs_get_block_group(ret); |
| 214 | if (bytenr == 0 && info->first_logical_byte > ret->key.objectid) | ||
| 215 | info->first_logical_byte = ret->key.objectid; | ||
| 216 | } | ||
| 208 | spin_unlock(&info->block_group_cache_lock); | 217 | spin_unlock(&info->block_group_cache_lock); |
| 209 | 218 | ||
| 210 | return ret; | 219 | return ret; |
| @@ -468,8 +477,6 @@ out: | |||
| 468 | } | 477 | } |
| 469 | 478 | ||
| 470 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 479 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
| 471 | struct btrfs_trans_handle *trans, | ||
| 472 | struct btrfs_root *root, | ||
| 473 | int load_cache_only) | 480 | int load_cache_only) |
| 474 | { | 481 | { |
| 475 | DEFINE_WAIT(wait); | 482 | DEFINE_WAIT(wait); |
| @@ -527,12 +534,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
| 527 | cache->cached = BTRFS_CACHE_FAST; | 534 | cache->cached = BTRFS_CACHE_FAST; |
| 528 | spin_unlock(&cache->lock); | 535 | spin_unlock(&cache->lock); |
| 529 | 536 | ||
| 530 | /* | ||
| 531 | * We can't do the read from on-disk cache during a commit since we need | ||
| 532 | * to have the normal tree locking. Also if we are currently trying to | ||
| 533 | * allocate blocks for the tree root we can't do the fast caching since | ||
| 534 | * we likely hold important locks. | ||
| 535 | */ | ||
| 536 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { | 537 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { |
| 537 | ret = load_free_space_cache(fs_info, cache); | 538 | ret = load_free_space_cache(fs_info, cache); |
| 538 | 539 | ||
| @@ -1852,6 +1853,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1852 | *actual_bytes = discarded_bytes; | 1853 | *actual_bytes = discarded_bytes; |
| 1853 | 1854 | ||
| 1854 | 1855 | ||
| 1856 | if (ret == -EOPNOTSUPP) | ||
| 1857 | ret = 0; | ||
| 1855 | return ret; | 1858 | return ret; |
| 1856 | } | 1859 | } |
| 1857 | 1860 | ||
| @@ -2143,7 +2146,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 2143 | node->num_bytes); | 2146 | node->num_bytes); |
| 2144 | } | 2147 | } |
| 2145 | } | 2148 | } |
| 2146 | mutex_unlock(&head->mutex); | ||
| 2147 | return ret; | 2149 | return ret; |
| 2148 | } | 2150 | } |
| 2149 | 2151 | ||
| @@ -2258,7 +2260,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2258 | * process of being added. Don't run this ref yet. | 2260 | * process of being added. Don't run this ref yet. |
| 2259 | */ | 2261 | */ |
| 2260 | list_del_init(&locked_ref->cluster); | 2262 | list_del_init(&locked_ref->cluster); |
| 2261 | mutex_unlock(&locked_ref->mutex); | 2263 | btrfs_delayed_ref_unlock(locked_ref); |
| 2262 | locked_ref = NULL; | 2264 | locked_ref = NULL; |
| 2263 | delayed_refs->num_heads_ready++; | 2265 | delayed_refs->num_heads_ready++; |
| 2264 | spin_unlock(&delayed_refs->lock); | 2266 | spin_unlock(&delayed_refs->lock); |
| @@ -2285,7 +2287,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2285 | ref = &locked_ref->node; | 2287 | ref = &locked_ref->node; |
| 2286 | 2288 | ||
| 2287 | if (extent_op && must_insert_reserved) { | 2289 | if (extent_op && must_insert_reserved) { |
| 2288 | kfree(extent_op); | 2290 | btrfs_free_delayed_extent_op(extent_op); |
| 2289 | extent_op = NULL; | 2291 | extent_op = NULL; |
| 2290 | } | 2292 | } |
| 2291 | 2293 | ||
| @@ -2294,28 +2296,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2294 | 2296 | ||
| 2295 | ret = run_delayed_extent_op(trans, root, | 2297 | ret = run_delayed_extent_op(trans, root, |
| 2296 | ref, extent_op); | 2298 | ref, extent_op); |
| 2297 | kfree(extent_op); | 2299 | btrfs_free_delayed_extent_op(extent_op); |
| 2298 | 2300 | ||
| 2299 | if (ret) { | 2301 | if (ret) { |
| 2300 | list_del_init(&locked_ref->cluster); | 2302 | printk(KERN_DEBUG |
| 2301 | mutex_unlock(&locked_ref->mutex); | 2303 | "btrfs: run_delayed_extent_op " |
| 2302 | 2304 | "returned %d\n", ret); | |
| 2303 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); | ||
| 2304 | spin_lock(&delayed_refs->lock); | 2305 | spin_lock(&delayed_refs->lock); |
| 2306 | btrfs_delayed_ref_unlock(locked_ref); | ||
| 2305 | return ret; | 2307 | return ret; |
| 2306 | } | 2308 | } |
| 2307 | 2309 | ||
| 2308 | goto next; | 2310 | goto next; |
| 2309 | } | 2311 | } |
| 2310 | |||
| 2311 | list_del_init(&locked_ref->cluster); | ||
| 2312 | locked_ref = NULL; | ||
| 2313 | } | 2312 | } |
| 2314 | 2313 | ||
| 2315 | ref->in_tree = 0; | 2314 | ref->in_tree = 0; |
| 2316 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2315 | rb_erase(&ref->rb_node, &delayed_refs->root); |
| 2317 | delayed_refs->num_entries--; | 2316 | delayed_refs->num_entries--; |
| 2318 | if (locked_ref) { | 2317 | if (!btrfs_delayed_ref_is_head(ref)) { |
| 2319 | /* | 2318 | /* |
| 2320 | * when we play the delayed ref, also correct the | 2319 | * when we play the delayed ref, also correct the |
| 2321 | * ref_mod on head | 2320 | * ref_mod on head |
| @@ -2337,20 +2336,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2337 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2336 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
| 2338 | must_insert_reserved); | 2337 | must_insert_reserved); |
| 2339 | 2338 | ||
| 2340 | btrfs_put_delayed_ref(ref); | 2339 | btrfs_free_delayed_extent_op(extent_op); |
| 2341 | kfree(extent_op); | ||
| 2342 | count++; | ||
| 2343 | |||
| 2344 | if (ret) { | 2340 | if (ret) { |
| 2345 | if (locked_ref) { | 2341 | btrfs_delayed_ref_unlock(locked_ref); |
| 2346 | list_del_init(&locked_ref->cluster); | 2342 | btrfs_put_delayed_ref(ref); |
| 2347 | mutex_unlock(&locked_ref->mutex); | 2343 | printk(KERN_DEBUG |
| 2348 | } | 2344 | "btrfs: run_one_delayed_ref returned %d\n", ret); |
| 2349 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); | ||
| 2350 | spin_lock(&delayed_refs->lock); | 2345 | spin_lock(&delayed_refs->lock); |
| 2351 | return ret; | 2346 | return ret; |
| 2352 | } | 2347 | } |
| 2353 | 2348 | ||
| 2349 | /* | ||
| 2350 | * If this node is a head, that means all the refs in this head | ||
| 2351 | * have been dealt with, and we will pick the next head to deal | ||
| 2352 | * with, so we must unlock the head and drop it from the cluster | ||
| 2353 | * list before we release it. | ||
| 2354 | */ | ||
| 2355 | if (btrfs_delayed_ref_is_head(ref)) { | ||
| 2356 | list_del_init(&locked_ref->cluster); | ||
| 2357 | btrfs_delayed_ref_unlock(locked_ref); | ||
| 2358 | locked_ref = NULL; | ||
| 2359 | } | ||
| 2360 | btrfs_put_delayed_ref(ref); | ||
| 2361 | count++; | ||
| 2354 | next: | 2362 | next: |
| 2355 | cond_resched(); | 2363 | cond_resched(); |
| 2356 | spin_lock(&delayed_refs->lock); | 2364 | spin_lock(&delayed_refs->lock); |
| @@ -2435,6 +2443,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | |||
| 2435 | return ret; | 2443 | return ret; |
| 2436 | } | 2444 | } |
| 2437 | 2445 | ||
| 2446 | static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | ||
| 2447 | int count) | ||
| 2448 | { | ||
| 2449 | int val = atomic_read(&delayed_refs->ref_seq); | ||
| 2450 | |||
| 2451 | if (val < seq || val >= seq + count) | ||
| 2452 | return 1; | ||
| 2453 | return 0; | ||
| 2454 | } | ||
| 2455 | |||
| 2438 | /* | 2456 | /* |
| 2439 | * this starts processing the delayed reference count updates and | 2457 | * this starts processing the delayed reference count updates and |
| 2440 | * extent insertions we have queued up so far. count can be | 2458 | * extent insertions we have queued up so far. count can be |
| @@ -2469,6 +2487,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
| 2469 | 2487 | ||
| 2470 | delayed_refs = &trans->transaction->delayed_refs; | 2488 | delayed_refs = &trans->transaction->delayed_refs; |
| 2471 | INIT_LIST_HEAD(&cluster); | 2489 | INIT_LIST_HEAD(&cluster); |
| 2490 | if (count == 0) { | ||
| 2491 | count = delayed_refs->num_entries * 2; | ||
| 2492 | run_most = 1; | ||
| 2493 | } | ||
| 2494 | |||
| 2495 | if (!run_all && !run_most) { | ||
| 2496 | int old; | ||
| 2497 | int seq = atomic_read(&delayed_refs->ref_seq); | ||
| 2498 | |||
| 2499 | progress: | ||
| 2500 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
| 2501 | if (old) { | ||
| 2502 | DEFINE_WAIT(__wait); | ||
| 2503 | if (delayed_refs->num_entries < 16348) | ||
| 2504 | return 0; | ||
| 2505 | |||
| 2506 | prepare_to_wait(&delayed_refs->wait, &__wait, | ||
| 2507 | TASK_UNINTERRUPTIBLE); | ||
| 2508 | |||
| 2509 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
| 2510 | if (old) { | ||
| 2511 | schedule(); | ||
| 2512 | finish_wait(&delayed_refs->wait, &__wait); | ||
| 2513 | |||
| 2514 | if (!refs_newer(delayed_refs, seq, 256)) | ||
| 2515 | goto progress; | ||
| 2516 | else | ||
| 2517 | return 0; | ||
| 2518 | } else { | ||
| 2519 | finish_wait(&delayed_refs->wait, &__wait); | ||
| 2520 | goto again; | ||
| 2521 | } | ||
| 2522 | } | ||
| 2523 | |||
| 2524 | } else { | ||
| 2525 | atomic_inc(&delayed_refs->procs_running_refs); | ||
| 2526 | } | ||
| 2527 | |||
| 2472 | again: | 2528 | again: |
| 2473 | loops = 0; | 2529 | loops = 0; |
| 2474 | spin_lock(&delayed_refs->lock); | 2530 | spin_lock(&delayed_refs->lock); |
| @@ -2477,10 +2533,6 @@ again: | |||
| 2477 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | 2533 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); |
| 2478 | #endif | 2534 | #endif |
| 2479 | 2535 | ||
| 2480 | if (count == 0) { | ||
| 2481 | count = delayed_refs->num_entries * 2; | ||
| 2482 | run_most = 1; | ||
| 2483 | } | ||
| 2484 | while (1) { | 2536 | while (1) { |
| 2485 | if (!(run_all || run_most) && | 2537 | if (!(run_all || run_most) && |
| 2486 | delayed_refs->num_heads_ready < 64) | 2538 | delayed_refs->num_heads_ready < 64) |
| @@ -2500,11 +2552,15 @@ again: | |||
| 2500 | 2552 | ||
| 2501 | ret = run_clustered_refs(trans, root, &cluster); | 2553 | ret = run_clustered_refs(trans, root, &cluster); |
| 2502 | if (ret < 0) { | 2554 | if (ret < 0) { |
| 2555 | btrfs_release_ref_cluster(&cluster); | ||
| 2503 | spin_unlock(&delayed_refs->lock); | 2556 | spin_unlock(&delayed_refs->lock); |
| 2504 | btrfs_abort_transaction(trans, root, ret); | 2557 | btrfs_abort_transaction(trans, root, ret); |
| 2558 | atomic_dec(&delayed_refs->procs_running_refs); | ||
| 2505 | return ret; | 2559 | return ret; |
| 2506 | } | 2560 | } |
| 2507 | 2561 | ||
| 2562 | atomic_add(ret, &delayed_refs->ref_seq); | ||
| 2563 | |||
| 2508 | count -= min_t(unsigned long, ret, count); | 2564 | count -= min_t(unsigned long, ret, count); |
| 2509 | 2565 | ||
| 2510 | if (count == 0) | 2566 | if (count == 0) |
| @@ -2573,6 +2629,11 @@ again: | |||
| 2573 | goto again; | 2629 | goto again; |
| 2574 | } | 2630 | } |
| 2575 | out: | 2631 | out: |
| 2632 | atomic_dec(&delayed_refs->procs_running_refs); | ||
| 2633 | smp_mb(); | ||
| 2634 | if (waitqueue_active(&delayed_refs->wait)) | ||
| 2635 | wake_up(&delayed_refs->wait); | ||
| 2636 | |||
| 2576 | spin_unlock(&delayed_refs->lock); | 2637 | spin_unlock(&delayed_refs->lock); |
| 2577 | assert_qgroups_uptodate(trans); | 2638 | assert_qgroups_uptodate(trans); |
| 2578 | return 0; | 2639 | return 0; |
| @@ -2586,7 +2647,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
| 2586 | struct btrfs_delayed_extent_op *extent_op; | 2647 | struct btrfs_delayed_extent_op *extent_op; |
| 2587 | int ret; | 2648 | int ret; |
| 2588 | 2649 | ||
| 2589 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2650 | extent_op = btrfs_alloc_delayed_extent_op(); |
| 2590 | if (!extent_op) | 2651 | if (!extent_op) |
| 2591 | return -ENOMEM; | 2652 | return -ENOMEM; |
| 2592 | 2653 | ||
| @@ -2598,7 +2659,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
| 2598 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, | 2659 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, |
| 2599 | num_bytes, extent_op); | 2660 | num_bytes, extent_op); |
| 2600 | if (ret) | 2661 | if (ret) |
| 2601 | kfree(extent_op); | 2662 | btrfs_free_delayed_extent_op(extent_op); |
| 2602 | return ret; | 2663 | return ret; |
| 2603 | } | 2664 | } |
| 2604 | 2665 | ||
| @@ -3223,12 +3284,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 3223 | u64 extra_flags = chunk_to_extended(flags) & | 3284 | u64 extra_flags = chunk_to_extended(flags) & |
| 3224 | BTRFS_EXTENDED_PROFILE_MASK; | 3285 | BTRFS_EXTENDED_PROFILE_MASK; |
| 3225 | 3286 | ||
| 3287 | write_seqlock(&fs_info->profiles_lock); | ||
| 3226 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3288 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| 3227 | fs_info->avail_data_alloc_bits |= extra_flags; | 3289 | fs_info->avail_data_alloc_bits |= extra_flags; |
| 3228 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3290 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
| 3229 | fs_info->avail_metadata_alloc_bits |= extra_flags; | 3291 | fs_info->avail_metadata_alloc_bits |= extra_flags; |
| 3230 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3292 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
| 3231 | fs_info->avail_system_alloc_bits |= extra_flags; | 3293 | fs_info->avail_system_alloc_bits |= extra_flags; |
| 3294 | write_sequnlock(&fs_info->profiles_lock); | ||
| 3232 | } | 3295 | } |
| 3233 | 3296 | ||
| 3234 | /* | 3297 | /* |
| @@ -3276,6 +3339,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 3276 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | 3339 | u64 num_devices = root->fs_info->fs_devices->rw_devices + |
| 3277 | root->fs_info->fs_devices->missing_devices; | 3340 | root->fs_info->fs_devices->missing_devices; |
| 3278 | u64 target; | 3341 | u64 target; |
| 3342 | u64 tmp; | ||
| 3279 | 3343 | ||
| 3280 | /* | 3344 | /* |
| 3281 | * see if restripe for this chunk_type is in progress, if so | 3345 | * see if restripe for this chunk_type is in progress, if so |
| @@ -3292,40 +3356,48 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 3292 | } | 3356 | } |
| 3293 | spin_unlock(&root->fs_info->balance_lock); | 3357 | spin_unlock(&root->fs_info->balance_lock); |
| 3294 | 3358 | ||
| 3359 | /* First, mask out the RAID levels which aren't possible */ | ||
| 3295 | if (num_devices == 1) | 3360 | if (num_devices == 1) |
| 3296 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3361 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 | |
| 3362 | BTRFS_BLOCK_GROUP_RAID5); | ||
| 3363 | if (num_devices < 3) | ||
| 3364 | flags &= ~BTRFS_BLOCK_GROUP_RAID6; | ||
| 3297 | if (num_devices < 4) | 3365 | if (num_devices < 4) |
| 3298 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; | 3366 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; |
| 3299 | 3367 | ||
| 3300 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | 3368 | tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | |
| 3301 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | 3369 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 | |
| 3302 | BTRFS_BLOCK_GROUP_RAID10))) { | 3370 | BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10); |
| 3303 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | 3371 | flags &= ~tmp; |
| 3304 | } | ||
| 3305 | |||
| 3306 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | ||
| 3307 | (flags & BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 3308 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | ||
| 3309 | } | ||
| 3310 | 3372 | ||
| 3311 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | 3373 | if (tmp & BTRFS_BLOCK_GROUP_RAID6) |
| 3312 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | 3374 | tmp = BTRFS_BLOCK_GROUP_RAID6; |
| 3313 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | 3375 | else if (tmp & BTRFS_BLOCK_GROUP_RAID5) |
| 3314 | (flags & BTRFS_BLOCK_GROUP_DUP))) { | 3376 | tmp = BTRFS_BLOCK_GROUP_RAID5; |
| 3315 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | 3377 | else if (tmp & BTRFS_BLOCK_GROUP_RAID10) |
| 3316 | } | 3378 | tmp = BTRFS_BLOCK_GROUP_RAID10; |
| 3379 | else if (tmp & BTRFS_BLOCK_GROUP_RAID1) | ||
| 3380 | tmp = BTRFS_BLOCK_GROUP_RAID1; | ||
| 3381 | else if (tmp & BTRFS_BLOCK_GROUP_RAID0) | ||
| 3382 | tmp = BTRFS_BLOCK_GROUP_RAID0; | ||
| 3317 | 3383 | ||
| 3318 | return extended_to_chunk(flags); | 3384 | return extended_to_chunk(flags | tmp); |
| 3319 | } | 3385 | } |
| 3320 | 3386 | ||
| 3321 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3387 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
| 3322 | { | 3388 | { |
| 3323 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3389 | unsigned seq; |
| 3324 | flags |= root->fs_info->avail_data_alloc_bits; | 3390 | |
| 3325 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3391 | do { |
| 3326 | flags |= root->fs_info->avail_system_alloc_bits; | 3392 | seq = read_seqbegin(&root->fs_info->profiles_lock); |
| 3327 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3393 | |
| 3328 | flags |= root->fs_info->avail_metadata_alloc_bits; | 3394 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| 3395 | flags |= root->fs_info->avail_data_alloc_bits; | ||
| 3396 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
| 3397 | flags |= root->fs_info->avail_system_alloc_bits; | ||
| 3398 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
| 3399 | flags |= root->fs_info->avail_metadata_alloc_bits; | ||
| 3400 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
| 3329 | 3401 | ||
| 3330 | return btrfs_reduce_alloc_profile(root, flags); | 3402 | return btrfs_reduce_alloc_profile(root, flags); |
| 3331 | } | 3403 | } |
| @@ -3333,6 +3405,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 3333 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3405 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
| 3334 | { | 3406 | { |
| 3335 | u64 flags; | 3407 | u64 flags; |
| 3408 | u64 ret; | ||
| 3336 | 3409 | ||
| 3337 | if (data) | 3410 | if (data) |
| 3338 | flags = BTRFS_BLOCK_GROUP_DATA; | 3411 | flags = BTRFS_BLOCK_GROUP_DATA; |
| @@ -3341,7 +3414,8 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
| 3341 | else | 3414 | else |
| 3342 | flags = BTRFS_BLOCK_GROUP_METADATA; | 3415 | flags = BTRFS_BLOCK_GROUP_METADATA; |
| 3343 | 3416 | ||
| 3344 | return get_alloc_profile(root, flags); | 3417 | ret = get_alloc_profile(root, flags); |
| 3418 | return ret; | ||
| 3345 | } | 3419 | } |
| 3346 | 3420 | ||
| 3347 | /* | 3421 | /* |
| @@ -3357,7 +3431,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
| 3357 | int ret = 0, committed = 0, alloc_chunk = 1; | 3431 | int ret = 0, committed = 0, alloc_chunk = 1; |
| 3358 | 3432 | ||
| 3359 | /* make sure bytes are sectorsize aligned */ | 3433 | /* make sure bytes are sectorsize aligned */ |
| 3360 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3434 | bytes = ALIGN(bytes, root->sectorsize); |
| 3361 | 3435 | ||
| 3362 | if (root == root->fs_info->tree_root || | 3436 | if (root == root->fs_info->tree_root || |
| 3363 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { | 3437 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { |
| @@ -3452,7 +3526,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
| 3452 | struct btrfs_space_info *data_sinfo; | 3526 | struct btrfs_space_info *data_sinfo; |
| 3453 | 3527 | ||
| 3454 | /* make sure bytes are sectorsize aligned */ | 3528 | /* make sure bytes are sectorsize aligned */ |
| 3455 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3529 | bytes = ALIGN(bytes, root->sectorsize); |
| 3456 | 3530 | ||
| 3457 | data_sinfo = root->fs_info->data_sinfo; | 3531 | data_sinfo = root->fs_info->data_sinfo; |
| 3458 | spin_lock(&data_sinfo->lock); | 3532 | spin_lock(&data_sinfo->lock); |
| @@ -3516,8 +3590,10 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) | |||
| 3516 | { | 3590 | { |
| 3517 | u64 num_dev; | 3591 | u64 num_dev; |
| 3518 | 3592 | ||
| 3519 | if (type & BTRFS_BLOCK_GROUP_RAID10 || | 3593 | if (type & (BTRFS_BLOCK_GROUP_RAID10 | |
| 3520 | type & BTRFS_BLOCK_GROUP_RAID0) | 3594 | BTRFS_BLOCK_GROUP_RAID0 | |
| 3595 | BTRFS_BLOCK_GROUP_RAID5 | | ||
| 3596 | BTRFS_BLOCK_GROUP_RAID6)) | ||
| 3521 | num_dev = root->fs_info->fs_devices->rw_devices; | 3597 | num_dev = root->fs_info->fs_devices->rw_devices; |
| 3522 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | 3598 | else if (type & BTRFS_BLOCK_GROUP_RAID1) |
| 3523 | num_dev = 2; | 3599 | num_dev = 2; |
| @@ -3564,6 +3640,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3564 | int wait_for_alloc = 0; | 3640 | int wait_for_alloc = 0; |
| 3565 | int ret = 0; | 3641 | int ret = 0; |
| 3566 | 3642 | ||
| 3643 | /* Don't re-enter if we're already allocating a chunk */ | ||
| 3644 | if (trans->allocating_chunk) | ||
| 3645 | return -ENOSPC; | ||
| 3646 | |||
| 3567 | space_info = __find_space_info(extent_root->fs_info, flags); | 3647 | space_info = __find_space_info(extent_root->fs_info, flags); |
| 3568 | if (!space_info) { | 3648 | if (!space_info) { |
| 3569 | ret = update_space_info(extent_root->fs_info, flags, | 3649 | ret = update_space_info(extent_root->fs_info, flags, |
| @@ -3606,6 +3686,8 @@ again: | |||
| 3606 | goto again; | 3686 | goto again; |
| 3607 | } | 3687 | } |
| 3608 | 3688 | ||
| 3689 | trans->allocating_chunk = true; | ||
| 3690 | |||
| 3609 | /* | 3691 | /* |
| 3610 | * If we have mixed data/metadata chunks we want to make sure we keep | 3692 | * If we have mixed data/metadata chunks we want to make sure we keep |
| 3611 | * allocating mixed chunks instead of individual chunks. | 3693 | * allocating mixed chunks instead of individual chunks. |
| @@ -3632,19 +3714,20 @@ again: | |||
| 3632 | check_system_chunk(trans, extent_root, flags); | 3714 | check_system_chunk(trans, extent_root, flags); |
| 3633 | 3715 | ||
| 3634 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3716 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
| 3635 | if (ret < 0 && ret != -ENOSPC) | 3717 | trans->allocating_chunk = false; |
| 3636 | goto out; | ||
| 3637 | 3718 | ||
| 3638 | spin_lock(&space_info->lock); | 3719 | spin_lock(&space_info->lock); |
| 3720 | if (ret < 0 && ret != -ENOSPC) | ||
| 3721 | goto out; | ||
| 3639 | if (ret) | 3722 | if (ret) |
| 3640 | space_info->full = 1; | 3723 | space_info->full = 1; |
| 3641 | else | 3724 | else |
| 3642 | ret = 1; | 3725 | ret = 1; |
| 3643 | 3726 | ||
| 3644 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3727 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
| 3728 | out: | ||
| 3645 | space_info->chunk_alloc = 0; | 3729 | space_info->chunk_alloc = 0; |
| 3646 | spin_unlock(&space_info->lock); | 3730 | spin_unlock(&space_info->lock); |
| 3647 | out: | ||
| 3648 | mutex_unlock(&fs_info->chunk_mutex); | 3731 | mutex_unlock(&fs_info->chunk_mutex); |
| 3649 | return ret; | 3732 | return ret; |
| 3650 | } | 3733 | } |
| @@ -3653,13 +3736,31 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3653 | struct btrfs_space_info *space_info, u64 bytes, | 3736 | struct btrfs_space_info *space_info, u64 bytes, |
| 3654 | enum btrfs_reserve_flush_enum flush) | 3737 | enum btrfs_reserve_flush_enum flush) |
| 3655 | { | 3738 | { |
| 3739 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
| 3656 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3740 | u64 profile = btrfs_get_alloc_profile(root, 0); |
| 3741 | u64 rsv_size = 0; | ||
| 3657 | u64 avail; | 3742 | u64 avail; |
| 3658 | u64 used; | 3743 | u64 used; |
| 3744 | u64 to_add; | ||
| 3659 | 3745 | ||
| 3660 | used = space_info->bytes_used + space_info->bytes_reserved + | 3746 | used = space_info->bytes_used + space_info->bytes_reserved + |
| 3661 | space_info->bytes_pinned + space_info->bytes_readonly + | 3747 | space_info->bytes_pinned + space_info->bytes_readonly; |
| 3662 | space_info->bytes_may_use; | 3748 | |
| 3749 | spin_lock(&global_rsv->lock); | ||
| 3750 | rsv_size = global_rsv->size; | ||
| 3751 | spin_unlock(&global_rsv->lock); | ||
| 3752 | |||
| 3753 | /* | ||
| 3754 | * We only want to allow over committing if we have lots of actual space | ||
| 3755 | * free, but if we don't have enough space to handle the global reserve | ||
| 3756 | * space then we could end up having a real enospc problem when trying | ||
| 3757 | * to allocate a chunk or some other such important allocation. | ||
| 3758 | */ | ||
| 3759 | rsv_size <<= 1; | ||
| 3760 | if (used + rsv_size >= space_info->total_bytes) | ||
| 3761 | return 0; | ||
| 3762 | |||
| 3763 | used += space_info->bytes_may_use; | ||
| 3663 | 3764 | ||
| 3664 | spin_lock(&root->fs_info->free_chunk_lock); | 3765 | spin_lock(&root->fs_info->free_chunk_lock); |
| 3665 | avail = root->fs_info->free_chunk_space; | 3766 | avail = root->fs_info->free_chunk_space; |
| @@ -3667,28 +3768,60 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3667 | 3768 | ||
| 3668 | /* | 3769 | /* |
| 3669 | * If we have dup, raid1 or raid10 then only half of the free | 3770 | * If we have dup, raid1 or raid10 then only half of the free |
| 3670 | * space is actually useable. | 3771 | * space is actually useable. For raid56, the space info used |
| 3772 | * doesn't include the parity drive, so we don't have to | ||
| 3773 | * change the math | ||
| 3671 | */ | 3774 | */ |
| 3672 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | 3775 | if (profile & (BTRFS_BLOCK_GROUP_DUP | |
| 3673 | BTRFS_BLOCK_GROUP_RAID1 | | 3776 | BTRFS_BLOCK_GROUP_RAID1 | |
| 3674 | BTRFS_BLOCK_GROUP_RAID10)) | 3777 | BTRFS_BLOCK_GROUP_RAID10)) |
| 3675 | avail >>= 1; | 3778 | avail >>= 1; |
| 3676 | 3779 | ||
| 3780 | to_add = space_info->total_bytes; | ||
| 3781 | |||
| 3677 | /* | 3782 | /* |
| 3678 | * If we aren't flushing all things, let us overcommit up to | 3783 | * If we aren't flushing all things, let us overcommit up to |
| 3679 | * 1/2th of the space. If we can flush, don't let us overcommit | 3784 | * 1/2th of the space. If we can flush, don't let us overcommit |
| 3680 | * too much, let it overcommit up to 1/8 of the space. | 3785 | * too much, let it overcommit up to 1/8 of the space. |
| 3681 | */ | 3786 | */ |
| 3682 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | 3787 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
| 3683 | avail >>= 3; | 3788 | to_add >>= 3; |
| 3684 | else | 3789 | else |
| 3685 | avail >>= 1; | 3790 | to_add >>= 1; |
| 3686 | 3791 | ||
| 3687 | if (used + bytes < space_info->total_bytes + avail) | 3792 | /* |
| 3793 | * Limit the overcommit to the amount of free space we could possibly | ||
| 3794 | * allocate for chunks. | ||
| 3795 | */ | ||
| 3796 | to_add = min(avail, to_add); | ||
| 3797 | |||
| 3798 | if (used + bytes < space_info->total_bytes + to_add) | ||
| 3688 | return 1; | 3799 | return 1; |
| 3689 | return 0; | 3800 | return 0; |
| 3690 | } | 3801 | } |
| 3691 | 3802 | ||
| 3803 | void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | ||
| 3804 | unsigned long nr_pages) | ||
| 3805 | { | ||
| 3806 | struct super_block *sb = root->fs_info->sb; | ||
| 3807 | int started; | ||
| 3808 | |||
| 3809 | /* If we can not start writeback, just sync all the delalloc file. */ | ||
| 3810 | started = try_to_writeback_inodes_sb_nr(sb, nr_pages, | ||
| 3811 | WB_REASON_FS_FREE_SPACE); | ||
| 3812 | if (!started) { | ||
| 3813 | /* | ||
| 3814 | * We needn't worry the filesystem going from r/w to r/o though | ||
| 3815 | * we don't acquire ->s_umount mutex, because the filesystem | ||
| 3816 | * should guarantee the delalloc inodes list be empty after | ||
| 3817 | * the filesystem is readonly(all dirty pages are written to | ||
| 3818 | * the disk). | ||
| 3819 | */ | ||
| 3820 | btrfs_start_delalloc_inodes(root, 0); | ||
| 3821 | btrfs_wait_ordered_extents(root, 0); | ||
| 3822 | } | ||
| 3823 | } | ||
| 3824 | |||
| 3692 | /* | 3825 | /* |
| 3693 | * shrink metadata reservation for delalloc | 3826 | * shrink metadata reservation for delalloc |
| 3694 | */ | 3827 | */ |
| @@ -3710,7 +3843,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3710 | space_info = block_rsv->space_info; | 3843 | space_info = block_rsv->space_info; |
| 3711 | 3844 | ||
| 3712 | smp_mb(); | 3845 | smp_mb(); |
| 3713 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3846 | delalloc_bytes = percpu_counter_sum_positive( |
| 3847 | &root->fs_info->delalloc_bytes); | ||
| 3714 | if (delalloc_bytes == 0) { | 3848 | if (delalloc_bytes == 0) { |
| 3715 | if (trans) | 3849 | if (trans) |
| 3716 | return; | 3850 | return; |
| @@ -3721,10 +3855,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3721 | while (delalloc_bytes && loops < 3) { | 3855 | while (delalloc_bytes && loops < 3) { |
| 3722 | max_reclaim = min(delalloc_bytes, to_reclaim); | 3856 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 3723 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 3857 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 3724 | try_to_writeback_inodes_sb_nr(root->fs_info->sb, | 3858 | btrfs_writeback_inodes_sb_nr(root, nr_pages); |
| 3725 | nr_pages, | ||
| 3726 | WB_REASON_FS_FREE_SPACE); | ||
| 3727 | |||
| 3728 | /* | 3859 | /* |
| 3729 | * We need to wait for the async pages to actually start before | 3860 | * We need to wait for the async pages to actually start before |
| 3730 | * we do anything. | 3861 | * we do anything. |
| @@ -3752,7 +3883,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3752 | break; | 3883 | break; |
| 3753 | } | 3884 | } |
| 3754 | smp_mb(); | 3885 | smp_mb(); |
| 3755 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3886 | delalloc_bytes = percpu_counter_sum_positive( |
| 3887 | &root->fs_info->delalloc_bytes); | ||
| 3756 | } | 3888 | } |
| 3757 | } | 3889 | } |
| 3758 | 3890 | ||
| @@ -4016,6 +4148,15 @@ again: | |||
| 4016 | goto again; | 4148 | goto again; |
| 4017 | 4149 | ||
| 4018 | out: | 4150 | out: |
| 4151 | if (ret == -ENOSPC && | ||
| 4152 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | ||
| 4153 | struct btrfs_block_rsv *global_rsv = | ||
| 4154 | &root->fs_info->global_block_rsv; | ||
| 4155 | |||
| 4156 | if (block_rsv != global_rsv && | ||
| 4157 | !block_rsv_use_bytes(global_rsv, orig_bytes)) | ||
| 4158 | ret = 0; | ||
| 4159 | } | ||
| 4019 | if (flushing) { | 4160 | if (flushing) { |
| 4020 | spin_lock(&space_info->lock); | 4161 | spin_lock(&space_info->lock); |
| 4021 | space_info->flush = 0; | 4162 | space_info->flush = 0; |
| @@ -4402,19 +4543,60 @@ void btrfs_orphan_release_metadata(struct inode *inode) | |||
| 4402 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 4543 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
| 4403 | } | 4544 | } |
| 4404 | 4545 | ||
| 4405 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | 4546 | /* |
| 4406 | struct btrfs_pending_snapshot *pending) | 4547 | * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation |
| 4548 | * root: the root of the parent directory | ||
| 4549 | * rsv: block reservation | ||
| 4550 | * items: the number of items that we need do reservation | ||
| 4551 | * qgroup_reserved: used to return the reserved size in qgroup | ||
| 4552 | * | ||
| 4553 | * This function is used to reserve the space for snapshot/subvolume | ||
| 4554 | * creation and deletion. Those operations are different with the | ||
| 4555 | * common file/directory operations, they change two fs/file trees | ||
| 4556 | * and root tree, the number of items that the qgroup reserves is | ||
| 4557 | * different with the free space reservation. So we can not use | ||
| 4558 | * the space reseravtion mechanism in start_transaction(). | ||
| 4559 | */ | ||
| 4560 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | ||
| 4561 | struct btrfs_block_rsv *rsv, | ||
| 4562 | int items, | ||
| 4563 | u64 *qgroup_reserved) | ||
| 4407 | { | 4564 | { |
| 4408 | struct btrfs_root *root = pending->root; | 4565 | u64 num_bytes; |
| 4409 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 4566 | int ret; |
| 4410 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | 4567 | |
| 4411 | /* | 4568 | if (root->fs_info->quota_enabled) { |
| 4412 | * two for root back/forward refs, two for directory entries, | 4569 | /* One for parent inode, two for dir entries */ |
| 4413 | * one for root of the snapshot and one for parent inode. | 4570 | num_bytes = 3 * root->leafsize; |
| 4414 | */ | 4571 | ret = btrfs_qgroup_reserve(root, num_bytes); |
| 4415 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6); | 4572 | if (ret) |
| 4416 | dst_rsv->space_info = src_rsv->space_info; | 4573 | return ret; |
| 4417 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4574 | } else { |
| 4575 | num_bytes = 0; | ||
| 4576 | } | ||
| 4577 | |||
| 4578 | *qgroup_reserved = num_bytes; | ||
| 4579 | |||
| 4580 | num_bytes = btrfs_calc_trans_metadata_size(root, items); | ||
| 4581 | rsv->space_info = __find_space_info(root->fs_info, | ||
| 4582 | BTRFS_BLOCK_GROUP_METADATA); | ||
| 4583 | ret = btrfs_block_rsv_add(root, rsv, num_bytes, | ||
| 4584 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 4585 | if (ret) { | ||
| 4586 | if (*qgroup_reserved) | ||
| 4587 | btrfs_qgroup_free(root, *qgroup_reserved); | ||
| 4588 | } | ||
| 4589 | |||
| 4590 | return ret; | ||
| 4591 | } | ||
| 4592 | |||
| 4593 | void btrfs_subvolume_release_metadata(struct btrfs_root *root, | ||
| 4594 | struct btrfs_block_rsv *rsv, | ||
| 4595 | u64 qgroup_reserved) | ||
| 4596 | { | ||
| 4597 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
| 4598 | if (qgroup_reserved) | ||
| 4599 | btrfs_qgroup_free(root, qgroup_reserved); | ||
| 4418 | } | 4600 | } |
| 4419 | 4601 | ||
| 4420 | /** | 4602 | /** |
| @@ -4522,6 +4704,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4522 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; | 4704 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
| 4523 | int ret = 0; | 4705 | int ret = 0; |
| 4524 | bool delalloc_lock = true; | 4706 | bool delalloc_lock = true; |
| 4707 | u64 to_free = 0; | ||
| 4708 | unsigned dropped; | ||
| 4525 | 4709 | ||
| 4526 | /* If we are a free space inode we need to not flush since we will be in | 4710 | /* If we are a free space inode we need to not flush since we will be in |
| 4527 | * the middle of a transaction commit. We also don't need the delalloc | 4711 | * the middle of a transaction commit. We also don't need the delalloc |
| @@ -4565,54 +4749,19 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4565 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4749 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
| 4566 | spin_unlock(&BTRFS_I(inode)->lock); | 4750 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4567 | 4751 | ||
| 4568 | if (root->fs_info->quota_enabled) | 4752 | if (root->fs_info->quota_enabled) { |
| 4569 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4753 | ret = btrfs_qgroup_reserve(root, num_bytes + |
| 4570 | nr_extents * root->leafsize); | 4754 | nr_extents * root->leafsize); |
| 4755 | if (ret) | ||
| 4756 | goto out_fail; | ||
| 4757 | } | ||
| 4571 | 4758 | ||
| 4572 | /* | 4759 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
| 4573 | * ret != 0 here means the qgroup reservation failed, we go straight to | 4760 | if (unlikely(ret)) { |
| 4574 | * the shared error handling then. | 4761 | if (root->fs_info->quota_enabled) |
| 4575 | */ | ||
| 4576 | if (ret == 0) | ||
| 4577 | ret = reserve_metadata_bytes(root, block_rsv, | ||
| 4578 | to_reserve, flush); | ||
| 4579 | |||
| 4580 | if (ret) { | ||
| 4581 | u64 to_free = 0; | ||
| 4582 | unsigned dropped; | ||
| 4583 | |||
| 4584 | spin_lock(&BTRFS_I(inode)->lock); | ||
| 4585 | dropped = drop_outstanding_extent(inode); | ||
| 4586 | /* | ||
| 4587 | * If the inodes csum_bytes is the same as the original | ||
| 4588 | * csum_bytes then we know we haven't raced with any free()ers | ||
| 4589 | * so we can just reduce our inodes csum bytes and carry on. | ||
| 4590 | * Otherwise we have to do the normal free thing to account for | ||
| 4591 | * the case that the free side didn't free up its reserve | ||
| 4592 | * because of this outstanding reservation. | ||
| 4593 | */ | ||
| 4594 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) | ||
| 4595 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4596 | else | ||
| 4597 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4598 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 4599 | if (dropped) | ||
| 4600 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
| 4601 | |||
| 4602 | if (to_free) { | ||
| 4603 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
| 4604 | trace_btrfs_space_reservation(root->fs_info, | ||
| 4605 | "delalloc", | ||
| 4606 | btrfs_ino(inode), | ||
| 4607 | to_free, 0); | ||
| 4608 | } | ||
| 4609 | if (root->fs_info->quota_enabled) { | ||
| 4610 | btrfs_qgroup_free(root, num_bytes + | 4762 | btrfs_qgroup_free(root, num_bytes + |
| 4611 | nr_extents * root->leafsize); | 4763 | nr_extents * root->leafsize); |
| 4612 | } | 4764 | goto out_fail; |
| 4613 | if (delalloc_lock) | ||
| 4614 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4615 | return ret; | ||
| 4616 | } | 4765 | } |
| 4617 | 4766 | ||
| 4618 | spin_lock(&BTRFS_I(inode)->lock); | 4767 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -4633,6 +4782,34 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4633 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4782 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
| 4634 | 4783 | ||
| 4635 | return 0; | 4784 | return 0; |
| 4785 | |||
| 4786 | out_fail: | ||
| 4787 | spin_lock(&BTRFS_I(inode)->lock); | ||
| 4788 | dropped = drop_outstanding_extent(inode); | ||
| 4789 | /* | ||
| 4790 | * If the inodes csum_bytes is the same as the original | ||
| 4791 | * csum_bytes then we know we haven't raced with any free()ers | ||
| 4792 | * so we can just reduce our inodes csum bytes and carry on. | ||
| 4793 | * Otherwise we have to do the normal free thing to account for | ||
| 4794 | * the case that the free side didn't free up its reserve | ||
| 4795 | * because of this outstanding reservation. | ||
| 4796 | */ | ||
| 4797 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) | ||
| 4798 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4799 | else | ||
| 4800 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4801 | spin_unlock(&BTRFS_I(inode)->lock); | ||
| 4802 | if (dropped) | ||
| 4803 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
| 4804 | |||
| 4805 | if (to_free) { | ||
| 4806 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
| 4807 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
| 4808 | btrfs_ino(inode), to_free, 0); | ||
| 4809 | } | ||
| 4810 | if (delalloc_lock) | ||
| 4811 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4812 | return ret; | ||
| 4636 | } | 4813 | } |
| 4637 | 4814 | ||
| 4638 | /** | 4815 | /** |
| @@ -4654,7 +4831,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
| 4654 | spin_lock(&BTRFS_I(inode)->lock); | 4831 | spin_lock(&BTRFS_I(inode)->lock); |
| 4655 | dropped = drop_outstanding_extent(inode); | 4832 | dropped = drop_outstanding_extent(inode); |
| 4656 | 4833 | ||
| 4657 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | 4834 | if (num_bytes) |
| 4835 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
| 4658 | spin_unlock(&BTRFS_I(inode)->lock); | 4836 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4659 | if (dropped > 0) | 4837 | if (dropped > 0) |
| 4660 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4838 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
| @@ -4721,8 +4899,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | |||
| 4721 | btrfs_free_reserved_data_space(inode, num_bytes); | 4899 | btrfs_free_reserved_data_space(inode, num_bytes); |
| 4722 | } | 4900 | } |
| 4723 | 4901 | ||
| 4724 | static int update_block_group(struct btrfs_trans_handle *trans, | 4902 | static int update_block_group(struct btrfs_root *root, |
| 4725 | struct btrfs_root *root, | ||
| 4726 | u64 bytenr, u64 num_bytes, int alloc) | 4903 | u64 bytenr, u64 num_bytes, int alloc) |
| 4727 | { | 4904 | { |
| 4728 | struct btrfs_block_group_cache *cache = NULL; | 4905 | struct btrfs_block_group_cache *cache = NULL; |
| @@ -4759,7 +4936,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 4759 | * space back to the block group, otherwise we will leak space. | 4936 | * space back to the block group, otherwise we will leak space. |
| 4760 | */ | 4937 | */ |
| 4761 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 4938 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
| 4762 | cache_block_group(cache, trans, NULL, 1); | 4939 | cache_block_group(cache, 1); |
| 4763 | 4940 | ||
| 4764 | byte_in_group = bytenr - cache->key.objectid; | 4941 | byte_in_group = bytenr - cache->key.objectid; |
| 4765 | WARN_ON(byte_in_group > cache->key.offset); | 4942 | WARN_ON(byte_in_group > cache->key.offset); |
| @@ -4809,6 +4986,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
| 4809 | struct btrfs_block_group_cache *cache; | 4986 | struct btrfs_block_group_cache *cache; |
| 4810 | u64 bytenr; | 4987 | u64 bytenr; |
| 4811 | 4988 | ||
| 4989 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
| 4990 | bytenr = root->fs_info->first_logical_byte; | ||
| 4991 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
| 4992 | |||
| 4993 | if (bytenr < (u64)-1) | ||
| 4994 | return bytenr; | ||
| 4995 | |||
| 4812 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); | 4996 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); |
| 4813 | if (!cache) | 4997 | if (!cache) |
| 4814 | return 0; | 4998 | return 0; |
| @@ -4859,8 +5043,7 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
| 4859 | /* | 5043 | /* |
| 4860 | * this function must be called within transaction | 5044 | * this function must be called within transaction |
| 4861 | */ | 5045 | */ |
| 4862 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | 5046 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
| 4863 | struct btrfs_root *root, | ||
| 4864 | u64 bytenr, u64 num_bytes) | 5047 | u64 bytenr, u64 num_bytes) |
| 4865 | { | 5048 | { |
| 4866 | struct btrfs_block_group_cache *cache; | 5049 | struct btrfs_block_group_cache *cache; |
| @@ -4874,7 +5057,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | |||
| 4874 | * to one because the slow code to read in the free extents does check | 5057 | * to one because the slow code to read in the free extents does check |
| 4875 | * the pinned extents. | 5058 | * the pinned extents. |
| 4876 | */ | 5059 | */ |
| 4877 | cache_block_group(cache, trans, root, 1); | 5060 | cache_block_group(cache, 1); |
| 4878 | 5061 | ||
| 4879 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | 5062 | pin_down_extent(root, cache, bytenr, num_bytes, 0); |
| 4880 | 5063 | ||
| @@ -5271,7 +5454,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 5271 | } | 5454 | } |
| 5272 | } | 5455 | } |
| 5273 | 5456 | ||
| 5274 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5457 | ret = update_block_group(root, bytenr, num_bytes, 0); |
| 5275 | if (ret) { | 5458 | if (ret) { |
| 5276 | btrfs_abort_transaction(trans, extent_root, ret); | 5459 | btrfs_abort_transaction(trans, extent_root, ret); |
| 5277 | goto out; | 5460 | goto out; |
| @@ -5316,7 +5499,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 5316 | if (head->extent_op) { | 5499 | if (head->extent_op) { |
| 5317 | if (!head->must_insert_reserved) | 5500 | if (!head->must_insert_reserved) |
| 5318 | goto out; | 5501 | goto out; |
| 5319 | kfree(head->extent_op); | 5502 | btrfs_free_delayed_extent_op(head->extent_op); |
| 5320 | head->extent_op = NULL; | 5503 | head->extent_op = NULL; |
| 5321 | } | 5504 | } |
| 5322 | 5505 | ||
| @@ -5439,10 +5622,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 5439 | return ret; | 5622 | return ret; |
| 5440 | } | 5623 | } |
| 5441 | 5624 | ||
| 5442 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 5625 | static u64 stripe_align(struct btrfs_root *root, |
| 5626 | struct btrfs_block_group_cache *cache, | ||
| 5627 | u64 val, u64 num_bytes) | ||
| 5443 | { | 5628 | { |
| 5444 | u64 mask = ((u64)root->stripesize - 1); | 5629 | u64 ret = ALIGN(val, root->stripesize); |
| 5445 | u64 ret = (val + mask) & ~mask; | ||
| 5446 | return ret; | 5630 | return ret; |
| 5447 | } | 5631 | } |
| 5448 | 5632 | ||
| @@ -5462,7 +5646,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | |||
| 5462 | u64 num_bytes) | 5646 | u64 num_bytes) |
| 5463 | { | 5647 | { |
| 5464 | struct btrfs_caching_control *caching_ctl; | 5648 | struct btrfs_caching_control *caching_ctl; |
| 5465 | DEFINE_WAIT(wait); | ||
| 5466 | 5649 | ||
| 5467 | caching_ctl = get_caching_control(cache); | 5650 | caching_ctl = get_caching_control(cache); |
| 5468 | if (!caching_ctl) | 5651 | if (!caching_ctl) |
| @@ -5479,7 +5662,6 @@ static noinline int | |||
| 5479 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | 5662 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) |
| 5480 | { | 5663 | { |
| 5481 | struct btrfs_caching_control *caching_ctl; | 5664 | struct btrfs_caching_control *caching_ctl; |
| 5482 | DEFINE_WAIT(wait); | ||
| 5483 | 5665 | ||
| 5484 | caching_ctl = get_caching_control(cache); | 5666 | caching_ctl = get_caching_control(cache); |
| 5485 | if (!caching_ctl) | 5667 | if (!caching_ctl) |
| @@ -5493,20 +5675,20 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
| 5493 | 5675 | ||
| 5494 | int __get_raid_index(u64 flags) | 5676 | int __get_raid_index(u64 flags) |
| 5495 | { | 5677 | { |
| 5496 | int index; | ||
| 5497 | |||
| 5498 | if (flags & BTRFS_BLOCK_GROUP_RAID10) | 5678 | if (flags & BTRFS_BLOCK_GROUP_RAID10) |
| 5499 | index = 0; | 5679 | return BTRFS_RAID_RAID10; |
| 5500 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) | 5680 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) |
| 5501 | index = 1; | 5681 | return BTRFS_RAID_RAID1; |
| 5502 | else if (flags & BTRFS_BLOCK_GROUP_DUP) | 5682 | else if (flags & BTRFS_BLOCK_GROUP_DUP) |
| 5503 | index = 2; | 5683 | return BTRFS_RAID_DUP; |
| 5504 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) | 5684 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) |
| 5505 | index = 3; | 5685 | return BTRFS_RAID_RAID0; |
| 5506 | else | 5686 | else if (flags & BTRFS_BLOCK_GROUP_RAID5) |
| 5507 | index = 4; | 5687 | return BTRFS_RAID_RAID5; |
| 5688 | else if (flags & BTRFS_BLOCK_GROUP_RAID6) | ||
| 5689 | return BTRFS_RAID_RAID6; | ||
| 5508 | 5690 | ||
| 5509 | return index; | 5691 | return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ |
| 5510 | } | 5692 | } |
| 5511 | 5693 | ||
| 5512 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5694 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
| @@ -5649,6 +5831,8 @@ search: | |||
| 5649 | if (!block_group_bits(block_group, data)) { | 5831 | if (!block_group_bits(block_group, data)) { |
| 5650 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | 5832 | u64 extra = BTRFS_BLOCK_GROUP_DUP | |
| 5651 | BTRFS_BLOCK_GROUP_RAID1 | | 5833 | BTRFS_BLOCK_GROUP_RAID1 | |
| 5834 | BTRFS_BLOCK_GROUP_RAID5 | | ||
| 5835 | BTRFS_BLOCK_GROUP_RAID6 | | ||
| 5652 | BTRFS_BLOCK_GROUP_RAID10; | 5836 | BTRFS_BLOCK_GROUP_RAID10; |
| 5653 | 5837 | ||
| 5654 | /* | 5838 | /* |
| @@ -5664,8 +5848,7 @@ have_block_group: | |||
| 5664 | cached = block_group_cache_done(block_group); | 5848 | cached = block_group_cache_done(block_group); |
| 5665 | if (unlikely(!cached)) { | 5849 | if (unlikely(!cached)) { |
| 5666 | found_uncached_bg = true; | 5850 | found_uncached_bg = true; |
| 5667 | ret = cache_block_group(block_group, trans, | 5851 | ret = cache_block_group(block_group, 0); |
| 5668 | orig_root, 0); | ||
| 5669 | BUG_ON(ret < 0); | 5852 | BUG_ON(ret < 0); |
| 5670 | ret = 0; | 5853 | ret = 0; |
| 5671 | } | 5854 | } |
| @@ -5678,6 +5861,7 @@ have_block_group: | |||
| 5678 | * lets look there | 5861 | * lets look there |
| 5679 | */ | 5862 | */ |
| 5680 | if (last_ptr) { | 5863 | if (last_ptr) { |
| 5864 | unsigned long aligned_cluster; | ||
| 5681 | /* | 5865 | /* |
| 5682 | * the refill lock keeps out other | 5866 | * the refill lock keeps out other |
| 5683 | * people trying to start a new cluster | 5867 | * people trying to start a new cluster |
| @@ -5744,11 +5928,15 @@ refill_cluster: | |||
| 5744 | goto unclustered_alloc; | 5928 | goto unclustered_alloc; |
| 5745 | } | 5929 | } |
| 5746 | 5930 | ||
| 5931 | aligned_cluster = max_t(unsigned long, | ||
| 5932 | empty_cluster + empty_size, | ||
| 5933 | block_group->full_stripe_len); | ||
| 5934 | |||
| 5747 | /* allocate a cluster in this block group */ | 5935 | /* allocate a cluster in this block group */ |
| 5748 | ret = btrfs_find_space_cluster(trans, root, | 5936 | ret = btrfs_find_space_cluster(trans, root, |
| 5749 | block_group, last_ptr, | 5937 | block_group, last_ptr, |
| 5750 | search_start, num_bytes, | 5938 | search_start, num_bytes, |
| 5751 | empty_cluster + empty_size); | 5939 | aligned_cluster); |
| 5752 | if (ret == 0) { | 5940 | if (ret == 0) { |
| 5753 | /* | 5941 | /* |
| 5754 | * now pull our allocation out of this | 5942 | * now pull our allocation out of this |
| @@ -5819,7 +6007,8 @@ unclustered_alloc: | |||
| 5819 | goto loop; | 6007 | goto loop; |
| 5820 | } | 6008 | } |
| 5821 | checks: | 6009 | checks: |
| 5822 | search_start = stripe_align(root, offset); | 6010 | search_start = stripe_align(root, used_block_group, |
| 6011 | offset, num_bytes); | ||
| 5823 | 6012 | ||
| 5824 | /* move on to the next group */ | 6013 | /* move on to the next group */ |
| 5825 | if (search_start + num_bytes > | 6014 | if (search_start + num_bytes > |
| @@ -5970,7 +6159,7 @@ again: | |||
| 5970 | if (ret == -ENOSPC) { | 6159 | if (ret == -ENOSPC) { |
| 5971 | if (!final_tried) { | 6160 | if (!final_tried) { |
| 5972 | num_bytes = num_bytes >> 1; | 6161 | num_bytes = num_bytes >> 1; |
| 5973 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 6162 | num_bytes = round_down(num_bytes, root->sectorsize); |
| 5974 | num_bytes = max(num_bytes, min_alloc_size); | 6163 | num_bytes = max(num_bytes, min_alloc_size); |
| 5975 | if (num_bytes == min_alloc_size) | 6164 | if (num_bytes == min_alloc_size) |
| 5976 | final_tried = true; | 6165 | final_tried = true; |
| @@ -6094,7 +6283,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 6094 | btrfs_mark_buffer_dirty(path->nodes[0]); | 6283 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 6095 | btrfs_free_path(path); | 6284 | btrfs_free_path(path); |
| 6096 | 6285 | ||
| 6097 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6286 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
| 6098 | if (ret) { /* -ENOENT, logic error */ | 6287 | if (ret) { /* -ENOENT, logic error */ |
| 6099 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6288 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 6100 | "%llu\n", (unsigned long long)ins->objectid, | 6289 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -6158,7 +6347,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 6158 | btrfs_mark_buffer_dirty(leaf); | 6347 | btrfs_mark_buffer_dirty(leaf); |
| 6159 | btrfs_free_path(path); | 6348 | btrfs_free_path(path); |
| 6160 | 6349 | ||
| 6161 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6350 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
| 6162 | if (ret) { /* -ENOENT, logic error */ | 6351 | if (ret) { /* -ENOENT, logic error */ |
| 6163 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6352 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 6164 | "%llu\n", (unsigned long long)ins->objectid, | 6353 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -6201,7 +6390,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 6201 | u64 num_bytes = ins->offset; | 6390 | u64 num_bytes = ins->offset; |
| 6202 | 6391 | ||
| 6203 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 6392 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
| 6204 | cache_block_group(block_group, trans, NULL, 0); | 6393 | cache_block_group(block_group, 0); |
| 6205 | caching_ctl = get_caching_control(block_group); | 6394 | caching_ctl = get_caching_control(block_group); |
| 6206 | 6395 | ||
| 6207 | if (!caching_ctl) { | 6396 | if (!caching_ctl) { |
| @@ -6315,12 +6504,14 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6315 | if (!ret) | 6504 | if (!ret) |
| 6316 | return block_rsv; | 6505 | return block_rsv; |
| 6317 | if (ret && !block_rsv->failfast) { | 6506 | if (ret && !block_rsv->failfast) { |
| 6318 | static DEFINE_RATELIMIT_STATE(_rs, | 6507 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { |
| 6319 | DEFAULT_RATELIMIT_INTERVAL, | 6508 | static DEFINE_RATELIMIT_STATE(_rs, |
| 6320 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6509 | DEFAULT_RATELIMIT_INTERVAL * 10, |
| 6321 | if (__ratelimit(&_rs)) | 6510 | /*DEFAULT_RATELIMIT_BURST*/ 1); |
| 6322 | WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", | 6511 | if (__ratelimit(&_rs)) |
| 6323 | ret); | 6512 | WARN(1, KERN_DEBUG |
| 6513 | "btrfs: block rsv returned %d\n", ret); | ||
| 6514 | } | ||
| 6324 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, | 6515 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
| 6325 | BTRFS_RESERVE_NO_FLUSH); | 6516 | BTRFS_RESERVE_NO_FLUSH); |
| 6326 | if (!ret) { | 6517 | if (!ret) { |
| @@ -6386,7 +6577,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 6386 | 6577 | ||
| 6387 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 6578 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 6388 | struct btrfs_delayed_extent_op *extent_op; | 6579 | struct btrfs_delayed_extent_op *extent_op; |
| 6389 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 6580 | extent_op = btrfs_alloc_delayed_extent_op(); |
| 6390 | BUG_ON(!extent_op); /* -ENOMEM */ | 6581 | BUG_ON(!extent_op); /* -ENOMEM */ |
| 6391 | if (key) | 6582 | if (key) |
| 6392 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | 6583 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); |
| @@ -7189,6 +7380,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
| 7189 | root->fs_info->fs_devices->missing_devices; | 7380 | root->fs_info->fs_devices->missing_devices; |
| 7190 | 7381 | ||
| 7191 | stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7382 | stripped = BTRFS_BLOCK_GROUP_RAID0 | |
| 7383 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | ||
| 7192 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7384 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
| 7193 | 7385 | ||
| 7194 | if (num_devices == 1) { | 7386 | if (num_devices == 1) { |
| @@ -7467,16 +7659,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7467 | index = get_block_group_index(block_group); | 7659 | index = get_block_group_index(block_group); |
| 7468 | } | 7660 | } |
| 7469 | 7661 | ||
| 7470 | if (index == 0) { | 7662 | if (index == BTRFS_RAID_RAID10) { |
| 7471 | dev_min = 4; | 7663 | dev_min = 4; |
| 7472 | /* Divide by 2 */ | 7664 | /* Divide by 2 */ |
| 7473 | min_free >>= 1; | 7665 | min_free >>= 1; |
| 7474 | } else if (index == 1) { | 7666 | } else if (index == BTRFS_RAID_RAID1) { |
| 7475 | dev_min = 2; | 7667 | dev_min = 2; |
| 7476 | } else if (index == 2) { | 7668 | } else if (index == BTRFS_RAID_DUP) { |
| 7477 | /* Multiply by 2 */ | 7669 | /* Multiply by 2 */ |
| 7478 | min_free <<= 1; | 7670 | min_free <<= 1; |
| 7479 | } else if (index == 3) { | 7671 | } else if (index == BTRFS_RAID_RAID0) { |
| 7480 | dev_min = fs_devices->rw_devices; | 7672 | dev_min = fs_devices->rw_devices; |
| 7481 | do_div(min_free, dev_min); | 7673 | do_div(min_free, dev_min); |
| 7482 | } | 7674 | } |
| @@ -7637,11 +7829,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7637 | space_info = list_entry(info->space_info.next, | 7829 | space_info = list_entry(info->space_info.next, |
| 7638 | struct btrfs_space_info, | 7830 | struct btrfs_space_info, |
| 7639 | list); | 7831 | list); |
| 7640 | if (space_info->bytes_pinned > 0 || | 7832 | if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { |
| 7641 | space_info->bytes_reserved > 0 || | 7833 | if (space_info->bytes_pinned > 0 || |
| 7642 | space_info->bytes_may_use > 0) { | 7834 | space_info->bytes_reserved > 0 || |
| 7643 | WARN_ON(1); | 7835 | space_info->bytes_may_use > 0) { |
| 7644 | dump_space_info(space_info, 0, 0); | 7836 | WARN_ON(1); |
| 7837 | dump_space_info(space_info, 0, 0); | ||
| 7838 | } | ||
| 7645 | } | 7839 | } |
| 7646 | list_del(&space_info->list); | 7840 | list_del(&space_info->list); |
| 7647 | kfree(space_info); | 7841 | kfree(space_info); |
| @@ -7740,7 +7934,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7740 | btrfs_release_path(path); | 7934 | btrfs_release_path(path); |
| 7741 | cache->flags = btrfs_block_group_flags(&cache->item); | 7935 | cache->flags = btrfs_block_group_flags(&cache->item); |
| 7742 | cache->sectorsize = root->sectorsize; | 7936 | cache->sectorsize = root->sectorsize; |
| 7743 | 7937 | cache->full_stripe_len = btrfs_full_stripe_len(root, | |
| 7938 | &root->fs_info->mapping_tree, | ||
| 7939 | found_key.objectid); | ||
| 7744 | btrfs_init_free_space_ctl(cache); | 7940 | btrfs_init_free_space_ctl(cache); |
| 7745 | 7941 | ||
| 7746 | /* | 7942 | /* |
| @@ -7794,6 +7990,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7794 | if (!(get_alloc_profile(root, space_info->flags) & | 7990 | if (!(get_alloc_profile(root, space_info->flags) & |
| 7795 | (BTRFS_BLOCK_GROUP_RAID10 | | 7991 | (BTRFS_BLOCK_GROUP_RAID10 | |
| 7796 | BTRFS_BLOCK_GROUP_RAID1 | | 7992 | BTRFS_BLOCK_GROUP_RAID1 | |
| 7993 | BTRFS_BLOCK_GROUP_RAID5 | | ||
| 7994 | BTRFS_BLOCK_GROUP_RAID6 | | ||
| 7797 | BTRFS_BLOCK_GROUP_DUP))) | 7995 | BTRFS_BLOCK_GROUP_DUP))) |
| 7798 | continue; | 7996 | continue; |
| 7799 | /* | 7997 | /* |
| @@ -7869,6 +8067,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7869 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8067 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
| 7870 | cache->sectorsize = root->sectorsize; | 8068 | cache->sectorsize = root->sectorsize; |
| 7871 | cache->fs_info = root->fs_info; | 8069 | cache->fs_info = root->fs_info; |
| 8070 | cache->full_stripe_len = btrfs_full_stripe_len(root, | ||
| 8071 | &root->fs_info->mapping_tree, | ||
| 8072 | chunk_offset); | ||
| 7872 | 8073 | ||
| 7873 | atomic_set(&cache->count, 1); | 8074 | atomic_set(&cache->count, 1); |
| 7874 | spin_lock_init(&cache->lock); | 8075 | spin_lock_init(&cache->lock); |
| @@ -7918,12 +8119,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 7918 | u64 extra_flags = chunk_to_extended(flags) & | 8119 | u64 extra_flags = chunk_to_extended(flags) & |
| 7919 | BTRFS_EXTENDED_PROFILE_MASK; | 8120 | BTRFS_EXTENDED_PROFILE_MASK; |
| 7920 | 8121 | ||
| 8122 | write_seqlock(&fs_info->profiles_lock); | ||
| 7921 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 8123 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| 7922 | fs_info->avail_data_alloc_bits &= ~extra_flags; | 8124 | fs_info->avail_data_alloc_bits &= ~extra_flags; |
| 7923 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 8125 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
| 7924 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; | 8126 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; |
| 7925 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 8127 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
| 7926 | fs_info->avail_system_alloc_bits &= ~extra_flags; | 8128 | fs_info->avail_system_alloc_bits &= ~extra_flags; |
| 8129 | write_sequnlock(&fs_info->profiles_lock); | ||
| 7927 | } | 8130 | } |
| 7928 | 8131 | ||
| 7929 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 8132 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| @@ -8022,6 +8225,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8022 | spin_lock(&root->fs_info->block_group_cache_lock); | 8225 | spin_lock(&root->fs_info->block_group_cache_lock); |
| 8023 | rb_erase(&block_group->cache_node, | 8226 | rb_erase(&block_group->cache_node, |
| 8024 | &root->fs_info->block_group_cache_tree); | 8227 | &root->fs_info->block_group_cache_tree); |
| 8228 | |||
| 8229 | if (root->fs_info->first_logical_byte == block_group->key.objectid) | ||
| 8230 | root->fs_info->first_logical_byte = (u64)-1; | ||
| 8025 | spin_unlock(&root->fs_info->block_group_cache_lock); | 8231 | spin_unlock(&root->fs_info->block_group_cache_lock); |
| 8026 | 8232 | ||
| 8027 | down_write(&block_group->space_info->groups_sem); | 8233 | down_write(&block_group->space_info->groups_sem); |
| @@ -8144,7 +8350,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
| 8144 | 8350 | ||
| 8145 | if (end - start >= range->minlen) { | 8351 | if (end - start >= range->minlen) { |
| 8146 | if (!block_group_cache_done(cache)) { | 8352 | if (!block_group_cache_done(cache)) { |
| 8147 | ret = cache_block_group(cache, NULL, root, 0); | 8353 | ret = cache_block_group(cache, 0); |
| 8148 | if (!ret) | 8354 | if (!ret) |
| 8149 | wait_block_group_cache_done(cache); | 8355 | wait_block_group_cache_done(cache); |
| 8150 | } | 8356 | } |
