diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 578 |
1 files changed, 392 insertions, 186 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cf54bdfee334..3e074dab2d57 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "print-tree.h" | 31 | #include "print-tree.h" |
32 | #include "transaction.h" | 32 | #include "transaction.h" |
33 | #include "volumes.h" | 33 | #include "volumes.h" |
34 | #include "raid56.h" | ||
34 | #include "locking.h" | 35 | #include "locking.h" |
35 | #include "free-space-cache.h" | 36 | #include "free-space-cache.h" |
36 | #include "math.h" | 37 | #include "math.h" |
@@ -72,8 +73,7 @@ enum { | |||
72 | RESERVE_ALLOC_NO_ACCOUNT = 2, | 73 | RESERVE_ALLOC_NO_ACCOUNT = 2, |
73 | }; | 74 | }; |
74 | 75 | ||
75 | static int update_block_group(struct btrfs_trans_handle *trans, | 76 | static int update_block_group(struct btrfs_root *root, |
76 | struct btrfs_root *root, | ||
77 | u64 bytenr, u64 num_bytes, int alloc); | 77 | u64 bytenr, u64 num_bytes, int alloc); |
78 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 78 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
79 | struct btrfs_root *root, | 79 | struct btrfs_root *root, |
@@ -103,6 +103,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
103 | int dump_block_groups); | 103 | int dump_block_groups); |
104 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 104 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
105 | u64 num_bytes, int reserve); | 105 | u64 num_bytes, int reserve); |
106 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
107 | u64 num_bytes); | ||
106 | 108 | ||
107 | static noinline int | 109 | static noinline int |
108 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 110 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -162,6 +164,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, | |||
162 | rb_link_node(&block_group->cache_node, parent, p); | 164 | rb_link_node(&block_group->cache_node, parent, p); |
163 | rb_insert_color(&block_group->cache_node, | 165 | rb_insert_color(&block_group->cache_node, |
164 | &info->block_group_cache_tree); | 166 | &info->block_group_cache_tree); |
167 | |||
168 | if (info->first_logical_byte > block_group->key.objectid) | ||
169 | info->first_logical_byte = block_group->key.objectid; | ||
170 | |||
165 | spin_unlock(&info->block_group_cache_lock); | 171 | spin_unlock(&info->block_group_cache_lock); |
166 | 172 | ||
167 | return 0; | 173 | return 0; |
@@ -203,8 +209,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
203 | break; | 209 | break; |
204 | } | 210 | } |
205 | } | 211 | } |
206 | if (ret) | 212 | if (ret) { |
207 | btrfs_get_block_group(ret); | 213 | btrfs_get_block_group(ret); |
214 | if (bytenr == 0 && info->first_logical_byte > ret->key.objectid) | ||
215 | info->first_logical_byte = ret->key.objectid; | ||
216 | } | ||
208 | spin_unlock(&info->block_group_cache_lock); | 217 | spin_unlock(&info->block_group_cache_lock); |
209 | 218 | ||
210 | return ret; | 219 | return ret; |
@@ -468,8 +477,6 @@ out: | |||
468 | } | 477 | } |
469 | 478 | ||
470 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 479 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
471 | struct btrfs_trans_handle *trans, | ||
472 | struct btrfs_root *root, | ||
473 | int load_cache_only) | 480 | int load_cache_only) |
474 | { | 481 | { |
475 | DEFINE_WAIT(wait); | 482 | DEFINE_WAIT(wait); |
@@ -527,12 +534,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
527 | cache->cached = BTRFS_CACHE_FAST; | 534 | cache->cached = BTRFS_CACHE_FAST; |
528 | spin_unlock(&cache->lock); | 535 | spin_unlock(&cache->lock); |
529 | 536 | ||
530 | /* | ||
531 | * We can't do the read from on-disk cache during a commit since we need | ||
532 | * to have the normal tree locking. Also if we are currently trying to | ||
533 | * allocate blocks for the tree root we can't do the fast caching since | ||
534 | * we likely hold important locks. | ||
535 | */ | ||
536 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { | 537 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { |
537 | ret = load_free_space_cache(fs_info, cache); | 538 | ret = load_free_space_cache(fs_info, cache); |
538 | 539 | ||
@@ -1852,6 +1853,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1852 | *actual_bytes = discarded_bytes; | 1853 | *actual_bytes = discarded_bytes; |
1853 | 1854 | ||
1854 | 1855 | ||
1856 | if (ret == -EOPNOTSUPP) | ||
1857 | ret = 0; | ||
1855 | return ret; | 1858 | return ret; |
1856 | } | 1859 | } |
1857 | 1860 | ||
@@ -2143,7 +2146,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
2143 | node->num_bytes); | 2146 | node->num_bytes); |
2144 | } | 2147 | } |
2145 | } | 2148 | } |
2146 | mutex_unlock(&head->mutex); | ||
2147 | return ret; | 2149 | return ret; |
2148 | } | 2150 | } |
2149 | 2151 | ||
@@ -2258,7 +2260,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2258 | * process of being added. Don't run this ref yet. | 2260 | * process of being added. Don't run this ref yet. |
2259 | */ | 2261 | */ |
2260 | list_del_init(&locked_ref->cluster); | 2262 | list_del_init(&locked_ref->cluster); |
2261 | mutex_unlock(&locked_ref->mutex); | 2263 | btrfs_delayed_ref_unlock(locked_ref); |
2262 | locked_ref = NULL; | 2264 | locked_ref = NULL; |
2263 | delayed_refs->num_heads_ready++; | 2265 | delayed_refs->num_heads_ready++; |
2264 | spin_unlock(&delayed_refs->lock); | 2266 | spin_unlock(&delayed_refs->lock); |
@@ -2285,7 +2287,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2285 | ref = &locked_ref->node; | 2287 | ref = &locked_ref->node; |
2286 | 2288 | ||
2287 | if (extent_op && must_insert_reserved) { | 2289 | if (extent_op && must_insert_reserved) { |
2288 | kfree(extent_op); | 2290 | btrfs_free_delayed_extent_op(extent_op); |
2289 | extent_op = NULL; | 2291 | extent_op = NULL; |
2290 | } | 2292 | } |
2291 | 2293 | ||
@@ -2294,28 +2296,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2294 | 2296 | ||
2295 | ret = run_delayed_extent_op(trans, root, | 2297 | ret = run_delayed_extent_op(trans, root, |
2296 | ref, extent_op); | 2298 | ref, extent_op); |
2297 | kfree(extent_op); | 2299 | btrfs_free_delayed_extent_op(extent_op); |
2298 | 2300 | ||
2299 | if (ret) { | 2301 | if (ret) { |
2300 | list_del_init(&locked_ref->cluster); | 2302 | printk(KERN_DEBUG |
2301 | mutex_unlock(&locked_ref->mutex); | 2303 | "btrfs: run_delayed_extent_op " |
2302 | 2304 | "returned %d\n", ret); | |
2303 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); | ||
2304 | spin_lock(&delayed_refs->lock); | 2305 | spin_lock(&delayed_refs->lock); |
2306 | btrfs_delayed_ref_unlock(locked_ref); | ||
2305 | return ret; | 2307 | return ret; |
2306 | } | 2308 | } |
2307 | 2309 | ||
2308 | goto next; | 2310 | goto next; |
2309 | } | 2311 | } |
2310 | |||
2311 | list_del_init(&locked_ref->cluster); | ||
2312 | locked_ref = NULL; | ||
2313 | } | 2312 | } |
2314 | 2313 | ||
2315 | ref->in_tree = 0; | 2314 | ref->in_tree = 0; |
2316 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2315 | rb_erase(&ref->rb_node, &delayed_refs->root); |
2317 | delayed_refs->num_entries--; | 2316 | delayed_refs->num_entries--; |
2318 | if (locked_ref) { | 2317 | if (!btrfs_delayed_ref_is_head(ref)) { |
2319 | /* | 2318 | /* |
2320 | * when we play the delayed ref, also correct the | 2319 | * when we play the delayed ref, also correct the |
2321 | * ref_mod on head | 2320 | * ref_mod on head |
@@ -2337,20 +2336,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2337 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2336 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
2338 | must_insert_reserved); | 2337 | must_insert_reserved); |
2339 | 2338 | ||
2340 | btrfs_put_delayed_ref(ref); | 2339 | btrfs_free_delayed_extent_op(extent_op); |
2341 | kfree(extent_op); | ||
2342 | count++; | ||
2343 | |||
2344 | if (ret) { | 2340 | if (ret) { |
2345 | if (locked_ref) { | 2341 | btrfs_delayed_ref_unlock(locked_ref); |
2346 | list_del_init(&locked_ref->cluster); | 2342 | btrfs_put_delayed_ref(ref); |
2347 | mutex_unlock(&locked_ref->mutex); | 2343 | printk(KERN_DEBUG |
2348 | } | 2344 | "btrfs: run_one_delayed_ref returned %d\n", ret); |
2349 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); | ||
2350 | spin_lock(&delayed_refs->lock); | 2345 | spin_lock(&delayed_refs->lock); |
2351 | return ret; | 2346 | return ret; |
2352 | } | 2347 | } |
2353 | 2348 | ||
2349 | /* | ||
2350 | * If this node is a head, that means all the refs in this head | ||
2351 | * have been dealt with, and we will pick the next head to deal | ||
2352 | * with, so we must unlock the head and drop it from the cluster | ||
2353 | * list before we release it. | ||
2354 | */ | ||
2355 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2356 | list_del_init(&locked_ref->cluster); | ||
2357 | btrfs_delayed_ref_unlock(locked_ref); | ||
2358 | locked_ref = NULL; | ||
2359 | } | ||
2360 | btrfs_put_delayed_ref(ref); | ||
2361 | count++; | ||
2354 | next: | 2362 | next: |
2355 | cond_resched(); | 2363 | cond_resched(); |
2356 | spin_lock(&delayed_refs->lock); | 2364 | spin_lock(&delayed_refs->lock); |
@@ -2435,6 +2443,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | |||
2435 | return ret; | 2443 | return ret; |
2436 | } | 2444 | } |
2437 | 2445 | ||
2446 | static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | ||
2447 | int count) | ||
2448 | { | ||
2449 | int val = atomic_read(&delayed_refs->ref_seq); | ||
2450 | |||
2451 | if (val < seq || val >= seq + count) | ||
2452 | return 1; | ||
2453 | return 0; | ||
2454 | } | ||
2455 | |||
2438 | /* | 2456 | /* |
2439 | * this starts processing the delayed reference count updates and | 2457 | * this starts processing the delayed reference count updates and |
2440 | * extent insertions we have queued up so far. count can be | 2458 | * extent insertions we have queued up so far. count can be |
@@ -2469,6 +2487,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2469 | 2487 | ||
2470 | delayed_refs = &trans->transaction->delayed_refs; | 2488 | delayed_refs = &trans->transaction->delayed_refs; |
2471 | INIT_LIST_HEAD(&cluster); | 2489 | INIT_LIST_HEAD(&cluster); |
2490 | if (count == 0) { | ||
2491 | count = delayed_refs->num_entries * 2; | ||
2492 | run_most = 1; | ||
2493 | } | ||
2494 | |||
2495 | if (!run_all && !run_most) { | ||
2496 | int old; | ||
2497 | int seq = atomic_read(&delayed_refs->ref_seq); | ||
2498 | |||
2499 | progress: | ||
2500 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
2501 | if (old) { | ||
2502 | DEFINE_WAIT(__wait); | ||
2503 | if (delayed_refs->num_entries < 16348) | ||
2504 | return 0; | ||
2505 | |||
2506 | prepare_to_wait(&delayed_refs->wait, &__wait, | ||
2507 | TASK_UNINTERRUPTIBLE); | ||
2508 | |||
2509 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
2510 | if (old) { | ||
2511 | schedule(); | ||
2512 | finish_wait(&delayed_refs->wait, &__wait); | ||
2513 | |||
2514 | if (!refs_newer(delayed_refs, seq, 256)) | ||
2515 | goto progress; | ||
2516 | else | ||
2517 | return 0; | ||
2518 | } else { | ||
2519 | finish_wait(&delayed_refs->wait, &__wait); | ||
2520 | goto again; | ||
2521 | } | ||
2522 | } | ||
2523 | |||
2524 | } else { | ||
2525 | atomic_inc(&delayed_refs->procs_running_refs); | ||
2526 | } | ||
2527 | |||
2472 | again: | 2528 | again: |
2473 | loops = 0; | 2529 | loops = 0; |
2474 | spin_lock(&delayed_refs->lock); | 2530 | spin_lock(&delayed_refs->lock); |
@@ -2477,10 +2533,6 @@ again: | |||
2477 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | 2533 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); |
2478 | #endif | 2534 | #endif |
2479 | 2535 | ||
2480 | if (count == 0) { | ||
2481 | count = delayed_refs->num_entries * 2; | ||
2482 | run_most = 1; | ||
2483 | } | ||
2484 | while (1) { | 2536 | while (1) { |
2485 | if (!(run_all || run_most) && | 2537 | if (!(run_all || run_most) && |
2486 | delayed_refs->num_heads_ready < 64) | 2538 | delayed_refs->num_heads_ready < 64) |
@@ -2500,11 +2552,15 @@ again: | |||
2500 | 2552 | ||
2501 | ret = run_clustered_refs(trans, root, &cluster); | 2553 | ret = run_clustered_refs(trans, root, &cluster); |
2502 | if (ret < 0) { | 2554 | if (ret < 0) { |
2555 | btrfs_release_ref_cluster(&cluster); | ||
2503 | spin_unlock(&delayed_refs->lock); | 2556 | spin_unlock(&delayed_refs->lock); |
2504 | btrfs_abort_transaction(trans, root, ret); | 2557 | btrfs_abort_transaction(trans, root, ret); |
2558 | atomic_dec(&delayed_refs->procs_running_refs); | ||
2505 | return ret; | 2559 | return ret; |
2506 | } | 2560 | } |
2507 | 2561 | ||
2562 | atomic_add(ret, &delayed_refs->ref_seq); | ||
2563 | |||
2508 | count -= min_t(unsigned long, ret, count); | 2564 | count -= min_t(unsigned long, ret, count); |
2509 | 2565 | ||
2510 | if (count == 0) | 2566 | if (count == 0) |
@@ -2573,6 +2629,11 @@ again: | |||
2573 | goto again; | 2629 | goto again; |
2574 | } | 2630 | } |
2575 | out: | 2631 | out: |
2632 | atomic_dec(&delayed_refs->procs_running_refs); | ||
2633 | smp_mb(); | ||
2634 | if (waitqueue_active(&delayed_refs->wait)) | ||
2635 | wake_up(&delayed_refs->wait); | ||
2636 | |||
2576 | spin_unlock(&delayed_refs->lock); | 2637 | spin_unlock(&delayed_refs->lock); |
2577 | assert_qgroups_uptodate(trans); | 2638 | assert_qgroups_uptodate(trans); |
2578 | return 0; | 2639 | return 0; |
@@ -2586,7 +2647,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2586 | struct btrfs_delayed_extent_op *extent_op; | 2647 | struct btrfs_delayed_extent_op *extent_op; |
2587 | int ret; | 2648 | int ret; |
2588 | 2649 | ||
2589 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2650 | extent_op = btrfs_alloc_delayed_extent_op(); |
2590 | if (!extent_op) | 2651 | if (!extent_op) |
2591 | return -ENOMEM; | 2652 | return -ENOMEM; |
2592 | 2653 | ||
@@ -2598,7 +2659,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2598 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, | 2659 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, |
2599 | num_bytes, extent_op); | 2660 | num_bytes, extent_op); |
2600 | if (ret) | 2661 | if (ret) |
2601 | kfree(extent_op); | 2662 | btrfs_free_delayed_extent_op(extent_op); |
2602 | return ret; | 2663 | return ret; |
2603 | } | 2664 | } |
2604 | 2665 | ||
@@ -3223,12 +3284,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
3223 | u64 extra_flags = chunk_to_extended(flags) & | 3284 | u64 extra_flags = chunk_to_extended(flags) & |
3224 | BTRFS_EXTENDED_PROFILE_MASK; | 3285 | BTRFS_EXTENDED_PROFILE_MASK; |
3225 | 3286 | ||
3287 | write_seqlock(&fs_info->profiles_lock); | ||
3226 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3288 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3227 | fs_info->avail_data_alloc_bits |= extra_flags; | 3289 | fs_info->avail_data_alloc_bits |= extra_flags; |
3228 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3290 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
3229 | fs_info->avail_metadata_alloc_bits |= extra_flags; | 3291 | fs_info->avail_metadata_alloc_bits |= extra_flags; |
3230 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3292 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
3231 | fs_info->avail_system_alloc_bits |= extra_flags; | 3293 | fs_info->avail_system_alloc_bits |= extra_flags; |
3294 | write_sequnlock(&fs_info->profiles_lock); | ||
3232 | } | 3295 | } |
3233 | 3296 | ||
3234 | /* | 3297 | /* |
@@ -3276,6 +3339,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3276 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | 3339 | u64 num_devices = root->fs_info->fs_devices->rw_devices + |
3277 | root->fs_info->fs_devices->missing_devices; | 3340 | root->fs_info->fs_devices->missing_devices; |
3278 | u64 target; | 3341 | u64 target; |
3342 | u64 tmp; | ||
3279 | 3343 | ||
3280 | /* | 3344 | /* |
3281 | * see if restripe for this chunk_type is in progress, if so | 3345 | * see if restripe for this chunk_type is in progress, if so |
@@ -3292,40 +3356,48 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3292 | } | 3356 | } |
3293 | spin_unlock(&root->fs_info->balance_lock); | 3357 | spin_unlock(&root->fs_info->balance_lock); |
3294 | 3358 | ||
3359 | /* First, mask out the RAID levels which aren't possible */ | ||
3295 | if (num_devices == 1) | 3360 | if (num_devices == 1) |
3296 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3361 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 | |
3362 | BTRFS_BLOCK_GROUP_RAID5); | ||
3363 | if (num_devices < 3) | ||
3364 | flags &= ~BTRFS_BLOCK_GROUP_RAID6; | ||
3297 | if (num_devices < 4) | 3365 | if (num_devices < 4) |
3298 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; | 3366 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; |
3299 | 3367 | ||
3300 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | 3368 | tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | |
3301 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | 3369 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 | |
3302 | BTRFS_BLOCK_GROUP_RAID10))) { | 3370 | BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10); |
3303 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | 3371 | flags &= ~tmp; |
3304 | } | ||
3305 | |||
3306 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | ||
3307 | (flags & BTRFS_BLOCK_GROUP_RAID10)) { | ||
3308 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | ||
3309 | } | ||
3310 | 3372 | ||
3311 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | 3373 | if (tmp & BTRFS_BLOCK_GROUP_RAID6) |
3312 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | 3374 | tmp = BTRFS_BLOCK_GROUP_RAID6; |
3313 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | 3375 | else if (tmp & BTRFS_BLOCK_GROUP_RAID5) |
3314 | (flags & BTRFS_BLOCK_GROUP_DUP))) { | 3376 | tmp = BTRFS_BLOCK_GROUP_RAID5; |
3315 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | 3377 | else if (tmp & BTRFS_BLOCK_GROUP_RAID10) |
3316 | } | 3378 | tmp = BTRFS_BLOCK_GROUP_RAID10; |
3379 | else if (tmp & BTRFS_BLOCK_GROUP_RAID1) | ||
3380 | tmp = BTRFS_BLOCK_GROUP_RAID1; | ||
3381 | else if (tmp & BTRFS_BLOCK_GROUP_RAID0) | ||
3382 | tmp = BTRFS_BLOCK_GROUP_RAID0; | ||
3317 | 3383 | ||
3318 | return extended_to_chunk(flags); | 3384 | return extended_to_chunk(flags | tmp); |
3319 | } | 3385 | } |
3320 | 3386 | ||
3321 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3387 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
3322 | { | 3388 | { |
3323 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3389 | unsigned seq; |
3324 | flags |= root->fs_info->avail_data_alloc_bits; | 3390 | |
3325 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3391 | do { |
3326 | flags |= root->fs_info->avail_system_alloc_bits; | 3392 | seq = read_seqbegin(&root->fs_info->profiles_lock); |
3327 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3393 | |
3328 | flags |= root->fs_info->avail_metadata_alloc_bits; | 3394 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3395 | flags |= root->fs_info->avail_data_alloc_bits; | ||
3396 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
3397 | flags |= root->fs_info->avail_system_alloc_bits; | ||
3398 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
3399 | flags |= root->fs_info->avail_metadata_alloc_bits; | ||
3400 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
3329 | 3401 | ||
3330 | return btrfs_reduce_alloc_profile(root, flags); | 3402 | return btrfs_reduce_alloc_profile(root, flags); |
3331 | } | 3403 | } |
@@ -3333,6 +3405,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3333 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3405 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3334 | { | 3406 | { |
3335 | u64 flags; | 3407 | u64 flags; |
3408 | u64 ret; | ||
3336 | 3409 | ||
3337 | if (data) | 3410 | if (data) |
3338 | flags = BTRFS_BLOCK_GROUP_DATA; | 3411 | flags = BTRFS_BLOCK_GROUP_DATA; |
@@ -3341,7 +3414,8 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
3341 | else | 3414 | else |
3342 | flags = BTRFS_BLOCK_GROUP_METADATA; | 3415 | flags = BTRFS_BLOCK_GROUP_METADATA; |
3343 | 3416 | ||
3344 | return get_alloc_profile(root, flags); | 3417 | ret = get_alloc_profile(root, flags); |
3418 | return ret; | ||
3345 | } | 3419 | } |
3346 | 3420 | ||
3347 | /* | 3421 | /* |
@@ -3357,7 +3431,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
3357 | int ret = 0, committed = 0, alloc_chunk = 1; | 3431 | int ret = 0, committed = 0, alloc_chunk = 1; |
3358 | 3432 | ||
3359 | /* make sure bytes are sectorsize aligned */ | 3433 | /* make sure bytes are sectorsize aligned */ |
3360 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3434 | bytes = ALIGN(bytes, root->sectorsize); |
3361 | 3435 | ||
3362 | if (root == root->fs_info->tree_root || | 3436 | if (root == root->fs_info->tree_root || |
3363 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { | 3437 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { |
@@ -3452,7 +3526,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) | |||
3452 | struct btrfs_space_info *data_sinfo; | 3526 | struct btrfs_space_info *data_sinfo; |
3453 | 3527 | ||
3454 | /* make sure bytes are sectorsize aligned */ | 3528 | /* make sure bytes are sectorsize aligned */ |
3455 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3529 | bytes = ALIGN(bytes, root->sectorsize); |
3456 | 3530 | ||
3457 | data_sinfo = root->fs_info->data_sinfo; | 3531 | data_sinfo = root->fs_info->data_sinfo; |
3458 | spin_lock(&data_sinfo->lock); | 3532 | spin_lock(&data_sinfo->lock); |
@@ -3516,8 +3590,10 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) | |||
3516 | { | 3590 | { |
3517 | u64 num_dev; | 3591 | u64 num_dev; |
3518 | 3592 | ||
3519 | if (type & BTRFS_BLOCK_GROUP_RAID10 || | 3593 | if (type & (BTRFS_BLOCK_GROUP_RAID10 | |
3520 | type & BTRFS_BLOCK_GROUP_RAID0) | 3594 | BTRFS_BLOCK_GROUP_RAID0 | |
3595 | BTRFS_BLOCK_GROUP_RAID5 | | ||
3596 | BTRFS_BLOCK_GROUP_RAID6)) | ||
3521 | num_dev = root->fs_info->fs_devices->rw_devices; | 3597 | num_dev = root->fs_info->fs_devices->rw_devices; |
3522 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | 3598 | else if (type & BTRFS_BLOCK_GROUP_RAID1) |
3523 | num_dev = 2; | 3599 | num_dev = 2; |
@@ -3564,6 +3640,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3564 | int wait_for_alloc = 0; | 3640 | int wait_for_alloc = 0; |
3565 | int ret = 0; | 3641 | int ret = 0; |
3566 | 3642 | ||
3643 | /* Don't re-enter if we're already allocating a chunk */ | ||
3644 | if (trans->allocating_chunk) | ||
3645 | return -ENOSPC; | ||
3646 | |||
3567 | space_info = __find_space_info(extent_root->fs_info, flags); | 3647 | space_info = __find_space_info(extent_root->fs_info, flags); |
3568 | if (!space_info) { | 3648 | if (!space_info) { |
3569 | ret = update_space_info(extent_root->fs_info, flags, | 3649 | ret = update_space_info(extent_root->fs_info, flags, |
@@ -3606,6 +3686,8 @@ again: | |||
3606 | goto again; | 3686 | goto again; |
3607 | } | 3687 | } |
3608 | 3688 | ||
3689 | trans->allocating_chunk = true; | ||
3690 | |||
3609 | /* | 3691 | /* |
3610 | * If we have mixed data/metadata chunks we want to make sure we keep | 3692 | * If we have mixed data/metadata chunks we want to make sure we keep |
3611 | * allocating mixed chunks instead of individual chunks. | 3693 | * allocating mixed chunks instead of individual chunks. |
@@ -3632,19 +3714,20 @@ again: | |||
3632 | check_system_chunk(trans, extent_root, flags); | 3714 | check_system_chunk(trans, extent_root, flags); |
3633 | 3715 | ||
3634 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3716 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3635 | if (ret < 0 && ret != -ENOSPC) | 3717 | trans->allocating_chunk = false; |
3636 | goto out; | ||
3637 | 3718 | ||
3638 | spin_lock(&space_info->lock); | 3719 | spin_lock(&space_info->lock); |
3720 | if (ret < 0 && ret != -ENOSPC) | ||
3721 | goto out; | ||
3639 | if (ret) | 3722 | if (ret) |
3640 | space_info->full = 1; | 3723 | space_info->full = 1; |
3641 | else | 3724 | else |
3642 | ret = 1; | 3725 | ret = 1; |
3643 | 3726 | ||
3644 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | 3727 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; |
3728 | out: | ||
3645 | space_info->chunk_alloc = 0; | 3729 | space_info->chunk_alloc = 0; |
3646 | spin_unlock(&space_info->lock); | 3730 | spin_unlock(&space_info->lock); |
3647 | out: | ||
3648 | mutex_unlock(&fs_info->chunk_mutex); | 3731 | mutex_unlock(&fs_info->chunk_mutex); |
3649 | return ret; | 3732 | return ret; |
3650 | } | 3733 | } |
@@ -3653,13 +3736,31 @@ static int can_overcommit(struct btrfs_root *root, | |||
3653 | struct btrfs_space_info *space_info, u64 bytes, | 3736 | struct btrfs_space_info *space_info, u64 bytes, |
3654 | enum btrfs_reserve_flush_enum flush) | 3737 | enum btrfs_reserve_flush_enum flush) |
3655 | { | 3738 | { |
3739 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3656 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3740 | u64 profile = btrfs_get_alloc_profile(root, 0); |
3741 | u64 rsv_size = 0; | ||
3657 | u64 avail; | 3742 | u64 avail; |
3658 | u64 used; | 3743 | u64 used; |
3744 | u64 to_add; | ||
3659 | 3745 | ||
3660 | used = space_info->bytes_used + space_info->bytes_reserved + | 3746 | used = space_info->bytes_used + space_info->bytes_reserved + |
3661 | space_info->bytes_pinned + space_info->bytes_readonly + | 3747 | space_info->bytes_pinned + space_info->bytes_readonly; |
3662 | space_info->bytes_may_use; | 3748 | |
3749 | spin_lock(&global_rsv->lock); | ||
3750 | rsv_size = global_rsv->size; | ||
3751 | spin_unlock(&global_rsv->lock); | ||
3752 | |||
3753 | /* | ||
3754 | * We only want to allow over committing if we have lots of actual space | ||
3755 | * free, but if we don't have enough space to handle the global reserve | ||
3756 | * space then we could end up having a real enospc problem when trying | ||
3757 | * to allocate a chunk or some other such important allocation. | ||
3758 | */ | ||
3759 | rsv_size <<= 1; | ||
3760 | if (used + rsv_size >= space_info->total_bytes) | ||
3761 | return 0; | ||
3762 | |||
3763 | used += space_info->bytes_may_use; | ||
3663 | 3764 | ||
3664 | spin_lock(&root->fs_info->free_chunk_lock); | 3765 | spin_lock(&root->fs_info->free_chunk_lock); |
3665 | avail = root->fs_info->free_chunk_space; | 3766 | avail = root->fs_info->free_chunk_space; |
@@ -3667,28 +3768,60 @@ static int can_overcommit(struct btrfs_root *root, | |||
3667 | 3768 | ||
3668 | /* | 3769 | /* |
3669 | * If we have dup, raid1 or raid10 then only half of the free | 3770 | * If we have dup, raid1 or raid10 then only half of the free |
3670 | * space is actually useable. | 3771 | * space is actually useable. For raid56, the space info used |
3772 | * doesn't include the parity drive, so we don't have to | ||
3773 | * change the math | ||
3671 | */ | 3774 | */ |
3672 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | 3775 | if (profile & (BTRFS_BLOCK_GROUP_DUP | |
3673 | BTRFS_BLOCK_GROUP_RAID1 | | 3776 | BTRFS_BLOCK_GROUP_RAID1 | |
3674 | BTRFS_BLOCK_GROUP_RAID10)) | 3777 | BTRFS_BLOCK_GROUP_RAID10)) |
3675 | avail >>= 1; | 3778 | avail >>= 1; |
3676 | 3779 | ||
3780 | to_add = space_info->total_bytes; | ||
3781 | |||
3677 | /* | 3782 | /* |
3678 | * If we aren't flushing all things, let us overcommit up to | 3783 | * If we aren't flushing all things, let us overcommit up to |
3679 | * 1/2th of the space. If we can flush, don't let us overcommit | 3784 | * 1/2th of the space. If we can flush, don't let us overcommit |
3680 | * too much, let it overcommit up to 1/8 of the space. | 3785 | * too much, let it overcommit up to 1/8 of the space. |
3681 | */ | 3786 | */ |
3682 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | 3787 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
3683 | avail >>= 3; | 3788 | to_add >>= 3; |
3684 | else | 3789 | else |
3685 | avail >>= 1; | 3790 | to_add >>= 1; |
3686 | 3791 | ||
3687 | if (used + bytes < space_info->total_bytes + avail) | 3792 | /* |
3793 | * Limit the overcommit to the amount of free space we could possibly | ||
3794 | * allocate for chunks. | ||
3795 | */ | ||
3796 | to_add = min(avail, to_add); | ||
3797 | |||
3798 | if (used + bytes < space_info->total_bytes + to_add) | ||
3688 | return 1; | 3799 | return 1; |
3689 | return 0; | 3800 | return 0; |
3690 | } | 3801 | } |
3691 | 3802 | ||
3803 | void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | ||
3804 | unsigned long nr_pages) | ||
3805 | { | ||
3806 | struct super_block *sb = root->fs_info->sb; | ||
3807 | int started; | ||
3808 | |||
3809 | /* If we can not start writeback, just sync all the delalloc file. */ | ||
3810 | started = try_to_writeback_inodes_sb_nr(sb, nr_pages, | ||
3811 | WB_REASON_FS_FREE_SPACE); | ||
3812 | if (!started) { | ||
3813 | /* | ||
3814 | * We needn't worry the filesystem going from r/w to r/o though | ||
3815 | * we don't acquire ->s_umount mutex, because the filesystem | ||
3816 | * should guarantee the delalloc inodes list be empty after | ||
3817 | * the filesystem is readonly(all dirty pages are written to | ||
3818 | * the disk). | ||
3819 | */ | ||
3820 | btrfs_start_delalloc_inodes(root, 0); | ||
3821 | btrfs_wait_ordered_extents(root, 0); | ||
3822 | } | ||
3823 | } | ||
3824 | |||
3692 | /* | 3825 | /* |
3693 | * shrink metadata reservation for delalloc | 3826 | * shrink metadata reservation for delalloc |
3694 | */ | 3827 | */ |
@@ -3710,7 +3843,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3710 | space_info = block_rsv->space_info; | 3843 | space_info = block_rsv->space_info; |
3711 | 3844 | ||
3712 | smp_mb(); | 3845 | smp_mb(); |
3713 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3846 | delalloc_bytes = percpu_counter_sum_positive( |
3847 | &root->fs_info->delalloc_bytes); | ||
3714 | if (delalloc_bytes == 0) { | 3848 | if (delalloc_bytes == 0) { |
3715 | if (trans) | 3849 | if (trans) |
3716 | return; | 3850 | return; |
@@ -3721,10 +3855,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3721 | while (delalloc_bytes && loops < 3) { | 3855 | while (delalloc_bytes && loops < 3) { |
3722 | max_reclaim = min(delalloc_bytes, to_reclaim); | 3856 | max_reclaim = min(delalloc_bytes, to_reclaim); |
3723 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 3857 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
3724 | try_to_writeback_inodes_sb_nr(root->fs_info->sb, | 3858 | btrfs_writeback_inodes_sb_nr(root, nr_pages); |
3725 | nr_pages, | ||
3726 | WB_REASON_FS_FREE_SPACE); | ||
3727 | |||
3728 | /* | 3859 | /* |
3729 | * We need to wait for the async pages to actually start before | 3860 | * We need to wait for the async pages to actually start before |
3730 | * we do anything. | 3861 | * we do anything. |
@@ -3752,7 +3883,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3752 | break; | 3883 | break; |
3753 | } | 3884 | } |
3754 | smp_mb(); | 3885 | smp_mb(); |
3755 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3886 | delalloc_bytes = percpu_counter_sum_positive( |
3887 | &root->fs_info->delalloc_bytes); | ||
3756 | } | 3888 | } |
3757 | } | 3889 | } |
3758 | 3890 | ||
@@ -4016,6 +4148,15 @@ again: | |||
4016 | goto again; | 4148 | goto again; |
4017 | 4149 | ||
4018 | out: | 4150 | out: |
4151 | if (ret == -ENOSPC && | ||
4152 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | ||
4153 | struct btrfs_block_rsv *global_rsv = | ||
4154 | &root->fs_info->global_block_rsv; | ||
4155 | |||
4156 | if (block_rsv != global_rsv && | ||
4157 | !block_rsv_use_bytes(global_rsv, orig_bytes)) | ||
4158 | ret = 0; | ||
4159 | } | ||
4019 | if (flushing) { | 4160 | if (flushing) { |
4020 | spin_lock(&space_info->lock); | 4161 | spin_lock(&space_info->lock); |
4021 | space_info->flush = 0; | 4162 | space_info->flush = 0; |
@@ -4402,19 +4543,60 @@ void btrfs_orphan_release_metadata(struct inode *inode) | |||
4402 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 4543 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
4403 | } | 4544 | } |
4404 | 4545 | ||
4405 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | 4546 | /* |
4406 | struct btrfs_pending_snapshot *pending) | 4547 | * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation |
4548 | * root: the root of the parent directory | ||
4549 | * rsv: block reservation | ||
4550 | * items: the number of items that we need do reservation | ||
4551 | * qgroup_reserved: used to return the reserved size in qgroup | ||
4552 | * | ||
4553 | * This function is used to reserve the space for snapshot/subvolume | ||
4554 | * creation and deletion. Those operations are different with the | ||
4555 | * common file/directory operations, they change two fs/file trees | ||
4556 | * and root tree, the number of items that the qgroup reserves is | ||
4557 | * different with the free space reservation. So we can not use | ||
4558 | * the space reseravtion mechanism in start_transaction(). | ||
4559 | */ | ||
4560 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | ||
4561 | struct btrfs_block_rsv *rsv, | ||
4562 | int items, | ||
4563 | u64 *qgroup_reserved) | ||
4407 | { | 4564 | { |
4408 | struct btrfs_root *root = pending->root; | 4565 | u64 num_bytes; |
4409 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | 4566 | int ret; |
4410 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | 4567 | |
4411 | /* | 4568 | if (root->fs_info->quota_enabled) { |
4412 | * two for root back/forward refs, two for directory entries, | 4569 | /* One for parent inode, two for dir entries */ |
4413 | * one for root of the snapshot and one for parent inode. | 4570 | num_bytes = 3 * root->leafsize; |
4414 | */ | 4571 | ret = btrfs_qgroup_reserve(root, num_bytes); |
4415 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6); | 4572 | if (ret) |
4416 | dst_rsv->space_info = src_rsv->space_info; | 4573 | return ret; |
4417 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 4574 | } else { |
4575 | num_bytes = 0; | ||
4576 | } | ||
4577 | |||
4578 | *qgroup_reserved = num_bytes; | ||
4579 | |||
4580 | num_bytes = btrfs_calc_trans_metadata_size(root, items); | ||
4581 | rsv->space_info = __find_space_info(root->fs_info, | ||
4582 | BTRFS_BLOCK_GROUP_METADATA); | ||
4583 | ret = btrfs_block_rsv_add(root, rsv, num_bytes, | ||
4584 | BTRFS_RESERVE_FLUSH_ALL); | ||
4585 | if (ret) { | ||
4586 | if (*qgroup_reserved) | ||
4587 | btrfs_qgroup_free(root, *qgroup_reserved); | ||
4588 | } | ||
4589 | |||
4590 | return ret; | ||
4591 | } | ||
4592 | |||
4593 | void btrfs_subvolume_release_metadata(struct btrfs_root *root, | ||
4594 | struct btrfs_block_rsv *rsv, | ||
4595 | u64 qgroup_reserved) | ||
4596 | { | ||
4597 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
4598 | if (qgroup_reserved) | ||
4599 | btrfs_qgroup_free(root, qgroup_reserved); | ||
4418 | } | 4600 | } |
4419 | 4601 | ||
4420 | /** | 4602 | /** |
@@ -4522,6 +4704,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4522 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; | 4704 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
4523 | int ret = 0; | 4705 | int ret = 0; |
4524 | bool delalloc_lock = true; | 4706 | bool delalloc_lock = true; |
4707 | u64 to_free = 0; | ||
4708 | unsigned dropped; | ||
4525 | 4709 | ||
4526 | /* If we are a free space inode we need to not flush since we will be in | 4710 | /* If we are a free space inode we need to not flush since we will be in |
4527 | * the middle of a transaction commit. We also don't need the delalloc | 4711 | * the middle of a transaction commit. We also don't need the delalloc |
@@ -4565,54 +4749,19 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4565 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4749 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
4566 | spin_unlock(&BTRFS_I(inode)->lock); | 4750 | spin_unlock(&BTRFS_I(inode)->lock); |
4567 | 4751 | ||
4568 | if (root->fs_info->quota_enabled) | 4752 | if (root->fs_info->quota_enabled) { |
4569 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4753 | ret = btrfs_qgroup_reserve(root, num_bytes + |
4570 | nr_extents * root->leafsize); | 4754 | nr_extents * root->leafsize); |
4755 | if (ret) | ||
4756 | goto out_fail; | ||
4757 | } | ||
4571 | 4758 | ||
4572 | /* | 4759 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
4573 | * ret != 0 here means the qgroup reservation failed, we go straight to | 4760 | if (unlikely(ret)) { |
4574 | * the shared error handling then. | 4761 | if (root->fs_info->quota_enabled) |
4575 | */ | ||
4576 | if (ret == 0) | ||
4577 | ret = reserve_metadata_bytes(root, block_rsv, | ||
4578 | to_reserve, flush); | ||
4579 | |||
4580 | if (ret) { | ||
4581 | u64 to_free = 0; | ||
4582 | unsigned dropped; | ||
4583 | |||
4584 | spin_lock(&BTRFS_I(inode)->lock); | ||
4585 | dropped = drop_outstanding_extent(inode); | ||
4586 | /* | ||
4587 | * If the inodes csum_bytes is the same as the original | ||
4588 | * csum_bytes then we know we haven't raced with any free()ers | ||
4589 | * so we can just reduce our inodes csum bytes and carry on. | ||
4590 | * Otherwise we have to do the normal free thing to account for | ||
4591 | * the case that the free side didn't free up its reserve | ||
4592 | * because of this outstanding reservation. | ||
4593 | */ | ||
4594 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) | ||
4595 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
4596 | else | ||
4597 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4598 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4599 | if (dropped) | ||
4600 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4601 | |||
4602 | if (to_free) { | ||
4603 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
4604 | trace_btrfs_space_reservation(root->fs_info, | ||
4605 | "delalloc", | ||
4606 | btrfs_ino(inode), | ||
4607 | to_free, 0); | ||
4608 | } | ||
4609 | if (root->fs_info->quota_enabled) { | ||
4610 | btrfs_qgroup_free(root, num_bytes + | 4762 | btrfs_qgroup_free(root, num_bytes + |
4611 | nr_extents * root->leafsize); | 4763 | nr_extents * root->leafsize); |
4612 | } | 4764 | goto out_fail; |
4613 | if (delalloc_lock) | ||
4614 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4615 | return ret; | ||
4616 | } | 4765 | } |
4617 | 4766 | ||
4618 | spin_lock(&BTRFS_I(inode)->lock); | 4767 | spin_lock(&BTRFS_I(inode)->lock); |
@@ -4633,6 +4782,34 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4633 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4782 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4634 | 4783 | ||
4635 | return 0; | 4784 | return 0; |
4785 | |||
4786 | out_fail: | ||
4787 | spin_lock(&BTRFS_I(inode)->lock); | ||
4788 | dropped = drop_outstanding_extent(inode); | ||
4789 | /* | ||
4790 | * If the inodes csum_bytes is the same as the original | ||
4791 | * csum_bytes then we know we haven't raced with any free()ers | ||
4792 | * so we can just reduce our inodes csum bytes and carry on. | ||
4793 | * Otherwise we have to do the normal free thing to account for | ||
4794 | * the case that the free side didn't free up its reserve | ||
4795 | * because of this outstanding reservation. | ||
4796 | */ | ||
4797 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) | ||
4798 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
4799 | else | ||
4800 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4801 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4802 | if (dropped) | ||
4803 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4804 | |||
4805 | if (to_free) { | ||
4806 | btrfs_block_rsv_release(root, block_rsv, to_free); | ||
4807 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
4808 | btrfs_ino(inode), to_free, 0); | ||
4809 | } | ||
4810 | if (delalloc_lock) | ||
4811 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
4812 | return ret; | ||
4636 | } | 4813 | } |
4637 | 4814 | ||
4638 | /** | 4815 | /** |
@@ -4654,7 +4831,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4654 | spin_lock(&BTRFS_I(inode)->lock); | 4831 | spin_lock(&BTRFS_I(inode)->lock); |
4655 | dropped = drop_outstanding_extent(inode); | 4832 | dropped = drop_outstanding_extent(inode); |
4656 | 4833 | ||
4657 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | 4834 | if (num_bytes) |
4835 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4658 | spin_unlock(&BTRFS_I(inode)->lock); | 4836 | spin_unlock(&BTRFS_I(inode)->lock); |
4659 | if (dropped > 0) | 4837 | if (dropped > 0) |
4660 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4838 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
@@ -4721,8 +4899,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | |||
4721 | btrfs_free_reserved_data_space(inode, num_bytes); | 4899 | btrfs_free_reserved_data_space(inode, num_bytes); |
4722 | } | 4900 | } |
4723 | 4901 | ||
4724 | static int update_block_group(struct btrfs_trans_handle *trans, | 4902 | static int update_block_group(struct btrfs_root *root, |
4725 | struct btrfs_root *root, | ||
4726 | u64 bytenr, u64 num_bytes, int alloc) | 4903 | u64 bytenr, u64 num_bytes, int alloc) |
4727 | { | 4904 | { |
4728 | struct btrfs_block_group_cache *cache = NULL; | 4905 | struct btrfs_block_group_cache *cache = NULL; |
@@ -4759,7 +4936,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4759 | * space back to the block group, otherwise we will leak space. | 4936 | * space back to the block group, otherwise we will leak space. |
4760 | */ | 4937 | */ |
4761 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 4938 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
4762 | cache_block_group(cache, trans, NULL, 1); | 4939 | cache_block_group(cache, 1); |
4763 | 4940 | ||
4764 | byte_in_group = bytenr - cache->key.objectid; | 4941 | byte_in_group = bytenr - cache->key.objectid; |
4765 | WARN_ON(byte_in_group > cache->key.offset); | 4942 | WARN_ON(byte_in_group > cache->key.offset); |
@@ -4809,6 +4986,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
4809 | struct btrfs_block_group_cache *cache; | 4986 | struct btrfs_block_group_cache *cache; |
4810 | u64 bytenr; | 4987 | u64 bytenr; |
4811 | 4988 | ||
4989 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
4990 | bytenr = root->fs_info->first_logical_byte; | ||
4991 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
4992 | |||
4993 | if (bytenr < (u64)-1) | ||
4994 | return bytenr; | ||
4995 | |||
4812 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); | 4996 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); |
4813 | if (!cache) | 4997 | if (!cache) |
4814 | return 0; | 4998 | return 0; |
@@ -4859,8 +5043,7 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4859 | /* | 5043 | /* |
4860 | * this function must be called within transaction | 5044 | * this function must be called within transaction |
4861 | */ | 5045 | */ |
4862 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | 5046 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
4863 | struct btrfs_root *root, | ||
4864 | u64 bytenr, u64 num_bytes) | 5047 | u64 bytenr, u64 num_bytes) |
4865 | { | 5048 | { |
4866 | struct btrfs_block_group_cache *cache; | 5049 | struct btrfs_block_group_cache *cache; |
@@ -4874,7 +5057,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | |||
4874 | * to one because the slow code to read in the free extents does check | 5057 | * to one because the slow code to read in the free extents does check |
4875 | * the pinned extents. | 5058 | * the pinned extents. |
4876 | */ | 5059 | */ |
4877 | cache_block_group(cache, trans, root, 1); | 5060 | cache_block_group(cache, 1); |
4878 | 5061 | ||
4879 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | 5062 | pin_down_extent(root, cache, bytenr, num_bytes, 0); |
4880 | 5063 | ||
@@ -5271,7 +5454,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5271 | } | 5454 | } |
5272 | } | 5455 | } |
5273 | 5456 | ||
5274 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5457 | ret = update_block_group(root, bytenr, num_bytes, 0); |
5275 | if (ret) { | 5458 | if (ret) { |
5276 | btrfs_abort_transaction(trans, extent_root, ret); | 5459 | btrfs_abort_transaction(trans, extent_root, ret); |
5277 | goto out; | 5460 | goto out; |
@@ -5316,7 +5499,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5316 | if (head->extent_op) { | 5499 | if (head->extent_op) { |
5317 | if (!head->must_insert_reserved) | 5500 | if (!head->must_insert_reserved) |
5318 | goto out; | 5501 | goto out; |
5319 | kfree(head->extent_op); | 5502 | btrfs_free_delayed_extent_op(head->extent_op); |
5320 | head->extent_op = NULL; | 5503 | head->extent_op = NULL; |
5321 | } | 5504 | } |
5322 | 5505 | ||
@@ -5439,10 +5622,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
5439 | return ret; | 5622 | return ret; |
5440 | } | 5623 | } |
5441 | 5624 | ||
5442 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 5625 | static u64 stripe_align(struct btrfs_root *root, |
5626 | struct btrfs_block_group_cache *cache, | ||
5627 | u64 val, u64 num_bytes) | ||
5443 | { | 5628 | { |
5444 | u64 mask = ((u64)root->stripesize - 1); | 5629 | u64 ret = ALIGN(val, root->stripesize); |
5445 | u64 ret = (val + mask) & ~mask; | ||
5446 | return ret; | 5630 | return ret; |
5447 | } | 5631 | } |
5448 | 5632 | ||
@@ -5462,7 +5646,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | |||
5462 | u64 num_bytes) | 5646 | u64 num_bytes) |
5463 | { | 5647 | { |
5464 | struct btrfs_caching_control *caching_ctl; | 5648 | struct btrfs_caching_control *caching_ctl; |
5465 | DEFINE_WAIT(wait); | ||
5466 | 5649 | ||
5467 | caching_ctl = get_caching_control(cache); | 5650 | caching_ctl = get_caching_control(cache); |
5468 | if (!caching_ctl) | 5651 | if (!caching_ctl) |
@@ -5479,7 +5662,6 @@ static noinline int | |||
5479 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | 5662 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) |
5480 | { | 5663 | { |
5481 | struct btrfs_caching_control *caching_ctl; | 5664 | struct btrfs_caching_control *caching_ctl; |
5482 | DEFINE_WAIT(wait); | ||
5483 | 5665 | ||
5484 | caching_ctl = get_caching_control(cache); | 5666 | caching_ctl = get_caching_control(cache); |
5485 | if (!caching_ctl) | 5667 | if (!caching_ctl) |
@@ -5493,20 +5675,20 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
5493 | 5675 | ||
5494 | int __get_raid_index(u64 flags) | 5676 | int __get_raid_index(u64 flags) |
5495 | { | 5677 | { |
5496 | int index; | ||
5497 | |||
5498 | if (flags & BTRFS_BLOCK_GROUP_RAID10) | 5678 | if (flags & BTRFS_BLOCK_GROUP_RAID10) |
5499 | index = 0; | 5679 | return BTRFS_RAID_RAID10; |
5500 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) | 5680 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) |
5501 | index = 1; | 5681 | return BTRFS_RAID_RAID1; |
5502 | else if (flags & BTRFS_BLOCK_GROUP_DUP) | 5682 | else if (flags & BTRFS_BLOCK_GROUP_DUP) |
5503 | index = 2; | 5683 | return BTRFS_RAID_DUP; |
5504 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) | 5684 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) |
5505 | index = 3; | 5685 | return BTRFS_RAID_RAID0; |
5506 | else | 5686 | else if (flags & BTRFS_BLOCK_GROUP_RAID5) |
5507 | index = 4; | 5687 | return BTRFS_RAID_RAID5; |
5688 | else if (flags & BTRFS_BLOCK_GROUP_RAID6) | ||
5689 | return BTRFS_RAID_RAID6; | ||
5508 | 5690 | ||
5509 | return index; | 5691 | return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ |
5510 | } | 5692 | } |
5511 | 5693 | ||
5512 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5694 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
@@ -5649,6 +5831,8 @@ search: | |||
5649 | if (!block_group_bits(block_group, data)) { | 5831 | if (!block_group_bits(block_group, data)) { |
5650 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | 5832 | u64 extra = BTRFS_BLOCK_GROUP_DUP | |
5651 | BTRFS_BLOCK_GROUP_RAID1 | | 5833 | BTRFS_BLOCK_GROUP_RAID1 | |
5834 | BTRFS_BLOCK_GROUP_RAID5 | | ||
5835 | BTRFS_BLOCK_GROUP_RAID6 | | ||
5652 | BTRFS_BLOCK_GROUP_RAID10; | 5836 | BTRFS_BLOCK_GROUP_RAID10; |
5653 | 5837 | ||
5654 | /* | 5838 | /* |
@@ -5664,8 +5848,7 @@ have_block_group: | |||
5664 | cached = block_group_cache_done(block_group); | 5848 | cached = block_group_cache_done(block_group); |
5665 | if (unlikely(!cached)) { | 5849 | if (unlikely(!cached)) { |
5666 | found_uncached_bg = true; | 5850 | found_uncached_bg = true; |
5667 | ret = cache_block_group(block_group, trans, | 5851 | ret = cache_block_group(block_group, 0); |
5668 | orig_root, 0); | ||
5669 | BUG_ON(ret < 0); | 5852 | BUG_ON(ret < 0); |
5670 | ret = 0; | 5853 | ret = 0; |
5671 | } | 5854 | } |
@@ -5678,6 +5861,7 @@ have_block_group: | |||
5678 | * lets look there | 5861 | * lets look there |
5679 | */ | 5862 | */ |
5680 | if (last_ptr) { | 5863 | if (last_ptr) { |
5864 | unsigned long aligned_cluster; | ||
5681 | /* | 5865 | /* |
5682 | * the refill lock keeps out other | 5866 | * the refill lock keeps out other |
5683 | * people trying to start a new cluster | 5867 | * people trying to start a new cluster |
@@ -5744,11 +5928,15 @@ refill_cluster: | |||
5744 | goto unclustered_alloc; | 5928 | goto unclustered_alloc; |
5745 | } | 5929 | } |
5746 | 5930 | ||
5931 | aligned_cluster = max_t(unsigned long, | ||
5932 | empty_cluster + empty_size, | ||
5933 | block_group->full_stripe_len); | ||
5934 | |||
5747 | /* allocate a cluster in this block group */ | 5935 | /* allocate a cluster in this block group */ |
5748 | ret = btrfs_find_space_cluster(trans, root, | 5936 | ret = btrfs_find_space_cluster(trans, root, |
5749 | block_group, last_ptr, | 5937 | block_group, last_ptr, |
5750 | search_start, num_bytes, | 5938 | search_start, num_bytes, |
5751 | empty_cluster + empty_size); | 5939 | aligned_cluster); |
5752 | if (ret == 0) { | 5940 | if (ret == 0) { |
5753 | /* | 5941 | /* |
5754 | * now pull our allocation out of this | 5942 | * now pull our allocation out of this |
@@ -5819,7 +6007,8 @@ unclustered_alloc: | |||
5819 | goto loop; | 6007 | goto loop; |
5820 | } | 6008 | } |
5821 | checks: | 6009 | checks: |
5822 | search_start = stripe_align(root, offset); | 6010 | search_start = stripe_align(root, used_block_group, |
6011 | offset, num_bytes); | ||
5823 | 6012 | ||
5824 | /* move on to the next group */ | 6013 | /* move on to the next group */ |
5825 | if (search_start + num_bytes > | 6014 | if (search_start + num_bytes > |
@@ -5970,7 +6159,7 @@ again: | |||
5970 | if (ret == -ENOSPC) { | 6159 | if (ret == -ENOSPC) { |
5971 | if (!final_tried) { | 6160 | if (!final_tried) { |
5972 | num_bytes = num_bytes >> 1; | 6161 | num_bytes = num_bytes >> 1; |
5973 | num_bytes = num_bytes & ~(root->sectorsize - 1); | 6162 | num_bytes = round_down(num_bytes, root->sectorsize); |
5974 | num_bytes = max(num_bytes, min_alloc_size); | 6163 | num_bytes = max(num_bytes, min_alloc_size); |
5975 | if (num_bytes == min_alloc_size) | 6164 | if (num_bytes == min_alloc_size) |
5976 | final_tried = true; | 6165 | final_tried = true; |
@@ -6094,7 +6283,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
6094 | btrfs_mark_buffer_dirty(path->nodes[0]); | 6283 | btrfs_mark_buffer_dirty(path->nodes[0]); |
6095 | btrfs_free_path(path); | 6284 | btrfs_free_path(path); |
6096 | 6285 | ||
6097 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6286 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
6098 | if (ret) { /* -ENOENT, logic error */ | 6287 | if (ret) { /* -ENOENT, logic error */ |
6099 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6288 | printk(KERN_ERR "btrfs update block group failed for %llu " |
6100 | "%llu\n", (unsigned long long)ins->objectid, | 6289 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -6158,7 +6347,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6158 | btrfs_mark_buffer_dirty(leaf); | 6347 | btrfs_mark_buffer_dirty(leaf); |
6159 | btrfs_free_path(path); | 6348 | btrfs_free_path(path); |
6160 | 6349 | ||
6161 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6350 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
6162 | if (ret) { /* -ENOENT, logic error */ | 6351 | if (ret) { /* -ENOENT, logic error */ |
6163 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6352 | printk(KERN_ERR "btrfs update block group failed for %llu " |
6164 | "%llu\n", (unsigned long long)ins->objectid, | 6353 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -6201,7 +6390,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
6201 | u64 num_bytes = ins->offset; | 6390 | u64 num_bytes = ins->offset; |
6202 | 6391 | ||
6203 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 6392 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
6204 | cache_block_group(block_group, trans, NULL, 0); | 6393 | cache_block_group(block_group, 0); |
6205 | caching_ctl = get_caching_control(block_group); | 6394 | caching_ctl = get_caching_control(block_group); |
6206 | 6395 | ||
6207 | if (!caching_ctl) { | 6396 | if (!caching_ctl) { |
@@ -6315,12 +6504,14 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
6315 | if (!ret) | 6504 | if (!ret) |
6316 | return block_rsv; | 6505 | return block_rsv; |
6317 | if (ret && !block_rsv->failfast) { | 6506 | if (ret && !block_rsv->failfast) { |
6318 | static DEFINE_RATELIMIT_STATE(_rs, | 6507 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { |
6319 | DEFAULT_RATELIMIT_INTERVAL, | 6508 | static DEFINE_RATELIMIT_STATE(_rs, |
6320 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6509 | DEFAULT_RATELIMIT_INTERVAL * 10, |
6321 | if (__ratelimit(&_rs)) | 6510 | /*DEFAULT_RATELIMIT_BURST*/ 1); |
6322 | WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", | 6511 | if (__ratelimit(&_rs)) |
6323 | ret); | 6512 | WARN(1, KERN_DEBUG |
6513 | "btrfs: block rsv returned %d\n", ret); | ||
6514 | } | ||
6324 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, | 6515 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
6325 | BTRFS_RESERVE_NO_FLUSH); | 6516 | BTRFS_RESERVE_NO_FLUSH); |
6326 | if (!ret) { | 6517 | if (!ret) { |
@@ -6386,7 +6577,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6386 | 6577 | ||
6387 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 6578 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
6388 | struct btrfs_delayed_extent_op *extent_op; | 6579 | struct btrfs_delayed_extent_op *extent_op; |
6389 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 6580 | extent_op = btrfs_alloc_delayed_extent_op(); |
6390 | BUG_ON(!extent_op); /* -ENOMEM */ | 6581 | BUG_ON(!extent_op); /* -ENOMEM */ |
6391 | if (key) | 6582 | if (key) |
6392 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | 6583 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); |
@@ -7189,6 +7380,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7189 | root->fs_info->fs_devices->missing_devices; | 7380 | root->fs_info->fs_devices->missing_devices; |
7190 | 7381 | ||
7191 | stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7382 | stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7383 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | ||
7192 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7384 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7193 | 7385 | ||
7194 | if (num_devices == 1) { | 7386 | if (num_devices == 1) { |
@@ -7467,16 +7659,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7467 | index = get_block_group_index(block_group); | 7659 | index = get_block_group_index(block_group); |
7468 | } | 7660 | } |
7469 | 7661 | ||
7470 | if (index == 0) { | 7662 | if (index == BTRFS_RAID_RAID10) { |
7471 | dev_min = 4; | 7663 | dev_min = 4; |
7472 | /* Divide by 2 */ | 7664 | /* Divide by 2 */ |
7473 | min_free >>= 1; | 7665 | min_free >>= 1; |
7474 | } else if (index == 1) { | 7666 | } else if (index == BTRFS_RAID_RAID1) { |
7475 | dev_min = 2; | 7667 | dev_min = 2; |
7476 | } else if (index == 2) { | 7668 | } else if (index == BTRFS_RAID_DUP) { |
7477 | /* Multiply by 2 */ | 7669 | /* Multiply by 2 */ |
7478 | min_free <<= 1; | 7670 | min_free <<= 1; |
7479 | } else if (index == 3) { | 7671 | } else if (index == BTRFS_RAID_RAID0) { |
7480 | dev_min = fs_devices->rw_devices; | 7672 | dev_min = fs_devices->rw_devices; |
7481 | do_div(min_free, dev_min); | 7673 | do_div(min_free, dev_min); |
7482 | } | 7674 | } |
@@ -7637,11 +7829,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7637 | space_info = list_entry(info->space_info.next, | 7829 | space_info = list_entry(info->space_info.next, |
7638 | struct btrfs_space_info, | 7830 | struct btrfs_space_info, |
7639 | list); | 7831 | list); |
7640 | if (space_info->bytes_pinned > 0 || | 7832 | if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { |
7641 | space_info->bytes_reserved > 0 || | 7833 | if (space_info->bytes_pinned > 0 || |
7642 | space_info->bytes_may_use > 0) { | 7834 | space_info->bytes_reserved > 0 || |
7643 | WARN_ON(1); | 7835 | space_info->bytes_may_use > 0) { |
7644 | dump_space_info(space_info, 0, 0); | 7836 | WARN_ON(1); |
7837 | dump_space_info(space_info, 0, 0); | ||
7838 | } | ||
7645 | } | 7839 | } |
7646 | list_del(&space_info->list); | 7840 | list_del(&space_info->list); |
7647 | kfree(space_info); | 7841 | kfree(space_info); |
@@ -7740,7 +7934,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7740 | btrfs_release_path(path); | 7934 | btrfs_release_path(path); |
7741 | cache->flags = btrfs_block_group_flags(&cache->item); | 7935 | cache->flags = btrfs_block_group_flags(&cache->item); |
7742 | cache->sectorsize = root->sectorsize; | 7936 | cache->sectorsize = root->sectorsize; |
7743 | 7937 | cache->full_stripe_len = btrfs_full_stripe_len(root, | |
7938 | &root->fs_info->mapping_tree, | ||
7939 | found_key.objectid); | ||
7744 | btrfs_init_free_space_ctl(cache); | 7940 | btrfs_init_free_space_ctl(cache); |
7745 | 7941 | ||
7746 | /* | 7942 | /* |
@@ -7794,6 +7990,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7794 | if (!(get_alloc_profile(root, space_info->flags) & | 7990 | if (!(get_alloc_profile(root, space_info->flags) & |
7795 | (BTRFS_BLOCK_GROUP_RAID10 | | 7991 | (BTRFS_BLOCK_GROUP_RAID10 | |
7796 | BTRFS_BLOCK_GROUP_RAID1 | | 7992 | BTRFS_BLOCK_GROUP_RAID1 | |
7993 | BTRFS_BLOCK_GROUP_RAID5 | | ||
7994 | BTRFS_BLOCK_GROUP_RAID6 | | ||
7797 | BTRFS_BLOCK_GROUP_DUP))) | 7995 | BTRFS_BLOCK_GROUP_DUP))) |
7798 | continue; | 7996 | continue; |
7799 | /* | 7997 | /* |
@@ -7869,6 +8067,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7869 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8067 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
7870 | cache->sectorsize = root->sectorsize; | 8068 | cache->sectorsize = root->sectorsize; |
7871 | cache->fs_info = root->fs_info; | 8069 | cache->fs_info = root->fs_info; |
8070 | cache->full_stripe_len = btrfs_full_stripe_len(root, | ||
8071 | &root->fs_info->mapping_tree, | ||
8072 | chunk_offset); | ||
7872 | 8073 | ||
7873 | atomic_set(&cache->count, 1); | 8074 | atomic_set(&cache->count, 1); |
7874 | spin_lock_init(&cache->lock); | 8075 | spin_lock_init(&cache->lock); |
@@ -7918,12 +8119,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
7918 | u64 extra_flags = chunk_to_extended(flags) & | 8119 | u64 extra_flags = chunk_to_extended(flags) & |
7919 | BTRFS_EXTENDED_PROFILE_MASK; | 8120 | BTRFS_EXTENDED_PROFILE_MASK; |
7920 | 8121 | ||
8122 | write_seqlock(&fs_info->profiles_lock); | ||
7921 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 8123 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
7922 | fs_info->avail_data_alloc_bits &= ~extra_flags; | 8124 | fs_info->avail_data_alloc_bits &= ~extra_flags; |
7923 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 8125 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
7924 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; | 8126 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; |
7925 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 8127 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
7926 | fs_info->avail_system_alloc_bits &= ~extra_flags; | 8128 | fs_info->avail_system_alloc_bits &= ~extra_flags; |
8129 | write_sequnlock(&fs_info->profiles_lock); | ||
7927 | } | 8130 | } |
7928 | 8131 | ||
7929 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 8132 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
@@ -8022,6 +8225,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8022 | spin_lock(&root->fs_info->block_group_cache_lock); | 8225 | spin_lock(&root->fs_info->block_group_cache_lock); |
8023 | rb_erase(&block_group->cache_node, | 8226 | rb_erase(&block_group->cache_node, |
8024 | &root->fs_info->block_group_cache_tree); | 8227 | &root->fs_info->block_group_cache_tree); |
8228 | |||
8229 | if (root->fs_info->first_logical_byte == block_group->key.objectid) | ||
8230 | root->fs_info->first_logical_byte = (u64)-1; | ||
8025 | spin_unlock(&root->fs_info->block_group_cache_lock); | 8231 | spin_unlock(&root->fs_info->block_group_cache_lock); |
8026 | 8232 | ||
8027 | down_write(&block_group->space_info->groups_sem); | 8233 | down_write(&block_group->space_info->groups_sem); |
@@ -8144,7 +8350,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8144 | 8350 | ||
8145 | if (end - start >= range->minlen) { | 8351 | if (end - start >= range->minlen) { |
8146 | if (!block_group_cache_done(cache)) { | 8352 | if (!block_group_cache_done(cache)) { |
8147 | ret = cache_block_group(cache, NULL, root, 0); | 8353 | ret = cache_block_group(cache, 0); |
8148 | if (!ret) | 8354 | if (!ret) |
8149 | wait_block_group_cache_done(cache); | 8355 | wait_block_group_cache_done(cache); |
8150 | } | 8356 | } |