diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 156 |
1 files changed, 122 insertions, 34 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5cd44e239595..b3ecca447ddf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "print-tree.h" | 31 | #include "print-tree.h" |
32 | #include "transaction.h" | 32 | #include "transaction.h" |
33 | #include "volumes.h" | 33 | #include "volumes.h" |
34 | #include "raid56.h" | ||
34 | #include "locking.h" | 35 | #include "locking.h" |
35 | #include "free-space-cache.h" | 36 | #include "free-space-cache.h" |
36 | #include "math.h" | 37 | #include "math.h" |
@@ -1852,6 +1853,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1852 | *actual_bytes = discarded_bytes; | 1853 | *actual_bytes = discarded_bytes; |
1853 | 1854 | ||
1854 | 1855 | ||
1856 | if (ret == -EOPNOTSUPP) | ||
1857 | ret = 0; | ||
1855 | return ret; | 1858 | return ret; |
1856 | } | 1859 | } |
1857 | 1860 | ||
@@ -2440,6 +2443,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | |||
2440 | return ret; | 2443 | return ret; |
2441 | } | 2444 | } |
2442 | 2445 | ||
2446 | static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq, | ||
2447 | int count) | ||
2448 | { | ||
2449 | int val = atomic_read(&delayed_refs->ref_seq); | ||
2450 | |||
2451 | if (val < seq || val >= seq + count) | ||
2452 | return 1; | ||
2453 | return 0; | ||
2454 | } | ||
2455 | |||
2443 | /* | 2456 | /* |
2444 | * this starts processing the delayed reference count updates and | 2457 | * this starts processing the delayed reference count updates and |
2445 | * extent insertions we have queued up so far. count can be | 2458 | * extent insertions we have queued up so far. count can be |
@@ -2474,6 +2487,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2474 | 2487 | ||
2475 | delayed_refs = &trans->transaction->delayed_refs; | 2488 | delayed_refs = &trans->transaction->delayed_refs; |
2476 | INIT_LIST_HEAD(&cluster); | 2489 | INIT_LIST_HEAD(&cluster); |
2490 | if (count == 0) { | ||
2491 | count = delayed_refs->num_entries * 2; | ||
2492 | run_most = 1; | ||
2493 | } | ||
2494 | |||
2495 | if (!run_all && !run_most) { | ||
2496 | int old; | ||
2497 | int seq = atomic_read(&delayed_refs->ref_seq); | ||
2498 | |||
2499 | progress: | ||
2500 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
2501 | if (old) { | ||
2502 | DEFINE_WAIT(__wait); | ||
2503 | if (delayed_refs->num_entries < 16348) | ||
2504 | return 0; | ||
2505 | |||
2506 | prepare_to_wait(&delayed_refs->wait, &__wait, | ||
2507 | TASK_UNINTERRUPTIBLE); | ||
2508 | |||
2509 | old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1); | ||
2510 | if (old) { | ||
2511 | schedule(); | ||
2512 | finish_wait(&delayed_refs->wait, &__wait); | ||
2513 | |||
2514 | if (!refs_newer(delayed_refs, seq, 256)) | ||
2515 | goto progress; | ||
2516 | else | ||
2517 | return 0; | ||
2518 | } else { | ||
2519 | finish_wait(&delayed_refs->wait, &__wait); | ||
2520 | goto again; | ||
2521 | } | ||
2522 | } | ||
2523 | |||
2524 | } else { | ||
2525 | atomic_inc(&delayed_refs->procs_running_refs); | ||
2526 | } | ||
2527 | |||
2477 | again: | 2528 | again: |
2478 | loops = 0; | 2529 | loops = 0; |
2479 | spin_lock(&delayed_refs->lock); | 2530 | spin_lock(&delayed_refs->lock); |
@@ -2482,10 +2533,6 @@ again: | |||
2482 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); | 2533 | delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); |
2483 | #endif | 2534 | #endif |
2484 | 2535 | ||
2485 | if (count == 0) { | ||
2486 | count = delayed_refs->num_entries * 2; | ||
2487 | run_most = 1; | ||
2488 | } | ||
2489 | while (1) { | 2536 | while (1) { |
2490 | if (!(run_all || run_most) && | 2537 | if (!(run_all || run_most) && |
2491 | delayed_refs->num_heads_ready < 64) | 2538 | delayed_refs->num_heads_ready < 64) |
@@ -2508,9 +2555,12 @@ again: | |||
2508 | btrfs_release_ref_cluster(&cluster); | 2555 | btrfs_release_ref_cluster(&cluster); |
2509 | spin_unlock(&delayed_refs->lock); | 2556 | spin_unlock(&delayed_refs->lock); |
2510 | btrfs_abort_transaction(trans, root, ret); | 2557 | btrfs_abort_transaction(trans, root, ret); |
2558 | atomic_dec(&delayed_refs->procs_running_refs); | ||
2511 | return ret; | 2559 | return ret; |
2512 | } | 2560 | } |
2513 | 2561 | ||
2562 | atomic_add(ret, &delayed_refs->ref_seq); | ||
2563 | |||
2514 | count -= min_t(unsigned long, ret, count); | 2564 | count -= min_t(unsigned long, ret, count); |
2515 | 2565 | ||
2516 | if (count == 0) | 2566 | if (count == 0) |
@@ -2579,6 +2629,11 @@ again: | |||
2579 | goto again; | 2629 | goto again; |
2580 | } | 2630 | } |
2581 | out: | 2631 | out: |
2632 | atomic_dec(&delayed_refs->procs_running_refs); | ||
2633 | smp_mb(); | ||
2634 | if (waitqueue_active(&delayed_refs->wait)) | ||
2635 | wake_up(&delayed_refs->wait); | ||
2636 | |||
2582 | spin_unlock(&delayed_refs->lock); | 2637 | spin_unlock(&delayed_refs->lock); |
2583 | assert_qgroups_uptodate(trans); | 2638 | assert_qgroups_uptodate(trans); |
2584 | return 0; | 2639 | return 0; |
@@ -3284,6 +3339,7 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3284 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | 3339 | u64 num_devices = root->fs_info->fs_devices->rw_devices + |
3285 | root->fs_info->fs_devices->missing_devices; | 3340 | root->fs_info->fs_devices->missing_devices; |
3286 | u64 target; | 3341 | u64 target; |
3342 | u64 tmp; | ||
3287 | 3343 | ||
3288 | /* | 3344 | /* |
3289 | * see if restripe for this chunk_type is in progress, if so | 3345 | * see if restripe for this chunk_type is in progress, if so |
@@ -3300,30 +3356,32 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3300 | } | 3356 | } |
3301 | spin_unlock(&root->fs_info->balance_lock); | 3357 | spin_unlock(&root->fs_info->balance_lock); |
3302 | 3358 | ||
3359 | /* First, mask out the RAID levels which aren't possible */ | ||
3303 | if (num_devices == 1) | 3360 | if (num_devices == 1) |
3304 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3361 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0 | |
3362 | BTRFS_BLOCK_GROUP_RAID5); | ||
3363 | if (num_devices < 3) | ||
3364 | flags &= ~BTRFS_BLOCK_GROUP_RAID6; | ||
3305 | if (num_devices < 4) | 3365 | if (num_devices < 4) |
3306 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; | 3366 | flags &= ~BTRFS_BLOCK_GROUP_RAID10; |
3307 | 3367 | ||
3308 | if ((flags & BTRFS_BLOCK_GROUP_DUP) && | 3368 | tmp = flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID0 | |
3309 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | 3369 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID5 | |
3310 | BTRFS_BLOCK_GROUP_RAID10))) { | 3370 | BTRFS_BLOCK_GROUP_RAID6 | BTRFS_BLOCK_GROUP_RAID10); |
3311 | flags &= ~BTRFS_BLOCK_GROUP_DUP; | 3371 | flags &= ~tmp; |
3312 | } | ||
3313 | 3372 | ||
3314 | if ((flags & BTRFS_BLOCK_GROUP_RAID1) && | 3373 | if (tmp & BTRFS_BLOCK_GROUP_RAID6) |
3315 | (flags & BTRFS_BLOCK_GROUP_RAID10)) { | 3374 | tmp = BTRFS_BLOCK_GROUP_RAID6; |
3316 | flags &= ~BTRFS_BLOCK_GROUP_RAID1; | 3375 | else if (tmp & BTRFS_BLOCK_GROUP_RAID5) |
3317 | } | 3376 | tmp = BTRFS_BLOCK_GROUP_RAID5; |
3318 | 3377 | else if (tmp & BTRFS_BLOCK_GROUP_RAID10) | |
3319 | if ((flags & BTRFS_BLOCK_GROUP_RAID0) && | 3378 | tmp = BTRFS_BLOCK_GROUP_RAID10; |
3320 | ((flags & BTRFS_BLOCK_GROUP_RAID1) | | 3379 | else if (tmp & BTRFS_BLOCK_GROUP_RAID1) |
3321 | (flags & BTRFS_BLOCK_GROUP_RAID10) | | 3380 | tmp = BTRFS_BLOCK_GROUP_RAID1; |
3322 | (flags & BTRFS_BLOCK_GROUP_DUP))) { | 3381 | else if (tmp & BTRFS_BLOCK_GROUP_RAID0) |
3323 | flags &= ~BTRFS_BLOCK_GROUP_RAID0; | 3382 | tmp = BTRFS_BLOCK_GROUP_RAID0; |
3324 | } | ||
3325 | 3383 | ||
3326 | return extended_to_chunk(flags); | 3384 | return extended_to_chunk(flags | tmp); |
3327 | } | 3385 | } |
3328 | 3386 | ||
3329 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3387 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
@@ -3347,6 +3405,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3347 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3405 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3348 | { | 3406 | { |
3349 | u64 flags; | 3407 | u64 flags; |
3408 | u64 ret; | ||
3350 | 3409 | ||
3351 | if (data) | 3410 | if (data) |
3352 | flags = BTRFS_BLOCK_GROUP_DATA; | 3411 | flags = BTRFS_BLOCK_GROUP_DATA; |
@@ -3355,7 +3414,8 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | |||
3355 | else | 3414 | else |
3356 | flags = BTRFS_BLOCK_GROUP_METADATA; | 3415 | flags = BTRFS_BLOCK_GROUP_METADATA; |
3357 | 3416 | ||
3358 | return get_alloc_profile(root, flags); | 3417 | ret = get_alloc_profile(root, flags); |
3418 | return ret; | ||
3359 | } | 3419 | } |
3360 | 3420 | ||
3361 | /* | 3421 | /* |
@@ -3530,8 +3590,10 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type) | |||
3530 | { | 3590 | { |
3531 | u64 num_dev; | 3591 | u64 num_dev; |
3532 | 3592 | ||
3533 | if (type & BTRFS_BLOCK_GROUP_RAID10 || | 3593 | if (type & (BTRFS_BLOCK_GROUP_RAID10 | |
3534 | type & BTRFS_BLOCK_GROUP_RAID0) | 3594 | BTRFS_BLOCK_GROUP_RAID0 | |
3595 | BTRFS_BLOCK_GROUP_RAID5 | | ||
3596 | BTRFS_BLOCK_GROUP_RAID6)) | ||
3535 | num_dev = root->fs_info->fs_devices->rw_devices; | 3597 | num_dev = root->fs_info->fs_devices->rw_devices; |
3536 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | 3598 | else if (type & BTRFS_BLOCK_GROUP_RAID1) |
3537 | num_dev = 2; | 3599 | num_dev = 2; |
@@ -3706,7 +3768,9 @@ static int can_overcommit(struct btrfs_root *root, | |||
3706 | 3768 | ||
3707 | /* | 3769 | /* |
3708 | * If we have dup, raid1 or raid10 then only half of the free | 3770 | * If we have dup, raid1 or raid10 then only half of the free |
3709 | * space is actually useable. | 3771 | * space is actually useable. For raid56, the space info used |
3772 | * doesn't include the parity drive, so we don't have to | ||
3773 | * change the math | ||
3710 | */ | 3774 | */ |
3711 | if (profile & (BTRFS_BLOCK_GROUP_DUP | | 3775 | if (profile & (BTRFS_BLOCK_GROUP_DUP | |
3712 | BTRFS_BLOCK_GROUP_RAID1 | | 3776 | BTRFS_BLOCK_GROUP_RAID1 | |
@@ -5539,10 +5603,14 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
5539 | return ret; | 5603 | return ret; |
5540 | } | 5604 | } |
5541 | 5605 | ||
5542 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 5606 | static u64 stripe_align(struct btrfs_root *root, |
5607 | struct btrfs_block_group_cache *cache, | ||
5608 | u64 val, u64 num_bytes) | ||
5543 | { | 5609 | { |
5544 | u64 mask = ((u64)root->stripesize - 1); | 5610 | u64 mask; |
5545 | u64 ret = (val + mask) & ~mask; | 5611 | u64 ret; |
5612 | mask = ((u64)root->stripesize - 1); | ||
5613 | ret = (val + mask) & ~mask; | ||
5546 | return ret; | 5614 | return ret; |
5547 | } | 5615 | } |
5548 | 5616 | ||
@@ -5599,8 +5667,12 @@ int __get_raid_index(u64 flags) | |||
5599 | return BTRFS_RAID_DUP; | 5667 | return BTRFS_RAID_DUP; |
5600 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) | 5668 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) |
5601 | return BTRFS_RAID_RAID0; | 5669 | return BTRFS_RAID_RAID0; |
5602 | else | 5670 | else if (flags & BTRFS_BLOCK_GROUP_RAID5) |
5603 | return BTRFS_RAID_SINGLE; | 5671 | return BTRFS_RAID_RAID5; |
5672 | else if (flags & BTRFS_BLOCK_GROUP_RAID6) | ||
5673 | return BTRFS_RAID_RAID6; | ||
5674 | |||
5675 | return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ | ||
5604 | } | 5676 | } |
5605 | 5677 | ||
5606 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5678 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
@@ -5743,6 +5815,8 @@ search: | |||
5743 | if (!block_group_bits(block_group, data)) { | 5815 | if (!block_group_bits(block_group, data)) { |
5744 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | 5816 | u64 extra = BTRFS_BLOCK_GROUP_DUP | |
5745 | BTRFS_BLOCK_GROUP_RAID1 | | 5817 | BTRFS_BLOCK_GROUP_RAID1 | |
5818 | BTRFS_BLOCK_GROUP_RAID5 | | ||
5819 | BTRFS_BLOCK_GROUP_RAID6 | | ||
5746 | BTRFS_BLOCK_GROUP_RAID10; | 5820 | BTRFS_BLOCK_GROUP_RAID10; |
5747 | 5821 | ||
5748 | /* | 5822 | /* |
@@ -5771,6 +5845,7 @@ have_block_group: | |||
5771 | * lets look there | 5845 | * lets look there |
5772 | */ | 5846 | */ |
5773 | if (last_ptr) { | 5847 | if (last_ptr) { |
5848 | unsigned long aligned_cluster; | ||
5774 | /* | 5849 | /* |
5775 | * the refill lock keeps out other | 5850 | * the refill lock keeps out other |
5776 | * people trying to start a new cluster | 5851 | * people trying to start a new cluster |
@@ -5837,11 +5912,15 @@ refill_cluster: | |||
5837 | goto unclustered_alloc; | 5912 | goto unclustered_alloc; |
5838 | } | 5913 | } |
5839 | 5914 | ||
5915 | aligned_cluster = max_t(unsigned long, | ||
5916 | empty_cluster + empty_size, | ||
5917 | block_group->full_stripe_len); | ||
5918 | |||
5840 | /* allocate a cluster in this block group */ | 5919 | /* allocate a cluster in this block group */ |
5841 | ret = btrfs_find_space_cluster(trans, root, | 5920 | ret = btrfs_find_space_cluster(trans, root, |
5842 | block_group, last_ptr, | 5921 | block_group, last_ptr, |
5843 | search_start, num_bytes, | 5922 | search_start, num_bytes, |
5844 | empty_cluster + empty_size); | 5923 | aligned_cluster); |
5845 | if (ret == 0) { | 5924 | if (ret == 0) { |
5846 | /* | 5925 | /* |
5847 | * now pull our allocation out of this | 5926 | * now pull our allocation out of this |
@@ -5912,7 +5991,8 @@ unclustered_alloc: | |||
5912 | goto loop; | 5991 | goto loop; |
5913 | } | 5992 | } |
5914 | checks: | 5993 | checks: |
5915 | search_start = stripe_align(root, offset); | 5994 | search_start = stripe_align(root, used_block_group, |
5995 | offset, num_bytes); | ||
5916 | 5996 | ||
5917 | /* move on to the next group */ | 5997 | /* move on to the next group */ |
5918 | if (search_start + num_bytes > | 5998 | if (search_start + num_bytes > |
@@ -7284,6 +7364,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7284 | root->fs_info->fs_devices->missing_devices; | 7364 | root->fs_info->fs_devices->missing_devices; |
7285 | 7365 | ||
7286 | stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7366 | stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7367 | BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | | ||
7287 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7368 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7288 | 7369 | ||
7289 | if (num_devices == 1) { | 7370 | if (num_devices == 1) { |
@@ -7837,7 +7918,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7837 | btrfs_release_path(path); | 7918 | btrfs_release_path(path); |
7838 | cache->flags = btrfs_block_group_flags(&cache->item); | 7919 | cache->flags = btrfs_block_group_flags(&cache->item); |
7839 | cache->sectorsize = root->sectorsize; | 7920 | cache->sectorsize = root->sectorsize; |
7840 | 7921 | cache->full_stripe_len = btrfs_full_stripe_len(root, | |
7922 | &root->fs_info->mapping_tree, | ||
7923 | found_key.objectid); | ||
7841 | btrfs_init_free_space_ctl(cache); | 7924 | btrfs_init_free_space_ctl(cache); |
7842 | 7925 | ||
7843 | /* | 7926 | /* |
@@ -7891,6 +7974,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7891 | if (!(get_alloc_profile(root, space_info->flags) & | 7974 | if (!(get_alloc_profile(root, space_info->flags) & |
7892 | (BTRFS_BLOCK_GROUP_RAID10 | | 7975 | (BTRFS_BLOCK_GROUP_RAID10 | |
7893 | BTRFS_BLOCK_GROUP_RAID1 | | 7976 | BTRFS_BLOCK_GROUP_RAID1 | |
7977 | BTRFS_BLOCK_GROUP_RAID5 | | ||
7978 | BTRFS_BLOCK_GROUP_RAID6 | | ||
7894 | BTRFS_BLOCK_GROUP_DUP))) | 7979 | BTRFS_BLOCK_GROUP_DUP))) |
7895 | continue; | 7980 | continue; |
7896 | /* | 7981 | /* |
@@ -7966,6 +8051,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7966 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8051 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
7967 | cache->sectorsize = root->sectorsize; | 8052 | cache->sectorsize = root->sectorsize; |
7968 | cache->fs_info = root->fs_info; | 8053 | cache->fs_info = root->fs_info; |
8054 | cache->full_stripe_len = btrfs_full_stripe_len(root, | ||
8055 | &root->fs_info->mapping_tree, | ||
8056 | chunk_offset); | ||
7969 | 8057 | ||
7970 | atomic_set(&cache->count, 1); | 8058 | atomic_set(&cache->count, 1); |
7971 | spin_lock_init(&cache->lock); | 8059 | spin_lock_init(&cache->lock); |