diff options
author | Josef Bacik <josef@toxicpanda.com> | 2019-06-20 15:38:04 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2019-09-09 08:59:09 -0400 |
commit | 07730d87ac7872b54efa02da5d20b42fd6bb165a (patch) | |
tree | bc1f9b69ef68cf9fc6eb689820e2d30460c483b3 | |
parent | 606d1bf10d7ebafdee26e8896b467b885c5233ec (diff) |
btrfs: migrate the chunk allocation code
This feels more at home in block-group.c than in extent-tree.c.
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>i
[ refresh ]
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/block-group.c | 246 | ||||
-rw-r--r-- | fs/btrfs/block-group.h | 21 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 24 | ||||
-rw-r--r-- | fs/btrfs/delalloc-space.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 244 |
5 files changed, 268 insertions, 268 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index eebef70725c6..8f702cf4c0db 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "sysfs.h" | 13 | #include "sysfs.h" |
14 | #include "tree-log.h" | 14 | #include "tree-log.h" |
15 | #include "delalloc-space.h" | 15 | #include "delalloc-space.h" |
16 | #include "math.h" | ||
16 | 17 | ||
17 | void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | 18 | void btrfs_get_block_group(struct btrfs_block_group_cache *cache) |
18 | { | 19 | { |
@@ -2694,3 +2695,248 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
2694 | spin_unlock(&cache->lock); | 2695 | spin_unlock(&cache->lock); |
2695 | spin_unlock(&space_info->lock); | 2696 | spin_unlock(&space_info->lock); |
2696 | } | 2697 | } |
2698 | |||
2699 | static void force_metadata_allocation(struct btrfs_fs_info *info) | ||
2700 | { | ||
2701 | struct list_head *head = &info->space_info; | ||
2702 | struct btrfs_space_info *found; | ||
2703 | |||
2704 | rcu_read_lock(); | ||
2705 | list_for_each_entry_rcu(found, head, list) { | ||
2706 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
2707 | found->force_alloc = CHUNK_ALLOC_FORCE; | ||
2708 | } | ||
2709 | rcu_read_unlock(); | ||
2710 | } | ||
2711 | |||
2712 | static int should_alloc_chunk(struct btrfs_fs_info *fs_info, | ||
2713 | struct btrfs_space_info *sinfo, int force) | ||
2714 | { | ||
2715 | u64 bytes_used = btrfs_space_info_used(sinfo, false); | ||
2716 | u64 thresh; | ||
2717 | |||
2718 | if (force == CHUNK_ALLOC_FORCE) | ||
2719 | return 1; | ||
2720 | |||
2721 | /* | ||
2722 | * in limited mode, we want to have some free space up to | ||
2723 | * about 1% of the FS size. | ||
2724 | */ | ||
2725 | if (force == CHUNK_ALLOC_LIMITED) { | ||
2726 | thresh = btrfs_super_total_bytes(fs_info->super_copy); | ||
2727 | thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); | ||
2728 | |||
2729 | if (sinfo->total_bytes - bytes_used < thresh) | ||
2730 | return 1; | ||
2731 | } | ||
2732 | |||
2733 | if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) | ||
2734 | return 0; | ||
2735 | return 1; | ||
2736 | } | ||
2737 | |||
2738 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) | ||
2739 | { | ||
2740 | u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type); | ||
2741 | |||
2742 | return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); | ||
2743 | } | ||
2744 | |||
2745 | /* | ||
2746 | * If force is CHUNK_ALLOC_FORCE: | ||
2747 | * - return 1 if it successfully allocates a chunk, | ||
2748 | * - return errors including -ENOSPC otherwise. | ||
2749 | * If force is NOT CHUNK_ALLOC_FORCE: | ||
2750 | * - return 0 if it doesn't need to allocate a new chunk, | ||
2751 | * - return 1 if it successfully allocates a chunk, | ||
2752 | * - return errors including -ENOSPC otherwise. | ||
2753 | */ | ||
2754 | int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | ||
2755 | enum btrfs_chunk_alloc_enum force) | ||
2756 | { | ||
2757 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
2758 | struct btrfs_space_info *space_info; | ||
2759 | bool wait_for_alloc = false; | ||
2760 | bool should_alloc = false; | ||
2761 | int ret = 0; | ||
2762 | |||
2763 | /* Don't re-enter if we're already allocating a chunk */ | ||
2764 | if (trans->allocating_chunk) | ||
2765 | return -ENOSPC; | ||
2766 | |||
2767 | space_info = btrfs_find_space_info(fs_info, flags); | ||
2768 | ASSERT(space_info); | ||
2769 | |||
2770 | do { | ||
2771 | spin_lock(&space_info->lock); | ||
2772 | if (force < space_info->force_alloc) | ||
2773 | force = space_info->force_alloc; | ||
2774 | should_alloc = should_alloc_chunk(fs_info, space_info, force); | ||
2775 | if (space_info->full) { | ||
2776 | /* No more free physical space */ | ||
2777 | if (should_alloc) | ||
2778 | ret = -ENOSPC; | ||
2779 | else | ||
2780 | ret = 0; | ||
2781 | spin_unlock(&space_info->lock); | ||
2782 | return ret; | ||
2783 | } else if (!should_alloc) { | ||
2784 | spin_unlock(&space_info->lock); | ||
2785 | return 0; | ||
2786 | } else if (space_info->chunk_alloc) { | ||
2787 | /* | ||
2788 | * Someone is already allocating, so we need to block | ||
2789 | * until this someone is finished and then loop to | ||
2790 | * recheck if we should continue with our allocation | ||
2791 | * attempt. | ||
2792 | */ | ||
2793 | wait_for_alloc = true; | ||
2794 | spin_unlock(&space_info->lock); | ||
2795 | mutex_lock(&fs_info->chunk_mutex); | ||
2796 | mutex_unlock(&fs_info->chunk_mutex); | ||
2797 | } else { | ||
2798 | /* Proceed with allocation */ | ||
2799 | space_info->chunk_alloc = 1; | ||
2800 | wait_for_alloc = false; | ||
2801 | spin_unlock(&space_info->lock); | ||
2802 | } | ||
2803 | |||
2804 | cond_resched(); | ||
2805 | } while (wait_for_alloc); | ||
2806 | |||
2807 | mutex_lock(&fs_info->chunk_mutex); | ||
2808 | trans->allocating_chunk = true; | ||
2809 | |||
2810 | /* | ||
2811 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
2812 | * allocating mixed chunks instead of individual chunks. | ||
2813 | */ | ||
2814 | if (btrfs_mixed_space_info(space_info)) | ||
2815 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
2816 | |||
2817 | /* | ||
2818 | * if we're doing a data chunk, go ahead and make sure that | ||
2819 | * we keep a reasonable number of metadata chunks allocated in the | ||
2820 | * FS as well. | ||
2821 | */ | ||
2822 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { | ||
2823 | fs_info->data_chunk_allocations++; | ||
2824 | if (!(fs_info->data_chunk_allocations % | ||
2825 | fs_info->metadata_ratio)) | ||
2826 | force_metadata_allocation(fs_info); | ||
2827 | } | ||
2828 | |||
2829 | /* | ||
2830 | * Check if we have enough space in SYSTEM chunk because we may need | ||
2831 | * to update devices. | ||
2832 | */ | ||
2833 | check_system_chunk(trans, flags); | ||
2834 | |||
2835 | ret = btrfs_alloc_chunk(trans, flags); | ||
2836 | trans->allocating_chunk = false; | ||
2837 | |||
2838 | spin_lock(&space_info->lock); | ||
2839 | if (ret < 0) { | ||
2840 | if (ret == -ENOSPC) | ||
2841 | space_info->full = 1; | ||
2842 | else | ||
2843 | goto out; | ||
2844 | } else { | ||
2845 | ret = 1; | ||
2846 | space_info->max_extent_size = 0; | ||
2847 | } | ||
2848 | |||
2849 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
2850 | out: | ||
2851 | space_info->chunk_alloc = 0; | ||
2852 | spin_unlock(&space_info->lock); | ||
2853 | mutex_unlock(&fs_info->chunk_mutex); | ||
2854 | /* | ||
2855 | * When we allocate a new chunk we reserve space in the chunk block | ||
2856 | * reserve to make sure we can COW nodes/leafs in the chunk tree or | ||
2857 | * add new nodes/leafs to it if we end up needing to do it when | ||
2858 | * inserting the chunk item and updating device items as part of the | ||
2859 | * second phase of chunk allocation, performed by | ||
2860 | * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a | ||
2861 | * large number of new block groups to create in our transaction | ||
2862 | * handle's new_bgs list to avoid exhausting the chunk block reserve | ||
2863 | * in extreme cases - like having a single transaction create many new | ||
2864 | * block groups when starting to write out the free space caches of all | ||
2865 | * the block groups that were made dirty during the lifetime of the | ||
2866 | * transaction. | ||
2867 | */ | ||
2868 | if (trans->chunk_bytes_reserved >= (u64)SZ_2M) | ||
2869 | btrfs_create_pending_block_groups(trans); | ||
2870 | |||
2871 | return ret; | ||
2872 | } | ||
2873 | |||
2874 | static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) | ||
2875 | { | ||
2876 | u64 num_dev; | ||
2877 | |||
2878 | num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max; | ||
2879 | if (!num_dev) | ||
2880 | num_dev = fs_info->fs_devices->rw_devices; | ||
2881 | |||
2882 | return num_dev; | ||
2883 | } | ||
2884 | |||
2885 | /* | ||
2886 | * If @is_allocation is true, reserve space in the system space info necessary | ||
2887 | * for allocating a chunk, otherwise if it's false, reserve space necessary for | ||
2888 | * removing a chunk. | ||
2889 | */ | ||
2890 | void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) | ||
2891 | { | ||
2892 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
2893 | struct btrfs_space_info *info; | ||
2894 | u64 left; | ||
2895 | u64 thresh; | ||
2896 | int ret = 0; | ||
2897 | u64 num_devs; | ||
2898 | |||
2899 | /* | ||
2900 | * Needed because we can end up allocating a system chunk and for an | ||
2901 | * atomic and race free space reservation in the chunk block reserve. | ||
2902 | */ | ||
2903 | lockdep_assert_held(&fs_info->chunk_mutex); | ||
2904 | |||
2905 | info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
2906 | spin_lock(&info->lock); | ||
2907 | left = info->total_bytes - btrfs_space_info_used(info, true); | ||
2908 | spin_unlock(&info->lock); | ||
2909 | |||
2910 | num_devs = get_profile_num_devs(fs_info, type); | ||
2911 | |||
2912 | /* num_devs device items to update and 1 chunk item to add or remove */ | ||
2913 | thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) + | ||
2914 | btrfs_calc_trans_metadata_size(fs_info, 1); | ||
2915 | |||
2916 | if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { | ||
2917 | btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", | ||
2918 | left, thresh, type); | ||
2919 | btrfs_dump_space_info(fs_info, info, 0, 0); | ||
2920 | } | ||
2921 | |||
2922 | if (left < thresh) { | ||
2923 | u64 flags = btrfs_system_alloc_profile(fs_info); | ||
2924 | |||
2925 | /* | ||
2926 | * Ignore failure to create system chunk. We might end up not | ||
2927 | * needing it, as we might not need to COW all nodes/leafs from | ||
2928 | * the paths we visit in the chunk tree (they were already COWed | ||
2929 | * or created in the current transaction for example). | ||
2930 | */ | ||
2931 | ret = btrfs_alloc_chunk(trans, flags); | ||
2932 | } | ||
2933 | |||
2934 | if (!ret) { | ||
2935 | ret = btrfs_block_rsv_add(fs_info->chunk_root, | ||
2936 | &fs_info->chunk_block_rsv, | ||
2937 | thresh, BTRFS_RESERVE_NO_FLUSH); | ||
2938 | if (!ret) | ||
2939 | trans->chunk_bytes_reserved += thresh; | ||
2940 | } | ||
2941 | } | ||
2942 | |||
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 886bfa88ae06..de90f7311574 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h | |||
@@ -10,6 +10,23 @@ enum btrfs_disk_cache_state { | |||
10 | BTRFS_DC_SETUP, | 10 | BTRFS_DC_SETUP, |
11 | }; | 11 | }; |
12 | 12 | ||
13 | /* | ||
14 | * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to | ||
15 | * only allocate a chunk if we really need one. | ||
16 | * | ||
17 | * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few | ||
18 | * chunks already allocated. This is used as part of the clustering code to | ||
19 | * help make sure we have a good pool of storage to cluster in, without filling | ||
20 | * the FS with empty chunks | ||
21 | * | ||
22 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
23 | */ | ||
24 | enum btrfs_chunk_alloc_enum { | ||
25 | CHUNK_ALLOC_NO_FORCE, | ||
26 | CHUNK_ALLOC_LIMITED, | ||
27 | CHUNK_ALLOC_FORCE, | ||
28 | }; | ||
29 | |||
13 | struct btrfs_caching_control { | 30 | struct btrfs_caching_control { |
14 | struct list_head list; | 31 | struct list_head list; |
15 | struct mutex mutex; | 32 | struct mutex mutex; |
@@ -198,6 +215,10 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, | |||
198 | u64 ram_bytes, u64 num_bytes, int delalloc); | 215 | u64 ram_bytes, u64 num_bytes, int delalloc); |
199 | void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | 216 | void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, |
200 | u64 num_bytes, int delalloc); | 217 | u64 num_bytes, int delalloc); |
218 | int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | ||
219 | enum btrfs_chunk_alloc_enum force); | ||
220 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); | ||
221 | void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); | ||
201 | 222 | ||
202 | static inline int btrfs_block_group_cache_done( | 223 | static inline int btrfs_block_group_cache_done( |
203 | struct btrfs_block_group_cache *cache) | 224 | struct btrfs_block_group_cache *cache) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6b17573c2fe6..fe25b7211f2d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2556,28 +2556,6 @@ enum btrfs_flush_state { | |||
2556 | COMMIT_TRANS = 9, | 2556 | COMMIT_TRANS = 9, |
2557 | }; | 2557 | }; |
2558 | 2558 | ||
2559 | /* | ||
2560 | * control flags for do_chunk_alloc's force field | ||
2561 | * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk | ||
2562 | * if we really need one. | ||
2563 | * | ||
2564 | * CHUNK_ALLOC_LIMITED means to only try and allocate one | ||
2565 | * if we have very few chunks already allocated. This is | ||
2566 | * used as part of the clustering code to help make sure | ||
2567 | * we have a good pool of storage to cluster in, without | ||
2568 | * filling the FS with empty chunks | ||
2569 | * | ||
2570 | * CHUNK_ALLOC_FORCE means it must try to allocate one | ||
2571 | * | ||
2572 | */ | ||
2573 | enum btrfs_chunk_alloc_enum { | ||
2574 | CHUNK_ALLOC_NO_FORCE, | ||
2575 | CHUNK_ALLOC_LIMITED, | ||
2576 | CHUNK_ALLOC_FORCE, | ||
2577 | }; | ||
2578 | |||
2579 | int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | ||
2580 | enum btrfs_chunk_alloc_enum force); | ||
2581 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | 2559 | int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, |
2582 | struct btrfs_block_rsv *rsv, | 2560 | struct btrfs_block_rsv *rsv, |
2583 | int nitems, bool use_global_rsv); | 2561 | int nitems, bool use_global_rsv); |
@@ -2593,7 +2571,6 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, | |||
2593 | u64 start, u64 end); | 2571 | u64 start, u64 end); |
2594 | int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, | 2572 | int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, |
2595 | u64 num_bytes, u64 *actual_bytes); | 2573 | u64 num_bytes, u64 *actual_bytes); |
2596 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); | ||
2597 | int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); | 2574 | int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range); |
2598 | 2575 | ||
2599 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | 2576 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
@@ -2602,7 +2579,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | |||
2602 | int btrfs_start_write_no_snapshotting(struct btrfs_root *root); | 2579 | int btrfs_start_write_no_snapshotting(struct btrfs_root *root); |
2603 | void btrfs_end_write_no_snapshotting(struct btrfs_root *root); | 2580 | void btrfs_end_write_no_snapshotting(struct btrfs_root *root); |
2604 | void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); | 2581 | void btrfs_wait_for_snapshot_creation(struct btrfs_root *root); |
2605 | void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); | ||
2606 | 2582 | ||
2607 | /* ctree.c */ | 2583 | /* ctree.c */ |
2608 | int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, | 2584 | int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, |
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 17f7c0d38768..d2dfc201b2e1 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include "space-info.h" | 7 | #include "space-info.h" |
8 | #include "transaction.h" | 8 | #include "transaction.h" |
9 | #include "qgroup.h" | 9 | #include "qgroup.h" |
10 | #include "block-group.h" | ||
10 | 11 | ||
11 | int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) | 12 | int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) |
12 | { | 13 | { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 03c0210840a2..9dd8b08e4615 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2661,243 +2661,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) | |||
2661 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | 2661 | return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); |
2662 | } | 2662 | } |
2663 | 2663 | ||
2664 | static void force_metadata_allocation(struct btrfs_fs_info *info) | ||
2665 | { | ||
2666 | struct list_head *head = &info->space_info; | ||
2667 | struct btrfs_space_info *found; | ||
2668 | |||
2669 | rcu_read_lock(); | ||
2670 | list_for_each_entry_rcu(found, head, list) { | ||
2671 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
2672 | found->force_alloc = CHUNK_ALLOC_FORCE; | ||
2673 | } | ||
2674 | rcu_read_unlock(); | ||
2675 | } | ||
2676 | |||
2677 | static int should_alloc_chunk(struct btrfs_fs_info *fs_info, | ||
2678 | struct btrfs_space_info *sinfo, int force) | ||
2679 | { | ||
2680 | u64 bytes_used = btrfs_space_info_used(sinfo, false); | ||
2681 | u64 thresh; | ||
2682 | |||
2683 | if (force == CHUNK_ALLOC_FORCE) | ||
2684 | return 1; | ||
2685 | |||
2686 | /* | ||
2687 | * in limited mode, we want to have some free space up to | ||
2688 | * about 1% of the FS size. | ||
2689 | */ | ||
2690 | if (force == CHUNK_ALLOC_LIMITED) { | ||
2691 | thresh = btrfs_super_total_bytes(fs_info->super_copy); | ||
2692 | thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); | ||
2693 | |||
2694 | if (sinfo->total_bytes - bytes_used < thresh) | ||
2695 | return 1; | ||
2696 | } | ||
2697 | |||
2698 | if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8)) | ||
2699 | return 0; | ||
2700 | return 1; | ||
2701 | } | ||
2702 | |||
2703 | static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) | ||
2704 | { | ||
2705 | u64 num_dev; | ||
2706 | |||
2707 | num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max; | ||
2708 | if (!num_dev) | ||
2709 | num_dev = fs_info->fs_devices->rw_devices; | ||
2710 | |||
2711 | return num_dev; | ||
2712 | } | ||
2713 | |||
2714 | /* | ||
2715 | * If @is_allocation is true, reserve space in the system space info necessary | ||
2716 | * for allocating a chunk, otherwise if it's false, reserve space necessary for | ||
2717 | * removing a chunk. | ||
2718 | */ | ||
2719 | void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) | ||
2720 | { | ||
2721 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
2722 | struct btrfs_space_info *info; | ||
2723 | u64 left; | ||
2724 | u64 thresh; | ||
2725 | int ret = 0; | ||
2726 | u64 num_devs; | ||
2727 | |||
2728 | /* | ||
2729 | * Needed because we can end up allocating a system chunk and for an | ||
2730 | * atomic and race free space reservation in the chunk block reserve. | ||
2731 | */ | ||
2732 | lockdep_assert_held(&fs_info->chunk_mutex); | ||
2733 | |||
2734 | info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
2735 | spin_lock(&info->lock); | ||
2736 | left = info->total_bytes - btrfs_space_info_used(info, true); | ||
2737 | spin_unlock(&info->lock); | ||
2738 | |||
2739 | num_devs = get_profile_num_devs(fs_info, type); | ||
2740 | |||
2741 | /* num_devs device items to update and 1 chunk item to add or remove */ | ||
2742 | thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) + | ||
2743 | btrfs_calc_trans_metadata_size(fs_info, 1); | ||
2744 | |||
2745 | if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { | ||
2746 | btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", | ||
2747 | left, thresh, type); | ||
2748 | btrfs_dump_space_info(fs_info, info, 0, 0); | ||
2749 | } | ||
2750 | |||
2751 | if (left < thresh) { | ||
2752 | u64 flags = btrfs_system_alloc_profile(fs_info); | ||
2753 | |||
2754 | /* | ||
2755 | * Ignore failure to create system chunk. We might end up not | ||
2756 | * needing it, as we might not need to COW all nodes/leafs from | ||
2757 | * the paths we visit in the chunk tree (they were already COWed | ||
2758 | * or created in the current transaction for example). | ||
2759 | */ | ||
2760 | ret = btrfs_alloc_chunk(trans, flags); | ||
2761 | } | ||
2762 | |||
2763 | if (!ret) { | ||
2764 | ret = btrfs_block_rsv_add(fs_info->chunk_root, | ||
2765 | &fs_info->chunk_block_rsv, | ||
2766 | thresh, BTRFS_RESERVE_NO_FLUSH); | ||
2767 | if (!ret) | ||
2768 | trans->chunk_bytes_reserved += thresh; | ||
2769 | } | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * If force is CHUNK_ALLOC_FORCE: | ||
2774 | * - return 1 if it successfully allocates a chunk, | ||
2775 | * - return errors including -ENOSPC otherwise. | ||
2776 | * If force is NOT CHUNK_ALLOC_FORCE: | ||
2777 | * - return 0 if it doesn't need to allocate a new chunk, | ||
2778 | * - return 1 if it successfully allocates a chunk, | ||
2779 | * - return errors including -ENOSPC otherwise. | ||
2780 | */ | ||
2781 | int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, | ||
2782 | enum btrfs_chunk_alloc_enum force) | ||
2783 | { | ||
2784 | struct btrfs_fs_info *fs_info = trans->fs_info; | ||
2785 | struct btrfs_space_info *space_info; | ||
2786 | bool wait_for_alloc = false; | ||
2787 | bool should_alloc = false; | ||
2788 | int ret = 0; | ||
2789 | |||
2790 | /* Don't re-enter if we're already allocating a chunk */ | ||
2791 | if (trans->allocating_chunk) | ||
2792 | return -ENOSPC; | ||
2793 | |||
2794 | space_info = btrfs_find_space_info(fs_info, flags); | ||
2795 | ASSERT(space_info); | ||
2796 | |||
2797 | do { | ||
2798 | spin_lock(&space_info->lock); | ||
2799 | if (force < space_info->force_alloc) | ||
2800 | force = space_info->force_alloc; | ||
2801 | should_alloc = should_alloc_chunk(fs_info, space_info, force); | ||
2802 | if (space_info->full) { | ||
2803 | /* No more free physical space */ | ||
2804 | if (should_alloc) | ||
2805 | ret = -ENOSPC; | ||
2806 | else | ||
2807 | ret = 0; | ||
2808 | spin_unlock(&space_info->lock); | ||
2809 | return ret; | ||
2810 | } else if (!should_alloc) { | ||
2811 | spin_unlock(&space_info->lock); | ||
2812 | return 0; | ||
2813 | } else if (space_info->chunk_alloc) { | ||
2814 | /* | ||
2815 | * Someone is already allocating, so we need to block | ||
2816 | * until this someone is finished and then loop to | ||
2817 | * recheck if we should continue with our allocation | ||
2818 | * attempt. | ||
2819 | */ | ||
2820 | wait_for_alloc = true; | ||
2821 | spin_unlock(&space_info->lock); | ||
2822 | mutex_lock(&fs_info->chunk_mutex); | ||
2823 | mutex_unlock(&fs_info->chunk_mutex); | ||
2824 | } else { | ||
2825 | /* Proceed with allocation */ | ||
2826 | space_info->chunk_alloc = 1; | ||
2827 | wait_for_alloc = false; | ||
2828 | spin_unlock(&space_info->lock); | ||
2829 | } | ||
2830 | |||
2831 | cond_resched(); | ||
2832 | } while (wait_for_alloc); | ||
2833 | |||
2834 | mutex_lock(&fs_info->chunk_mutex); | ||
2835 | trans->allocating_chunk = true; | ||
2836 | |||
2837 | /* | ||
2838 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
2839 | * allocating mixed chunks instead of individual chunks. | ||
2840 | */ | ||
2841 | if (btrfs_mixed_space_info(space_info)) | ||
2842 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
2843 | |||
2844 | /* | ||
2845 | * if we're doing a data chunk, go ahead and make sure that | ||
2846 | * we keep a reasonable number of metadata chunks allocated in the | ||
2847 | * FS as well. | ||
2848 | */ | ||
2849 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { | ||
2850 | fs_info->data_chunk_allocations++; | ||
2851 | if (!(fs_info->data_chunk_allocations % | ||
2852 | fs_info->metadata_ratio)) | ||
2853 | force_metadata_allocation(fs_info); | ||
2854 | } | ||
2855 | |||
2856 | /* | ||
2857 | * Check if we have enough space in SYSTEM chunk because we may need | ||
2858 | * to update devices. | ||
2859 | */ | ||
2860 | check_system_chunk(trans, flags); | ||
2861 | |||
2862 | ret = btrfs_alloc_chunk(trans, flags); | ||
2863 | trans->allocating_chunk = false; | ||
2864 | |||
2865 | spin_lock(&space_info->lock); | ||
2866 | if (ret < 0) { | ||
2867 | if (ret == -ENOSPC) | ||
2868 | space_info->full = 1; | ||
2869 | else | ||
2870 | goto out; | ||
2871 | } else { | ||
2872 | ret = 1; | ||
2873 | space_info->max_extent_size = 0; | ||
2874 | } | ||
2875 | |||
2876 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | ||
2877 | out: | ||
2878 | space_info->chunk_alloc = 0; | ||
2879 | spin_unlock(&space_info->lock); | ||
2880 | mutex_unlock(&fs_info->chunk_mutex); | ||
2881 | /* | ||
2882 | * When we allocate a new chunk we reserve space in the chunk block | ||
2883 | * reserve to make sure we can COW nodes/leafs in the chunk tree or | ||
2884 | * add new nodes/leafs to it if we end up needing to do it when | ||
2885 | * inserting the chunk item and updating device items as part of the | ||
2886 | * second phase of chunk allocation, performed by | ||
2887 | * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a | ||
2888 | * large number of new block groups to create in our transaction | ||
2889 | * handle's new_bgs list to avoid exhausting the chunk block reserve | ||
2890 | * in extreme cases - like having a single transaction create many new | ||
2891 | * block groups when starting to write out the free space caches of all | ||
2892 | * the block groups that were made dirty during the lifetime of the | ||
2893 | * transaction. | ||
2894 | */ | ||
2895 | if (trans->chunk_bytes_reserved >= (u64)SZ_2M) | ||
2896 | btrfs_create_pending_block_groups(trans); | ||
2897 | |||
2898 | return ret; | ||
2899 | } | ||
2900 | |||
2901 | static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) | 2664 | static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) |
2902 | { | 2665 | { |
2903 | struct btrfs_block_group_cache *cache; | 2666 | struct btrfs_block_group_cache *cache; |
@@ -5837,13 +5600,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
5837 | return ret; | 5600 | return ret; |
5838 | } | 5601 | } |
5839 | 5602 | ||
5840 | int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) | ||
5841 | { | ||
5842 | u64 alloc_flags = get_alloc_profile(trans->fs_info, type); | ||
5843 | |||
5844 | return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); | ||
5845 | } | ||
5846 | |||
5847 | /* | 5603 | /* |
5848 | * helper to account the unused space of all the readonly block group in the | 5604 | * helper to account the unused space of all the readonly block group in the |
5849 | * space_info. takes mirrors into account. | 5605 | * space_info. takes mirrors into account. |