summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index eebef70725c6..8f702cf4c0db 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -13,6 +13,7 @@
13#include "sysfs.h" 13#include "sysfs.h"
14#include "tree-log.h" 14#include "tree-log.h"
15#include "delalloc-space.h" 15#include "delalloc-space.h"
16#include "math.h"
16 17
17void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 18void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
18{ 19{
@@ -2694,3 +2695,248 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
2694 spin_unlock(&cache->lock); 2695 spin_unlock(&cache->lock);
2695 spin_unlock(&space_info->lock); 2696 spin_unlock(&space_info->lock);
2696} 2697}
2698
2699static void force_metadata_allocation(struct btrfs_fs_info *info)
2700{
2701 struct list_head *head = &info->space_info;
2702 struct btrfs_space_info *found;
2703
2704 rcu_read_lock();
2705 list_for_each_entry_rcu(found, head, list) {
2706 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
2707 found->force_alloc = CHUNK_ALLOC_FORCE;
2708 }
2709 rcu_read_unlock();
2710}
2711
2712static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
2713 struct btrfs_space_info *sinfo, int force)
2714{
2715 u64 bytes_used = btrfs_space_info_used(sinfo, false);
2716 u64 thresh;
2717
2718 if (force == CHUNK_ALLOC_FORCE)
2719 return 1;
2720
2721 /*
2722 * in limited mode, we want to have some free space up to
2723 * about 1% of the FS size.
2724 */
2725 if (force == CHUNK_ALLOC_LIMITED) {
2726 thresh = btrfs_super_total_bytes(fs_info->super_copy);
2727 thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
2728
2729 if (sinfo->total_bytes - bytes_used < thresh)
2730 return 1;
2731 }
2732
2733 if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
2734 return 0;
2735 return 1;
2736}
2737
2738int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
2739{
2740 u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
2741
2742 return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
2743}
2744
2745/*
2746 * If force is CHUNK_ALLOC_FORCE:
2747 * - return 1 if it successfully allocates a chunk,
2748 * - return errors including -ENOSPC otherwise.
2749 * If force is NOT CHUNK_ALLOC_FORCE:
2750 * - return 0 if it doesn't need to allocate a new chunk,
2751 * - return 1 if it successfully allocates a chunk,
2752 * - return errors including -ENOSPC otherwise.
2753 */
2754int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
2755 enum btrfs_chunk_alloc_enum force)
2756{
2757 struct btrfs_fs_info *fs_info = trans->fs_info;
2758 struct btrfs_space_info *space_info;
2759 bool wait_for_alloc = false;
2760 bool should_alloc = false;
2761 int ret = 0;
2762
2763 /* Don't re-enter if we're already allocating a chunk */
2764 if (trans->allocating_chunk)
2765 return -ENOSPC;
2766
2767 space_info = btrfs_find_space_info(fs_info, flags);
2768 ASSERT(space_info);
2769
2770 do {
2771 spin_lock(&space_info->lock);
2772 if (force < space_info->force_alloc)
2773 force = space_info->force_alloc;
2774 should_alloc = should_alloc_chunk(fs_info, space_info, force);
2775 if (space_info->full) {
2776 /* No more free physical space */
2777 if (should_alloc)
2778 ret = -ENOSPC;
2779 else
2780 ret = 0;
2781 spin_unlock(&space_info->lock);
2782 return ret;
2783 } else if (!should_alloc) {
2784 spin_unlock(&space_info->lock);
2785 return 0;
2786 } else if (space_info->chunk_alloc) {
2787 /*
2788 * Someone is already allocating, so we need to block
2789 * until this someone is finished and then loop to
2790 * recheck if we should continue with our allocation
2791 * attempt.
2792 */
2793 wait_for_alloc = true;
2794 spin_unlock(&space_info->lock);
2795 mutex_lock(&fs_info->chunk_mutex);
2796 mutex_unlock(&fs_info->chunk_mutex);
2797 } else {
2798 /* Proceed with allocation */
2799 space_info->chunk_alloc = 1;
2800 wait_for_alloc = false;
2801 spin_unlock(&space_info->lock);
2802 }
2803
2804 cond_resched();
2805 } while (wait_for_alloc);
2806
2807 mutex_lock(&fs_info->chunk_mutex);
2808 trans->allocating_chunk = true;
2809
2810 /*
2811 * If we have mixed data/metadata chunks we want to make sure we keep
2812 * allocating mixed chunks instead of individual chunks.
2813 */
2814 if (btrfs_mixed_space_info(space_info))
2815 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
2816
2817 /*
2818 * if we're doing a data chunk, go ahead and make sure that
2819 * we keep a reasonable number of metadata chunks allocated in the
2820 * FS as well.
2821 */
2822 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
2823 fs_info->data_chunk_allocations++;
2824 if (!(fs_info->data_chunk_allocations %
2825 fs_info->metadata_ratio))
2826 force_metadata_allocation(fs_info);
2827 }
2828
2829 /*
2830 * Check if we have enough space in SYSTEM chunk because we may need
2831 * to update devices.
2832 */
2833 check_system_chunk(trans, flags);
2834
2835 ret = btrfs_alloc_chunk(trans, flags);
2836 trans->allocating_chunk = false;
2837
2838 spin_lock(&space_info->lock);
2839 if (ret < 0) {
2840 if (ret == -ENOSPC)
2841 space_info->full = 1;
2842 else
2843 goto out;
2844 } else {
2845 ret = 1;
2846 space_info->max_extent_size = 0;
2847 }
2848
2849 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
2850out:
2851 space_info->chunk_alloc = 0;
2852 spin_unlock(&space_info->lock);
2853 mutex_unlock(&fs_info->chunk_mutex);
2854 /*
2855 * When we allocate a new chunk we reserve space in the chunk block
2856 * reserve to make sure we can COW nodes/leafs in the chunk tree or
2857 * add new nodes/leafs to it if we end up needing to do it when
2858 * inserting the chunk item and updating device items as part of the
2859 * second phase of chunk allocation, performed by
2860 * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
2861 * large number of new block groups to create in our transaction
2862 * handle's new_bgs list to avoid exhausting the chunk block reserve
2863 * in extreme cases - like having a single transaction create many new
2864 * block groups when starting to write out the free space caches of all
2865 * the block groups that were made dirty during the lifetime of the
2866 * transaction.
2867 */
2868 if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
2869 btrfs_create_pending_block_groups(trans);
2870
2871 return ret;
2872}
2873
2874static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
2875{
2876 u64 num_dev;
2877
2878 num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
2879 if (!num_dev)
2880 num_dev = fs_info->fs_devices->rw_devices;
2881
2882 return num_dev;
2883}
2884
2885/*
2886 * If @is_allocation is true, reserve space in the system space info necessary
2887 * for allocating a chunk, otherwise if it's false, reserve space necessary for
2888 * removing a chunk.
2889 */
2890void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
2891{
2892 struct btrfs_fs_info *fs_info = trans->fs_info;
2893 struct btrfs_space_info *info;
2894 u64 left;
2895 u64 thresh;
2896 int ret = 0;
2897 u64 num_devs;
2898
2899 /*
2900 * Needed because we can end up allocating a system chunk and for an
2901 * atomic and race free space reservation in the chunk block reserve.
2902 */
2903 lockdep_assert_held(&fs_info->chunk_mutex);
2904
2905 info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
2906 spin_lock(&info->lock);
2907 left = info->total_bytes - btrfs_space_info_used(info, true);
2908 spin_unlock(&info->lock);
2909
2910 num_devs = get_profile_num_devs(fs_info, type);
2911
2912 /* num_devs device items to update and 1 chunk item to add or remove */
2913 thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
2914 btrfs_calc_trans_metadata_size(fs_info, 1);
2915
2916 if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
2917 btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
2918 left, thresh, type);
2919 btrfs_dump_space_info(fs_info, info, 0, 0);
2920 }
2921
2922 if (left < thresh) {
2923 u64 flags = btrfs_system_alloc_profile(fs_info);
2924
2925 /*
2926 * Ignore failure to create system chunk. We might end up not
2927 * needing it, as we might not need to COW all nodes/leafs from
2928 * the paths we visit in the chunk tree (they were already COWed
2929 * or created in the current transaction for example).
2930 */
2931 ret = btrfs_alloc_chunk(trans, flags);
2932 }
2933
2934 if (!ret) {
2935 ret = btrfs_block_rsv_add(fs_info->chunk_root,
2936 &fs_info->chunk_block_rsv,
2937 thresh, BTRFS_RESERVE_NO_FLUSH);
2938 if (!ret)
2939 trans->chunk_bytes_reserved += thresh;
2940 }
2941}
2942