diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 117 |
1 files changed, 110 insertions, 7 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 33848196550e..030847bf7cec 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -27,10 +27,17 @@ | |||
27 | #include "disk-io.h" | 27 | #include "disk-io.h" |
28 | #include "extent_io.h" | 28 | #include "extent_io.h" |
29 | #include "inode-map.h" | 29 | #include "inode-map.h" |
30 | #include "volumes.h" | ||
30 | 31 | ||
31 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 32 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
32 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 33 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
33 | 34 | ||
35 | struct btrfs_trim_range { | ||
36 | u64 start; | ||
37 | u64 bytes; | ||
38 | struct list_head list; | ||
39 | }; | ||
40 | |||
34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, | 41 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
35 | struct btrfs_free_space *info); | 42 | struct btrfs_free_space *info); |
36 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | 43 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, |
@@ -881,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
881 | int ret; | 888 | int ret; |
882 | struct btrfs_free_cluster *cluster = NULL; | 889 | struct btrfs_free_cluster *cluster = NULL; |
883 | struct rb_node *node = rb_first(&ctl->free_space_offset); | 890 | struct rb_node *node = rb_first(&ctl->free_space_offset); |
891 | struct btrfs_trim_range *trim_entry; | ||
884 | 892 | ||
885 | /* Get the cluster for this block_group if it exists */ | 893 | /* Get the cluster for this block_group if it exists */ |
886 | if (block_group && !list_empty(&block_group->cluster_list)) { | 894 | if (block_group && !list_empty(&block_group->cluster_list)) { |
@@ -916,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, | |||
916 | cluster = NULL; | 924 | cluster = NULL; |
917 | } | 925 | } |
918 | } | 926 | } |
927 | |||
928 | /* | ||
929 | * Make sure we don't miss any range that was removed from our rbtree | ||
930 | * because trimming is running. Otherwise after a umount+mount (or crash | ||
931 | * after committing the transaction) we would leak free space and get | ||
932 | * an inconsistent free space cache report from fsck. | ||
933 | */ | ||
934 | list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { | ||
935 | ret = io_ctl_add_entry(io_ctl, trim_entry->start, | ||
936 | trim_entry->bytes, NULL); | ||
937 | if (ret) | ||
938 | goto fail; | ||
939 | *entries += 1; | ||
940 | } | ||
941 | |||
919 | return 0; | 942 | return 0; |
920 | fail: | 943 | fail: |
921 | return -ENOSPC; | 944 | return -ENOSPC; |
@@ -1135,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1135 | 1158 | ||
1136 | io_ctl_set_generation(&io_ctl, trans->transid); | 1159 | io_ctl_set_generation(&io_ctl, trans->transid); |
1137 | 1160 | ||
1161 | mutex_lock(&ctl->cache_writeout_mutex); | ||
1138 | /* Write out the extent entries in the free space cache */ | 1162 | /* Write out the extent entries in the free space cache */ |
1139 | ret = write_cache_extent_entries(&io_ctl, ctl, | 1163 | ret = write_cache_extent_entries(&io_ctl, ctl, |
1140 | block_group, &entries, &bitmaps, | 1164 | block_group, &entries, &bitmaps, |
1141 | &bitmap_list); | 1165 | &bitmap_list); |
1142 | if (ret) | 1166 | if (ret) { |
1167 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1143 | goto out_nospc; | 1168 | goto out_nospc; |
1169 | } | ||
1144 | 1170 | ||
1145 | /* | 1171 | /* |
1146 | * Some spaces that are freed in the current transaction are pinned, | 1172 | * Some spaces that are freed in the current transaction are pinned, |
@@ -1148,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
1148 | * committed, we shouldn't lose them. | 1174 | * committed, we shouldn't lose them. |
1149 | */ | 1175 | */ |
1150 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); | 1176 | ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); |
1151 | if (ret) | 1177 | if (ret) { |
1178 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1152 | goto out_nospc; | 1179 | goto out_nospc; |
1180 | } | ||
1153 | 1181 | ||
1154 | /* At last, we write out all the bitmaps. */ | 1182 | /* |
1183 | * At last, we write out all the bitmaps and keep cache_writeout_mutex | ||
1184 | * locked while doing it because a concurrent trim can be manipulating | ||
1185 | * or freeing the bitmap. | ||
1186 | */ | ||
1155 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); | 1187 | ret = write_bitmap_entries(&io_ctl, &bitmap_list); |
1188 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
1156 | if (ret) | 1189 | if (ret) |
1157 | goto out_nospc; | 1190 | goto out_nospc; |
1158 | 1191 | ||
@@ -2295,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) | |||
2295 | ctl->start = block_group->key.objectid; | 2328 | ctl->start = block_group->key.objectid; |
2296 | ctl->private = block_group; | 2329 | ctl->private = block_group; |
2297 | ctl->op = &free_space_op; | 2330 | ctl->op = &free_space_op; |
2331 | INIT_LIST_HEAD(&ctl->trimming_ranges); | ||
2332 | mutex_init(&ctl->cache_writeout_mutex); | ||
2298 | 2333 | ||
2299 | /* | 2334 | /* |
2300 | * we only want to have 32k of ram per block group for keeping | 2335 | * we only want to have 32k of ram per block group for keeping |
@@ -2911,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2911 | 2946 | ||
2912 | static int do_trimming(struct btrfs_block_group_cache *block_group, | 2947 | static int do_trimming(struct btrfs_block_group_cache *block_group, |
2913 | u64 *total_trimmed, u64 start, u64 bytes, | 2948 | u64 *total_trimmed, u64 start, u64 bytes, |
2914 | u64 reserved_start, u64 reserved_bytes) | 2949 | u64 reserved_start, u64 reserved_bytes, |
2950 | struct btrfs_trim_range *trim_entry) | ||
2915 | { | 2951 | { |
2916 | struct btrfs_space_info *space_info = block_group->space_info; | 2952 | struct btrfs_space_info *space_info = block_group->space_info; |
2917 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 2953 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
2954 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | ||
2918 | int ret; | 2955 | int ret; |
2919 | int update = 0; | 2956 | int update = 0; |
2920 | u64 trimmed = 0; | 2957 | u64 trimmed = 0; |
@@ -2934,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group, | |||
2934 | if (!ret) | 2971 | if (!ret) |
2935 | *total_trimmed += trimmed; | 2972 | *total_trimmed += trimmed; |
2936 | 2973 | ||
2974 | mutex_lock(&ctl->cache_writeout_mutex); | ||
2937 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); | 2975 | btrfs_add_free_space(block_group, reserved_start, reserved_bytes); |
2976 | list_del(&trim_entry->list); | ||
2977 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2938 | 2978 | ||
2939 | if (update) { | 2979 | if (update) { |
2940 | spin_lock(&space_info->lock); | 2980 | spin_lock(&space_info->lock); |
@@ -2962,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2962 | u64 bytes; | 3002 | u64 bytes; |
2963 | 3003 | ||
2964 | while (start < end) { | 3004 | while (start < end) { |
3005 | struct btrfs_trim_range trim_entry; | ||
3006 | |||
3007 | mutex_lock(&ctl->cache_writeout_mutex); | ||
2965 | spin_lock(&ctl->tree_lock); | 3008 | spin_lock(&ctl->tree_lock); |
2966 | 3009 | ||
2967 | if (ctl->free_space < minlen) { | 3010 | if (ctl->free_space < minlen) { |
2968 | spin_unlock(&ctl->tree_lock); | 3011 | spin_unlock(&ctl->tree_lock); |
3012 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2969 | break; | 3013 | break; |
2970 | } | 3014 | } |
2971 | 3015 | ||
2972 | entry = tree_search_offset(ctl, start, 0, 1); | 3016 | entry = tree_search_offset(ctl, start, 0, 1); |
2973 | if (!entry) { | 3017 | if (!entry) { |
2974 | spin_unlock(&ctl->tree_lock); | 3018 | spin_unlock(&ctl->tree_lock); |
3019 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2975 | break; | 3020 | break; |
2976 | } | 3021 | } |
2977 | 3022 | ||
@@ -2980,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2980 | node = rb_next(&entry->offset_index); | 3025 | node = rb_next(&entry->offset_index); |
2981 | if (!node) { | 3026 | if (!node) { |
2982 | spin_unlock(&ctl->tree_lock); | 3027 | spin_unlock(&ctl->tree_lock); |
3028 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2983 | goto out; | 3029 | goto out; |
2984 | } | 3030 | } |
2985 | entry = rb_entry(node, struct btrfs_free_space, | 3031 | entry = rb_entry(node, struct btrfs_free_space, |
@@ -2988,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2988 | 3034 | ||
2989 | if (entry->offset >= end) { | 3035 | if (entry->offset >= end) { |
2990 | spin_unlock(&ctl->tree_lock); | 3036 | spin_unlock(&ctl->tree_lock); |
3037 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
2991 | break; | 3038 | break; |
2992 | } | 3039 | } |
2993 | 3040 | ||
@@ -2997,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
2997 | bytes = min(extent_start + extent_bytes, end) - start; | 3044 | bytes = min(extent_start + extent_bytes, end) - start; |
2998 | if (bytes < minlen) { | 3045 | if (bytes < minlen) { |
2999 | spin_unlock(&ctl->tree_lock); | 3046 | spin_unlock(&ctl->tree_lock); |
3047 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3000 | goto next; | 3048 | goto next; |
3001 | } | 3049 | } |
3002 | 3050 | ||
@@ -3004,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, | |||
3004 | kmem_cache_free(btrfs_free_space_cachep, entry); | 3052 | kmem_cache_free(btrfs_free_space_cachep, entry); |
3005 | 3053 | ||
3006 | spin_unlock(&ctl->tree_lock); | 3054 | spin_unlock(&ctl->tree_lock); |
3055 | trim_entry.start = extent_start; | ||
3056 | trim_entry.bytes = extent_bytes; | ||
3057 | list_add_tail(&trim_entry.list, &ctl->trimming_ranges); | ||
3058 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3007 | 3059 | ||
3008 | ret = do_trimming(block_group, total_trimmed, start, bytes, | 3060 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
3009 | extent_start, extent_bytes); | 3061 | extent_start, extent_bytes, &trim_entry); |
3010 | if (ret) | 3062 | if (ret) |
3011 | break; | 3063 | break; |
3012 | next: | 3064 | next: |
@@ -3035,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3035 | 3087 | ||
3036 | while (offset < end) { | 3088 | while (offset < end) { |
3037 | bool next_bitmap = false; | 3089 | bool next_bitmap = false; |
3090 | struct btrfs_trim_range trim_entry; | ||
3038 | 3091 | ||
3092 | mutex_lock(&ctl->cache_writeout_mutex); | ||
3039 | spin_lock(&ctl->tree_lock); | 3093 | spin_lock(&ctl->tree_lock); |
3040 | 3094 | ||
3041 | if (ctl->free_space < minlen) { | 3095 | if (ctl->free_space < minlen) { |
3042 | spin_unlock(&ctl->tree_lock); | 3096 | spin_unlock(&ctl->tree_lock); |
3097 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3043 | break; | 3098 | break; |
3044 | } | 3099 | } |
3045 | 3100 | ||
3046 | entry = tree_search_offset(ctl, offset, 1, 0); | 3101 | entry = tree_search_offset(ctl, offset, 1, 0); |
3047 | if (!entry) { | 3102 | if (!entry) { |
3048 | spin_unlock(&ctl->tree_lock); | 3103 | spin_unlock(&ctl->tree_lock); |
3104 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3049 | next_bitmap = true; | 3105 | next_bitmap = true; |
3050 | goto next; | 3106 | goto next; |
3051 | } | 3107 | } |
@@ -3054,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3054 | ret2 = search_bitmap(ctl, entry, &start, &bytes); | 3110 | ret2 = search_bitmap(ctl, entry, &start, &bytes); |
3055 | if (ret2 || start >= end) { | 3111 | if (ret2 || start >= end) { |
3056 | spin_unlock(&ctl->tree_lock); | 3112 | spin_unlock(&ctl->tree_lock); |
3113 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3057 | next_bitmap = true; | 3114 | next_bitmap = true; |
3058 | goto next; | 3115 | goto next; |
3059 | } | 3116 | } |
@@ -3061,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3061 | bytes = min(bytes, end - start); | 3118 | bytes = min(bytes, end - start); |
3062 | if (bytes < minlen) { | 3119 | if (bytes < minlen) { |
3063 | spin_unlock(&ctl->tree_lock); | 3120 | spin_unlock(&ctl->tree_lock); |
3121 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3064 | goto next; | 3122 | goto next; |
3065 | } | 3123 | } |
3066 | 3124 | ||
@@ -3069,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, | |||
3069 | free_bitmap(ctl, entry); | 3127 | free_bitmap(ctl, entry); |
3070 | 3128 | ||
3071 | spin_unlock(&ctl->tree_lock); | 3129 | spin_unlock(&ctl->tree_lock); |
3130 | trim_entry.start = start; | ||
3131 | trim_entry.bytes = bytes; | ||
3132 | list_add_tail(&trim_entry.list, &ctl->trimming_ranges); | ||
3133 | mutex_unlock(&ctl->cache_writeout_mutex); | ||
3072 | 3134 | ||
3073 | ret = do_trimming(block_group, total_trimmed, start, bytes, | 3135 | ret = do_trimming(block_group, total_trimmed, start, bytes, |
3074 | start, bytes); | 3136 | start, bytes, &trim_entry); |
3075 | if (ret) | 3137 | if (ret) |
3076 | break; | 3138 | break; |
3077 | next: | 3139 | next: |
@@ -3101,11 +3163,52 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | |||
3101 | 3163 | ||
3102 | *trimmed = 0; | 3164 | *trimmed = 0; |
3103 | 3165 | ||
3166 | spin_lock(&block_group->lock); | ||
3167 | if (block_group->removed) { | ||
3168 | spin_unlock(&block_group->lock); | ||
3169 | return 0; | ||
3170 | } | ||
3171 | atomic_inc(&block_group->trimming); | ||
3172 | spin_unlock(&block_group->lock); | ||
3173 | |||
3104 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); | 3174 | ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); |
3105 | if (ret) | 3175 | if (ret) |
3106 | return ret; | 3176 | goto out; |
3107 | 3177 | ||
3108 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); | 3178 | ret = trim_bitmaps(block_group, trimmed, start, end, minlen); |
3179 | out: | ||
3180 | spin_lock(&block_group->lock); | ||
3181 | if (atomic_dec_and_test(&block_group->trimming) && | ||
3182 | block_group->removed) { | ||
3183 | struct extent_map_tree *em_tree; | ||
3184 | struct extent_map *em; | ||
3185 | |||
3186 | spin_unlock(&block_group->lock); | ||
3187 | |||
3188 | em_tree = &block_group->fs_info->mapping_tree.map_tree; | ||
3189 | write_lock(&em_tree->lock); | ||
3190 | em = lookup_extent_mapping(em_tree, block_group->key.objectid, | ||
3191 | 1); | ||
3192 | BUG_ON(!em); /* logic error, can't happen */ | ||
3193 | remove_extent_mapping(em_tree, em); | ||
3194 | write_unlock(&em_tree->lock); | ||
3195 | |||
3196 | lock_chunks(block_group->fs_info->chunk_root); | ||
3197 | list_del_init(&em->list); | ||
3198 | unlock_chunks(block_group->fs_info->chunk_root); | ||
3199 | |||
3200 | /* once for us and once for the tree */ | ||
3201 | free_extent_map(em); | ||
3202 | free_extent_map(em); | ||
3203 | |||
3204 | /* | ||
3205 | * We've left one free space entry and other tasks trimming | ||
3206 | * this block group have left 1 entry each one. Free them. | ||
3207 | */ | ||
3208 | __btrfs_remove_free_space_cache(block_group->free_space_ctl); | ||
3209 | } else { | ||
3210 | spin_unlock(&block_group->lock); | ||
3211 | } | ||
3109 | 3212 | ||
3110 | return ret; | 3213 | return ret; |
3111 | } | 3214 | } |