diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 235 |
1 files changed, 139 insertions, 96 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee40..309a57b9fc85 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -33,17 +33,6 @@ | |||
33 | #include "volumes.h" | 33 | #include "volumes.h" |
34 | #include "async-thread.h" | 34 | #include "async-thread.h" |
35 | 35 | ||
36 | struct map_lookup { | ||
37 | u64 type; | ||
38 | int io_align; | ||
39 | int io_width; | ||
40 | int stripe_len; | ||
41 | int sector_size; | ||
42 | int num_stripes; | ||
43 | int sub_stripes; | ||
44 | struct btrfs_bio_stripe stripes[]; | ||
45 | }; | ||
46 | |||
47 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 36 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
48 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
49 | struct btrfs_device *device); | 38 | struct btrfs_device *device); |
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
162 | struct bio *cur; | 151 | struct bio *cur; |
163 | int again = 0; | 152 | int again = 0; |
164 | unsigned long num_run; | 153 | unsigned long num_run; |
165 | unsigned long num_sync_run; | ||
166 | unsigned long batch_run = 0; | 154 | unsigned long batch_run = 0; |
167 | unsigned long limit; | 155 | unsigned long limit; |
168 | unsigned long last_waited = 0; | 156 | unsigned long last_waited = 0; |
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
173 | limit = btrfs_async_submit_limit(fs_info); | 161 | limit = btrfs_async_submit_limit(fs_info); |
174 | limit = limit * 2 / 3; | 162 | limit = limit * 2 / 3; |
175 | 163 | ||
176 | /* we want to make sure that every time we switch from the sync | ||
177 | * list to the normal list, we unplug | ||
178 | */ | ||
179 | num_sync_run = 0; | ||
180 | |||
181 | loop: | 164 | loop: |
182 | spin_lock(&device->io_lock); | 165 | spin_lock(&device->io_lock); |
183 | 166 | ||
@@ -223,15 +206,6 @@ loop_lock: | |||
223 | 206 | ||
224 | spin_unlock(&device->io_lock); | 207 | spin_unlock(&device->io_lock); |
225 | 208 | ||
226 | /* | ||
227 | * if we're doing the regular priority list, make sure we unplug | ||
228 | * for any high prio bios we've sent down | ||
229 | */ | ||
230 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
231 | num_sync_run = 0; | ||
232 | blk_run_backing_dev(bdi, NULL); | ||
233 | } | ||
234 | |||
235 | while (pending) { | 209 | while (pending) { |
236 | 210 | ||
237 | rmb(); | 211 | rmb(); |
@@ -259,19 +233,11 @@ loop_lock: | |||
259 | 233 | ||
260 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 234 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
261 | 235 | ||
262 | if (cur->bi_rw & REQ_SYNC) | ||
263 | num_sync_run++; | ||
264 | |||
265 | submit_bio(cur->bi_rw, cur); | 236 | submit_bio(cur->bi_rw, cur); |
266 | num_run++; | 237 | num_run++; |
267 | batch_run++; | 238 | batch_run++; |
268 | if (need_resched()) { | 239 | if (need_resched()) |
269 | if (num_sync_run) { | ||
270 | blk_run_backing_dev(bdi, NULL); | ||
271 | num_sync_run = 0; | ||
272 | } | ||
273 | cond_resched(); | 240 | cond_resched(); |
274 | } | ||
275 | 241 | ||
276 | /* | 242 | /* |
277 | * we made progress, there is more work to do and the bdi | 243 | * we made progress, there is more work to do and the bdi |
@@ -304,13 +270,8 @@ loop_lock: | |||
304 | * against it before looping | 270 | * against it before looping |
305 | */ | 271 | */ |
306 | last_waited = ioc->last_waited; | 272 | last_waited = ioc->last_waited; |
307 | if (need_resched()) { | 273 | if (need_resched()) |
308 | if (num_sync_run) { | ||
309 | blk_run_backing_dev(bdi, NULL); | ||
310 | num_sync_run = 0; | ||
311 | } | ||
312 | cond_resched(); | 274 | cond_resched(); |
313 | } | ||
314 | continue; | 275 | continue; |
315 | } | 276 | } |
316 | spin_lock(&device->io_lock); | 277 | spin_lock(&device->io_lock); |
@@ -323,22 +284,6 @@ loop_lock: | |||
323 | } | 284 | } |
324 | } | 285 | } |
325 | 286 | ||
326 | if (num_sync_run) { | ||
327 | num_sync_run = 0; | ||
328 | blk_run_backing_dev(bdi, NULL); | ||
329 | } | ||
330 | /* | ||
331 | * IO has already been through a long path to get here. Checksumming, | ||
332 | * async helper threads, perhaps compression. We've done a pretty | ||
333 | * good job of collecting a batch of IO and should just unplug | ||
334 | * the device right away. | ||
335 | * | ||
336 | * This will help anyone who is waiting on the IO, they might have | ||
337 | * already unplugged, but managed to do so before the bio they | ||
338 | * cared about found its way down here. | ||
339 | */ | ||
340 | blk_run_backing_dev(bdi, NULL); | ||
341 | |||
342 | cond_resched(); | 287 | cond_resched(); |
343 | if (again) | 288 | if (again) |
344 | goto loop; | 289 | goto loop; |
@@ -1923,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1923 | 1868 | ||
1924 | BUG_ON(ret); | 1869 | BUG_ON(ret); |
1925 | 1870 | ||
1871 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | ||
1872 | |||
1926 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 1873 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
1927 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 1874 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
1928 | BUG_ON(ret); | 1875 | BUG_ON(ret); |
@@ -2650,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2650 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2597 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2651 | map->num_stripes, sub_stripes); | 2598 | map->num_stripes, sub_stripes); |
2652 | 2599 | ||
2600 | trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); | ||
2601 | |||
2653 | em = alloc_extent_map(GFP_NOFS); | 2602 | em = alloc_extent_map(GFP_NOFS); |
2654 | if (!em) { | 2603 | if (!em) { |
2655 | ret = -ENOMEM; | 2604 | ret = -ENOMEM; |
@@ -2758,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2758 | item_size); | 2707 | item_size); |
2759 | BUG_ON(ret); | 2708 | BUG_ON(ret); |
2760 | } | 2709 | } |
2710 | |||
2761 | kfree(chunk); | 2711 | kfree(chunk); |
2762 | return 0; | 2712 | return 0; |
2763 | } | 2713 | } |
@@ -2955,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, | |||
2955 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 2905 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, |
2956 | u64 logical, u64 *length, | 2906 | u64 logical, u64 *length, |
2957 | struct btrfs_multi_bio **multi_ret, | 2907 | struct btrfs_multi_bio **multi_ret, |
2958 | int mirror_num, struct page *unplug_page) | 2908 | int mirror_num) |
2959 | { | 2909 | { |
2960 | struct extent_map *em; | 2910 | struct extent_map *em; |
2961 | struct map_lookup *map; | 2911 | struct map_lookup *map; |
2962 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2912 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2963 | u64 offset; | 2913 | u64 offset; |
2964 | u64 stripe_offset; | 2914 | u64 stripe_offset; |
2915 | u64 stripe_end_offset; | ||
2965 | u64 stripe_nr; | 2916 | u64 stripe_nr; |
2917 | u64 stripe_nr_orig; | ||
2918 | u64 stripe_nr_end; | ||
2966 | int stripes_allocated = 8; | 2919 | int stripes_allocated = 8; |
2967 | int stripes_required = 1; | 2920 | int stripes_required = 1; |
2968 | int stripe_index; | 2921 | int stripe_index; |
@@ -2971,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2971 | int max_errors = 0; | 2924 | int max_errors = 0; |
2972 | struct btrfs_multi_bio *multi = NULL; | 2925 | struct btrfs_multi_bio *multi = NULL; |
2973 | 2926 | ||
2974 | if (multi_ret && !(rw & REQ_WRITE)) | 2927 | if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) |
2975 | stripes_allocated = 1; | 2928 | stripes_allocated = 1; |
2976 | again: | 2929 | again: |
2977 | if (multi_ret) { | 2930 | if (multi_ret) { |
@@ -2987,11 +2940,6 @@ again: | |||
2987 | em = lookup_extent_mapping(em_tree, logical, *length); | 2940 | em = lookup_extent_mapping(em_tree, logical, *length); |
2988 | read_unlock(&em_tree->lock); | 2941 | read_unlock(&em_tree->lock); |
2989 | 2942 | ||
2990 | if (!em && unplug_page) { | ||
2991 | kfree(multi); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!em) { | 2943 | if (!em) { |
2996 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", | 2944 | printk(KERN_CRIT "unable to find logical %llu len %llu\n", |
2997 | (unsigned long long)logical, | 2945 | (unsigned long long)logical, |
@@ -3017,7 +2965,15 @@ again: | |||
3017 | max_errors = 1; | 2965 | max_errors = 1; |
3018 | } | 2966 | } |
3019 | } | 2967 | } |
3020 | if (multi_ret && (rw & REQ_WRITE) && | 2968 | if (rw & REQ_DISCARD) { |
2969 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | | ||
2970 | BTRFS_BLOCK_GROUP_RAID1 | | ||
2971 | BTRFS_BLOCK_GROUP_DUP | | ||
2972 | BTRFS_BLOCK_GROUP_RAID10)) { | ||
2973 | stripes_required = map->num_stripes; | ||
2974 | } | ||
2975 | } | ||
2976 | if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
3021 | stripes_allocated < stripes_required) { | 2977 | stripes_allocated < stripes_required) { |
3022 | stripes_allocated = map->num_stripes; | 2978 | stripes_allocated = map->num_stripes; |
3023 | free_extent_map(em); | 2979 | free_extent_map(em); |
@@ -3037,23 +2993,37 @@ again: | |||
3037 | /* stripe_offset is the offset of this block in its stripe*/ | 2993 | /* stripe_offset is the offset of this block in its stripe*/ |
3038 | stripe_offset = offset - stripe_offset; | 2994 | stripe_offset = offset - stripe_offset; |
3039 | 2995 | ||
3040 | if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 2996 | if (rw & REQ_DISCARD) |
3041 | BTRFS_BLOCK_GROUP_RAID10 | | 2997 | *length = min_t(u64, em->len - offset, *length); |
3042 | BTRFS_BLOCK_GROUP_DUP)) { | 2998 | else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | |
2999 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3000 | BTRFS_BLOCK_GROUP_RAID10 | | ||
3001 | BTRFS_BLOCK_GROUP_DUP)) { | ||
3043 | /* we limit the length of each bio to what fits in a stripe */ | 3002 | /* we limit the length of each bio to what fits in a stripe */ |
3044 | *length = min_t(u64, em->len - offset, | 3003 | *length = min_t(u64, em->len - offset, |
3045 | map->stripe_len - stripe_offset); | 3004 | map->stripe_len - stripe_offset); |
3046 | } else { | 3005 | } else { |
3047 | *length = em->len - offset; | 3006 | *length = em->len - offset; |
3048 | } | 3007 | } |
3049 | 3008 | ||
3050 | if (!multi_ret && !unplug_page) | 3009 | if (!multi_ret) |
3051 | goto out; | 3010 | goto out; |
3052 | 3011 | ||
3053 | num_stripes = 1; | 3012 | num_stripes = 1; |
3054 | stripe_index = 0; | 3013 | stripe_index = 0; |
3055 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 3014 | stripe_nr_orig = stripe_nr; |
3056 | if (unplug_page || (rw & REQ_WRITE)) | 3015 | stripe_nr_end = (offset + *length + map->stripe_len - 1) & |
3016 | (~(map->stripe_len - 1)); | ||
3017 | do_div(stripe_nr_end, map->stripe_len); | ||
3018 | stripe_end_offset = stripe_nr_end * map->stripe_len - | ||
3019 | (offset + *length); | ||
3020 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3021 | if (rw & REQ_DISCARD) | ||
3022 | num_stripes = min_t(u64, map->num_stripes, | ||
3023 | stripe_nr_end - stripe_nr_orig); | ||
3024 | stripe_index = do_div(stripe_nr, map->num_stripes); | ||
3025 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
3026 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
3057 | num_stripes = map->num_stripes; | 3027 | num_stripes = map->num_stripes; |
3058 | else if (mirror_num) | 3028 | else if (mirror_num) |
3059 | stripe_index = mirror_num - 1; | 3029 | stripe_index = mirror_num - 1; |
@@ -3064,7 +3034,7 @@ again: | |||
3064 | } | 3034 | } |
3065 | 3035 | ||
3066 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3036 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3067 | if (rw & REQ_WRITE) | 3037 | if (rw & (REQ_WRITE | REQ_DISCARD)) |
3068 | num_stripes = map->num_stripes; | 3038 | num_stripes = map->num_stripes; |
3069 | else if (mirror_num) | 3039 | else if (mirror_num) |
3070 | stripe_index = mirror_num - 1; | 3040 | stripe_index = mirror_num - 1; |
@@ -3075,8 +3045,12 @@ again: | |||
3075 | stripe_index = do_div(stripe_nr, factor); | 3045 | stripe_index = do_div(stripe_nr, factor); |
3076 | stripe_index *= map->sub_stripes; | 3046 | stripe_index *= map->sub_stripes; |
3077 | 3047 | ||
3078 | if (unplug_page || (rw & REQ_WRITE)) | 3048 | if (rw & REQ_WRITE) |
3079 | num_stripes = map->sub_stripes; | 3049 | num_stripes = map->sub_stripes; |
3050 | else if (rw & REQ_DISCARD) | ||
3051 | num_stripes = min_t(u64, map->sub_stripes * | ||
3052 | (stripe_nr_end - stripe_nr_orig), | ||
3053 | map->num_stripes); | ||
3080 | else if (mirror_num) | 3054 | else if (mirror_num) |
3081 | stripe_index += mirror_num - 1; | 3055 | stripe_index += mirror_num - 1; |
3082 | else { | 3056 | else { |
@@ -3094,24 +3068,101 @@ again: | |||
3094 | } | 3068 | } |
3095 | BUG_ON(stripe_index >= map->num_stripes); | 3069 | BUG_ON(stripe_index >= map->num_stripes); |
3096 | 3070 | ||
3097 | for (i = 0; i < num_stripes; i++) { | 3071 | if (rw & REQ_DISCARD) { |
3098 | if (unplug_page) { | 3072 | for (i = 0; i < num_stripes; i++) { |
3099 | struct btrfs_device *device; | ||
3100 | struct backing_dev_info *bdi; | ||
3101 | |||
3102 | device = map->stripes[stripe_index].dev; | ||
3103 | if (device->bdev) { | ||
3104 | bdi = blk_get_backing_dev_info(device->bdev); | ||
3105 | if (bdi->unplug_io_fn) | ||
3106 | bdi->unplug_io_fn(bdi, unplug_page); | ||
3107 | } | ||
3108 | } else { | ||
3109 | multi->stripes[i].physical = | 3073 | multi->stripes[i].physical = |
3110 | map->stripes[stripe_index].physical + | 3074 | map->stripes[stripe_index].physical + |
3111 | stripe_offset + stripe_nr * map->stripe_len; | 3075 | stripe_offset + stripe_nr * map->stripe_len; |
3112 | multi->stripes[i].dev = map->stripes[stripe_index].dev; | 3076 | multi->stripes[i].dev = map->stripes[stripe_index].dev; |
3077 | |||
3078 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
3079 | u64 stripes; | ||
3080 | u32 last_stripe = 0; | ||
3081 | int j; | ||
3082 | |||
3083 | div_u64_rem(stripe_nr_end - 1, | ||
3084 | map->num_stripes, | ||
3085 | &last_stripe); | ||
3086 | |||
3087 | for (j = 0; j < map->num_stripes; j++) { | ||
3088 | u32 test; | ||
3089 | |||
3090 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3091 | map->num_stripes, &test); | ||
3092 | if (test == stripe_index) | ||
3093 | break; | ||
3094 | } | ||
3095 | stripes = stripe_nr_end - 1 - j; | ||
3096 | do_div(stripes, map->num_stripes); | ||
3097 | multi->stripes[i].length = map->stripe_len * | ||
3098 | (stripes - stripe_nr + 1); | ||
3099 | |||
3100 | if (i == 0) { | ||
3101 | multi->stripes[i].length -= | ||
3102 | stripe_offset; | ||
3103 | stripe_offset = 0; | ||
3104 | } | ||
3105 | if (stripe_index == last_stripe) | ||
3106 | multi->stripes[i].length -= | ||
3107 | stripe_end_offset; | ||
3108 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
3109 | u64 stripes; | ||
3110 | int j; | ||
3111 | int factor = map->num_stripes / | ||
3112 | map->sub_stripes; | ||
3113 | u32 last_stripe = 0; | ||
3114 | |||
3115 | div_u64_rem(stripe_nr_end - 1, | ||
3116 | factor, &last_stripe); | ||
3117 | last_stripe *= map->sub_stripes; | ||
3118 | |||
3119 | for (j = 0; j < factor; j++) { | ||
3120 | u32 test; | ||
3121 | |||
3122 | div_u64_rem(stripe_nr_end - 1 - j, | ||
3123 | factor, &test); | ||
3124 | |||
3125 | if (test == | ||
3126 | stripe_index / map->sub_stripes) | ||
3127 | break; | ||
3128 | } | ||
3129 | stripes = stripe_nr_end - 1 - j; | ||
3130 | do_div(stripes, factor); | ||
3131 | multi->stripes[i].length = map->stripe_len * | ||
3132 | (stripes - stripe_nr + 1); | ||
3133 | |||
3134 | if (i < map->sub_stripes) { | ||
3135 | multi->stripes[i].length -= | ||
3136 | stripe_offset; | ||
3137 | if (i == map->sub_stripes - 1) | ||
3138 | stripe_offset = 0; | ||
3139 | } | ||
3140 | if (stripe_index >= last_stripe && | ||
3141 | stripe_index <= (last_stripe + | ||
3142 | map->sub_stripes - 1)) { | ||
3143 | multi->stripes[i].length -= | ||
3144 | stripe_end_offset; | ||
3145 | } | ||
3146 | } else | ||
3147 | multi->stripes[i].length = *length; | ||
3148 | |||
3149 | stripe_index++; | ||
3150 | if (stripe_index == map->num_stripes) { | ||
3151 | /* This could only happen for RAID0/10 */ | ||
3152 | stripe_index = 0; | ||
3153 | stripe_nr++; | ||
3154 | } | ||
3155 | } | ||
3156 | } else { | ||
3157 | for (i = 0; i < num_stripes; i++) { | ||
3158 | multi->stripes[i].physical = | ||
3159 | map->stripes[stripe_index].physical + | ||
3160 | stripe_offset + | ||
3161 | stripe_nr * map->stripe_len; | ||
3162 | multi->stripes[i].dev = | ||
3163 | map->stripes[stripe_index].dev; | ||
3164 | stripe_index++; | ||
3113 | } | 3165 | } |
3114 | stripe_index++; | ||
3115 | } | 3166 | } |
3116 | if (multi_ret) { | 3167 | if (multi_ret) { |
3117 | *multi_ret = multi; | 3168 | *multi_ret = multi; |
@@ -3128,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3128 | struct btrfs_multi_bio **multi_ret, int mirror_num) | 3179 | struct btrfs_multi_bio **multi_ret, int mirror_num) |
3129 | { | 3180 | { |
3130 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, | 3181 | return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, |
3131 | mirror_num, NULL); | 3182 | mirror_num); |
3132 | } | 3183 | } |
3133 | 3184 | ||
3134 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 3185 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
@@ -3196,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
3196 | return 0; | 3247 | return 0; |
3197 | } | 3248 | } |
3198 | 3249 | ||
3199 | int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, | ||
3200 | u64 logical, struct page *page) | ||
3201 | { | ||
3202 | u64 length = PAGE_CACHE_SIZE; | ||
3203 | return __btrfs_map_block(map_tree, READ, logical, &length, | ||
3204 | NULL, 0, page); | ||
3205 | } | ||
3206 | |||
3207 | static void end_bio_multi_stripe(struct bio *bio, int err) | 3250 | static void end_bio_multi_stripe(struct bio *bio, int err) |
3208 | { | 3251 | { |
3209 | struct btrfs_multi_bio *multi = bio->bi_private; | 3252 | struct btrfs_multi_bio *multi = bio->bi_private; |