aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c235
1 files changed, 139 insertions, 96 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index dd13eb81ee40..309a57b9fc85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,17 +33,6 @@
33#include "volumes.h" 33#include "volumes.h"
34#include "async-thread.h" 34#include "async-thread.h"
35 35
36struct map_lookup {
37 u64 type;
38 int io_align;
39 int io_width;
40 int stripe_len;
41 int sector_size;
42 int num_stripes;
43 int sub_stripes;
44 struct btrfs_bio_stripe stripes[];
45};
46
47static int init_first_rw_device(struct btrfs_trans_handle *trans, 36static int init_first_rw_device(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root, 37 struct btrfs_root *root,
49 struct btrfs_device *device); 38 struct btrfs_device *device);
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
162 struct bio *cur; 151 struct bio *cur;
163 int again = 0; 152 int again = 0;
164 unsigned long num_run; 153 unsigned long num_run;
165 unsigned long num_sync_run;
166 unsigned long batch_run = 0; 154 unsigned long batch_run = 0;
167 unsigned long limit; 155 unsigned long limit;
168 unsigned long last_waited = 0; 156 unsigned long last_waited = 0;
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
173 limit = btrfs_async_submit_limit(fs_info); 161 limit = btrfs_async_submit_limit(fs_info);
174 limit = limit * 2 / 3; 162 limit = limit * 2 / 3;
175 163
176 /* we want to make sure that every time we switch from the sync
177 * list to the normal list, we unplug
178 */
179 num_sync_run = 0;
180
181loop: 164loop:
182 spin_lock(&device->io_lock); 165 spin_lock(&device->io_lock);
183 166
@@ -223,15 +206,6 @@ loop_lock:
223 206
224 spin_unlock(&device->io_lock); 207 spin_unlock(&device->io_lock);
225 208
226 /*
227 * if we're doing the regular priority list, make sure we unplug
228 * for any high prio bios we've sent down
229 */
230 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
231 num_sync_run = 0;
232 blk_run_backing_dev(bdi, NULL);
233 }
234
235 while (pending) { 209 while (pending) {
236 210
237 rmb(); 211 rmb();
@@ -259,19 +233,11 @@ loop_lock:
259 233
260 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 234 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
261 235
262 if (cur->bi_rw & REQ_SYNC)
263 num_sync_run++;
264
265 submit_bio(cur->bi_rw, cur); 236 submit_bio(cur->bi_rw, cur);
266 num_run++; 237 num_run++;
267 batch_run++; 238 batch_run++;
268 if (need_resched()) { 239 if (need_resched())
269 if (num_sync_run) {
270 blk_run_backing_dev(bdi, NULL);
271 num_sync_run = 0;
272 }
273 cond_resched(); 240 cond_resched();
274 }
275 241
276 /* 242 /*
277 * we made progress, there is more work to do and the bdi 243 * we made progress, there is more work to do and the bdi
@@ -304,13 +270,8 @@ loop_lock:
304 * against it before looping 270 * against it before looping
305 */ 271 */
306 last_waited = ioc->last_waited; 272 last_waited = ioc->last_waited;
307 if (need_resched()) { 273 if (need_resched())
308 if (num_sync_run) {
309 blk_run_backing_dev(bdi, NULL);
310 num_sync_run = 0;
311 }
312 cond_resched(); 274 cond_resched();
313 }
314 continue; 275 continue;
315 } 276 }
316 spin_lock(&device->io_lock); 277 spin_lock(&device->io_lock);
@@ -323,22 +284,6 @@ loop_lock:
323 } 284 }
324 } 285 }
325 286
326 if (num_sync_run) {
327 num_sync_run = 0;
328 blk_run_backing_dev(bdi, NULL);
329 }
330 /*
331 * IO has already been through a long path to get here. Checksumming,
332 * async helper threads, perhaps compression. We've done a pretty
333 * good job of collecting a batch of IO and should just unplug
334 * the device right away.
335 *
336 * This will help anyone who is waiting on the IO, they might have
337 * already unplugged, but managed to do so before the bio they
338 * cared about found its way down here.
339 */
340 blk_run_backing_dev(bdi, NULL);
341
342 cond_resched(); 287 cond_resched();
343 if (again) 288 if (again)
344 goto loop; 289 goto loop;
@@ -1923,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1923 1868
1924 BUG_ON(ret); 1869 BUG_ON(ret);
1925 1870
1871 trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
1872
1926 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { 1873 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1927 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); 1874 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1928 BUG_ON(ret); 1875 BUG_ON(ret);
@@ -2650,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2650 *num_bytes = chunk_bytes_by_type(type, calc_size, 2597 *num_bytes = chunk_bytes_by_type(type, calc_size,
2651 map->num_stripes, sub_stripes); 2598 map->num_stripes, sub_stripes);
2652 2599
2600 trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
2601
2653 em = alloc_extent_map(GFP_NOFS); 2602 em = alloc_extent_map(GFP_NOFS);
2654 if (!em) { 2603 if (!em) {
2655 ret = -ENOMEM; 2604 ret = -ENOMEM;
@@ -2758,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2758 item_size); 2707 item_size);
2759 BUG_ON(ret); 2708 BUG_ON(ret);
2760 } 2709 }
2710
2761 kfree(chunk); 2711 kfree(chunk);
2762 return 0; 2712 return 0;
2763} 2713}
@@ -2955,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
2955static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 2905static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2956 u64 logical, u64 *length, 2906 u64 logical, u64 *length,
2957 struct btrfs_multi_bio **multi_ret, 2907 struct btrfs_multi_bio **multi_ret,
2958 int mirror_num, struct page *unplug_page) 2908 int mirror_num)
2959{ 2909{
2960 struct extent_map *em; 2910 struct extent_map *em;
2961 struct map_lookup *map; 2911 struct map_lookup *map;
2962 struct extent_map_tree *em_tree = &map_tree->map_tree; 2912 struct extent_map_tree *em_tree = &map_tree->map_tree;
2963 u64 offset; 2913 u64 offset;
2964 u64 stripe_offset; 2914 u64 stripe_offset;
2915 u64 stripe_end_offset;
2965 u64 stripe_nr; 2916 u64 stripe_nr;
2917 u64 stripe_nr_orig;
2918 u64 stripe_nr_end;
2966 int stripes_allocated = 8; 2919 int stripes_allocated = 8;
2967 int stripes_required = 1; 2920 int stripes_required = 1;
2968 int stripe_index; 2921 int stripe_index;
@@ -2971,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2971 int max_errors = 0; 2924 int max_errors = 0;
2972 struct btrfs_multi_bio *multi = NULL; 2925 struct btrfs_multi_bio *multi = NULL;
2973 2926
2974 if (multi_ret && !(rw & REQ_WRITE)) 2927 if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
2975 stripes_allocated = 1; 2928 stripes_allocated = 1;
2976again: 2929again:
2977 if (multi_ret) { 2930 if (multi_ret) {
@@ -2987,11 +2940,6 @@ again:
2987 em = lookup_extent_mapping(em_tree, logical, *length); 2940 em = lookup_extent_mapping(em_tree, logical, *length);
2988 read_unlock(&em_tree->lock); 2941 read_unlock(&em_tree->lock);
2989 2942
2990 if (!em && unplug_page) {
2991 kfree(multi);
2992 return 0;
2993 }
2994
2995 if (!em) { 2943 if (!em) {
2996 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2944 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
2997 (unsigned long long)logical, 2945 (unsigned long long)logical,
@@ -3017,7 +2965,15 @@ again:
3017 max_errors = 1; 2965 max_errors = 1;
3018 } 2966 }
3019 } 2967 }
3020 if (multi_ret && (rw & REQ_WRITE) && 2968 if (rw & REQ_DISCARD) {
2969 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2970 BTRFS_BLOCK_GROUP_RAID1 |
2971 BTRFS_BLOCK_GROUP_DUP |
2972 BTRFS_BLOCK_GROUP_RAID10)) {
2973 stripes_required = map->num_stripes;
2974 }
2975 }
2976 if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
3021 stripes_allocated < stripes_required) { 2977 stripes_allocated < stripes_required) {
3022 stripes_allocated = map->num_stripes; 2978 stripes_allocated = map->num_stripes;
3023 free_extent_map(em); 2979 free_extent_map(em);
@@ -3037,23 +2993,37 @@ again:
3037 /* stripe_offset is the offset of this block in its stripe*/ 2993 /* stripe_offset is the offset of this block in its stripe*/
3038 stripe_offset = offset - stripe_offset; 2994 stripe_offset = offset - stripe_offset;
3039 2995
3040 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 2996 if (rw & REQ_DISCARD)
3041 BTRFS_BLOCK_GROUP_RAID10 | 2997 *length = min_t(u64, em->len - offset, *length);
3042 BTRFS_BLOCK_GROUP_DUP)) { 2998 else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
2999 BTRFS_BLOCK_GROUP_RAID1 |
3000 BTRFS_BLOCK_GROUP_RAID10 |
3001 BTRFS_BLOCK_GROUP_DUP)) {
3043 /* we limit the length of each bio to what fits in a stripe */ 3002 /* we limit the length of each bio to what fits in a stripe */
3044 *length = min_t(u64, em->len - offset, 3003 *length = min_t(u64, em->len - offset,
3045 map->stripe_len - stripe_offset); 3004 map->stripe_len - stripe_offset);
3046 } else { 3005 } else {
3047 *length = em->len - offset; 3006 *length = em->len - offset;
3048 } 3007 }
3049 3008
3050 if (!multi_ret && !unplug_page) 3009 if (!multi_ret)
3051 goto out; 3010 goto out;
3052 3011
3053 num_stripes = 1; 3012 num_stripes = 1;
3054 stripe_index = 0; 3013 stripe_index = 0;
3055 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 3014 stripe_nr_orig = stripe_nr;
3056 if (unplug_page || (rw & REQ_WRITE)) 3015 stripe_nr_end = (offset + *length + map->stripe_len - 1) &
3016 (~(map->stripe_len - 1));
3017 do_div(stripe_nr_end, map->stripe_len);
3018 stripe_end_offset = stripe_nr_end * map->stripe_len -
3019 (offset + *length);
3020 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3021 if (rw & REQ_DISCARD)
3022 num_stripes = min_t(u64, map->num_stripes,
3023 stripe_nr_end - stripe_nr_orig);
3024 stripe_index = do_div(stripe_nr, map->num_stripes);
3025 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3026 if (rw & (REQ_WRITE | REQ_DISCARD))
3057 num_stripes = map->num_stripes; 3027 num_stripes = map->num_stripes;
3058 else if (mirror_num) 3028 else if (mirror_num)
3059 stripe_index = mirror_num - 1; 3029 stripe_index = mirror_num - 1;
@@ -3064,7 +3034,7 @@ again:
3064 } 3034 }
3065 3035
3066 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3036 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3067 if (rw & REQ_WRITE) 3037 if (rw & (REQ_WRITE | REQ_DISCARD))
3068 num_stripes = map->num_stripes; 3038 num_stripes = map->num_stripes;
3069 else if (mirror_num) 3039 else if (mirror_num)
3070 stripe_index = mirror_num - 1; 3040 stripe_index = mirror_num - 1;
@@ -3075,8 +3045,12 @@ again:
3075 stripe_index = do_div(stripe_nr, factor); 3045 stripe_index = do_div(stripe_nr, factor);
3076 stripe_index *= map->sub_stripes; 3046 stripe_index *= map->sub_stripes;
3077 3047
3078 if (unplug_page || (rw & REQ_WRITE)) 3048 if (rw & REQ_WRITE)
3079 num_stripes = map->sub_stripes; 3049 num_stripes = map->sub_stripes;
3050 else if (rw & REQ_DISCARD)
3051 num_stripes = min_t(u64, map->sub_stripes *
3052 (stripe_nr_end - stripe_nr_orig),
3053 map->num_stripes);
3080 else if (mirror_num) 3054 else if (mirror_num)
3081 stripe_index += mirror_num - 1; 3055 stripe_index += mirror_num - 1;
3082 else { 3056 else {
@@ -3094,24 +3068,101 @@ again:
3094 } 3068 }
3095 BUG_ON(stripe_index >= map->num_stripes); 3069 BUG_ON(stripe_index >= map->num_stripes);
3096 3070
3097 for (i = 0; i < num_stripes; i++) { 3071 if (rw & REQ_DISCARD) {
3098 if (unplug_page) { 3072 for (i = 0; i < num_stripes; i++) {
3099 struct btrfs_device *device;
3100 struct backing_dev_info *bdi;
3101
3102 device = map->stripes[stripe_index].dev;
3103 if (device->bdev) {
3104 bdi = blk_get_backing_dev_info(device->bdev);
3105 if (bdi->unplug_io_fn)
3106 bdi->unplug_io_fn(bdi, unplug_page);
3107 }
3108 } else {
3109 multi->stripes[i].physical = 3073 multi->stripes[i].physical =
3110 map->stripes[stripe_index].physical + 3074 map->stripes[stripe_index].physical +
3111 stripe_offset + stripe_nr * map->stripe_len; 3075 stripe_offset + stripe_nr * map->stripe_len;
3112 multi->stripes[i].dev = map->stripes[stripe_index].dev; 3076 multi->stripes[i].dev = map->stripes[stripe_index].dev;
3077
3078 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3079 u64 stripes;
3080 u32 last_stripe = 0;
3081 int j;
3082
3083 div_u64_rem(stripe_nr_end - 1,
3084 map->num_stripes,
3085 &last_stripe);
3086
3087 for (j = 0; j < map->num_stripes; j++) {
3088 u32 test;
3089
3090 div_u64_rem(stripe_nr_end - 1 - j,
3091 map->num_stripes, &test);
3092 if (test == stripe_index)
3093 break;
3094 }
3095 stripes = stripe_nr_end - 1 - j;
3096 do_div(stripes, map->num_stripes);
3097 multi->stripes[i].length = map->stripe_len *
3098 (stripes - stripe_nr + 1);
3099
3100 if (i == 0) {
3101 multi->stripes[i].length -=
3102 stripe_offset;
3103 stripe_offset = 0;
3104 }
3105 if (stripe_index == last_stripe)
3106 multi->stripes[i].length -=
3107 stripe_end_offset;
3108 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3109 u64 stripes;
3110 int j;
3111 int factor = map->num_stripes /
3112 map->sub_stripes;
3113 u32 last_stripe = 0;
3114
3115 div_u64_rem(stripe_nr_end - 1,
3116 factor, &last_stripe);
3117 last_stripe *= map->sub_stripes;
3118
3119 for (j = 0; j < factor; j++) {
3120 u32 test;
3121
3122 div_u64_rem(stripe_nr_end - 1 - j,
3123 factor, &test);
3124
3125 if (test ==
3126 stripe_index / map->sub_stripes)
3127 break;
3128 }
3129 stripes = stripe_nr_end - 1 - j;
3130 do_div(stripes, factor);
3131 multi->stripes[i].length = map->stripe_len *
3132 (stripes - stripe_nr + 1);
3133
3134 if (i < map->sub_stripes) {
3135 multi->stripes[i].length -=
3136 stripe_offset;
3137 if (i == map->sub_stripes - 1)
3138 stripe_offset = 0;
3139 }
3140 if (stripe_index >= last_stripe &&
3141 stripe_index <= (last_stripe +
3142 map->sub_stripes - 1)) {
3143 multi->stripes[i].length -=
3144 stripe_end_offset;
3145 }
3146 } else
3147 multi->stripes[i].length = *length;
3148
3149 stripe_index++;
3150 if (stripe_index == map->num_stripes) {
3151 /* This could only happen for RAID0/10 */
3152 stripe_index = 0;
3153 stripe_nr++;
3154 }
3155 }
3156 } else {
3157 for (i = 0; i < num_stripes; i++) {
3158 multi->stripes[i].physical =
3159 map->stripes[stripe_index].physical +
3160 stripe_offset +
3161 stripe_nr * map->stripe_len;
3162 multi->stripes[i].dev =
3163 map->stripes[stripe_index].dev;
3164 stripe_index++;
3113 } 3165 }
3114 stripe_index++;
3115 } 3166 }
3116 if (multi_ret) { 3167 if (multi_ret) {
3117 *multi_ret = multi; 3168 *multi_ret = multi;
@@ -3128,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3128 struct btrfs_multi_bio **multi_ret, int mirror_num) 3179 struct btrfs_multi_bio **multi_ret, int mirror_num)
3129{ 3180{
3130 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, 3181 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
3131 mirror_num, NULL); 3182 mirror_num);
3132} 3183}
3133 3184
3134int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, 3185int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3196,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
3196 return 0; 3247 return 0;
3197} 3248}
3198 3249
3199int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
3200 u64 logical, struct page *page)
3201{
3202 u64 length = PAGE_CACHE_SIZE;
3203 return __btrfs_map_block(map_tree, READ, logical, &length,
3204 NULL, 0, page);
3205}
3206
3207static void end_bio_multi_stripe(struct bio *bio, int err) 3250static void end_bio_multi_stripe(struct bio *bio, int err)
3208{ 3251{
3209 struct btrfs_multi_bio *multi = bio->bi_private; 3252 struct btrfs_multi_bio *multi = bio->bi_private;