diff options
author | NeilBrown <neilb@suse.de> | 2012-03-18 21:46:39 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-03-18 21:46:39 -0400 |
commit | ba13da47ffa202784355561f72160a41350e95cc (patch) | |
tree | 9b60f27ab89e4036df65d0dc3bf52b7420f83a50 | |
parent | dafb20fa34320a472deb7442f25a0c086e0feb33 (diff) |
md: add proper merge_bvec handling to RAID0 and Linear.
These personalities currently set a max request size of one page
when any member device has a merge_bvec_fn because they don't
bother to call that function.
This causes extra works in splitting and combining requests.
So make the extra effort to call the merge_bvec_fn when it exists
so that we end up with larger requests out the bottom.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/linear.c | 30 | ||||
-rw-r--r-- | drivers/md/raid0.c | 154 | ||||
-rw-r--r-- | drivers/md/raid0.h | 11 |
3 files changed, 107 insertions, 88 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 67940741b19d..b0fcc7d02adb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
68 | struct dev_info *dev0; | 68 | struct dev_info *dev0; |
69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; | 69 | unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; |
70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 70 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
71 | int maxbytes = biovec->bv_len; | ||
72 | struct request_queue *subq; | ||
71 | 73 | ||
72 | rcu_read_lock(); | 74 | rcu_read_lock(); |
73 | dev0 = which_dev(mddev, sector); | 75 | dev0 = which_dev(mddev, sector); |
74 | maxsectors = dev0->end_sector - sector; | 76 | maxsectors = dev0->end_sector - sector; |
77 | subq = bdev_get_queue(dev0->rdev->bdev); | ||
78 | if (subq->merge_bvec_fn) { | ||
79 | bvm->bi_bdev = dev0->rdev->bdev; | ||
80 | bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors; | ||
81 | maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm, | ||
82 | biovec)); | ||
83 | } | ||
75 | rcu_read_unlock(); | 84 | rcu_read_unlock(); |
76 | 85 | ||
77 | if (maxsectors < bio_sectors) | 86 | if (maxsectors < bio_sectors) |
@@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q, | |||
80 | maxsectors -= bio_sectors; | 89 | maxsectors -= bio_sectors; |
81 | 90 | ||
82 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) | 91 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) |
83 | return biovec->bv_len; | 92 | return maxbytes; |
84 | /* The bytes available at this offset could be really big, | 93 | |
85 | * so we cap at 2^31 to avoid overflow */ | 94 | if (maxsectors > (maxbytes >> 9)) |
86 | if (maxsectors > (1 << (31-9))) | 95 | return maxbytes; |
87 | return 1<<31; | 96 | else |
88 | return maxsectors << 9; | 97 | return maxsectors << 9; |
89 | } | 98 | } |
90 | 99 | ||
91 | static int linear_congested(void *data, int bits) | 100 | static int linear_congested(void *data, int bits) |
@@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |||
158 | 167 | ||
159 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 168 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
160 | rdev->data_offset << 9); | 169 | rdev->data_offset << 9); |
161 | /* as we don't honour merge_bvec_fn, we must never risk | ||
162 | * violating it, so limit max_segments to 1 lying within | ||
163 | * a single page. | ||
164 | */ | ||
165 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
166 | blk_queue_max_segments(mddev->queue, 1); | ||
167 | blk_queue_segment_boundary(mddev->queue, | ||
168 | PAGE_CACHE_SIZE - 1); | ||
169 | } | ||
170 | 170 | ||
171 | conf->array_sectors += rdev->sectors; | 171 | conf->array_sectors += rdev->sectors; |
172 | cnt++; | 172 | cnt++; |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7ef5cbf31bb1..6f31f5596e01 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
188 | 188 | ||
189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, | 189 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
190 | rdev1->data_offset << 9); | 190 | rdev1->data_offset << 9); |
191 | /* as we don't honour merge_bvec_fn, we must never risk | ||
192 | * violating it, so limit ->max_segments to 1, lying within | ||
193 | * a single page. | ||
194 | */ | ||
195 | 191 | ||
196 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { | 192 | if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) |
197 | blk_queue_max_segments(mddev->queue, 1); | 193 | conf->has_merge_bvec = 1; |
198 | blk_queue_segment_boundary(mddev->queue, | 194 | |
199 | PAGE_CACHE_SIZE - 1); | ||
200 | } | ||
201 | if (!smallest || (rdev1->sectors < smallest->sectors)) | 195 | if (!smallest || (rdev1->sectors < smallest->sectors)) |
202 | smallest = rdev1; | 196 | smallest = rdev1; |
203 | cnt++; | 197 | cnt++; |
@@ -290,8 +284,64 @@ abort: | |||
290 | return err; | 284 | return err; |
291 | } | 285 | } |
292 | 286 | ||
287 | /* Find the zone which holds a particular offset | ||
288 | * Update *sectorp to be an offset in that zone | ||
289 | */ | ||
290 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
291 | sector_t *sectorp) | ||
292 | { | ||
293 | int i; | ||
294 | struct strip_zone *z = conf->strip_zone; | ||
295 | sector_t sector = *sectorp; | ||
296 | |||
297 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
298 | if (sector < z[i].zone_end) { | ||
299 | if (i) | ||
300 | *sectorp = sector - z[i-1].zone_end; | ||
301 | return z + i; | ||
302 | } | ||
303 | BUG(); | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * remaps the bio to the target device. we separate two flows. | ||
308 | * power 2 flow and a general flow for the sake of perfromance | ||
309 | */ | ||
310 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
311 | sector_t sector, sector_t *sector_offset) | ||
312 | { | ||
313 | unsigned int sect_in_chunk; | ||
314 | sector_t chunk; | ||
315 | struct r0conf *conf = mddev->private; | ||
316 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
317 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
318 | |||
319 | if (is_power_of_2(chunk_sects)) { | ||
320 | int chunksect_bits = ffz(~chunk_sects); | ||
321 | /* find the sector offset inside the chunk */ | ||
322 | sect_in_chunk = sector & (chunk_sects - 1); | ||
323 | sector >>= chunksect_bits; | ||
324 | /* chunk in zone */ | ||
325 | chunk = *sector_offset; | ||
326 | /* quotient is the chunk in real device*/ | ||
327 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
328 | } else{ | ||
329 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
330 | chunk = *sector_offset; | ||
331 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
332 | } | ||
333 | /* | ||
334 | * position the bio over the real device | ||
335 | * real sector = chunk in device + starting of zone | ||
336 | * + the position in the chunk | ||
337 | */ | ||
338 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
339 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
340 | + sector_div(sector, zone->nb_dev)]; | ||
341 | } | ||
342 | |||
293 | /** | 343 | /** |
294 | * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged | 344 | * raid0_mergeable_bvec -- tell bio layer if two requests can be merged |
295 | * @q: request queue | 345 | * @q: request queue |
296 | * @bvm: properties of new bio | 346 | * @bvm: properties of new bio |
297 | * @biovec: the request that could be merged to it. | 347 | * @biovec: the request that could be merged to it. |
@@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
303 | struct bio_vec *biovec) | 353 | struct bio_vec *biovec) |
304 | { | 354 | { |
305 | struct mddev *mddev = q->queuedata; | 355 | struct mddev *mddev = q->queuedata; |
356 | struct r0conf *conf = mddev->private; | ||
306 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | 357 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); |
358 | sector_t sector_offset = sector; | ||
307 | int max; | 359 | int max; |
308 | unsigned int chunk_sectors = mddev->chunk_sectors; | 360 | unsigned int chunk_sectors = mddev->chunk_sectors; |
309 | unsigned int bio_sectors = bvm->bi_size >> 9; | 361 | unsigned int bio_sectors = bvm->bi_size >> 9; |
362 | struct strip_zone *zone; | ||
363 | struct md_rdev *rdev; | ||
364 | struct request_queue *subq; | ||
310 | 365 | ||
311 | if (is_power_of_2(chunk_sectors)) | 366 | if (is_power_of_2(chunk_sectors)) |
312 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) | 367 | max = (chunk_sectors - ((sector & (chunk_sectors-1)) |
@@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q, | |||
314 | else | 369 | else |
315 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) | 370 | max = (chunk_sectors - (sector_div(sector, chunk_sectors) |
316 | + bio_sectors)) << 9; | 371 | + bio_sectors)) << 9; |
317 | if (max < 0) max = 0; /* bio_add cannot handle a negative return */ | 372 | if (max < 0) |
373 | max = 0; /* bio_add cannot handle a negative return */ | ||
318 | if (max <= biovec->bv_len && bio_sectors == 0) | 374 | if (max <= biovec->bv_len && bio_sectors == 0) |
319 | return biovec->bv_len; | 375 | return biovec->bv_len; |
320 | else | 376 | if (max < biovec->bv_len) |
377 | /* too small already, no need to check further */ | ||
378 | return max; | ||
379 | if (!conf->has_merge_bvec) | ||
380 | return max; | ||
381 | |||
382 | /* May need to check subordinate device */ | ||
383 | sector = sector_offset; | ||
384 | zone = find_zone(mddev->private, §or_offset); | ||
385 | rdev = map_sector(mddev, zone, sector, §or_offset); | ||
386 | subq = bdev_get_queue(rdev->bdev); | ||
387 | if (subq->merge_bvec_fn) { | ||
388 | bvm->bi_bdev = rdev->bdev; | ||
389 | bvm->bi_sector = sector_offset + zone->dev_start + | ||
390 | rdev->data_offset; | ||
391 | return min(max, subq->merge_bvec_fn(subq, bvm, biovec)); | ||
392 | } else | ||
321 | return max; | 393 | return max; |
322 | } | 394 | } |
323 | 395 | ||
@@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev) | |||
397 | return 0; | 469 | return 0; |
398 | } | 470 | } |
399 | 471 | ||
400 | /* Find the zone which holds a particular offset | ||
401 | * Update *sectorp to be an offset in that zone | ||
402 | */ | ||
403 | static struct strip_zone *find_zone(struct r0conf *conf, | ||
404 | sector_t *sectorp) | ||
405 | { | ||
406 | int i; | ||
407 | struct strip_zone *z = conf->strip_zone; | ||
408 | sector_t sector = *sectorp; | ||
409 | |||
410 | for (i = 0; i < conf->nr_strip_zones; i++) | ||
411 | if (sector < z[i].zone_end) { | ||
412 | if (i) | ||
413 | *sectorp = sector - z[i-1].zone_end; | ||
414 | return z + i; | ||
415 | } | ||
416 | BUG(); | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * remaps the bio to the target device. we separate two flows. | ||
421 | * power 2 flow and a general flow for the sake of perfromance | ||
422 | */ | ||
423 | static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, | ||
424 | sector_t sector, sector_t *sector_offset) | ||
425 | { | ||
426 | unsigned int sect_in_chunk; | ||
427 | sector_t chunk; | ||
428 | struct r0conf *conf = mddev->private; | ||
429 | int raid_disks = conf->strip_zone[0].nb_dev; | ||
430 | unsigned int chunk_sects = mddev->chunk_sectors; | ||
431 | |||
432 | if (is_power_of_2(chunk_sects)) { | ||
433 | int chunksect_bits = ffz(~chunk_sects); | ||
434 | /* find the sector offset inside the chunk */ | ||
435 | sect_in_chunk = sector & (chunk_sects - 1); | ||
436 | sector >>= chunksect_bits; | ||
437 | /* chunk in zone */ | ||
438 | chunk = *sector_offset; | ||
439 | /* quotient is the chunk in real device*/ | ||
440 | sector_div(chunk, zone->nb_dev << chunksect_bits); | ||
441 | } else{ | ||
442 | sect_in_chunk = sector_div(sector, chunk_sects); | ||
443 | chunk = *sector_offset; | ||
444 | sector_div(chunk, chunk_sects * zone->nb_dev); | ||
445 | } | ||
446 | /* | ||
447 | * position the bio over the real device | ||
448 | * real sector = chunk in device + starting of zone | ||
449 | * + the position in the chunk | ||
450 | */ | ||
451 | *sector_offset = (chunk * chunk_sects) + sect_in_chunk; | ||
452 | return conf->devlist[(zone - conf->strip_zone)*raid_disks | ||
453 | + sector_div(sector, zone->nb_dev)]; | ||
454 | } | ||
455 | |||
456 | /* | 472 | /* |
457 | * Is io distribute over 1 or more chunks ? | 473 | * Is io distribute over 1 or more chunks ? |
458 | */ | 474 | */ |
@@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) | |||
505 | } | 521 | } |
506 | 522 | ||
507 | sector_offset = bio->bi_sector; | 523 | sector_offset = bio->bi_sector; |
508 | zone = find_zone(mddev->private, §or_offset); | 524 | zone = find_zone(mddev->private, §or_offset); |
509 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, | 525 | tmp_dev = map_sector(mddev, zone, bio->bi_sector, |
510 | §or_offset); | 526 | §or_offset); |
511 | bio->bi_bdev = tmp_dev->bdev; | 527 | bio->bi_bdev = tmp_dev->bdev; |
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 0884bba8df4c..05539d9c97f0 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h | |||
@@ -4,13 +4,16 @@ | |||
4 | struct strip_zone { | 4 | struct strip_zone { |
5 | sector_t zone_end; /* Start of the next zone (in sectors) */ | 5 | sector_t zone_end; /* Start of the next zone (in sectors) */ |
6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ | 6 | sector_t dev_start; /* Zone offset in real dev (in sectors) */ |
7 | int nb_dev; /* # of devices attached to the zone */ | 7 | int nb_dev; /* # of devices attached to the zone */ |
8 | }; | 8 | }; |
9 | 9 | ||
10 | struct r0conf { | 10 | struct r0conf { |
11 | struct strip_zone *strip_zone; | 11 | struct strip_zone *strip_zone; |
12 | struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ | 12 | struct md_rdev **devlist; /* lists of rdevs, pointed to |
13 | int nr_strip_zones; | 13 | * by strip_zone->dev */ |
14 | int nr_strip_zones; | ||
15 | int has_merge_bvec; /* at least one member has | ||
16 | * a merge_bvec_fn */ | ||
14 | }; | 17 | }; |
15 | 18 | ||
16 | #endif | 19 | #endif |