aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-03-18 21:46:39 -0400
committerNeilBrown <neilb@suse.de>2012-03-18 21:46:39 -0400
commitba13da47ffa202784355561f72160a41350e95cc (patch)
tree9b60f27ab89e4036df65d0dc3bf52b7420f83a50
parentdafb20fa34320a472deb7442f25a0c086e0feb33 (diff)
md: add proper merge_bvec handling to RAID0 and Linear.
These personalities currently set a max request size of one page when any member device has a merge_bvec_fn because they don't bother to call that function. This causes extra works in splitting and combining requests. So make the extra effort to call the merge_bvec_fn when it exists so that we end up with larger requests out the bottom. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/linear.c30
-rw-r--r--drivers/md/raid0.c154
-rw-r--r--drivers/md/raid0.h11
3 files changed, 107 insertions, 88 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 67940741b19d..b0fcc7d02adb 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -68,10 +68,19 @@ static int linear_mergeable_bvec(struct request_queue *q,
68 struct dev_info *dev0; 68 struct dev_info *dev0;
69 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; 69 unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
70 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); 70 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
71 int maxbytes = biovec->bv_len;
72 struct request_queue *subq;
71 73
72 rcu_read_lock(); 74 rcu_read_lock();
73 dev0 = which_dev(mddev, sector); 75 dev0 = which_dev(mddev, sector);
74 maxsectors = dev0->end_sector - sector; 76 maxsectors = dev0->end_sector - sector;
77 subq = bdev_get_queue(dev0->rdev->bdev);
78 if (subq->merge_bvec_fn) {
79 bvm->bi_bdev = dev0->rdev->bdev;
80 bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors;
81 maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm,
82 biovec));
83 }
75 rcu_read_unlock(); 84 rcu_read_unlock();
76 85
77 if (maxsectors < bio_sectors) 86 if (maxsectors < bio_sectors)
@@ -80,12 +89,12 @@ static int linear_mergeable_bvec(struct request_queue *q,
80 maxsectors -= bio_sectors; 89 maxsectors -= bio_sectors;
81 90
82 if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) 91 if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
83 return biovec->bv_len; 92 return maxbytes;
84 /* The bytes available at this offset could be really big, 93
85 * so we cap at 2^31 to avoid overflow */ 94 if (maxsectors > (maxbytes >> 9))
86 if (maxsectors > (1 << (31-9))) 95 return maxbytes;
87 return 1<<31; 96 else
88 return maxsectors << 9; 97 return maxsectors << 9;
89} 98}
90 99
91static int linear_congested(void *data, int bits) 100static int linear_congested(void *data, int bits)
@@ -158,15 +167,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
158 167
159 disk_stack_limits(mddev->gendisk, rdev->bdev, 168 disk_stack_limits(mddev->gendisk, rdev->bdev,
160 rdev->data_offset << 9); 169 rdev->data_offset << 9);
161 /* as we don't honour merge_bvec_fn, we must never risk
162 * violating it, so limit max_segments to 1 lying within
163 * a single page.
164 */
165 if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
166 blk_queue_max_segments(mddev->queue, 1);
167 blk_queue_segment_boundary(mddev->queue,
168 PAGE_CACHE_SIZE - 1);
169 }
170 170
171 conf->array_sectors += rdev->sectors; 171 conf->array_sectors += rdev->sectors;
172 cnt++; 172 cnt++;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 7ef5cbf31bb1..6f31f5596e01 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -188,16 +188,10 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
188 188
189 disk_stack_limits(mddev->gendisk, rdev1->bdev, 189 disk_stack_limits(mddev->gendisk, rdev1->bdev,
190 rdev1->data_offset << 9); 190 rdev1->data_offset << 9);
191 /* as we don't honour merge_bvec_fn, we must never risk
192 * violating it, so limit ->max_segments to 1, lying within
193 * a single page.
194 */
195 191
196 if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) { 192 if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
197 blk_queue_max_segments(mddev->queue, 1); 193 conf->has_merge_bvec = 1;
198 blk_queue_segment_boundary(mddev->queue, 194
199 PAGE_CACHE_SIZE - 1);
200 }
201 if (!smallest || (rdev1->sectors < smallest->sectors)) 195 if (!smallest || (rdev1->sectors < smallest->sectors))
202 smallest = rdev1; 196 smallest = rdev1;
203 cnt++; 197 cnt++;
@@ -290,8 +284,64 @@ abort:
290 return err; 284 return err;
291} 285}
292 286
287/* Find the zone which holds a particular offset
288 * Update *sectorp to be an offset in that zone
289 */
290static struct strip_zone *find_zone(struct r0conf *conf,
291 sector_t *sectorp)
292{
293 int i;
294 struct strip_zone *z = conf->strip_zone;
295 sector_t sector = *sectorp;
296
297 for (i = 0; i < conf->nr_strip_zones; i++)
298 if (sector < z[i].zone_end) {
299 if (i)
300 *sectorp = sector - z[i-1].zone_end;
301 return z + i;
302 }
303 BUG();
304}
305
306/*
307 * remaps the bio to the target device. we separate two flows.
308 * power 2 flow and a general flow for the sake of perfromance
309*/
310static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
311 sector_t sector, sector_t *sector_offset)
312{
313 unsigned int sect_in_chunk;
314 sector_t chunk;
315 struct r0conf *conf = mddev->private;
316 int raid_disks = conf->strip_zone[0].nb_dev;
317 unsigned int chunk_sects = mddev->chunk_sectors;
318
319 if (is_power_of_2(chunk_sects)) {
320 int chunksect_bits = ffz(~chunk_sects);
321 /* find the sector offset inside the chunk */
322 sect_in_chunk = sector & (chunk_sects - 1);
323 sector >>= chunksect_bits;
324 /* chunk in zone */
325 chunk = *sector_offset;
326 /* quotient is the chunk in real device*/
327 sector_div(chunk, zone->nb_dev << chunksect_bits);
328 } else{
329 sect_in_chunk = sector_div(sector, chunk_sects);
330 chunk = *sector_offset;
331 sector_div(chunk, chunk_sects * zone->nb_dev);
332 }
333 /*
334 * position the bio over the real device
335 * real sector = chunk in device + starting of zone
336 * + the position in the chunk
337 */
338 *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
339 return conf->devlist[(zone - conf->strip_zone)*raid_disks
340 + sector_div(sector, zone->nb_dev)];
341}
342
293/** 343/**
294 * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged 344 * raid0_mergeable_bvec -- tell bio layer if two requests can be merged
295 * @q: request queue 345 * @q: request queue
296 * @bvm: properties of new bio 346 * @bvm: properties of new bio
297 * @biovec: the request that could be merged to it. 347 * @biovec: the request that could be merged to it.
@@ -303,10 +353,15 @@ static int raid0_mergeable_bvec(struct request_queue *q,
303 struct bio_vec *biovec) 353 struct bio_vec *biovec)
304{ 354{
305 struct mddev *mddev = q->queuedata; 355 struct mddev *mddev = q->queuedata;
356 struct r0conf *conf = mddev->private;
306 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); 357 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
358 sector_t sector_offset = sector;
307 int max; 359 int max;
308 unsigned int chunk_sectors = mddev->chunk_sectors; 360 unsigned int chunk_sectors = mddev->chunk_sectors;
309 unsigned int bio_sectors = bvm->bi_size >> 9; 361 unsigned int bio_sectors = bvm->bi_size >> 9;
362 struct strip_zone *zone;
363 struct md_rdev *rdev;
364 struct request_queue *subq;
310 365
311 if (is_power_of_2(chunk_sectors)) 366 if (is_power_of_2(chunk_sectors))
312 max = (chunk_sectors - ((sector & (chunk_sectors-1)) 367 max = (chunk_sectors - ((sector & (chunk_sectors-1))
@@ -314,10 +369,27 @@ static int raid0_mergeable_bvec(struct request_queue *q,
314 else 369 else
315 max = (chunk_sectors - (sector_div(sector, chunk_sectors) 370 max = (chunk_sectors - (sector_div(sector, chunk_sectors)
316 + bio_sectors)) << 9; 371 + bio_sectors)) << 9;
317 if (max < 0) max = 0; /* bio_add cannot handle a negative return */ 372 if (max < 0)
373 max = 0; /* bio_add cannot handle a negative return */
318 if (max <= biovec->bv_len && bio_sectors == 0) 374 if (max <= biovec->bv_len && bio_sectors == 0)
319 return biovec->bv_len; 375 return biovec->bv_len;
320 else 376 if (max < biovec->bv_len)
377 /* too small already, no need to check further */
378 return max;
379 if (!conf->has_merge_bvec)
380 return max;
381
382 /* May need to check subordinate device */
383 sector = sector_offset;
384 zone = find_zone(mddev->private, &sector_offset);
385 rdev = map_sector(mddev, zone, sector, &sector_offset);
386 subq = bdev_get_queue(rdev->bdev);
387 if (subq->merge_bvec_fn) {
388 bvm->bi_bdev = rdev->bdev;
389 bvm->bi_sector = sector_offset + zone->dev_start +
390 rdev->data_offset;
391 return min(max, subq->merge_bvec_fn(subq, bvm, biovec));
392 } else
321 return max; 393 return max;
322} 394}
323 395
@@ -397,62 +469,6 @@ static int raid0_stop(struct mddev *mddev)
397 return 0; 469 return 0;
398} 470}
399 471
400/* Find the zone which holds a particular offset
401 * Update *sectorp to be an offset in that zone
402 */
403static struct strip_zone *find_zone(struct r0conf *conf,
404 sector_t *sectorp)
405{
406 int i;
407 struct strip_zone *z = conf->strip_zone;
408 sector_t sector = *sectorp;
409
410 for (i = 0; i < conf->nr_strip_zones; i++)
411 if (sector < z[i].zone_end) {
412 if (i)
413 *sectorp = sector - z[i-1].zone_end;
414 return z + i;
415 }
416 BUG();
417}
418
419/*
420 * remaps the bio to the target device. we separate two flows.
421 * power 2 flow and a general flow for the sake of perfromance
422*/
423static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
424 sector_t sector, sector_t *sector_offset)
425{
426 unsigned int sect_in_chunk;
427 sector_t chunk;
428 struct r0conf *conf = mddev->private;
429 int raid_disks = conf->strip_zone[0].nb_dev;
430 unsigned int chunk_sects = mddev->chunk_sectors;
431
432 if (is_power_of_2(chunk_sects)) {
433 int chunksect_bits = ffz(~chunk_sects);
434 /* find the sector offset inside the chunk */
435 sect_in_chunk = sector & (chunk_sects - 1);
436 sector >>= chunksect_bits;
437 /* chunk in zone */
438 chunk = *sector_offset;
439 /* quotient is the chunk in real device*/
440 sector_div(chunk, zone->nb_dev << chunksect_bits);
441 } else{
442 sect_in_chunk = sector_div(sector, chunk_sects);
443 chunk = *sector_offset;
444 sector_div(chunk, chunk_sects * zone->nb_dev);
445 }
446 /*
447 * position the bio over the real device
448 * real sector = chunk in device + starting of zone
449 * + the position in the chunk
450 */
451 *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
452 return conf->devlist[(zone - conf->strip_zone)*raid_disks
453 + sector_div(sector, zone->nb_dev)];
454}
455
456/* 472/*
457 * Is io distribute over 1 or more chunks ? 473 * Is io distribute over 1 or more chunks ?
458*/ 474*/
@@ -505,7 +521,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
505 } 521 }
506 522
507 sector_offset = bio->bi_sector; 523 sector_offset = bio->bi_sector;
508 zone = find_zone(mddev->private, &sector_offset); 524 zone = find_zone(mddev->private, &sector_offset);
509 tmp_dev = map_sector(mddev, zone, bio->bi_sector, 525 tmp_dev = map_sector(mddev, zone, bio->bi_sector,
510 &sector_offset); 526 &sector_offset);
511 bio->bi_bdev = tmp_dev->bdev; 527 bio->bi_bdev = tmp_dev->bdev;
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
index 0884bba8df4c..05539d9c97f0 100644
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -4,13 +4,16 @@
4struct strip_zone { 4struct strip_zone {
5 sector_t zone_end; /* Start of the next zone (in sectors) */ 5 sector_t zone_end; /* Start of the next zone (in sectors) */
6 sector_t dev_start; /* Zone offset in real dev (in sectors) */ 6 sector_t dev_start; /* Zone offset in real dev (in sectors) */
7 int nb_dev; /* # of devices attached to the zone */ 7 int nb_dev; /* # of devices attached to the zone */
8}; 8};
9 9
10struct r0conf { 10struct r0conf {
11 struct strip_zone *strip_zone; 11 struct strip_zone *strip_zone;
12 struct md_rdev **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ 12 struct md_rdev **devlist; /* lists of rdevs, pointed to
13 int nr_strip_zones; 13 * by strip_zone->dev */
14 int nr_strip_zones;
15 int has_merge_bvec; /* at least one member has
16 * a merge_bvec_fn */
14}; 17};
15 18
16#endif 19#endif