aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2007-02-08 17:20:29 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-09 12:25:46 -0500
commit387bb17374c5fa057462d00d4ba941d49f45de4d (patch)
tree85c68bbd2f077724563cfb26a3357d6679f12104
parent6649a3863232eb2e2f15ea6c622bd8ceacf96d76 (diff)
[PATCH] md: fix various bugs with aligned reads in RAID5
It is possible for raid5 to be sent a bio that is too big for an underlying device. So if it is a READ that we pass stright down to a device, it will fail and confuse RAID5. So in 'chunk_aligned_read' we check that the bio fits within the parameters for the target device and if it doesn't fit, fall back on reading through the stripe cache and making lots of one-page requests. Note that this is the earliest time we can check against the device because earlier we don't have a lock on the device, so it could change underneath us. Also, the code for handling a retry through the cache when a read fails has not been tested and was badly broken. This patch fixes that code. Signed-off-by: Neil Brown <neilb@suse.de> Cc: "Kai" <epimetreus@fastmail.fm> Cc: <stable@suse.de> Cc: <org@suse.de> Cc: Jens Axboe <jens.axboe@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--block/ll_rw_blk.c2
-rw-r--r--drivers/md/raid5.c42
2 files changed, 40 insertions, 4 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index fb6789725e1b..38c293b987b7 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1264,7 +1264,7 @@ new_hw_segment:
1264 bio->bi_hw_segments = nr_hw_segs; 1264 bio->bi_hw_segments = nr_hw_segs;
1265 bio->bi_flags |= (1 << BIO_SEG_VALID); 1265 bio->bi_flags |= (1 << BIO_SEG_VALID);
1266} 1266}
1267 1267EXPORT_SYMBOL(blk_recount_segments);
1268 1268
1269static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, 1269static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
1270 struct bio *nxt) 1270 struct bio *nxt)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 467c16982d02..11c3d7bfa797 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2620,7 +2620,7 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
2620 } 2620 }
2621 bi = conf->retry_read_aligned_list; 2621 bi = conf->retry_read_aligned_list;
2622 if(bi) { 2622 if(bi) {
2623 conf->retry_read_aligned = bi->bi_next; 2623 conf->retry_read_aligned_list = bi->bi_next;
2624 bi->bi_next = NULL; 2624 bi->bi_next = NULL;
2625 bi->bi_phys_segments = 1; /* biased count of active stripes */ 2625 bi->bi_phys_segments = 1; /* biased count of active stripes */
2626 bi->bi_hw_segments = 0; /* count of processed stripes */ 2626 bi->bi_hw_segments = 0; /* count of processed stripes */
@@ -2669,6 +2669,27 @@ static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
2669 return 0; 2669 return 0;
2670} 2670}
2671 2671
2672static int bio_fits_rdev(struct bio *bi)
2673{
2674 request_queue_t *q = bdev_get_queue(bi->bi_bdev);
2675
2676 if ((bi->bi_size>>9) > q->max_sectors)
2677 return 0;
2678 blk_recount_segments(q, bi);
2679 if (bi->bi_phys_segments > q->max_phys_segments ||
2680 bi->bi_hw_segments > q->max_hw_segments)
2681 return 0;
2682
2683 if (q->merge_bvec_fn)
2684 /* it's too hard to apply the merge_bvec_fn at this stage,
2685 * just just give up
2686 */
2687 return 0;
2688
2689 return 1;
2690}
2691
2692
2672static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) 2693static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
2673{ 2694{
2674 mddev_t *mddev = q->queuedata; 2695 mddev_t *mddev = q->queuedata;
@@ -2715,6 +2736,13 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
2715 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 2736 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
2716 align_bi->bi_sector += rdev->data_offset; 2737 align_bi->bi_sector += rdev->data_offset;
2717 2738
2739 if (!bio_fits_rdev(align_bi)) {
2740 /* too big in some way */
2741 bio_put(align_bi);
2742 rdev_dec_pending(rdev, mddev);
2743 return 0;
2744 }
2745
2718 spin_lock_irq(&conf->device_lock); 2746 spin_lock_irq(&conf->device_lock);
2719 wait_event_lock_irq(conf->wait_for_stripe, 2747 wait_event_lock_irq(conf->wait_for_stripe,
2720 conf->quiesce == 0, 2748 conf->quiesce == 0,
@@ -3107,7 +3135,9 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3107 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); 3135 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
3108 3136
3109 for (; logical_sector < last_sector; 3137 for (; logical_sector < last_sector;
3110 logical_sector += STRIPE_SECTORS, scnt++) { 3138 logical_sector += STRIPE_SECTORS,
3139 sector += STRIPE_SECTORS,
3140 scnt++) {
3111 3141
3112 if (scnt < raid_bio->bi_hw_segments) 3142 if (scnt < raid_bio->bi_hw_segments)
3113 /* already done this stripe */ 3143 /* already done this stripe */
@@ -3123,7 +3153,13 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3123 } 3153 }
3124 3154
3125 set_bit(R5_ReadError, &sh->dev[dd_idx].flags); 3155 set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
3126 add_stripe_bio(sh, raid_bio, dd_idx, 0); 3156 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
3157 release_stripe(sh);
3158 raid_bio->bi_hw_segments = scnt;
3159 conf->retry_read_aligned = raid_bio;
3160 return handled;
3161 }
3162
3127 handle_stripe(sh, NULL); 3163 handle_stripe(sh, NULL);
3128 release_stripe(sh); 3164 release_stripe(sh);
3129 handled++; 3165 handled++;