diff options
author | Neil Brown <neilb@suse.de> | 2007-02-08 17:20:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-09 12:25:46 -0500 |
commit | 387bb17374c5fa057462d00d4ba941d49f45de4d (patch) | |
tree | 85c68bbd2f077724563cfb26a3357d6679f12104 | |
parent | 6649a3863232eb2e2f15ea6c622bd8ceacf96d76 (diff) |
[PATCH] md: fix various bugs with aligned reads in RAID5
It is possible for raid5 to be sent a bio that is too big for an underlying
device. So if it is a READ that we pass stright down to a device, it will
fail and confuse RAID5.
So in 'chunk_aligned_read' we check that the bio fits within the parameters
for the target device and if it doesn't fit, fall back on reading through
the stripe cache and making lots of one-page requests.
Note that this is the earliest time we can check against the device because
earlier we don't have a lock on the device, so it could change underneath
us.
Also, the code for handling a retry through the cache when a read fails has
not been tested and was badly broken. This patch fixes that code.
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Kai" <epimetreus@fastmail.fm>
Cc: <stable@suse.de>
Cc: <org@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | block/ll_rw_blk.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 42 |
2 files changed, 40 insertions, 4 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index fb6789725e1b..38c293b987b7 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c | |||
@@ -1264,7 +1264,7 @@ new_hw_segment: | |||
1264 | bio->bi_hw_segments = nr_hw_segs; | 1264 | bio->bi_hw_segments = nr_hw_segs; |
1265 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 1265 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
1266 | } | 1266 | } |
1267 | 1267 | EXPORT_SYMBOL(blk_recount_segments); | |
1268 | 1268 | ||
1269 | static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, | 1269 | static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio, |
1270 | struct bio *nxt) | 1270 | struct bio *nxt) |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 467c16982d02..11c3d7bfa797 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2620,7 +2620,7 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) | |||
2620 | } | 2620 | } |
2621 | bi = conf->retry_read_aligned_list; | 2621 | bi = conf->retry_read_aligned_list; |
2622 | if(bi) { | 2622 | if(bi) { |
2623 | conf->retry_read_aligned = bi->bi_next; | 2623 | conf->retry_read_aligned_list = bi->bi_next; |
2624 | bi->bi_next = NULL; | 2624 | bi->bi_next = NULL; |
2625 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | 2625 | bi->bi_phys_segments = 1; /* biased count of active stripes */ |
2626 | bi->bi_hw_segments = 0; /* count of processed stripes */ | 2626 | bi->bi_hw_segments = 0; /* count of processed stripes */ |
@@ -2669,6 +2669,27 @@ static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error) | |||
2669 | return 0; | 2669 | return 0; |
2670 | } | 2670 | } |
2671 | 2671 | ||
2672 | static int bio_fits_rdev(struct bio *bi) | ||
2673 | { | ||
2674 | request_queue_t *q = bdev_get_queue(bi->bi_bdev); | ||
2675 | |||
2676 | if ((bi->bi_size>>9) > q->max_sectors) | ||
2677 | return 0; | ||
2678 | blk_recount_segments(q, bi); | ||
2679 | if (bi->bi_phys_segments > q->max_phys_segments || | ||
2680 | bi->bi_hw_segments > q->max_hw_segments) | ||
2681 | return 0; | ||
2682 | |||
2683 | if (q->merge_bvec_fn) | ||
2684 | /* it's too hard to apply the merge_bvec_fn at this stage, | ||
2685 | * just just give up | ||
2686 | */ | ||
2687 | return 0; | ||
2688 | |||
2689 | return 1; | ||
2690 | } | ||
2691 | |||
2692 | |||
2672 | static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) | 2693 | static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) |
2673 | { | 2694 | { |
2674 | mddev_t *mddev = q->queuedata; | 2695 | mddev_t *mddev = q->queuedata; |
@@ -2715,6 +2736,13 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) | |||
2715 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); | 2736 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); |
2716 | align_bi->bi_sector += rdev->data_offset; | 2737 | align_bi->bi_sector += rdev->data_offset; |
2717 | 2738 | ||
2739 | if (!bio_fits_rdev(align_bi)) { | ||
2740 | /* too big in some way */ | ||
2741 | bio_put(align_bi); | ||
2742 | rdev_dec_pending(rdev, mddev); | ||
2743 | return 0; | ||
2744 | } | ||
2745 | |||
2718 | spin_lock_irq(&conf->device_lock); | 2746 | spin_lock_irq(&conf->device_lock); |
2719 | wait_event_lock_irq(conf->wait_for_stripe, | 2747 | wait_event_lock_irq(conf->wait_for_stripe, |
2720 | conf->quiesce == 0, | 2748 | conf->quiesce == 0, |
@@ -3107,7 +3135,9 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3107 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); | 3135 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); |
3108 | 3136 | ||
3109 | for (; logical_sector < last_sector; | 3137 | for (; logical_sector < last_sector; |
3110 | logical_sector += STRIPE_SECTORS, scnt++) { | 3138 | logical_sector += STRIPE_SECTORS, |
3139 | sector += STRIPE_SECTORS, | ||
3140 | scnt++) { | ||
3111 | 3141 | ||
3112 | if (scnt < raid_bio->bi_hw_segments) | 3142 | if (scnt < raid_bio->bi_hw_segments) |
3113 | /* already done this stripe */ | 3143 | /* already done this stripe */ |
@@ -3123,7 +3153,13 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3123 | } | 3153 | } |
3124 | 3154 | ||
3125 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); | 3155 | set_bit(R5_ReadError, &sh->dev[dd_idx].flags); |
3126 | add_stripe_bio(sh, raid_bio, dd_idx, 0); | 3156 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { |
3157 | release_stripe(sh); | ||
3158 | raid_bio->bi_hw_segments = scnt; | ||
3159 | conf->retry_read_aligned = raid_bio; | ||
3160 | return handled; | ||
3161 | } | ||
3162 | |||
3127 | handle_stripe(sh, NULL); | 3163 | handle_stripe(sh, NULL); |
3128 | release_stripe(sh); | 3164 | release_stripe(sh); |
3129 | handled++; | 3165 | handled++; |