aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
commitd2eb35acfdccbe2a3622ed6cc441a5482148423b (patch)
tree77600cab29fc9e1fd39d612773086a456fc32d88 /drivers/md/md.c
parent9f2f3830789a4c9c1af2d1437d407c43e05136e6 (diff)
md/raid1: avoid reading from known bad blocks.
Now that we have a bad block list, we should not read from those blocks. There are several main parts to this: 1/ read_balance needs to check for bad blocks, and return not only the chosen device, but also how many good blocks are available there. 2/ fix_read_error needs to avoid trying to read from bad blocks. 3/ read submission must be ready to issue multiple reads to different devices as different bad blocks on different devices could mean that a single large read cannot be served by any one device, but can still be served by the array. This requires keeping count of the number of outstanding requests per bio. This count is stored in 'bi_phys_segments' 4/ retrying a read needs to also be ready to submit a smaller read and queue another request for the rest. This does not yet handle bad blocks when reading to perform resync, recovery, or check. 'md_trim_bio' will also be used for RAID10, so put it in md.c and export it. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7ae3c5a18001..48217e8aa0eb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
215} 215}
216EXPORT_SYMBOL_GPL(bio_clone_mddev); 216EXPORT_SYMBOL_GPL(bio_clone_mddev);
217 217
218void md_trim_bio(struct bio *bio, int offset, int size)
219{
220 /* 'bio' is a cloned bio which we need to trim to match
221 * the given offset and size.
222 * This requires adjusting bi_sector, bi_size, and bi_io_vec
223 */
224 int i;
225 struct bio_vec *bvec;
226 int sofar = 0;
227
228 size <<= 9;
229 if (offset == 0 && size == bio->bi_size)
230 return;
231
232 bio->bi_sector += offset;
233 bio->bi_size = size;
234 offset <<= 9;
235 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
236
237 while (bio->bi_idx < bio->bi_vcnt &&
238 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
239 /* remove this whole bio_vec */
240 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
241 bio->bi_idx++;
242 }
243 if (bio->bi_idx < bio->bi_vcnt) {
244 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
245 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
246 }
247 /* avoid any complications with bi_idx being non-zero*/
248 if (bio->bi_idx) {
249 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
250 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
251 bio->bi_vcnt -= bio->bi_idx;
252 bio->bi_idx = 0;
253 }
254 /* Make sure vcnt and last bv are not too big */
255 bio_for_each_segment(bvec, bio, i) {
256 if (sofar + bvec->bv_len > size)
257 bvec->bv_len = size - sofar;
258 if (bvec->bv_len == 0) {
259 bio->bi_vcnt = i;
260 break;
261 }
262 sofar += bvec->bv_len;
263 }
264}
265EXPORT_SYMBOL_GPL(md_trim_bio);
266
218/* 267/*
219 * We have a system wide 'event count' that is incremented 268 * We have a system wide 'event count' that is incremented
220 * on any 'interesting' event, and readers of /proc/mdstat 269 * on any 'interesting' event, and readers of /proc/mdstat