diff options
author | NeilBrown <neilb@suse.de> | 2011-12-22 18:17:52 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-12-22 18:17:52 -0500 |
commit | 671488cc25f7c194c7c7a9f258bab1df17a6ff69 (patch) | |
tree | 00bec63c7789dbf7439418ee903490d6ee076a70 /drivers/md | |
parent | 2d78f8c451785f030ac1676a18691896b59c69d8 (diff) |
md/raid5: allow each slot to have an extra replacement device
Just enhance data structures to record a second device per slot to be
used as a 'replacement' device, replacing the original.
We also have a second bio in each slot in each stripe_head. This will
only be used when writing to the array - we need to write to both the
original and the replacement at the same time, so will need two bios.
For now, only try using the replacement drive for aligned-reads.
In this case, we prefer the replacement if it has been recovered far
enough, otherwise use the original.
This includes a small enhancement. Previously we would only do
aligned reads if the target device was fully recovered. Now we also
do them if it has recovered far enough.
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 15 | ||||
-rw-r--r-- | drivers/md/raid5.h | 57 |
2 files changed, 46 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6b9fc58e8f2d..94bc35ba4c81 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3594,6 +3594,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3594 | int dd_idx; | 3594 | int dd_idx; |
3595 | struct bio* align_bi; | 3595 | struct bio* align_bi; |
3596 | struct md_rdev *rdev; | 3596 | struct md_rdev *rdev; |
3597 | sector_t end_sector; | ||
3597 | 3598 | ||
3598 | if (!in_chunk_boundary(mddev, raid_bio)) { | 3599 | if (!in_chunk_boundary(mddev, raid_bio)) { |
3599 | pr_debug("chunk_aligned_read : non aligned\n"); | 3600 | pr_debug("chunk_aligned_read : non aligned\n"); |
@@ -3618,9 +3619,19 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3618 | 0, | 3619 | 0, |
3619 | &dd_idx, NULL); | 3620 | &dd_idx, NULL); |
3620 | 3621 | ||
3622 | end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9); | ||
3621 | rcu_read_lock(); | 3623 | rcu_read_lock(); |
3622 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); | 3624 | rdev = rcu_dereference(conf->disks[dd_idx].replacement); |
3623 | if (rdev && test_bit(In_sync, &rdev->flags)) { | 3625 | if (!rdev || test_bit(Faulty, &rdev->flags) || |
3626 | rdev->recovery_offset < end_sector) { | ||
3627 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); | ||
3628 | if (rdev && | ||
3629 | (test_bit(Faulty, &rdev->flags) || | ||
3630 | !(test_bit(In_sync, &rdev->flags) || | ||
3631 | rdev->recovery_offset >= end_sector))) | ||
3632 | rdev = NULL; | ||
3633 | } | ||
3634 | if (rdev) { | ||
3624 | sector_t first_bad; | 3635 | sector_t first_bad; |
3625 | int bad_sectors; | 3636 | int bad_sectors; |
3626 | 3637 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index e10c5531f9c5..43106f01862d 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -226,8 +226,11 @@ struct stripe_head { | |||
226 | #endif | 226 | #endif |
227 | } ops; | 227 | } ops; |
228 | struct r5dev { | 228 | struct r5dev { |
229 | struct bio req; | 229 | /* rreq and rvec are used for the replacement device when |
230 | struct bio_vec vec; | 230 | * writing data to both devices. |
231 | */ | ||
232 | struct bio req, rreq; | ||
233 | struct bio_vec vec, rvec; | ||
231 | struct page *page; | 234 | struct page *page; |
232 | struct bio *toread, *read, *towrite, *written; | 235 | struct bio *toread, *read, *towrite, *written; |
233 | sector_t sector; /* sector of this page */ | 236 | sector_t sector; /* sector of this page */ |
@@ -252,29 +255,35 @@ struct stripe_head_state { | |||
252 | int handle_bad_blocks; | 255 | int handle_bad_blocks; |
253 | }; | 256 | }; |
254 | 257 | ||
255 | /* Flags */ | 258 | /* Flags for struct r5dev.flags */ |
256 | #define R5_UPTODATE 0 /* page contains current data */ | 259 | enum r5dev_flags { |
257 | #define R5_LOCKED 1 /* IO has been submitted on "req" */ | 260 | R5_UPTODATE, /* page contains current data */ |
258 | #define R5_OVERWRITE 2 /* towrite covers whole page */ | 261 | R5_LOCKED, /* IO has been submitted on "req" */ |
262 | R5_OVERWRITE, /* towrite covers whole page */ | ||
259 | /* and some that are internal to handle_stripe */ | 263 | /* and some that are internal to handle_stripe */ |
260 | #define R5_Insync 3 /* rdev && rdev->in_sync at start */ | 264 | R5_Insync, /* rdev && rdev->in_sync at start */ |
261 | #define R5_Wantread 4 /* want to schedule a read */ | 265 | R5_Wantread, /* want to schedule a read */ |
262 | #define R5_Wantwrite 5 | 266 | R5_Wantwrite, |
263 | #define R5_Overlap 7 /* There is a pending overlapping request on this block */ | 267 | R5_Overlap, /* There is a pending overlapping request |
264 | #define R5_ReadError 8 /* seen a read error here recently */ | 268 | * on this block */ |
265 | #define R5_ReWrite 9 /* have tried to over-write the readerror */ | 269 | R5_ReadError, /* seen a read error here recently */ |
270 | R5_ReWrite, /* have tried to over-write the readerror */ | ||
266 | 271 | ||
267 | #define R5_Expanded 10 /* This block now has post-expand data */ | 272 | R5_Expanded, /* This block now has post-expand data */ |
268 | #define R5_Wantcompute 11 /* compute_block in progress treat as | 273 | R5_Wantcompute, /* compute_block in progress treat as |
269 | * uptodate | 274 | * uptodate |
270 | */ | 275 | */ |
271 | #define R5_Wantfill 12 /* dev->toread contains a bio that needs | 276 | R5_Wantfill, /* dev->toread contains a bio that needs |
272 | * filling | 277 | * filling |
273 | */ | 278 | */ |
274 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ | 279 | R5_Wantdrain, /* dev->towrite needs to be drained */ |
275 | #define R5_WantFUA 14 /* Write should be FUA */ | 280 | R5_WantFUA, /* Write should be FUA */ |
276 | #define R5_WriteError 15 /* got a write error - need to record it */ | 281 | R5_WriteError, /* got a write error - need to record it */ |
277 | #define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/ | 282 | R5_MadeGood, /* A bad block has been fixed by writing to it */ |
283 | R5_ReadRepl, /* Will/did read from replacement rather than orig */ | ||
284 | R5_MadeGoodRepl,/* A bad block on the replacement device has been | ||
285 | * fixed by writing to it */ | ||
286 | }; | ||
278 | /* | 287 | /* |
279 | * Write method | 288 | * Write method |
280 | */ | 289 | */ |
@@ -344,7 +353,7 @@ enum { | |||
344 | 353 | ||
345 | 354 | ||
346 | struct disk_info { | 355 | struct disk_info { |
347 | struct md_rdev *rdev; | 356 | struct md_rdev *rdev, *replacement; |
348 | }; | 357 | }; |
349 | 358 | ||
350 | struct r5conf { | 359 | struct r5conf { |