aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-12-22 18:17:52 -0500
committerNeilBrown <neilb@suse.de>2011-12-22 18:17:52 -0500
commit671488cc25f7c194c7c7a9f258bab1df17a6ff69 (patch)
tree00bec63c7789dbf7439418ee903490d6ee076a70 /drivers/md
parent2d78f8c451785f030ac1676a18691896b59c69d8 (diff)
md/raid5: allow each slot to have an extra replacement device
Just enhance data structures to record a second device per slot to be used as a 'replacement' device, replacing the original. We also have a second bio in each slot in each stripe_head. This will only be used when writing to the array - we need to write to both the original and the replacement at the same time, so will need two bios. For now, only try using the replacement drive for aligned-reads. In this case, we prefer the replacement if it has been recovered far enough, otherwise use the original. This includes a small enhancement. Previously we would only do aligned reads if the target device was fully recovered. Now we also do them if it has recovered far enough. Reviewed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c15
-rw-r--r--drivers/md/raid5.h57
2 files changed, 46 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6b9fc58e8f2d..94bc35ba4c81 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3594,6 +3594,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3594 int dd_idx; 3594 int dd_idx;
3595 struct bio* align_bi; 3595 struct bio* align_bi;
3596 struct md_rdev *rdev; 3596 struct md_rdev *rdev;
3597 sector_t end_sector;
3597 3598
3598 if (!in_chunk_boundary(mddev, raid_bio)) { 3599 if (!in_chunk_boundary(mddev, raid_bio)) {
3599 pr_debug("chunk_aligned_read : non aligned\n"); 3600 pr_debug("chunk_aligned_read : non aligned\n");
@@ -3618,9 +3619,19 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3618 0, 3619 0,
3619 &dd_idx, NULL); 3620 &dd_idx, NULL);
3620 3621
3622 end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
3621 rcu_read_lock(); 3623 rcu_read_lock();
3622 rdev = rcu_dereference(conf->disks[dd_idx].rdev); 3624 rdev = rcu_dereference(conf->disks[dd_idx].replacement);
3623 if (rdev && test_bit(In_sync, &rdev->flags)) { 3625 if (!rdev || test_bit(Faulty, &rdev->flags) ||
3626 rdev->recovery_offset < end_sector) {
3627 rdev = rcu_dereference(conf->disks[dd_idx].rdev);
3628 if (rdev &&
3629 (test_bit(Faulty, &rdev->flags) ||
3630 !(test_bit(In_sync, &rdev->flags) ||
3631 rdev->recovery_offset >= end_sector)))
3632 rdev = NULL;
3633 }
3634 if (rdev) {
3624 sector_t first_bad; 3635 sector_t first_bad;
3625 int bad_sectors; 3636 int bad_sectors;
3626 3637
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index e10c5531f9c5..43106f01862d 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -226,8 +226,11 @@ struct stripe_head {
226 #endif 226 #endif
227 } ops; 227 } ops;
228 struct r5dev { 228 struct r5dev {
229 struct bio req; 229 /* rreq and rvec are used for the replacement device when
230 struct bio_vec vec; 230 * writing data to both devices.
231 */
232 struct bio req, rreq;
233 struct bio_vec vec, rvec;
231 struct page *page; 234 struct page *page;
232 struct bio *toread, *read, *towrite, *written; 235 struct bio *toread, *read, *towrite, *written;
233 sector_t sector; /* sector of this page */ 236 sector_t sector; /* sector of this page */
@@ -252,29 +255,35 @@ struct stripe_head_state {
252 int handle_bad_blocks; 255 int handle_bad_blocks;
253}; 256};
254 257
255/* Flags */ 258/* Flags for struct r5dev.flags */
256#define R5_UPTODATE 0 /* page contains current data */ 259enum r5dev_flags {
257#define R5_LOCKED 1 /* IO has been submitted on "req" */ 260 R5_UPTODATE, /* page contains current data */
258#define R5_OVERWRITE 2 /* towrite covers whole page */ 261 R5_LOCKED, /* IO has been submitted on "req" */
262 R5_OVERWRITE, /* towrite covers whole page */
259/* and some that are internal to handle_stripe */ 263/* and some that are internal to handle_stripe */
260#define R5_Insync 3 /* rdev && rdev->in_sync at start */ 264 R5_Insync, /* rdev && rdev->in_sync at start */
261#define R5_Wantread 4 /* want to schedule a read */ 265 R5_Wantread, /* want to schedule a read */
262#define R5_Wantwrite 5 266 R5_Wantwrite,
263#define R5_Overlap 7 /* There is a pending overlapping request on this block */ 267 R5_Overlap, /* There is a pending overlapping request
264#define R5_ReadError 8 /* seen a read error here recently */ 268 * on this block */
265#define R5_ReWrite 9 /* have tried to over-write the readerror */ 269 R5_ReadError, /* seen a read error here recently */
270 R5_ReWrite, /* have tried to over-write the readerror */
266 271
267#define R5_Expanded 10 /* This block now has post-expand data */ 272 R5_Expanded, /* This block now has post-expand data */
268#define R5_Wantcompute 11 /* compute_block in progress treat as 273 R5_Wantcompute, /* compute_block in progress treat as
269 * uptodate 274 * uptodate
270 */ 275 */
271#define R5_Wantfill 12 /* dev->toread contains a bio that needs 276 R5_Wantfill, /* dev->toread contains a bio that needs
272 * filling 277 * filling
273 */ 278 */
274#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ 279 R5_Wantdrain, /* dev->towrite needs to be drained */
275#define R5_WantFUA 14 /* Write should be FUA */ 280 R5_WantFUA, /* Write should be FUA */
276#define R5_WriteError 15 /* got a write error - need to record it */ 281 R5_WriteError, /* got a write error - need to record it */
277#define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/ 282 R5_MadeGood, /* A bad block has been fixed by writing to it */
283 R5_ReadRepl, /* Will/did read from replacement rather than orig */
284 R5_MadeGoodRepl,/* A bad block on the replacement device has been
285 * fixed by writing to it */
286};
278/* 287/*
279 * Write method 288 * Write method
280 */ 289 */
@@ -344,7 +353,7 @@ enum {
344 353
345 354
346struct disk_info { 355struct disk_info {
347 struct md_rdev *rdev; 356 struct md_rdev *rdev, *replacement;
348}; 357};
349 358
350struct r5conf { 359struct r5conf {