diff options
| -rw-r--r-- | drivers/md/raid10.c | 48 | ||||
| -rw-r--r-- | drivers/md/raid10.h | 61 |
2 files changed, 78 insertions, 31 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f5088dda4dca..9722065022fa 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -73,7 +73,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) | |||
| 73 | struct r10conf *conf = data; | 73 | struct r10conf *conf = data; |
| 74 | int size = offsetof(struct r10bio, devs[conf->copies]); | 74 | int size = offsetof(struct r10bio, devs[conf->copies]); |
| 75 | 75 | ||
| 76 | /* allocate a r10bio with room for raid_disks entries in the bios array */ | 76 | /* allocate a r10bio with room for raid_disks entries in the |
| 77 | * bios array */ | ||
| 77 | return kzalloc(size, gfp_flags); | 78 | return kzalloc(size, gfp_flags); |
| 78 | } | 79 | } |
| 79 | 80 | ||
| @@ -123,12 +124,19 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
| 123 | if (!bio) | 124 | if (!bio) |
| 124 | goto out_free_bio; | 125 | goto out_free_bio; |
| 125 | r10_bio->devs[j].bio = bio; | 126 | r10_bio->devs[j].bio = bio; |
| 127 | if (!conf->have_replacement) | ||
| 128 | continue; | ||
| 129 | bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); | ||
| 130 | if (!bio) | ||
| 131 | goto out_free_bio; | ||
| 132 | r10_bio->devs[j].repl_bio = bio; | ||
| 126 | } | 133 | } |
| 127 | /* | 134 | /* |
| 128 | * Allocate RESYNC_PAGES data pages and attach them | 135 | * Allocate RESYNC_PAGES data pages and attach them |
| 129 | * where needed. | 136 | * where needed. |
| 130 | */ | 137 | */ |
| 131 | for (j = 0 ; j < nalloc; j++) { | 138 | for (j = 0 ; j < nalloc; j++) { |
| 139 | struct bio *rbio = r10_bio->devs[j].repl_bio; | ||
| 132 | bio = r10_bio->devs[j].bio; | 140 | bio = r10_bio->devs[j].bio; |
| 133 | for (i = 0; i < RESYNC_PAGES; i++) { | 141 | for (i = 0; i < RESYNC_PAGES; i++) { |
| 134 | if (j == 1 && !test_bit(MD_RECOVERY_SYNC, | 142 | if (j == 1 && !test_bit(MD_RECOVERY_SYNC, |
| @@ -143,6 +151,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
| 143 | goto out_free_pages; | 151 | goto out_free_pages; |
| 144 | 152 | ||
| 145 | bio->bi_io_vec[i].bv_page = page; | 153 | bio->bi_io_vec[i].bv_page = page; |
| 154 | if (rbio) | ||
| 155 | rbio->bi_io_vec[i].bv_page = page; | ||
| 146 | } | 156 | } |
| 147 | } | 157 | } |
| 148 | 158 | ||
| @@ -156,8 +166,11 @@ out_free_pages: | |||
| 156 | safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); | 166 | safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); |
| 157 | j = -1; | 167 | j = -1; |
| 158 | out_free_bio: | 168 | out_free_bio: |
| 159 | while ( ++j < nalloc ) | 169 | while (++j < nalloc) { |
| 160 | bio_put(r10_bio->devs[j].bio); | 170 | bio_put(r10_bio->devs[j].bio); |
| 171 | if (r10_bio->devs[j].repl_bio) | ||
| 172 | bio_put(r10_bio->devs[j].repl_bio); | ||
| 173 | } | ||
| 161 | r10bio_pool_free(r10_bio, conf); | 174 | r10bio_pool_free(r10_bio, conf); |
| 162 | return NULL; | 175 | return NULL; |
| 163 | } | 176 | } |
| @@ -178,6 +191,9 @@ static void r10buf_pool_free(void *__r10_bio, void *data) | |||
| 178 | } | 191 | } |
| 179 | bio_put(bio); | 192 | bio_put(bio); |
| 180 | } | 193 | } |
| 194 | bio = r10bio->devs[j].repl_bio; | ||
| 195 | if (bio) | ||
| 196 | bio_put(bio); | ||
| 181 | } | 197 | } |
| 182 | r10bio_pool_free(r10bio, conf); | 198 | r10bio_pool_free(r10bio, conf); |
| 183 | } | 199 | } |
| @@ -191,6 +207,10 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) | |||
| 191 | if (!BIO_SPECIAL(*bio)) | 207 | if (!BIO_SPECIAL(*bio)) |
| 192 | bio_put(*bio); | 208 | bio_put(*bio); |
| 193 | *bio = NULL; | 209 | *bio = NULL; |
| 210 | bio = &r10_bio->devs[i].repl_bio; | ||
| 211 | if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio)) | ||
| 212 | bio_put(*bio); | ||
| 213 | *bio = NULL; | ||
| 194 | } | 214 | } |
| 195 | } | 215 | } |
| 196 | 216 | ||
| @@ -275,19 +295,27 @@ static inline void update_head_pos(int slot, struct r10bio *r10_bio) | |||
| 275 | * Find the disk number which triggered given bio | 295 | * Find the disk number which triggered given bio |
| 276 | */ | 296 | */ |
| 277 | static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, | 297 | static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, |
| 278 | struct bio *bio, int *slotp) | 298 | struct bio *bio, int *slotp, int *replp) |
| 279 | { | 299 | { |
| 280 | int slot; | 300 | int slot; |
| 301 | int repl = 0; | ||
| 281 | 302 | ||
| 282 | for (slot = 0; slot < conf->copies; slot++) | 303 | for (slot = 0; slot < conf->copies; slot++) { |
| 283 | if (r10_bio->devs[slot].bio == bio) | 304 | if (r10_bio->devs[slot].bio == bio) |
| 284 | break; | 305 | break; |
| 306 | if (r10_bio->devs[slot].repl_bio == bio) { | ||
| 307 | repl = 1; | ||
| 308 | break; | ||
| 309 | } | ||
| 310 | } | ||
| 285 | 311 | ||
| 286 | BUG_ON(slot == conf->copies); | 312 | BUG_ON(slot == conf->copies); |
| 287 | update_head_pos(slot, r10_bio); | 313 | update_head_pos(slot, r10_bio); |
| 288 | 314 | ||
| 289 | if (slotp) | 315 | if (slotp) |
| 290 | *slotp = slot; | 316 | *slotp = slot; |
| 317 | if (replp) | ||
| 318 | *replp = repl; | ||
| 291 | return r10_bio->devs[slot].devnum; | 319 | return r10_bio->devs[slot].devnum; |
| 292 | } | 320 | } |
| 293 | 321 | ||
| @@ -368,7 +396,7 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
| 368 | struct r10conf *conf = r10_bio->mddev->private; | 396 | struct r10conf *conf = r10_bio->mddev->private; |
| 369 | int slot; | 397 | int slot; |
| 370 | 398 | ||
| 371 | dev = find_bio_disk(conf, r10_bio, bio, &slot); | 399 | dev = find_bio_disk(conf, r10_bio, bio, &slot, NULL); |
| 372 | 400 | ||
| 373 | /* | 401 | /* |
| 374 | * this branch is our 'one mirror IO has finished' event handler: | 402 | * this branch is our 'one mirror IO has finished' event handler: |
| @@ -1025,6 +1053,7 @@ read_again: | |||
| 1025 | */ | 1053 | */ |
| 1026 | plugged = mddev_check_plugged(mddev); | 1054 | plugged = mddev_check_plugged(mddev); |
| 1027 | 1055 | ||
| 1056 | r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ | ||
| 1028 | raid10_find_phys(conf, r10_bio); | 1057 | raid10_find_phys(conf, r10_bio); |
| 1029 | retry_write: | 1058 | retry_write: |
| 1030 | blocked_rdev = NULL; | 1059 | blocked_rdev = NULL; |
| @@ -1431,7 +1460,7 @@ static void end_sync_read(struct bio *bio, int error) | |||
| 1431 | struct r10conf *conf = r10_bio->mddev->private; | 1460 | struct r10conf *conf = r10_bio->mddev->private; |
| 1432 | int d; | 1461 | int d; |
| 1433 | 1462 | ||
| 1434 | d = find_bio_disk(conf, r10_bio, bio, NULL); | 1463 | d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); |
| 1435 | 1464 | ||
| 1436 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1465 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
| 1437 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 1466 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
| @@ -1493,7 +1522,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
| 1493 | int bad_sectors; | 1522 | int bad_sectors; |
| 1494 | int slot; | 1523 | int slot; |
| 1495 | 1524 | ||
| 1496 | d = find_bio_disk(conf, r10_bio, bio, &slot); | 1525 | d = find_bio_disk(conf, r10_bio, bio, &slot, NULL); |
| 1497 | 1526 | ||
| 1498 | if (!uptodate) { | 1527 | if (!uptodate) { |
| 1499 | set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); | 1528 | set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); |
| @@ -2271,9 +2300,14 @@ static void raid10d(struct mddev *mddev) | |||
| 2271 | static int init_resync(struct r10conf *conf) | 2300 | static int init_resync(struct r10conf *conf) |
| 2272 | { | 2301 | { |
| 2273 | int buffs; | 2302 | int buffs; |
| 2303 | int i; | ||
| 2274 | 2304 | ||
| 2275 | buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; | 2305 | buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; |
| 2276 | BUG_ON(conf->r10buf_pool); | 2306 | BUG_ON(conf->r10buf_pool); |
| 2307 | conf->have_replacement = 0; | ||
| 2308 | for (i = 0; i < conf->raid_disks; i++) | ||
| 2309 | if (conf->mirrors[i].replacement) | ||
| 2310 | conf->have_replacement = 1; | ||
| 2277 | conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); | 2311 | conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); |
| 2278 | if (!conf->r10buf_pool) | 2312 | if (!conf->r10buf_pool) |
| 2279 | return -ENOMEM; | 2313 | return -ENOMEM; |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 7facfdf841f4..7c615613c381 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | #define _RAID10_H | 2 | #define _RAID10_H |
| 3 | 3 | ||
| 4 | struct mirror_info { | 4 | struct mirror_info { |
| 5 | struct md_rdev *rdev; | 5 | struct md_rdev *rdev, *replacement; |
| 6 | sector_t head_position; | 6 | sector_t head_position; |
| 7 | int recovery_disabled; /* matches | 7 | int recovery_disabled; /* matches |
| 8 | * mddev->recovery_disabled | 8 | * mddev->recovery_disabled |
| @@ -18,12 +18,13 @@ struct r10conf { | |||
| 18 | spinlock_t device_lock; | 18 | spinlock_t device_lock; |
| 19 | 19 | ||
| 20 | /* geometry */ | 20 | /* geometry */ |
| 21 | int near_copies; /* number of copies laid out raid0 style */ | 21 | int near_copies; /* number of copies laid out |
| 22 | * raid0 style */ | ||
| 22 | int far_copies; /* number of copies laid out | 23 | int far_copies; /* number of copies laid out |
| 23 | * at large strides across drives | 24 | * at large strides across drives |
| 24 | */ | 25 | */ |
| 25 | int far_offset; /* far_copies are offset by 1 stripe | 26 | int far_offset; /* far_copies are offset by 1 |
| 26 | * instead of many | 27 | * stripe instead of many |
| 27 | */ | 28 | */ |
| 28 | int copies; /* near_copies * far_copies. | 29 | int copies; /* near_copies * far_copies. |
| 29 | * must be <= raid_disks | 30 | * must be <= raid_disks |
| @@ -34,10 +35,11 @@ struct r10conf { | |||
| 34 | * 1 stripe. | 35 | * 1 stripe. |
| 35 | */ | 36 | */ |
| 36 | 37 | ||
| 37 | sector_t dev_sectors; /* temp copy of mddev->dev_sectors */ | 38 | sector_t dev_sectors; /* temp copy of |
| 39 | * mddev->dev_sectors */ | ||
| 38 | 40 | ||
| 39 | int chunk_shift; /* shift from chunks to sectors */ | 41 | int chunk_shift; /* shift from chunks to sectors */ |
| 40 | sector_t chunk_mask; | 42 | sector_t chunk_mask; |
| 41 | 43 | ||
| 42 | struct list_head retry_list; | 44 | struct list_head retry_list; |
| 43 | /* queue pending writes and submit them on unplug */ | 45 | /* queue pending writes and submit them on unplug */ |
| @@ -45,20 +47,22 @@ struct r10conf { | |||
| 45 | int pending_count; | 47 | int pending_count; |
| 46 | 48 | ||
| 47 | spinlock_t resync_lock; | 49 | spinlock_t resync_lock; |
| 48 | int nr_pending; | 50 | int nr_pending; |
| 49 | int nr_waiting; | 51 | int nr_waiting; |
| 50 | int nr_queued; | 52 | int nr_queued; |
| 51 | int barrier; | 53 | int barrier; |
| 52 | sector_t next_resync; | 54 | sector_t next_resync; |
| 53 | int fullsync; /* set to 1 if a full sync is needed, | 55 | int fullsync; /* set to 1 if a full sync is needed, |
| 54 | * (fresh device added). | 56 | * (fresh device added). |
| 55 | * Cleared when a sync completes. | 57 | * Cleared when a sync completes. |
| 56 | */ | 58 | */ |
| 57 | 59 | int have_replacement; /* There is at least one | |
| 60 | * replacement device. | ||
| 61 | */ | ||
| 58 | wait_queue_head_t wait_barrier; | 62 | wait_queue_head_t wait_barrier; |
| 59 | 63 | ||
| 60 | mempool_t *r10bio_pool; | 64 | mempool_t *r10bio_pool; |
| 61 | mempool_t *r10buf_pool; | 65 | mempool_t *r10buf_pool; |
| 62 | struct page *tmppage; | 66 | struct page *tmppage; |
| 63 | 67 | ||
| 64 | /* When taking over an array from a different personality, we store | 68 | /* When taking over an array from a different personality, we store |
| @@ -98,11 +102,18 @@ struct r10bio { | |||
| 98 | * When resyncing we also use one for each copy. | 102 | * When resyncing we also use one for each copy. |
| 99 | * When reconstructing, we use 2 bios, one for read, one for write. | 103 | * When reconstructing, we use 2 bios, one for read, one for write. |
| 100 | * We choose the number when they are allocated. | 104 | * We choose the number when they are allocated. |
| 105 | * We sometimes need an extra bio to write to the replacement. | ||
| 101 | */ | 106 | */ |
| 102 | struct { | 107 | struct { |
| 103 | struct bio *bio; | 108 | struct bio *bio; |
| 104 | sector_t addr; | 109 | union { |
| 105 | int devnum; | 110 | struct bio *repl_bio; /* used for resync and |
| 111 | * writes */ | ||
| 112 | struct md_rdev *rdev; /* used for reads | ||
| 113 | * (read_slot >= 0) */ | ||
| 114 | }; | ||
| 115 | sector_t addr; | ||
| 116 | int devnum; | ||
| 106 | } devs[0]; | 117 | } devs[0]; |
| 107 | }; | 118 | }; |
| 108 | 119 | ||
| @@ -121,17 +132,19 @@ struct r10bio { | |||
| 121 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | 132 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) |
| 122 | 133 | ||
| 123 | /* bits for r10bio.state */ | 134 | /* bits for r10bio.state */ |
| 124 | #define R10BIO_Uptodate 0 | 135 | enum r10bio_state { |
| 125 | #define R10BIO_IsSync 1 | 136 | R10BIO_Uptodate, |
| 126 | #define R10BIO_IsRecover 2 | 137 | R10BIO_IsSync, |
| 127 | #define R10BIO_Degraded 3 | 138 | R10BIO_IsRecover, |
| 139 | R10BIO_Degraded, | ||
| 128 | /* Set ReadError on bios that experience a read error | 140 | /* Set ReadError on bios that experience a read error |
| 129 | * so that raid10d knows what to do with them. | 141 | * so that raid10d knows what to do with them. |
| 130 | */ | 142 | */ |
| 131 | #define R10BIO_ReadError 4 | 143 | R10BIO_ReadError, |
| 132 | /* If a write for this request means we can clear some | 144 | /* If a write for this request means we can clear some |
| 133 | * known-bad-block records, we set this flag. | 145 | * known-bad-block records, we set this flag. |
| 134 | */ | 146 | */ |
| 135 | #define R10BIO_MadeGood 5 | 147 | R10BIO_MadeGood, |
| 136 | #define R10BIO_WriteError 6 | 148 | R10BIO_WriteError, |
| 149 | }; | ||
| 137 | #endif | 150 | #endif |
