diff options
author | NeilBrown <neilb@suse.de> | 2011-12-22 18:17:54 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-12-22 18:17:54 -0500 |
commit | 69335ef3bc5b766f34db2d688be1d35313138bca (patch) | |
tree | dcd87ffc4c97540d374a20de7380368e3679ac3b | |
parent | 3a6de2924af602f9c1b5a5154438c37f2d712dfa (diff) |
md/raid10: prepare data structures for handling replacement.
Allow each slot in the RAID10 to have 2 devices, the want_replacement
and the replacement.
Also an r10bio to have 2 bios, and for resync/recovery allocate the
second bio if there are any replacement devices.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid10.c | 48 | ||||
-rw-r--r-- | drivers/md/raid10.h | 61 |
2 files changed, 78 insertions, 31 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f5088dda4dca..9722065022fa 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -73,7 +73,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) | |||
73 | struct r10conf *conf = data; | 73 | struct r10conf *conf = data; |
74 | int size = offsetof(struct r10bio, devs[conf->copies]); | 74 | int size = offsetof(struct r10bio, devs[conf->copies]); |
75 | 75 | ||
76 | /* allocate a r10bio with room for raid_disks entries in the bios array */ | 76 | /* allocate a r10bio with room for raid_disks entries in the |
77 | * bios array */ | ||
77 | return kzalloc(size, gfp_flags); | 78 | return kzalloc(size, gfp_flags); |
78 | } | 79 | } |
79 | 80 | ||
@@ -123,12 +124,19 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
123 | if (!bio) | 124 | if (!bio) |
124 | goto out_free_bio; | 125 | goto out_free_bio; |
125 | r10_bio->devs[j].bio = bio; | 126 | r10_bio->devs[j].bio = bio; |
127 | if (!conf->have_replacement) | ||
128 | continue; | ||
129 | bio = bio_kmalloc(gfp_flags, RESYNC_PAGES); | ||
130 | if (!bio) | ||
131 | goto out_free_bio; | ||
132 | r10_bio->devs[j].repl_bio = bio; | ||
126 | } | 133 | } |
127 | /* | 134 | /* |
128 | * Allocate RESYNC_PAGES data pages and attach them | 135 | * Allocate RESYNC_PAGES data pages and attach them |
129 | * where needed. | 136 | * where needed. |
130 | */ | 137 | */ |
131 | for (j = 0 ; j < nalloc; j++) { | 138 | for (j = 0 ; j < nalloc; j++) { |
139 | struct bio *rbio = r10_bio->devs[j].repl_bio; | ||
132 | bio = r10_bio->devs[j].bio; | 140 | bio = r10_bio->devs[j].bio; |
133 | for (i = 0; i < RESYNC_PAGES; i++) { | 141 | for (i = 0; i < RESYNC_PAGES; i++) { |
134 | if (j == 1 && !test_bit(MD_RECOVERY_SYNC, | 142 | if (j == 1 && !test_bit(MD_RECOVERY_SYNC, |
@@ -143,6 +151,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) | |||
143 | goto out_free_pages; | 151 | goto out_free_pages; |
144 | 152 | ||
145 | bio->bi_io_vec[i].bv_page = page; | 153 | bio->bi_io_vec[i].bv_page = page; |
154 | if (rbio) | ||
155 | rbio->bi_io_vec[i].bv_page = page; | ||
146 | } | 156 | } |
147 | } | 157 | } |
148 | 158 | ||
@@ -156,8 +166,11 @@ out_free_pages: | |||
156 | safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); | 166 | safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); |
157 | j = -1; | 167 | j = -1; |
158 | out_free_bio: | 168 | out_free_bio: |
159 | while ( ++j < nalloc ) | 169 | while (++j < nalloc) { |
160 | bio_put(r10_bio->devs[j].bio); | 170 | bio_put(r10_bio->devs[j].bio); |
171 | if (r10_bio->devs[j].repl_bio) | ||
172 | bio_put(r10_bio->devs[j].repl_bio); | ||
173 | } | ||
161 | r10bio_pool_free(r10_bio, conf); | 174 | r10bio_pool_free(r10_bio, conf); |
162 | return NULL; | 175 | return NULL; |
163 | } | 176 | } |
@@ -178,6 +191,9 @@ static void r10buf_pool_free(void *__r10_bio, void *data) | |||
178 | } | 191 | } |
179 | bio_put(bio); | 192 | bio_put(bio); |
180 | } | 193 | } |
194 | bio = r10bio->devs[j].repl_bio; | ||
195 | if (bio) | ||
196 | bio_put(bio); | ||
181 | } | 197 | } |
182 | r10bio_pool_free(r10bio, conf); | 198 | r10bio_pool_free(r10bio, conf); |
183 | } | 199 | } |
@@ -191,6 +207,10 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) | |||
191 | if (!BIO_SPECIAL(*bio)) | 207 | if (!BIO_SPECIAL(*bio)) |
192 | bio_put(*bio); | 208 | bio_put(*bio); |
193 | *bio = NULL; | 209 | *bio = NULL; |
210 | bio = &r10_bio->devs[i].repl_bio; | ||
211 | if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio)) | ||
212 | bio_put(*bio); | ||
213 | *bio = NULL; | ||
194 | } | 214 | } |
195 | } | 215 | } |
196 | 216 | ||
@@ -275,19 +295,27 @@ static inline void update_head_pos(int slot, struct r10bio *r10_bio) | |||
275 | * Find the disk number which triggered given bio | 295 | * Find the disk number which triggered given bio |
276 | */ | 296 | */ |
277 | static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, | 297 | static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, |
278 | struct bio *bio, int *slotp) | 298 | struct bio *bio, int *slotp, int *replp) |
279 | { | 299 | { |
280 | int slot; | 300 | int slot; |
301 | int repl = 0; | ||
281 | 302 | ||
282 | for (slot = 0; slot < conf->copies; slot++) | 303 | for (slot = 0; slot < conf->copies; slot++) { |
283 | if (r10_bio->devs[slot].bio == bio) | 304 | if (r10_bio->devs[slot].bio == bio) |
284 | break; | 305 | break; |
306 | if (r10_bio->devs[slot].repl_bio == bio) { | ||
307 | repl = 1; | ||
308 | break; | ||
309 | } | ||
310 | } | ||
285 | 311 | ||
286 | BUG_ON(slot == conf->copies); | 312 | BUG_ON(slot == conf->copies); |
287 | update_head_pos(slot, r10_bio); | 313 | update_head_pos(slot, r10_bio); |
288 | 314 | ||
289 | if (slotp) | 315 | if (slotp) |
290 | *slotp = slot; | 316 | *slotp = slot; |
317 | if (replp) | ||
318 | *replp = repl; | ||
291 | return r10_bio->devs[slot].devnum; | 319 | return r10_bio->devs[slot].devnum; |
292 | } | 320 | } |
293 | 321 | ||
@@ -368,7 +396,7 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
368 | struct r10conf *conf = r10_bio->mddev->private; | 396 | struct r10conf *conf = r10_bio->mddev->private; |
369 | int slot; | 397 | int slot; |
370 | 398 | ||
371 | dev = find_bio_disk(conf, r10_bio, bio, &slot); | 399 | dev = find_bio_disk(conf, r10_bio, bio, &slot, NULL); |
372 | 400 | ||
373 | /* | 401 | /* |
374 | * this branch is our 'one mirror IO has finished' event handler: | 402 | * this branch is our 'one mirror IO has finished' event handler: |
@@ -1025,6 +1053,7 @@ read_again: | |||
1025 | */ | 1053 | */ |
1026 | plugged = mddev_check_plugged(mddev); | 1054 | plugged = mddev_check_plugged(mddev); |
1027 | 1055 | ||
1056 | r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ | ||
1028 | raid10_find_phys(conf, r10_bio); | 1057 | raid10_find_phys(conf, r10_bio); |
1029 | retry_write: | 1058 | retry_write: |
1030 | blocked_rdev = NULL; | 1059 | blocked_rdev = NULL; |
@@ -1431,7 +1460,7 @@ static void end_sync_read(struct bio *bio, int error) | |||
1431 | struct r10conf *conf = r10_bio->mddev->private; | 1460 | struct r10conf *conf = r10_bio->mddev->private; |
1432 | int d; | 1461 | int d; |
1433 | 1462 | ||
1434 | d = find_bio_disk(conf, r10_bio, bio, NULL); | 1463 | d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); |
1435 | 1464 | ||
1436 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1465 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1437 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 1466 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
@@ -1493,7 +1522,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
1493 | int bad_sectors; | 1522 | int bad_sectors; |
1494 | int slot; | 1523 | int slot; |
1495 | 1524 | ||
1496 | d = find_bio_disk(conf, r10_bio, bio, &slot); | 1525 | d = find_bio_disk(conf, r10_bio, bio, &slot, NULL); |
1497 | 1526 | ||
1498 | if (!uptodate) { | 1527 | if (!uptodate) { |
1499 | set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); | 1528 | set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); |
@@ -2271,9 +2300,14 @@ static void raid10d(struct mddev *mddev) | |||
2271 | static int init_resync(struct r10conf *conf) | 2300 | static int init_resync(struct r10conf *conf) |
2272 | { | 2301 | { |
2273 | int buffs; | 2302 | int buffs; |
2303 | int i; | ||
2274 | 2304 | ||
2275 | buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; | 2305 | buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; |
2276 | BUG_ON(conf->r10buf_pool); | 2306 | BUG_ON(conf->r10buf_pool); |
2307 | conf->have_replacement = 0; | ||
2308 | for (i = 0; i < conf->raid_disks; i++) | ||
2309 | if (conf->mirrors[i].replacement) | ||
2310 | conf->have_replacement = 1; | ||
2277 | conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); | 2311 | conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); |
2278 | if (!conf->r10buf_pool) | 2312 | if (!conf->r10buf_pool) |
2279 | return -ENOMEM; | 2313 | return -ENOMEM; |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 7facfdf841f4..7c615613c381 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -2,7 +2,7 @@ | |||
2 | #define _RAID10_H | 2 | #define _RAID10_H |
3 | 3 | ||
4 | struct mirror_info { | 4 | struct mirror_info { |
5 | struct md_rdev *rdev; | 5 | struct md_rdev *rdev, *replacement; |
6 | sector_t head_position; | 6 | sector_t head_position; |
7 | int recovery_disabled; /* matches | 7 | int recovery_disabled; /* matches |
8 | * mddev->recovery_disabled | 8 | * mddev->recovery_disabled |
@@ -18,12 +18,13 @@ struct r10conf { | |||
18 | spinlock_t device_lock; | 18 | spinlock_t device_lock; |
19 | 19 | ||
20 | /* geometry */ | 20 | /* geometry */ |
21 | int near_copies; /* number of copies laid out raid0 style */ | 21 | int near_copies; /* number of copies laid out |
22 | * raid0 style */ | ||
22 | int far_copies; /* number of copies laid out | 23 | int far_copies; /* number of copies laid out |
23 | * at large strides across drives | 24 | * at large strides across drives |
24 | */ | 25 | */ |
25 | int far_offset; /* far_copies are offset by 1 stripe | 26 | int far_offset; /* far_copies are offset by 1 |
26 | * instead of many | 27 | * stripe instead of many |
27 | */ | 28 | */ |
28 | int copies; /* near_copies * far_copies. | 29 | int copies; /* near_copies * far_copies. |
29 | * must be <= raid_disks | 30 | * must be <= raid_disks |
@@ -34,10 +35,11 @@ struct r10conf { | |||
34 | * 1 stripe. | 35 | * 1 stripe. |
35 | */ | 36 | */ |
36 | 37 | ||
37 | sector_t dev_sectors; /* temp copy of mddev->dev_sectors */ | 38 | sector_t dev_sectors; /* temp copy of |
39 | * mddev->dev_sectors */ | ||
38 | 40 | ||
39 | int chunk_shift; /* shift from chunks to sectors */ | 41 | int chunk_shift; /* shift from chunks to sectors */ |
40 | sector_t chunk_mask; | 42 | sector_t chunk_mask; |
41 | 43 | ||
42 | struct list_head retry_list; | 44 | struct list_head retry_list; |
43 | /* queue pending writes and submit them on unplug */ | 45 | /* queue pending writes and submit them on unplug */ |
@@ -45,20 +47,22 @@ struct r10conf { | |||
45 | int pending_count; | 47 | int pending_count; |
46 | 48 | ||
47 | spinlock_t resync_lock; | 49 | spinlock_t resync_lock; |
48 | int nr_pending; | 50 | int nr_pending; |
49 | int nr_waiting; | 51 | int nr_waiting; |
50 | int nr_queued; | 52 | int nr_queued; |
51 | int barrier; | 53 | int barrier; |
52 | sector_t next_resync; | 54 | sector_t next_resync; |
53 | int fullsync; /* set to 1 if a full sync is needed, | 55 | int fullsync; /* set to 1 if a full sync is needed, |
54 | * (fresh device added). | 56 | * (fresh device added). |
55 | * Cleared when a sync completes. | 57 | * Cleared when a sync completes. |
56 | */ | 58 | */ |
57 | 59 | int have_replacement; /* There is at least one | |
60 | * replacement device. | ||
61 | */ | ||
58 | wait_queue_head_t wait_barrier; | 62 | wait_queue_head_t wait_barrier; |
59 | 63 | ||
60 | mempool_t *r10bio_pool; | 64 | mempool_t *r10bio_pool; |
61 | mempool_t *r10buf_pool; | 65 | mempool_t *r10buf_pool; |
62 | struct page *tmppage; | 66 | struct page *tmppage; |
63 | 67 | ||
64 | /* When taking over an array from a different personality, we store | 68 | /* When taking over an array from a different personality, we store |
@@ -98,11 +102,18 @@ struct r10bio { | |||
98 | * When resyncing we also use one for each copy. | 102 | * When resyncing we also use one for each copy. |
99 | * When reconstructing, we use 2 bios, one for read, one for write. | 103 | * When reconstructing, we use 2 bios, one for read, one for write. |
100 | * We choose the number when they are allocated. | 104 | * We choose the number when they are allocated. |
105 | * We sometimes need an extra bio to write to the replacement. | ||
101 | */ | 106 | */ |
102 | struct { | 107 | struct { |
103 | struct bio *bio; | 108 | struct bio *bio; |
104 | sector_t addr; | 109 | union { |
105 | int devnum; | 110 | struct bio *repl_bio; /* used for resync and |
111 | * writes */ | ||
112 | struct md_rdev *rdev; /* used for reads | ||
113 | * (read_slot >= 0) */ | ||
114 | }; | ||
115 | sector_t addr; | ||
116 | int devnum; | ||
106 | } devs[0]; | 117 | } devs[0]; |
107 | }; | 118 | }; |
108 | 119 | ||
@@ -121,17 +132,19 @@ struct r10bio { | |||
121 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | 132 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) |
122 | 133 | ||
123 | /* bits for r10bio.state */ | 134 | /* bits for r10bio.state */ |
124 | #define R10BIO_Uptodate 0 | 135 | enum r10bio_state { |
125 | #define R10BIO_IsSync 1 | 136 | R10BIO_Uptodate, |
126 | #define R10BIO_IsRecover 2 | 137 | R10BIO_IsSync, |
127 | #define R10BIO_Degraded 3 | 138 | R10BIO_IsRecover, |
139 | R10BIO_Degraded, | ||
128 | /* Set ReadError on bios that experience a read error | 140 | /* Set ReadError on bios that experience a read error |
129 | * so that raid10d knows what to do with them. | 141 | * so that raid10d knows what to do with them. |
130 | */ | 142 | */ |
131 | #define R10BIO_ReadError 4 | 143 | R10BIO_ReadError, |
132 | /* If a write for this request means we can clear some | 144 | /* If a write for this request means we can clear some |
133 | * known-bad-block records, we set this flag. | 145 | * known-bad-block records, we set this flag. |
134 | */ | 146 | */ |
135 | #define R10BIO_MadeGood 5 | 147 | R10BIO_MadeGood, |
136 | #define R10BIO_WriteError 6 | 148 | R10BIO_WriteError, |
149 | }; | ||
137 | #endif | 150 | #endif |