aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-12-22 18:17:54 -0500
committerNeilBrown <neilb@suse.de>2011-12-22 18:17:54 -0500
commit69335ef3bc5b766f34db2d688be1d35313138bca (patch)
treedcd87ffc4c97540d374a20de7380368e3679ac3b /drivers/md
parent3a6de2924af602f9c1b5a5154438c37f2d712dfa (diff)
md/raid10: prepare data structures for handling replacement.
Allow each slot in the RAID10 to have 2 devices, the want_replacement and the replacement. Also an r10bio to have 2 bios, and for resync/recovery allocate the second bio if there are any replacement devices. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid10.c48
-rw-r--r--drivers/md/raid10.h61
2 files changed, 78 insertions, 31 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f5088dda4dca..9722065022fa 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -73,7 +73,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
73 struct r10conf *conf = data; 73 struct r10conf *conf = data;
74 int size = offsetof(struct r10bio, devs[conf->copies]); 74 int size = offsetof(struct r10bio, devs[conf->copies]);
75 75
76 /* allocate a r10bio with room for raid_disks entries in the bios array */ 76 /* allocate a r10bio with room for raid_disks entries in the
77 * bios array */
77 return kzalloc(size, gfp_flags); 78 return kzalloc(size, gfp_flags);
78} 79}
79 80
@@ -123,12 +124,19 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
123 if (!bio) 124 if (!bio)
124 goto out_free_bio; 125 goto out_free_bio;
125 r10_bio->devs[j].bio = bio; 126 r10_bio->devs[j].bio = bio;
127 if (!conf->have_replacement)
128 continue;
129 bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
130 if (!bio)
131 goto out_free_bio;
132 r10_bio->devs[j].repl_bio = bio;
126 } 133 }
127 /* 134 /*
128 * Allocate RESYNC_PAGES data pages and attach them 135 * Allocate RESYNC_PAGES data pages and attach them
129 * where needed. 136 * where needed.
130 */ 137 */
131 for (j = 0 ; j < nalloc; j++) { 138 for (j = 0 ; j < nalloc; j++) {
139 struct bio *rbio = r10_bio->devs[j].repl_bio;
132 bio = r10_bio->devs[j].bio; 140 bio = r10_bio->devs[j].bio;
133 for (i = 0; i < RESYNC_PAGES; i++) { 141 for (i = 0; i < RESYNC_PAGES; i++) {
134 if (j == 1 && !test_bit(MD_RECOVERY_SYNC, 142 if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
@@ -143,6 +151,8 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
143 goto out_free_pages; 151 goto out_free_pages;
144 152
145 bio->bi_io_vec[i].bv_page = page; 153 bio->bi_io_vec[i].bv_page = page;
154 if (rbio)
155 rbio->bi_io_vec[i].bv_page = page;
146 } 156 }
147 } 157 }
148 158
@@ -156,8 +166,11 @@ out_free_pages:
156 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); 166 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
157 j = -1; 167 j = -1;
158out_free_bio: 168out_free_bio:
159 while ( ++j < nalloc ) 169 while (++j < nalloc) {
160 bio_put(r10_bio->devs[j].bio); 170 bio_put(r10_bio->devs[j].bio);
171 if (r10_bio->devs[j].repl_bio)
172 bio_put(r10_bio->devs[j].repl_bio);
173 }
161 r10bio_pool_free(r10_bio, conf); 174 r10bio_pool_free(r10_bio, conf);
162 return NULL; 175 return NULL;
163} 176}
@@ -178,6 +191,9 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
178 } 191 }
179 bio_put(bio); 192 bio_put(bio);
180 } 193 }
194 bio = r10bio->devs[j].repl_bio;
195 if (bio)
196 bio_put(bio);
181 } 197 }
182 r10bio_pool_free(r10bio, conf); 198 r10bio_pool_free(r10bio, conf);
183} 199}
@@ -191,6 +207,10 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
191 if (!BIO_SPECIAL(*bio)) 207 if (!BIO_SPECIAL(*bio))
192 bio_put(*bio); 208 bio_put(*bio);
193 *bio = NULL; 209 *bio = NULL;
210 bio = &r10_bio->devs[i].repl_bio;
211 if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
212 bio_put(*bio);
213 *bio = NULL;
194 } 214 }
195} 215}
196 216
@@ -275,19 +295,27 @@ static inline void update_head_pos(int slot, struct r10bio *r10_bio)
275 * Find the disk number which triggered given bio 295 * Find the disk number which triggered given bio
276 */ 296 */
277static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, 297static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
278 struct bio *bio, int *slotp) 298 struct bio *bio, int *slotp, int *replp)
279{ 299{
280 int slot; 300 int slot;
301 int repl = 0;
281 302
282 for (slot = 0; slot < conf->copies; slot++) 303 for (slot = 0; slot < conf->copies; slot++) {
283 if (r10_bio->devs[slot].bio == bio) 304 if (r10_bio->devs[slot].bio == bio)
284 break; 305 break;
306 if (r10_bio->devs[slot].repl_bio == bio) {
307 repl = 1;
308 break;
309 }
310 }
285 311
286 BUG_ON(slot == conf->copies); 312 BUG_ON(slot == conf->copies);
287 update_head_pos(slot, r10_bio); 313 update_head_pos(slot, r10_bio);
288 314
289 if (slotp) 315 if (slotp)
290 *slotp = slot; 316 *slotp = slot;
317 if (replp)
318 *replp = repl;
291 return r10_bio->devs[slot].devnum; 319 return r10_bio->devs[slot].devnum;
292} 320}
293 321
@@ -368,7 +396,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
368 struct r10conf *conf = r10_bio->mddev->private; 396 struct r10conf *conf = r10_bio->mddev->private;
369 int slot; 397 int slot;
370 398
371 dev = find_bio_disk(conf, r10_bio, bio, &slot); 399 dev = find_bio_disk(conf, r10_bio, bio, &slot, NULL);
372 400
373 /* 401 /*
374 * this branch is our 'one mirror IO has finished' event handler: 402 * this branch is our 'one mirror IO has finished' event handler:
@@ -1025,6 +1053,7 @@ read_again:
1025 */ 1053 */
1026 plugged = mddev_check_plugged(mddev); 1054 plugged = mddev_check_plugged(mddev);
1027 1055
1056 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
1028 raid10_find_phys(conf, r10_bio); 1057 raid10_find_phys(conf, r10_bio);
1029retry_write: 1058retry_write:
1030 blocked_rdev = NULL; 1059 blocked_rdev = NULL;
@@ -1431,7 +1460,7 @@ static void end_sync_read(struct bio *bio, int error)
1431 struct r10conf *conf = r10_bio->mddev->private; 1460 struct r10conf *conf = r10_bio->mddev->private;
1432 int d; 1461 int d;
1433 1462
1434 d = find_bio_disk(conf, r10_bio, bio, NULL); 1463 d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
1435 1464
1436 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 1465 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1437 set_bit(R10BIO_Uptodate, &r10_bio->state); 1466 set_bit(R10BIO_Uptodate, &r10_bio->state);
@@ -1493,7 +1522,7 @@ static void end_sync_write(struct bio *bio, int error)
1493 int bad_sectors; 1522 int bad_sectors;
1494 int slot; 1523 int slot;
1495 1524
1496 d = find_bio_disk(conf, r10_bio, bio, &slot); 1525 d = find_bio_disk(conf, r10_bio, bio, &slot, NULL);
1497 1526
1498 if (!uptodate) { 1527 if (!uptodate) {
1499 set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); 1528 set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
@@ -2271,9 +2300,14 @@ static void raid10d(struct mddev *mddev)
2271static int init_resync(struct r10conf *conf) 2300static int init_resync(struct r10conf *conf)
2272{ 2301{
2273 int buffs; 2302 int buffs;
2303 int i;
2274 2304
2275 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; 2305 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
2276 BUG_ON(conf->r10buf_pool); 2306 BUG_ON(conf->r10buf_pool);
2307 conf->have_replacement = 0;
2308 for (i = 0; i < conf->raid_disks; i++)
2309 if (conf->mirrors[i].replacement)
2310 conf->have_replacement = 1;
2277 conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); 2311 conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
2278 if (!conf->r10buf_pool) 2312 if (!conf->r10buf_pool)
2279 return -ENOMEM; 2313 return -ENOMEM;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 7facfdf841f4..7c615613c381 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -2,7 +2,7 @@
2#define _RAID10_H 2#define _RAID10_H
3 3
4struct mirror_info { 4struct mirror_info {
5 struct md_rdev *rdev; 5 struct md_rdev *rdev, *replacement;
6 sector_t head_position; 6 sector_t head_position;
7 int recovery_disabled; /* matches 7 int recovery_disabled; /* matches
8 * mddev->recovery_disabled 8 * mddev->recovery_disabled
@@ -18,12 +18,13 @@ struct r10conf {
18 spinlock_t device_lock; 18 spinlock_t device_lock;
19 19
20 /* geometry */ 20 /* geometry */
21 int near_copies; /* number of copies laid out raid0 style */ 21 int near_copies; /* number of copies laid out
22 * raid0 style */
22 int far_copies; /* number of copies laid out 23 int far_copies; /* number of copies laid out
23 * at large strides across drives 24 * at large strides across drives
24 */ 25 */
25 int far_offset; /* far_copies are offset by 1 stripe 26 int far_offset; /* far_copies are offset by 1
26 * instead of many 27 * stripe instead of many
27 */ 28 */
28 int copies; /* near_copies * far_copies. 29 int copies; /* near_copies * far_copies.
29 * must be <= raid_disks 30 * must be <= raid_disks
@@ -34,10 +35,11 @@ struct r10conf {
34 * 1 stripe. 35 * 1 stripe.
35 */ 36 */
36 37
37 sector_t dev_sectors; /* temp copy of mddev->dev_sectors */ 38 sector_t dev_sectors; /* temp copy of
39 * mddev->dev_sectors */
38 40
39 int chunk_shift; /* shift from chunks to sectors */ 41 int chunk_shift; /* shift from chunks to sectors */
40 sector_t chunk_mask; 42 sector_t chunk_mask;
41 43
42 struct list_head retry_list; 44 struct list_head retry_list;
43 /* queue pending writes and submit them on unplug */ 45 /* queue pending writes and submit them on unplug */
@@ -45,20 +47,22 @@ struct r10conf {
45 int pending_count; 47 int pending_count;
46 48
47 spinlock_t resync_lock; 49 spinlock_t resync_lock;
48 int nr_pending; 50 int nr_pending;
49 int nr_waiting; 51 int nr_waiting;
50 int nr_queued; 52 int nr_queued;
51 int barrier; 53 int barrier;
52 sector_t next_resync; 54 sector_t next_resync;
53 int fullsync; /* set to 1 if a full sync is needed, 55 int fullsync; /* set to 1 if a full sync is needed,
54 * (fresh device added). 56 * (fresh device added).
55 * Cleared when a sync completes. 57 * Cleared when a sync completes.
56 */ 58 */
57 59 int have_replacement; /* There is at least one
60 * replacement device.
61 */
58 wait_queue_head_t wait_barrier; 62 wait_queue_head_t wait_barrier;
59 63
60 mempool_t *r10bio_pool; 64 mempool_t *r10bio_pool;
61 mempool_t *r10buf_pool; 65 mempool_t *r10buf_pool;
62 struct page *tmppage; 66 struct page *tmppage;
63 67
64 /* When taking over an array from a different personality, we store 68 /* When taking over an array from a different personality, we store
@@ -98,11 +102,18 @@ struct r10bio {
98 * When resyncing we also use one for each copy. 102 * When resyncing we also use one for each copy.
99 * When reconstructing, we use 2 bios, one for read, one for write. 103 * When reconstructing, we use 2 bios, one for read, one for write.
100 * We choose the number when they are allocated. 104 * We choose the number when they are allocated.
105 * We sometimes need an extra bio to write to the replacement.
101 */ 106 */
102 struct { 107 struct {
103 struct bio *bio; 108 struct bio *bio;
104 sector_t addr; 109 union {
105 int devnum; 110 struct bio *repl_bio; /* used for resync and
111 * writes */
112 struct md_rdev *rdev; /* used for reads
113 * (read_slot >= 0) */
114 };
115 sector_t addr;
116 int devnum;
106 } devs[0]; 117 } devs[0];
107}; 118};
108 119
@@ -121,17 +132,19 @@ struct r10bio {
121#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) 132#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
122 133
123/* bits for r10bio.state */ 134/* bits for r10bio.state */
124#define R10BIO_Uptodate 0 135enum r10bio_state {
125#define R10BIO_IsSync 1 136 R10BIO_Uptodate,
126#define R10BIO_IsRecover 2 137 R10BIO_IsSync,
127#define R10BIO_Degraded 3 138 R10BIO_IsRecover,
139 R10BIO_Degraded,
128/* Set ReadError on bios that experience a read error 140/* Set ReadError on bios that experience a read error
129 * so that raid10d knows what to do with them. 141 * so that raid10d knows what to do with them.
130 */ 142 */
131#define R10BIO_ReadError 4 143 R10BIO_ReadError,
132/* If a write for this request means we can clear some 144/* If a write for this request means we can clear some
133 * known-bad-block records, we set this flag. 145 * known-bad-block records, we set this flag.
134 */ 146 */
135#define R10BIO_MadeGood 5 147 R10BIO_MadeGood,
136#define R10BIO_WriteError 6 148 R10BIO_WriteError,
149};
137#endif 150#endif