aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-12-22 18:17:56 -0500
committerNeilBrown <neilb@suse.de>2011-12-22 18:17:56 -0500
commit8f19ccb2fd70deb1f278be5e75076074cfddee46 (patch)
tree59419e814d4d02dfdefaf6f5ef152c3468c890e8
parent301946364e0aa67c4cfaec82e94c389993c9f7c6 (diff)
md/raid1: Allocate spare to store replacement devices and their bios.
In RAID1, a replacement is much like a normal device, so we just double the size of the relevant arrays and look at all possible devices for reads and writes. This means that the array looks like it is now double the size in some way - we need to be careful about that. In particular, we checking if the array is still degraded while creating a recovery request we need to only consider the first 'half' - i.e. the real (non-replacement) devices. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid1.c64
-rw-r--r--drivers/md/raid1.h7
2 files changed, 40 insertions, 31 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ab8113cff7c9..e7768e37f36a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -135,7 +135,7 @@ out_free_pages:
135 put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); 135 put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
136 j = -1; 136 j = -1;
137out_free_bio: 137out_free_bio:
138 while ( ++j < pi->raid_disks ) 138 while (++j < pi->raid_disks)
139 bio_put(r1_bio->bios[j]); 139 bio_put(r1_bio->bios[j]);
140 r1bio_pool_free(r1_bio, data); 140 r1bio_pool_free(r1_bio, data);
141 return NULL; 141 return NULL;
@@ -164,7 +164,7 @@ static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
164{ 164{
165 int i; 165 int i;
166 166
167 for (i = 0; i < conf->raid_disks; i++) { 167 for (i = 0; i < conf->raid_disks * 2; i++) {
168 struct bio **bio = r1_bio->bios + i; 168 struct bio **bio = r1_bio->bios + i;
169 if (!BIO_SPECIAL(*bio)) 169 if (!BIO_SPECIAL(*bio))
170 bio_put(*bio); 170 bio_put(*bio);
@@ -185,7 +185,7 @@ static void put_buf(struct r1bio *r1_bio)
185 struct r1conf *conf = r1_bio->mddev->private; 185 struct r1conf *conf = r1_bio->mddev->private;
186 int i; 186 int i;
187 187
188 for (i=0; i<conf->raid_disks; i++) { 188 for (i = 0; i < conf->raid_disks * 2; i++) {
189 struct bio *bio = r1_bio->bios[i]; 189 struct bio *bio = r1_bio->bios[i];
190 if (bio->bi_end_io) 190 if (bio->bi_end_io)
191 rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); 191 rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
@@ -280,11 +280,11 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
280 struct r1conf *conf = r1_bio->mddev->private; 280 struct r1conf *conf = r1_bio->mddev->private;
281 int raid_disks = conf->raid_disks; 281 int raid_disks = conf->raid_disks;
282 282
283 for (mirror = 0; mirror < raid_disks; mirror++) 283 for (mirror = 0; mirror < raid_disks * 2; mirror++)
284 if (r1_bio->bios[mirror] == bio) 284 if (r1_bio->bios[mirror] == bio)
285 break; 285 break;
286 286
287 BUG_ON(mirror == raid_disks); 287 BUG_ON(mirror == raid_disks * 2);
288 update_head_pos(mirror, r1_bio); 288 update_head_pos(mirror, r1_bio);
289 289
290 return mirror; 290 return mirror;
@@ -506,7 +506,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
506 start_disk = conf->last_used; 506 start_disk = conf->last_used;
507 } 507 }
508 508
509 for (i = 0 ; i < conf->raid_disks ; i++) { 509 for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
510 sector_t dist; 510 sector_t dist;
511 sector_t first_bad; 511 sector_t first_bad;
512 int bad_sectors; 512 int bad_sectors;
@@ -975,7 +975,7 @@ read_again:
975 */ 975 */
976 plugged = mddev_check_plugged(mddev); 976 plugged = mddev_check_plugged(mddev);
977 977
978 disks = conf->raid_disks; 978 disks = conf->raid_disks * 2;
979 retry_write: 979 retry_write:
980 blocked_rdev = NULL; 980 blocked_rdev = NULL;
981 rcu_read_lock(); 981 rcu_read_lock();
@@ -989,7 +989,8 @@ read_again:
989 } 989 }
990 r1_bio->bios[i] = NULL; 990 r1_bio->bios[i] = NULL;
991 if (!rdev || test_bit(Faulty, &rdev->flags)) { 991 if (!rdev || test_bit(Faulty, &rdev->flags)) {
992 set_bit(R1BIO_Degraded, &r1_bio->state); 992 if (i < conf->raid_disks)
993 set_bit(R1BIO_Degraded, &r1_bio->state);
993 continue; 994 continue;
994 } 995 }
995 996
@@ -1493,7 +1494,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
1493 } 1494 }
1494 } 1495 }
1495 d++; 1496 d++;
1496 if (d == conf->raid_disks) 1497 if (d == conf->raid_disks * 2)
1497 d = 0; 1498 d = 0;
1498 } while (!success && d != r1_bio->read_disk); 1499 } while (!success && d != r1_bio->read_disk);
1499 1500
@@ -1510,7 +1511,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
1510 mdname(mddev), 1511 mdname(mddev),
1511 bdevname(bio->bi_bdev, b), 1512 bdevname(bio->bi_bdev, b),
1512 (unsigned long long)r1_bio->sector); 1513 (unsigned long long)r1_bio->sector);
1513 for (d = 0; d < conf->raid_disks; d++) { 1514 for (d = 0; d < conf->raid_disks * 2; d++) {
1514 rdev = conf->mirrors[d].rdev; 1515 rdev = conf->mirrors[d].rdev;
1515 if (!rdev || test_bit(Faulty, &rdev->flags)) 1516 if (!rdev || test_bit(Faulty, &rdev->flags))
1516 continue; 1517 continue;
@@ -1536,7 +1537,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
1536 /* write it back and re-read */ 1537 /* write it back and re-read */
1537 while (d != r1_bio->read_disk) { 1538 while (d != r1_bio->read_disk) {
1538 if (d == 0) 1539 if (d == 0)
1539 d = conf->raid_disks; 1540 d = conf->raid_disks * 2;
1540 d--; 1541 d--;
1541 if (r1_bio->bios[d]->bi_end_io != end_sync_read) 1542 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1542 continue; 1543 continue;
@@ -1551,7 +1552,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
1551 d = start; 1552 d = start;
1552 while (d != r1_bio->read_disk) { 1553 while (d != r1_bio->read_disk) {
1553 if (d == 0) 1554 if (d == 0)
1554 d = conf->raid_disks; 1555 d = conf->raid_disks * 2;
1555 d--; 1556 d--;
1556 if (r1_bio->bios[d]->bi_end_io != end_sync_read) 1557 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1557 continue; 1558 continue;
@@ -1584,7 +1585,7 @@ static int process_checks(struct r1bio *r1_bio)
1584 int primary; 1585 int primary;
1585 int i; 1586 int i;
1586 1587
1587 for (primary = 0; primary < conf->raid_disks; primary++) 1588 for (primary = 0; primary < conf->raid_disks * 2; primary++)
1588 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && 1589 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1589 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { 1590 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1590 r1_bio->bios[primary]->bi_end_io = NULL; 1591 r1_bio->bios[primary]->bi_end_io = NULL;
@@ -1592,7 +1593,7 @@ static int process_checks(struct r1bio *r1_bio)
1592 break; 1593 break;
1593 } 1594 }
1594 r1_bio->read_disk = primary; 1595 r1_bio->read_disk = primary;
1595 for (i = 0; i < conf->raid_disks; i++) { 1596 for (i = 0; i < conf->raid_disks * 2; i++) {
1596 int j; 1597 int j;
1597 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9); 1598 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1598 struct bio *pbio = r1_bio->bios[primary]; 1599 struct bio *pbio = r1_bio->bios[primary];
@@ -1656,7 +1657,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
1656{ 1657{
1657 struct r1conf *conf = mddev->private; 1658 struct r1conf *conf = mddev->private;
1658 int i; 1659 int i;
1659 int disks = conf->raid_disks; 1660 int disks = conf->raid_disks * 2;
1660 struct bio *bio, *wbio; 1661 struct bio *bio, *wbio;
1661 1662
1662 bio = r1_bio->bios[r1_bio->read_disk]; 1663 bio = r1_bio->bios[r1_bio->read_disk];
@@ -1737,7 +1738,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
1737 success = 1; 1738 success = 1;
1738 else { 1739 else {
1739 d++; 1740 d++;
1740 if (d == conf->raid_disks) 1741 if (d == conf->raid_disks * 2)
1741 d = 0; 1742 d = 0;
1742 } 1743 }
1743 } while (!success && d != read_disk); 1744 } while (!success && d != read_disk);
@@ -1753,7 +1754,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
1753 start = d; 1754 start = d;
1754 while (d != read_disk) { 1755 while (d != read_disk) {
1755 if (d==0) 1756 if (d==0)
1756 d = conf->raid_disks; 1757 d = conf->raid_disks * 2;
1757 d--; 1758 d--;
1758 rdev = conf->mirrors[d].rdev; 1759 rdev = conf->mirrors[d].rdev;
1759 if (rdev && 1760 if (rdev &&
@@ -1765,7 +1766,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
1765 while (d != read_disk) { 1766 while (d != read_disk) {
1766 char b[BDEVNAME_SIZE]; 1767 char b[BDEVNAME_SIZE];
1767 if (d==0) 1768 if (d==0)
1768 d = conf->raid_disks; 1769 d = conf->raid_disks * 2;
1769 d--; 1770 d--;
1770 rdev = conf->mirrors[d].rdev; 1771 rdev = conf->mirrors[d].rdev;
1771 if (rdev && 1772 if (rdev &&
@@ -1887,7 +1888,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
1887{ 1888{
1888 int m; 1889 int m;
1889 int s = r1_bio->sectors; 1890 int s = r1_bio->sectors;
1890 for (m = 0; m < conf->raid_disks ; m++) { 1891 for (m = 0; m < conf->raid_disks * 2 ; m++) {
1891 struct md_rdev *rdev = conf->mirrors[m].rdev; 1892 struct md_rdev *rdev = conf->mirrors[m].rdev;
1892 struct bio *bio = r1_bio->bios[m]; 1893 struct bio *bio = r1_bio->bios[m];
1893 if (bio->bi_end_io == NULL) 1894 if (bio->bi_end_io == NULL)
@@ -1909,7 +1910,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
1909static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) 1910static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
1910{ 1911{
1911 int m; 1912 int m;
1912 for (m = 0; m < conf->raid_disks ; m++) 1913 for (m = 0; m < conf->raid_disks * 2 ; m++)
1913 if (r1_bio->bios[m] == IO_MADE_GOOD) { 1914 if (r1_bio->bios[m] == IO_MADE_GOOD) {
1914 struct md_rdev *rdev = conf->mirrors[m].rdev; 1915 struct md_rdev *rdev = conf->mirrors[m].rdev;
1915 rdev_clear_badblocks(rdev, 1916 rdev_clear_badblocks(rdev,
@@ -2184,7 +2185,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2184 r1_bio->state = 0; 2185 r1_bio->state = 0;
2185 set_bit(R1BIO_IsSync, &r1_bio->state); 2186 set_bit(R1BIO_IsSync, &r1_bio->state);
2186 2187
2187 for (i=0; i < conf->raid_disks; i++) { 2188 for (i = 0; i < conf->raid_disks * 2; i++) {
2188 struct md_rdev *rdev; 2189 struct md_rdev *rdev;
2189 bio = r1_bio->bios[i]; 2190 bio = r1_bio->bios[i];
2190 2191
@@ -2203,7 +2204,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2203 rdev = rcu_dereference(conf->mirrors[i].rdev); 2204 rdev = rcu_dereference(conf->mirrors[i].rdev);
2204 if (rdev == NULL || 2205 if (rdev == NULL ||
2205 test_bit(Faulty, &rdev->flags)) { 2206 test_bit(Faulty, &rdev->flags)) {
2206 still_degraded = 1; 2207 if (i < conf->raid_disks)
2208 still_degraded = 1;
2207 } else if (!test_bit(In_sync, &rdev->flags)) { 2209 } else if (!test_bit(In_sync, &rdev->flags)) {
2208 bio->bi_rw = WRITE; 2210 bio->bi_rw = WRITE;
2209 bio->bi_end_io = end_sync_write; 2211 bio->bi_end_io = end_sync_write;
@@ -2254,7 +2256,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2254 * need to mark them bad on all write targets 2256 * need to mark them bad on all write targets
2255 */ 2257 */
2256 int ok = 1; 2258 int ok = 1;
2257 for (i = 0 ; i < conf->raid_disks ; i++) 2259 for (i = 0 ; i < conf->raid_disks * 2 ; i++)
2258 if (r1_bio->bios[i]->bi_end_io == end_sync_write) { 2260 if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
2259 struct md_rdev *rdev = 2261 struct md_rdev *rdev =
2260 rcu_dereference(conf->mirrors[i].rdev); 2262 rcu_dereference(conf->mirrors[i].rdev);
@@ -2323,7 +2325,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2323 len = sync_blocks<<9; 2325 len = sync_blocks<<9;
2324 } 2326 }
2325 2327
2326 for (i=0 ; i < conf->raid_disks; i++) { 2328 for (i = 0 ; i < conf->raid_disks * 2; i++) {
2327 bio = r1_bio->bios[i]; 2329 bio = r1_bio->bios[i];
2328 if (bio->bi_end_io) { 2330 if (bio->bi_end_io) {
2329 page = bio->bi_io_vec[bio->bi_vcnt].bv_page; 2331 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
@@ -2356,7 +2358,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2356 */ 2358 */
2357 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 2359 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
2358 atomic_set(&r1_bio->remaining, read_targets); 2360 atomic_set(&r1_bio->remaining, read_targets);
2359 for (i=0; i<conf->raid_disks; i++) { 2361 for (i = 0; i < conf->raid_disks * 2; i++) {
2360 bio = r1_bio->bios[i]; 2362 bio = r1_bio->bios[i];
2361 if (bio->bi_end_io == end_sync_read) { 2363 if (bio->bi_end_io == end_sync_read) {
2362 md_sync_acct(bio->bi_bdev, nr_sectors); 2364 md_sync_acct(bio->bi_bdev, nr_sectors);
@@ -2393,7 +2395,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2393 if (!conf) 2395 if (!conf)
2394 goto abort; 2396 goto abort;
2395 2397
2396 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 2398 conf->mirrors = kzalloc(sizeof(struct mirror_info)
2399 * mddev->raid_disks * 2,
2397 GFP_KERNEL); 2400 GFP_KERNEL);
2398 if (!conf->mirrors) 2401 if (!conf->mirrors)
2399 goto abort; 2402 goto abort;
@@ -2405,7 +2408,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2405 conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 2408 conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
2406 if (!conf->poolinfo) 2409 if (!conf->poolinfo)
2407 goto abort; 2410 goto abort;
2408 conf->poolinfo->raid_disks = mddev->raid_disks; 2411 conf->poolinfo->raid_disks = mddev->raid_disks * 2;
2409 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, 2412 conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
2410 r1bio_pool_free, 2413 r1bio_pool_free,
2411 conf->poolinfo); 2414 conf->poolinfo);
@@ -2438,7 +2441,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2438 conf->recovery_disabled = mddev->recovery_disabled - 1; 2441 conf->recovery_disabled = mddev->recovery_disabled - 1;
2439 2442
2440 conf->last_used = -1; 2443 conf->last_used = -1;
2441 for (i = 0; i < conf->raid_disks; i++) { 2444 for (i = 0; i < conf->raid_disks * 2; i++) {
2442 2445
2443 disk = conf->mirrors + i; 2446 disk = conf->mirrors + i;
2444 2447
@@ -2665,7 +2668,7 @@ static int raid1_reshape(struct mddev *mddev)
2665 if (!newpoolinfo) 2668 if (!newpoolinfo)
2666 return -ENOMEM; 2669 return -ENOMEM;
2667 newpoolinfo->mddev = mddev; 2670 newpoolinfo->mddev = mddev;
2668 newpoolinfo->raid_disks = raid_disks; 2671 newpoolinfo->raid_disks = raid_disks * 2;
2669 2672
2670 newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, 2673 newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
2671 r1bio_pool_free, newpoolinfo); 2674 r1bio_pool_free, newpoolinfo);
@@ -2673,7 +2676,8 @@ static int raid1_reshape(struct mddev *mddev)
2673 kfree(newpoolinfo); 2676 kfree(newpoolinfo);
2674 return -ENOMEM; 2677 return -ENOMEM;
2675 } 2678 }
2676 newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL); 2679 newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
2680 GFP_KERNEL);
2677 if (!newmirrors) { 2681 if (!newmirrors) {
2678 kfree(newpoolinfo); 2682 kfree(newpoolinfo);
2679 mempool_destroy(newpool); 2683 mempool_destroy(newpool);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index c732b6cce935..80ded139314c 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -12,6 +12,9 @@ struct mirror_info {
12 * pool was allocated for, so they know how much to allocate and free. 12 * pool was allocated for, so they know how much to allocate and free.
13 * mddev->raid_disks cannot be used, as it can change while a pool is active 13 * mddev->raid_disks cannot be used, as it can change while a pool is active
14 * These two datums are stored in a kmalloced struct. 14 * These two datums are stored in a kmalloced struct.
15 * The 'raid_disks' here is twice the raid_disks in r1conf.
16 * This allows space for each 'real' device can have a replacement in the
17 * second half of the array.
15 */ 18 */
16 19
17struct pool_info { 20struct pool_info {
@@ -21,7 +24,9 @@ struct pool_info {
21 24
22struct r1conf { 25struct r1conf {
23 struct mddev *mddev; 26 struct mddev *mddev;
24 struct mirror_info *mirrors; 27 struct mirror_info *mirrors; /* twice 'raid_disks' to
28 * allow for replacements.
29 */
25 int raid_disks; 30 int raid_disks;
26 31
27 /* When choose the best device for a read (read_balance()) 32 /* When choose the best device for a read (read_balance())