diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 300 |
1 files changed, 198 insertions, 102 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e2766d8251a1..03724992cdf2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/seq_file.h> | 24 | #include <linux/seq_file.h> |
25 | #include "md.h" | 25 | #include "md.h" |
26 | #include "raid10.h" | 26 | #include "raid10.h" |
27 | #include "raid0.h" | ||
27 | #include "bitmap.h" | 28 | #include "bitmap.h" |
28 | 29 | ||
29 | /* | 30 | /* |
@@ -255,7 +256,7 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio) | |||
255 | static void raid10_end_read_request(struct bio *bio, int error) | 256 | static void raid10_end_read_request(struct bio *bio, int error) |
256 | { | 257 | { |
257 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 258 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
258 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 259 | r10bio_t *r10_bio = bio->bi_private; |
259 | int slot, dev; | 260 | int slot, dev; |
260 | conf_t *conf = r10_bio->mddev->private; | 261 | conf_t *conf = r10_bio->mddev->private; |
261 | 262 | ||
@@ -285,7 +286,8 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
285 | */ | 286 | */ |
286 | char b[BDEVNAME_SIZE]; | 287 | char b[BDEVNAME_SIZE]; |
287 | if (printk_ratelimit()) | 288 | if (printk_ratelimit()) |
288 | printk(KERN_ERR "raid10: %s: rescheduling sector %llu\n", | 289 | printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n", |
290 | mdname(conf->mddev), | ||
289 | bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); | 291 | bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); |
290 | reschedule_retry(r10_bio); | 292 | reschedule_retry(r10_bio); |
291 | } | 293 | } |
@@ -296,7 +298,7 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
296 | static void raid10_end_write_request(struct bio *bio, int error) | 298 | static void raid10_end_write_request(struct bio *bio, int error) |
297 | { | 299 | { |
298 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 300 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
299 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 301 | r10bio_t *r10_bio = bio->bi_private; |
300 | int slot, dev; | 302 | int slot, dev; |
301 | conf_t *conf = r10_bio->mddev->private; | 303 | conf_t *conf = r10_bio->mddev->private; |
302 | 304 | ||
@@ -494,7 +496,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, | |||
494 | */ | 496 | */ |
495 | static int read_balance(conf_t *conf, r10bio_t *r10_bio) | 497 | static int read_balance(conf_t *conf, r10bio_t *r10_bio) |
496 | { | 498 | { |
497 | const unsigned long this_sector = r10_bio->sector; | 499 | const sector_t this_sector = r10_bio->sector; |
498 | int disk, slot, nslot; | 500 | int disk, slot, nslot; |
499 | const int sectors = r10_bio->sectors; | 501 | const int sectors = r10_bio->sectors; |
500 | sector_t new_distance, current_distance; | 502 | sector_t new_distance, current_distance; |
@@ -601,7 +603,7 @@ static void unplug_slaves(mddev_t *mddev) | |||
601 | int i; | 603 | int i; |
602 | 604 | ||
603 | rcu_read_lock(); | 605 | rcu_read_lock(); |
604 | for (i=0; i<mddev->raid_disks; i++) { | 606 | for (i=0; i < conf->raid_disks; i++) { |
605 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); | 607 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
606 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { | 608 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { |
607 | struct request_queue *r_queue = bdev_get_queue(rdev->bdev); | 609 | struct request_queue *r_queue = bdev_get_queue(rdev->bdev); |
@@ -635,7 +637,7 @@ static int raid10_congested(void *data, int bits) | |||
635 | if (mddev_congested(mddev, bits)) | 637 | if (mddev_congested(mddev, bits)) |
636 | return 1; | 638 | return 1; |
637 | rcu_read_lock(); | 639 | rcu_read_lock(); |
638 | for (i = 0; i < mddev->raid_disks && ret == 0; i++) { | 640 | for (i = 0; i < conf->raid_disks && ret == 0; i++) { |
639 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); | 641 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
640 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | 642 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
641 | struct request_queue *q = bdev_get_queue(rdev->bdev); | 643 | struct request_queue *q = bdev_get_queue(rdev->bdev); |
@@ -788,14 +790,12 @@ static void unfreeze_array(conf_t *conf) | |||
788 | spin_unlock_irq(&conf->resync_lock); | 790 | spin_unlock_irq(&conf->resync_lock); |
789 | } | 791 | } |
790 | 792 | ||
791 | static int make_request(struct request_queue *q, struct bio * bio) | 793 | static int make_request(mddev_t *mddev, struct bio * bio) |
792 | { | 794 | { |
793 | mddev_t *mddev = q->queuedata; | ||
794 | conf_t *conf = mddev->private; | 795 | conf_t *conf = mddev->private; |
795 | mirror_info_t *mirror; | 796 | mirror_info_t *mirror; |
796 | r10bio_t *r10_bio; | 797 | r10bio_t *r10_bio; |
797 | struct bio *read_bio; | 798 | struct bio *read_bio; |
798 | int cpu; | ||
799 | int i; | 799 | int i; |
800 | int chunk_sects = conf->chunk_mask + 1; | 800 | int chunk_sects = conf->chunk_mask + 1; |
801 | const int rw = bio_data_dir(bio); | 801 | const int rw = bio_data_dir(bio); |
@@ -825,16 +825,16 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
825 | */ | 825 | */ |
826 | bp = bio_split(bio, | 826 | bp = bio_split(bio, |
827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); | 827 | chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); |
828 | if (make_request(q, &bp->bio1)) | 828 | if (make_request(mddev, &bp->bio1)) |
829 | generic_make_request(&bp->bio1); | 829 | generic_make_request(&bp->bio1); |
830 | if (make_request(q, &bp->bio2)) | 830 | if (make_request(mddev, &bp->bio2)) |
831 | generic_make_request(&bp->bio2); | 831 | generic_make_request(&bp->bio2); |
832 | 832 | ||
833 | bio_pair_release(bp); | 833 | bio_pair_release(bp); |
834 | return 0; | 834 | return 0; |
835 | bad_map: | 835 | bad_map: |
836 | printk("raid10_make_request bug: can't convert block across chunks" | 836 | printk("md/raid10:%s: make_request bug: can't convert block across chunks" |
837 | " or bigger than %dk %llu %d\n", chunk_sects/2, | 837 | " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, |
838 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); | 838 | (unsigned long long)bio->bi_sector, bio->bi_size >> 10); |
839 | 839 | ||
840 | bio_io_error(bio); | 840 | bio_io_error(bio); |
@@ -850,12 +850,6 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
850 | */ | 850 | */ |
851 | wait_barrier(conf); | 851 | wait_barrier(conf); |
852 | 852 | ||
853 | cpu = part_stat_lock(); | ||
854 | part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); | ||
855 | part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], | ||
856 | bio_sectors(bio)); | ||
857 | part_stat_unlock(); | ||
858 | |||
859 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); | 853 | r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); |
860 | 854 | ||
861 | r10_bio->master_bio = bio; | 855 | r10_bio->master_bio = bio; |
@@ -1039,9 +1033,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1039 | } | 1033 | } |
1040 | set_bit(Faulty, &rdev->flags); | 1034 | set_bit(Faulty, &rdev->flags); |
1041 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1035 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
1042 | printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n" | 1036 | printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" |
1043 | "raid10: Operation continuing on %d devices.\n", | 1037 | KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n", |
1044 | bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); | 1038 | mdname(mddev), bdevname(rdev->bdev, b), |
1039 | mdname(mddev), conf->raid_disks - mddev->degraded); | ||
1045 | } | 1040 | } |
1046 | 1041 | ||
1047 | static void print_conf(conf_t *conf) | 1042 | static void print_conf(conf_t *conf) |
@@ -1049,19 +1044,19 @@ static void print_conf(conf_t *conf) | |||
1049 | int i; | 1044 | int i; |
1050 | mirror_info_t *tmp; | 1045 | mirror_info_t *tmp; |
1051 | 1046 | ||
1052 | printk("RAID10 conf printout:\n"); | 1047 | printk(KERN_DEBUG "RAID10 conf printout:\n"); |
1053 | if (!conf) { | 1048 | if (!conf) { |
1054 | printk("(!conf)\n"); | 1049 | printk(KERN_DEBUG "(!conf)\n"); |
1055 | return; | 1050 | return; |
1056 | } | 1051 | } |
1057 | printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, | 1052 | printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, |
1058 | conf->raid_disks); | 1053 | conf->raid_disks); |
1059 | 1054 | ||
1060 | for (i = 0; i < conf->raid_disks; i++) { | 1055 | for (i = 0; i < conf->raid_disks; i++) { |
1061 | char b[BDEVNAME_SIZE]; | 1056 | char b[BDEVNAME_SIZE]; |
1062 | tmp = conf->mirrors + i; | 1057 | tmp = conf->mirrors + i; |
1063 | if (tmp->rdev) | 1058 | if (tmp->rdev) |
1064 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", | 1059 | printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n", |
1065 | i, !test_bit(In_sync, &tmp->rdev->flags), | 1060 | i, !test_bit(In_sync, &tmp->rdev->flags), |
1066 | !test_bit(Faulty, &tmp->rdev->flags), | 1061 | !test_bit(Faulty, &tmp->rdev->flags), |
1067 | bdevname(tmp->rdev->bdev,b)); | 1062 | bdevname(tmp->rdev->bdev,b)); |
@@ -1132,7 +1127,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1132 | int mirror; | 1127 | int mirror; |
1133 | mirror_info_t *p; | 1128 | mirror_info_t *p; |
1134 | int first = 0; | 1129 | int first = 0; |
1135 | int last = mddev->raid_disks - 1; | 1130 | int last = conf->raid_disks - 1; |
1136 | 1131 | ||
1137 | if (mddev->recovery_cp < MaxSector) | 1132 | if (mddev->recovery_cp < MaxSector) |
1138 | /* only hot-add to in-sync arrays, as recovery is | 1133 | /* only hot-add to in-sync arrays, as recovery is |
@@ -1224,7 +1219,7 @@ abort: | |||
1224 | 1219 | ||
1225 | static void end_sync_read(struct bio *bio, int error) | 1220 | static void end_sync_read(struct bio *bio, int error) |
1226 | { | 1221 | { |
1227 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 1222 | r10bio_t *r10_bio = bio->bi_private; |
1228 | conf_t *conf = r10_bio->mddev->private; | 1223 | conf_t *conf = r10_bio->mddev->private; |
1229 | int i,d; | 1224 | int i,d; |
1230 | 1225 | ||
@@ -1261,7 +1256,7 @@ static void end_sync_read(struct bio *bio, int error) | |||
1261 | static void end_sync_write(struct bio *bio, int error) | 1256 | static void end_sync_write(struct bio *bio, int error) |
1262 | { | 1257 | { |
1263 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 1258 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
1264 | r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); | 1259 | r10bio_t *r10_bio = bio->bi_private; |
1265 | mddev_t *mddev = r10_bio->mddev; | 1260 | mddev_t *mddev = r10_bio->mddev; |
1266 | conf_t *conf = mddev->private; | 1261 | conf_t *conf = mddev->private; |
1267 | int i,d; | 1262 | int i,d; |
@@ -1510,13 +1505,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1510 | if (cur_read_error_count > max_read_errors) { | 1505 | if (cur_read_error_count > max_read_errors) { |
1511 | rcu_read_unlock(); | 1506 | rcu_read_unlock(); |
1512 | printk(KERN_NOTICE | 1507 | printk(KERN_NOTICE |
1513 | "raid10: %s: Raid device exceeded " | 1508 | "md/raid10:%s: %s: Raid device exceeded " |
1514 | "read_error threshold " | 1509 | "read_error threshold " |
1515 | "[cur %d:max %d]\n", | 1510 | "[cur %d:max %d]\n", |
1511 | mdname(mddev), | ||
1516 | b, cur_read_error_count, max_read_errors); | 1512 | b, cur_read_error_count, max_read_errors); |
1517 | printk(KERN_NOTICE | 1513 | printk(KERN_NOTICE |
1518 | "raid10: %s: Failing raid " | 1514 | "md/raid10:%s: %s: Failing raid " |
1519 | "device\n", b); | 1515 | "device\n", mdname(mddev), b); |
1520 | md_error(mddev, conf->mirrors[d].rdev); | 1516 | md_error(mddev, conf->mirrors[d].rdev); |
1521 | return; | 1517 | return; |
1522 | } | 1518 | } |
@@ -1586,15 +1582,16 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1586 | == 0) { | 1582 | == 0) { |
1587 | /* Well, this device is dead */ | 1583 | /* Well, this device is dead */ |
1588 | printk(KERN_NOTICE | 1584 | printk(KERN_NOTICE |
1589 | "raid10:%s: read correction " | 1585 | "md/raid10:%s: read correction " |
1590 | "write failed" | 1586 | "write failed" |
1591 | " (%d sectors at %llu on %s)\n", | 1587 | " (%d sectors at %llu on %s)\n", |
1592 | mdname(mddev), s, | 1588 | mdname(mddev), s, |
1593 | (unsigned long long)(sect+ | 1589 | (unsigned long long)(sect+ |
1594 | rdev->data_offset), | 1590 | rdev->data_offset), |
1595 | bdevname(rdev->bdev, b)); | 1591 | bdevname(rdev->bdev, b)); |
1596 | printk(KERN_NOTICE "raid10:%s: failing " | 1592 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " |
1597 | "drive\n", | 1593 | "drive\n", |
1594 | mdname(mddev), | ||
1598 | bdevname(rdev->bdev, b)); | 1595 | bdevname(rdev->bdev, b)); |
1599 | md_error(mddev, rdev); | 1596 | md_error(mddev, rdev); |
1600 | } | 1597 | } |
@@ -1622,20 +1619,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1622 | READ) == 0) { | 1619 | READ) == 0) { |
1623 | /* Well, this device is dead */ | 1620 | /* Well, this device is dead */ |
1624 | printk(KERN_NOTICE | 1621 | printk(KERN_NOTICE |
1625 | "raid10:%s: unable to read back " | 1622 | "md/raid10:%s: unable to read back " |
1626 | "corrected sectors" | 1623 | "corrected sectors" |
1627 | " (%d sectors at %llu on %s)\n", | 1624 | " (%d sectors at %llu on %s)\n", |
1628 | mdname(mddev), s, | 1625 | mdname(mddev), s, |
1629 | (unsigned long long)(sect+ | 1626 | (unsigned long long)(sect+ |
1630 | rdev->data_offset), | 1627 | rdev->data_offset), |
1631 | bdevname(rdev->bdev, b)); | 1628 | bdevname(rdev->bdev, b)); |
1632 | printk(KERN_NOTICE "raid10:%s: failing drive\n", | 1629 | printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", |
1630 | mdname(mddev), | ||
1633 | bdevname(rdev->bdev, b)); | 1631 | bdevname(rdev->bdev, b)); |
1634 | 1632 | ||
1635 | md_error(mddev, rdev); | 1633 | md_error(mddev, rdev); |
1636 | } else { | 1634 | } else { |
1637 | printk(KERN_INFO | 1635 | printk(KERN_INFO |
1638 | "raid10:%s: read error corrected" | 1636 | "md/raid10:%s: read error corrected" |
1639 | " (%d sectors at %llu on %s)\n", | 1637 | " (%d sectors at %llu on %s)\n", |
1640 | mdname(mddev), s, | 1638 | mdname(mddev), s, |
1641 | (unsigned long long)(sect+ | 1639 | (unsigned long long)(sect+ |
@@ -1710,8 +1708,9 @@ static void raid10d(mddev_t *mddev) | |||
1710 | mddev->ro ? IO_BLOCKED : NULL; | 1708 | mddev->ro ? IO_BLOCKED : NULL; |
1711 | mirror = read_balance(conf, r10_bio); | 1709 | mirror = read_balance(conf, r10_bio); |
1712 | if (mirror == -1) { | 1710 | if (mirror == -1) { |
1713 | printk(KERN_ALERT "raid10: %s: unrecoverable I/O" | 1711 | printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O" |
1714 | " read error for block %llu\n", | 1712 | " read error for block %llu\n", |
1713 | mdname(mddev), | ||
1715 | bdevname(bio->bi_bdev,b), | 1714 | bdevname(bio->bi_bdev,b), |
1716 | (unsigned long long)r10_bio->sector); | 1715 | (unsigned long long)r10_bio->sector); |
1717 | raid_end_bio_io(r10_bio); | 1716 | raid_end_bio_io(r10_bio); |
@@ -1721,8 +1720,9 @@ static void raid10d(mddev_t *mddev) | |||
1721 | bio_put(bio); | 1720 | bio_put(bio); |
1722 | rdev = conf->mirrors[mirror].rdev; | 1721 | rdev = conf->mirrors[mirror].rdev; |
1723 | if (printk_ratelimit()) | 1722 | if (printk_ratelimit()) |
1724 | printk(KERN_ERR "raid10: %s: redirecting sector %llu to" | 1723 | printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" |
1725 | " another mirror\n", | 1724 | " another mirror\n", |
1725 | mdname(mddev), | ||
1726 | bdevname(rdev->bdev,b), | 1726 | bdevname(rdev->bdev,b), |
1727 | (unsigned long long)r10_bio->sector); | 1727 | (unsigned long long)r10_bio->sector); |
1728 | bio = bio_clone(r10_bio->master_bio, GFP_NOIO); | 1728 | bio = bio_clone(r10_bio->master_bio, GFP_NOIO); |
@@ -1980,7 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1980 | r10_bio = rb2; | 1980 | r10_bio = rb2; |
1981 | if (!test_and_set_bit(MD_RECOVERY_INTR, | 1981 | if (!test_and_set_bit(MD_RECOVERY_INTR, |
1982 | &mddev->recovery)) | 1982 | &mddev->recovery)) |
1983 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", | 1983 | printk(KERN_INFO "md/raid10:%s: insufficient " |
1984 | "working devices for recovery.\n", | ||
1984 | mdname(mddev)); | 1985 | mdname(mddev)); |
1985 | break; | 1986 | break; |
1986 | } | 1987 | } |
@@ -2140,9 +2141,9 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2140 | conf_t *conf = mddev->private; | 2141 | conf_t *conf = mddev->private; |
2141 | 2142 | ||
2142 | if (!raid_disks) | 2143 | if (!raid_disks) |
2143 | raid_disks = mddev->raid_disks; | 2144 | raid_disks = conf->raid_disks; |
2144 | if (!sectors) | 2145 | if (!sectors) |
2145 | sectors = mddev->dev_sectors; | 2146 | sectors = conf->dev_sectors; |
2146 | 2147 | ||
2147 | size = sectors >> conf->chunk_shift; | 2148 | size = sectors >> conf->chunk_shift; |
2148 | sector_div(size, conf->far_copies); | 2149 | sector_div(size, conf->far_copies); |
@@ -2152,62 +2153,61 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2152 | return size << conf->chunk_shift; | 2153 | return size << conf->chunk_shift; |
2153 | } | 2154 | } |
2154 | 2155 | ||
2155 | static int run(mddev_t *mddev) | 2156 | |
2157 | static conf_t *setup_conf(mddev_t *mddev) | ||
2156 | { | 2158 | { |
2157 | conf_t *conf; | 2159 | conf_t *conf = NULL; |
2158 | int i, disk_idx, chunk_size; | ||
2159 | mirror_info_t *disk; | ||
2160 | mdk_rdev_t *rdev; | ||
2161 | int nc, fc, fo; | 2160 | int nc, fc, fo; |
2162 | sector_t stride, size; | 2161 | sector_t stride, size; |
2162 | int err = -EINVAL; | ||
2163 | 2163 | ||
2164 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || | 2164 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || |
2165 | !is_power_of_2(mddev->chunk_sectors)) { | 2165 | !is_power_of_2(mddev->chunk_sectors)) { |
2166 | printk(KERN_ERR "md/raid10: chunk size must be " | 2166 | printk(KERN_ERR "md/raid10:%s: chunk size must be " |
2167 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); | 2167 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", |
2168 | return -EINVAL; | 2168 | mdname(mddev), PAGE_SIZE); |
2169 | goto out; | ||
2169 | } | 2170 | } |
2170 | 2171 | ||
2171 | nc = mddev->layout & 255; | 2172 | nc = mddev->layout & 255; |
2172 | fc = (mddev->layout >> 8) & 255; | 2173 | fc = (mddev->layout >> 8) & 255; |
2173 | fo = mddev->layout & (1<<16); | 2174 | fo = mddev->layout & (1<<16); |
2175 | |||
2174 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || | 2176 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || |
2175 | (mddev->layout >> 17)) { | 2177 | (mddev->layout >> 17)) { |
2176 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", | 2178 | printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n", |
2177 | mdname(mddev), mddev->layout); | 2179 | mdname(mddev), mddev->layout); |
2178 | goto out; | 2180 | goto out; |
2179 | } | 2181 | } |
2180 | /* | 2182 | |
2181 | * copy the already verified devices into our private RAID10 | 2183 | err = -ENOMEM; |
2182 | * bookkeeping area. [whatever we allocate in run(), | ||
2183 | * should be freed in stop()] | ||
2184 | */ | ||
2185 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); | 2184 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
2186 | mddev->private = conf; | 2185 | if (!conf) |
2187 | if (!conf) { | ||
2188 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
2189 | mdname(mddev)); | ||
2190 | goto out; | 2186 | goto out; |
2191 | } | 2187 | |
2192 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, | 2188 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
2193 | GFP_KERNEL); | 2189 | GFP_KERNEL); |
2194 | if (!conf->mirrors) { | 2190 | if (!conf->mirrors) |
2195 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 2191 | goto out; |
2196 | mdname(mddev)); | ||
2197 | goto out_free_conf; | ||
2198 | } | ||
2199 | 2192 | ||
2200 | conf->tmppage = alloc_page(GFP_KERNEL); | 2193 | conf->tmppage = alloc_page(GFP_KERNEL); |
2201 | if (!conf->tmppage) | 2194 | if (!conf->tmppage) |
2202 | goto out_free_conf; | 2195 | goto out; |
2196 | |||
2203 | 2197 | ||
2204 | conf->raid_disks = mddev->raid_disks; | 2198 | conf->raid_disks = mddev->raid_disks; |
2205 | conf->near_copies = nc; | 2199 | conf->near_copies = nc; |
2206 | conf->far_copies = fc; | 2200 | conf->far_copies = fc; |
2207 | conf->copies = nc*fc; | 2201 | conf->copies = nc*fc; |
2208 | conf->far_offset = fo; | 2202 | conf->far_offset = fo; |
2209 | conf->chunk_mask = mddev->chunk_sectors - 1; | 2203 | conf->chunk_mask = mddev->new_chunk_sectors - 1; |
2210 | conf->chunk_shift = ffz(~mddev->chunk_sectors); | 2204 | conf->chunk_shift = ffz(~mddev->new_chunk_sectors); |
2205 | |||
2206 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
2207 | r10bio_pool_free, conf); | ||
2208 | if (!conf->r10bio_pool) | ||
2209 | goto out; | ||
2210 | |||
2211 | size = mddev->dev_sectors >> conf->chunk_shift; | 2211 | size = mddev->dev_sectors >> conf->chunk_shift; |
2212 | sector_div(size, fc); | 2212 | sector_div(size, fc); |
2213 | size = size * conf->raid_disks; | 2213 | size = size * conf->raid_disks; |
@@ -2221,7 +2221,8 @@ static int run(mddev_t *mddev) | |||
2221 | */ | 2221 | */ |
2222 | stride += conf->raid_disks - 1; | 2222 | stride += conf->raid_disks - 1; |
2223 | sector_div(stride, conf->raid_disks); | 2223 | sector_div(stride, conf->raid_disks); |
2224 | mddev->dev_sectors = stride << conf->chunk_shift; | 2224 | |
2225 | conf->dev_sectors = stride << conf->chunk_shift; | ||
2225 | 2226 | ||
2226 | if (fo) | 2227 | if (fo) |
2227 | stride = 1; | 2228 | stride = 1; |
@@ -2229,18 +2230,63 @@ static int run(mddev_t *mddev) | |||
2229 | sector_div(stride, fc); | 2230 | sector_div(stride, fc); |
2230 | conf->stride = stride << conf->chunk_shift; | 2231 | conf->stride = stride << conf->chunk_shift; |
2231 | 2232 | ||
2232 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
2233 | r10bio_pool_free, conf); | ||
2234 | if (!conf->r10bio_pool) { | ||
2235 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
2236 | mdname(mddev)); | ||
2237 | goto out_free_conf; | ||
2238 | } | ||
2239 | 2233 | ||
2240 | conf->mddev = mddev; | ||
2241 | spin_lock_init(&conf->device_lock); | 2234 | spin_lock_init(&conf->device_lock); |
2235 | INIT_LIST_HEAD(&conf->retry_list); | ||
2236 | |||
2237 | spin_lock_init(&conf->resync_lock); | ||
2238 | init_waitqueue_head(&conf->wait_barrier); | ||
2239 | |||
2240 | conf->thread = md_register_thread(raid10d, mddev, NULL); | ||
2241 | if (!conf->thread) | ||
2242 | goto out; | ||
2243 | |||
2244 | conf->scale_disks = 0; | ||
2245 | conf->mddev = mddev; | ||
2246 | return conf; | ||
2247 | |||
2248 | out: | ||
2249 | printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n", | ||
2250 | mdname(mddev)); | ||
2251 | if (conf) { | ||
2252 | if (conf->r10bio_pool) | ||
2253 | mempool_destroy(conf->r10bio_pool); | ||
2254 | kfree(conf->mirrors); | ||
2255 | safe_put_page(conf->tmppage); | ||
2256 | kfree(conf); | ||
2257 | } | ||
2258 | return ERR_PTR(err); | ||
2259 | } | ||
2260 | |||
2261 | static int run(mddev_t *mddev) | ||
2262 | { | ||
2263 | conf_t *conf; | ||
2264 | int i, disk_idx, chunk_size; | ||
2265 | mirror_info_t *disk; | ||
2266 | mdk_rdev_t *rdev; | ||
2267 | sector_t size; | ||
2268 | |||
2269 | /* | ||
2270 | * copy the already verified devices into our private RAID10 | ||
2271 | * bookkeeping area. [whatever we allocate in run(), | ||
2272 | * should be freed in stop()] | ||
2273 | */ | ||
2274 | |||
2275 | if (mddev->private == NULL) { | ||
2276 | conf = setup_conf(mddev); | ||
2277 | if (IS_ERR(conf)) | ||
2278 | return PTR_ERR(conf); | ||
2279 | mddev->private = conf; | ||
2280 | } | ||
2281 | conf = mddev->private; | ||
2282 | if (!conf) | ||
2283 | goto out; | ||
2284 | |||
2242 | mddev->queue->queue_lock = &conf->device_lock; | 2285 | mddev->queue->queue_lock = &conf->device_lock; |
2243 | 2286 | ||
2287 | mddev->thread = conf->thread; | ||
2288 | conf->thread = NULL; | ||
2289 | |||
2244 | chunk_size = mddev->chunk_sectors << 9; | 2290 | chunk_size = mddev->chunk_sectors << 9; |
2245 | blk_queue_io_min(mddev->queue, chunk_size); | 2291 | blk_queue_io_min(mddev->queue, chunk_size); |
2246 | if (conf->raid_disks % conf->near_copies) | 2292 | if (conf->raid_disks % conf->near_copies) |
@@ -2251,9 +2297,14 @@ static int run(mddev_t *mddev) | |||
2251 | 2297 | ||
2252 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2298 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2253 | disk_idx = rdev->raid_disk; | 2299 | disk_idx = rdev->raid_disk; |
2254 | if (disk_idx >= mddev->raid_disks | 2300 | if (disk_idx >= conf->raid_disks |
2255 | || disk_idx < 0) | 2301 | || disk_idx < 0) |
2256 | continue; | 2302 | continue; |
2303 | if (conf->scale_disks) { | ||
2304 | disk_idx *= conf->scale_disks; | ||
2305 | rdev->raid_disk = disk_idx; | ||
2306 | /* MOVE 'rd%d' link !! */ | ||
2307 | } | ||
2257 | disk = conf->mirrors + disk_idx; | 2308 | disk = conf->mirrors + disk_idx; |
2258 | 2309 | ||
2259 | disk->rdev = rdev; | 2310 | disk->rdev = rdev; |
@@ -2271,14 +2322,9 @@ static int run(mddev_t *mddev) | |||
2271 | 2322 | ||
2272 | disk->head_position = 0; | 2323 | disk->head_position = 0; |
2273 | } | 2324 | } |
2274 | INIT_LIST_HEAD(&conf->retry_list); | ||
2275 | |||
2276 | spin_lock_init(&conf->resync_lock); | ||
2277 | init_waitqueue_head(&conf->wait_barrier); | ||
2278 | |||
2279 | /* need to check that every block has at least one working mirror */ | 2325 | /* need to check that every block has at least one working mirror */ |
2280 | if (!enough(conf)) { | 2326 | if (!enough(conf)) { |
2281 | printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", | 2327 | printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", |
2282 | mdname(mddev)); | 2328 | mdname(mddev)); |
2283 | goto out_free_conf; | 2329 | goto out_free_conf; |
2284 | } | 2330 | } |
@@ -2297,28 +2343,21 @@ static int run(mddev_t *mddev) | |||
2297 | } | 2343 | } |
2298 | } | 2344 | } |
2299 | 2345 | ||
2300 | |||
2301 | mddev->thread = md_register_thread(raid10d, mddev, NULL); | ||
2302 | if (!mddev->thread) { | ||
2303 | printk(KERN_ERR | ||
2304 | "raid10: couldn't allocate thread for %s\n", | ||
2305 | mdname(mddev)); | ||
2306 | goto out_free_conf; | ||
2307 | } | ||
2308 | |||
2309 | if (mddev->recovery_cp != MaxSector) | 2346 | if (mddev->recovery_cp != MaxSector) |
2310 | printk(KERN_NOTICE "raid10: %s is not clean" | 2347 | printk(KERN_NOTICE "md/raid10:%s: not clean" |
2311 | " -- starting background reconstruction\n", | 2348 | " -- starting background reconstruction\n", |
2312 | mdname(mddev)); | 2349 | mdname(mddev)); |
2313 | printk(KERN_INFO | 2350 | printk(KERN_INFO |
2314 | "raid10: raid set %s active with %d out of %d devices\n", | 2351 | "md/raid10:%s: active with %d out of %d devices\n", |
2315 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 2352 | mdname(mddev), conf->raid_disks - mddev->degraded, |
2316 | mddev->raid_disks); | 2353 | conf->raid_disks); |
2317 | /* | 2354 | /* |
2318 | * Ok, everything is just fine now | 2355 | * Ok, everything is just fine now |
2319 | */ | 2356 | */ |
2320 | md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); | 2357 | mddev->dev_sectors = conf->dev_sectors; |
2321 | mddev->resync_max_sectors = raid10_size(mddev, 0, 0); | 2358 | size = raid10_size(mddev, 0, 0); |
2359 | md_set_array_sectors(mddev, size); | ||
2360 | mddev->resync_max_sectors = size; | ||
2322 | 2361 | ||
2323 | mddev->queue->unplug_fn = raid10_unplug; | 2362 | mddev->queue->unplug_fn = raid10_unplug; |
2324 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 2363 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; |
@@ -2336,7 +2375,7 @@ static int run(mddev_t *mddev) | |||
2336 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 2375 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |
2337 | } | 2376 | } |
2338 | 2377 | ||
2339 | if (conf->near_copies < mddev->raid_disks) | 2378 | if (conf->near_copies < conf->raid_disks) |
2340 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2379 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
2341 | md_integrity_register(mddev); | 2380 | md_integrity_register(mddev); |
2342 | return 0; | 2381 | return 0; |
@@ -2348,6 +2387,7 @@ out_free_conf: | |||
2348 | kfree(conf->mirrors); | 2387 | kfree(conf->mirrors); |
2349 | kfree(conf); | 2388 | kfree(conf); |
2350 | mddev->private = NULL; | 2389 | mddev->private = NULL; |
2390 | md_unregister_thread(mddev->thread); | ||
2351 | out: | 2391 | out: |
2352 | return -EIO; | 2392 | return -EIO; |
2353 | } | 2393 | } |
@@ -2384,6 +2424,61 @@ static void raid10_quiesce(mddev_t *mddev, int state) | |||
2384 | } | 2424 | } |
2385 | } | 2425 | } |
2386 | 2426 | ||
2427 | static void *raid10_takeover_raid0(mddev_t *mddev) | ||
2428 | { | ||
2429 | mdk_rdev_t *rdev; | ||
2430 | conf_t *conf; | ||
2431 | |||
2432 | if (mddev->degraded > 0) { | ||
2433 | printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n", | ||
2434 | mdname(mddev)); | ||
2435 | return ERR_PTR(-EINVAL); | ||
2436 | } | ||
2437 | |||
2438 | /* Update slot numbers to obtain | ||
2439 | * degraded raid10 with missing mirrors | ||
2440 | */ | ||
2441 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
2442 | rdev->raid_disk *= 2; | ||
2443 | } | ||
2444 | |||
2445 | /* Set new parameters */ | ||
2446 | mddev->new_level = 10; | ||
2447 | /* new layout: far_copies = 1, near_copies = 2 */ | ||
2448 | mddev->new_layout = (1<<8) + 2; | ||
2449 | mddev->new_chunk_sectors = mddev->chunk_sectors; | ||
2450 | mddev->delta_disks = mddev->raid_disks; | ||
2451 | mddev->degraded = mddev->raid_disks; | ||
2452 | mddev->raid_disks *= 2; | ||
2453 | /* make sure it will be not marked as dirty */ | ||
2454 | mddev->recovery_cp = MaxSector; | ||
2455 | |||
2456 | conf = setup_conf(mddev); | ||
2457 | conf->scale_disks = 2; | ||
2458 | return conf; | ||
2459 | } | ||
2460 | |||
2461 | static void *raid10_takeover(mddev_t *mddev) | ||
2462 | { | ||
2463 | struct raid0_private_data *raid0_priv; | ||
2464 | |||
2465 | /* raid10 can take over: | ||
2466 | * raid0 - providing it has only two drives | ||
2467 | */ | ||
2468 | if (mddev->level == 0) { | ||
2469 | /* for raid0 takeover only one zone is supported */ | ||
2470 | raid0_priv = mddev->private; | ||
2471 | if (raid0_priv->nr_strip_zones > 1) { | ||
2472 | printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0" | ||
2473 | " with more than one zone.\n", | ||
2474 | mdname(mddev)); | ||
2475 | return ERR_PTR(-EINVAL); | ||
2476 | } | ||
2477 | return raid10_takeover_raid0(mddev); | ||
2478 | } | ||
2479 | return ERR_PTR(-EINVAL); | ||
2480 | } | ||
2481 | |||
2387 | static struct mdk_personality raid10_personality = | 2482 | static struct mdk_personality raid10_personality = |
2388 | { | 2483 | { |
2389 | .name = "raid10", | 2484 | .name = "raid10", |
@@ -2400,6 +2495,7 @@ static struct mdk_personality raid10_personality = | |||
2400 | .sync_request = sync_request, | 2495 | .sync_request = sync_request, |
2401 | .quiesce = raid10_quiesce, | 2496 | .quiesce = raid10_quiesce, |
2402 | .size = raid10_size, | 2497 | .size = raid10_size, |
2498 | .takeover = raid10_takeover, | ||
2403 | }; | 2499 | }; |
2404 | 2500 | ||
2405 | static int __init raid_init(void) | 2501 | static int __init raid_init(void) |