aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c300
1 files changed, 198 insertions, 102 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e2766d8251a1..03724992cdf2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -24,6 +24,7 @@
24#include <linux/seq_file.h> 24#include <linux/seq_file.h>
25#include "md.h" 25#include "md.h"
26#include "raid10.h" 26#include "raid10.h"
27#include "raid0.h"
27#include "bitmap.h" 28#include "bitmap.h"
28 29
29/* 30/*
@@ -255,7 +256,7 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio)
255static void raid10_end_read_request(struct bio *bio, int error) 256static void raid10_end_read_request(struct bio *bio, int error)
256{ 257{
257 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 258 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
258 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 259 r10bio_t *r10_bio = bio->bi_private;
259 int slot, dev; 260 int slot, dev;
260 conf_t *conf = r10_bio->mddev->private; 261 conf_t *conf = r10_bio->mddev->private;
261 262
@@ -285,7 +286,8 @@ static void raid10_end_read_request(struct bio *bio, int error)
285 */ 286 */
286 char b[BDEVNAME_SIZE]; 287 char b[BDEVNAME_SIZE];
287 if (printk_ratelimit()) 288 if (printk_ratelimit())
288 printk(KERN_ERR "raid10: %s: rescheduling sector %llu\n", 289 printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n",
290 mdname(conf->mddev),
289 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); 291 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
290 reschedule_retry(r10_bio); 292 reschedule_retry(r10_bio);
291 } 293 }
@@ -296,7 +298,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
296static void raid10_end_write_request(struct bio *bio, int error) 298static void raid10_end_write_request(struct bio *bio, int error)
297{ 299{
298 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 300 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
299 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 301 r10bio_t *r10_bio = bio->bi_private;
300 int slot, dev; 302 int slot, dev;
301 conf_t *conf = r10_bio->mddev->private; 303 conf_t *conf = r10_bio->mddev->private;
302 304
@@ -494,7 +496,7 @@ static int raid10_mergeable_bvec(struct request_queue *q,
494 */ 496 */
495static int read_balance(conf_t *conf, r10bio_t *r10_bio) 497static int read_balance(conf_t *conf, r10bio_t *r10_bio)
496{ 498{
497 const unsigned long this_sector = r10_bio->sector; 499 const sector_t this_sector = r10_bio->sector;
498 int disk, slot, nslot; 500 int disk, slot, nslot;
499 const int sectors = r10_bio->sectors; 501 const int sectors = r10_bio->sectors;
500 sector_t new_distance, current_distance; 502 sector_t new_distance, current_distance;
@@ -601,7 +603,7 @@ static void unplug_slaves(mddev_t *mddev)
601 int i; 603 int i;
602 604
603 rcu_read_lock(); 605 rcu_read_lock();
604 for (i=0; i<mddev->raid_disks; i++) { 606 for (i=0; i < conf->raid_disks; i++) {
605 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 607 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
606 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { 608 if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
607 struct request_queue *r_queue = bdev_get_queue(rdev->bdev); 609 struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -635,7 +637,7 @@ static int raid10_congested(void *data, int bits)
635 if (mddev_congested(mddev, bits)) 637 if (mddev_congested(mddev, bits))
636 return 1; 638 return 1;
637 rcu_read_lock(); 639 rcu_read_lock();
638 for (i = 0; i < mddev->raid_disks && ret == 0; i++) { 640 for (i = 0; i < conf->raid_disks && ret == 0; i++) {
639 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); 641 mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
640 if (rdev && !test_bit(Faulty, &rdev->flags)) { 642 if (rdev && !test_bit(Faulty, &rdev->flags)) {
641 struct request_queue *q = bdev_get_queue(rdev->bdev); 643 struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -788,14 +790,12 @@ static void unfreeze_array(conf_t *conf)
788 spin_unlock_irq(&conf->resync_lock); 790 spin_unlock_irq(&conf->resync_lock);
789} 791}
790 792
791static int make_request(struct request_queue *q, struct bio * bio) 793static int make_request(mddev_t *mddev, struct bio * bio)
792{ 794{
793 mddev_t *mddev = q->queuedata;
794 conf_t *conf = mddev->private; 795 conf_t *conf = mddev->private;
795 mirror_info_t *mirror; 796 mirror_info_t *mirror;
796 r10bio_t *r10_bio; 797 r10bio_t *r10_bio;
797 struct bio *read_bio; 798 struct bio *read_bio;
798 int cpu;
799 int i; 799 int i;
800 int chunk_sects = conf->chunk_mask + 1; 800 int chunk_sects = conf->chunk_mask + 1;
801 const int rw = bio_data_dir(bio); 801 const int rw = bio_data_dir(bio);
@@ -825,16 +825,16 @@ static int make_request(struct request_queue *q, struct bio * bio)
825 */ 825 */
826 bp = bio_split(bio, 826 bp = bio_split(bio,
827 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 827 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
828 if (make_request(q, &bp->bio1)) 828 if (make_request(mddev, &bp->bio1))
829 generic_make_request(&bp->bio1); 829 generic_make_request(&bp->bio1);
830 if (make_request(q, &bp->bio2)) 830 if (make_request(mddev, &bp->bio2))
831 generic_make_request(&bp->bio2); 831 generic_make_request(&bp->bio2);
832 832
833 bio_pair_release(bp); 833 bio_pair_release(bp);
834 return 0; 834 return 0;
835 bad_map: 835 bad_map:
836 printk("raid10_make_request bug: can't convert block across chunks" 836 printk("md/raid10:%s: make_request bug: can't convert block across chunks"
837 " or bigger than %dk %llu %d\n", chunk_sects/2, 837 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
838 (unsigned long long)bio->bi_sector, bio->bi_size >> 10); 838 (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
839 839
840 bio_io_error(bio); 840 bio_io_error(bio);
@@ -850,12 +850,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
850 */ 850 */
851 wait_barrier(conf); 851 wait_barrier(conf);
852 852
853 cpu = part_stat_lock();
854 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
855 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
856 bio_sectors(bio));
857 part_stat_unlock();
858
859 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 853 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
860 854
861 r10_bio->master_bio = bio; 855 r10_bio->master_bio = bio;
@@ -1039,9 +1033,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1039 } 1033 }
1040 set_bit(Faulty, &rdev->flags); 1034 set_bit(Faulty, &rdev->flags);
1041 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1035 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1042 printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n" 1036 printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n"
1043 "raid10: Operation continuing on %d devices.\n", 1037 KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n",
1044 bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded); 1038 mdname(mddev), bdevname(rdev->bdev, b),
1039 mdname(mddev), conf->raid_disks - mddev->degraded);
1045} 1040}
1046 1041
1047static void print_conf(conf_t *conf) 1042static void print_conf(conf_t *conf)
@@ -1049,19 +1044,19 @@ static void print_conf(conf_t *conf)
1049 int i; 1044 int i;
1050 mirror_info_t *tmp; 1045 mirror_info_t *tmp;
1051 1046
1052 printk("RAID10 conf printout:\n"); 1047 printk(KERN_DEBUG "RAID10 conf printout:\n");
1053 if (!conf) { 1048 if (!conf) {
1054 printk("(!conf)\n"); 1049 printk(KERN_DEBUG "(!conf)\n");
1055 return; 1050 return;
1056 } 1051 }
1057 printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, 1052 printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
1058 conf->raid_disks); 1053 conf->raid_disks);
1059 1054
1060 for (i = 0; i < conf->raid_disks; i++) { 1055 for (i = 0; i < conf->raid_disks; i++) {
1061 char b[BDEVNAME_SIZE]; 1056 char b[BDEVNAME_SIZE];
1062 tmp = conf->mirrors + i; 1057 tmp = conf->mirrors + i;
1063 if (tmp->rdev) 1058 if (tmp->rdev)
1064 printk(" disk %d, wo:%d, o:%d, dev:%s\n", 1059 printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
1065 i, !test_bit(In_sync, &tmp->rdev->flags), 1060 i, !test_bit(In_sync, &tmp->rdev->flags),
1066 !test_bit(Faulty, &tmp->rdev->flags), 1061 !test_bit(Faulty, &tmp->rdev->flags),
1067 bdevname(tmp->rdev->bdev,b)); 1062 bdevname(tmp->rdev->bdev,b));
@@ -1132,7 +1127,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1132 int mirror; 1127 int mirror;
1133 mirror_info_t *p; 1128 mirror_info_t *p;
1134 int first = 0; 1129 int first = 0;
1135 int last = mddev->raid_disks - 1; 1130 int last = conf->raid_disks - 1;
1136 1131
1137 if (mddev->recovery_cp < MaxSector) 1132 if (mddev->recovery_cp < MaxSector)
1138 /* only hot-add to in-sync arrays, as recovery is 1133 /* only hot-add to in-sync arrays, as recovery is
@@ -1224,7 +1219,7 @@ abort:
1224 1219
1225static void end_sync_read(struct bio *bio, int error) 1220static void end_sync_read(struct bio *bio, int error)
1226{ 1221{
1227 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 1222 r10bio_t *r10_bio = bio->bi_private;
1228 conf_t *conf = r10_bio->mddev->private; 1223 conf_t *conf = r10_bio->mddev->private;
1229 int i,d; 1224 int i,d;
1230 1225
@@ -1261,7 +1256,7 @@ static void end_sync_read(struct bio *bio, int error)
1261static void end_sync_write(struct bio *bio, int error) 1256static void end_sync_write(struct bio *bio, int error)
1262{ 1257{
1263 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1258 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1264 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 1259 r10bio_t *r10_bio = bio->bi_private;
1265 mddev_t *mddev = r10_bio->mddev; 1260 mddev_t *mddev = r10_bio->mddev;
1266 conf_t *conf = mddev->private; 1261 conf_t *conf = mddev->private;
1267 int i,d; 1262 int i,d;
@@ -1510,13 +1505,14 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1510 if (cur_read_error_count > max_read_errors) { 1505 if (cur_read_error_count > max_read_errors) {
1511 rcu_read_unlock(); 1506 rcu_read_unlock();
1512 printk(KERN_NOTICE 1507 printk(KERN_NOTICE
1513 "raid10: %s: Raid device exceeded " 1508 "md/raid10:%s: %s: Raid device exceeded "
1514 "read_error threshold " 1509 "read_error threshold "
1515 "[cur %d:max %d]\n", 1510 "[cur %d:max %d]\n",
1511 mdname(mddev),
1516 b, cur_read_error_count, max_read_errors); 1512 b, cur_read_error_count, max_read_errors);
1517 printk(KERN_NOTICE 1513 printk(KERN_NOTICE
1518 "raid10: %s: Failing raid " 1514 "md/raid10:%s: %s: Failing raid "
1519 "device\n", b); 1515 "device\n", mdname(mddev), b);
1520 md_error(mddev, conf->mirrors[d].rdev); 1516 md_error(mddev, conf->mirrors[d].rdev);
1521 return; 1517 return;
1522 } 1518 }
@@ -1586,15 +1582,16 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1586 == 0) { 1582 == 0) {
1587 /* Well, this device is dead */ 1583 /* Well, this device is dead */
1588 printk(KERN_NOTICE 1584 printk(KERN_NOTICE
1589 "raid10:%s: read correction " 1585 "md/raid10:%s: read correction "
1590 "write failed" 1586 "write failed"
1591 " (%d sectors at %llu on %s)\n", 1587 " (%d sectors at %llu on %s)\n",
1592 mdname(mddev), s, 1588 mdname(mddev), s,
1593 (unsigned long long)(sect+ 1589 (unsigned long long)(sect+
1594 rdev->data_offset), 1590 rdev->data_offset),
1595 bdevname(rdev->bdev, b)); 1591 bdevname(rdev->bdev, b));
1596 printk(KERN_NOTICE "raid10:%s: failing " 1592 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
1597 "drive\n", 1593 "drive\n",
1594 mdname(mddev),
1598 bdevname(rdev->bdev, b)); 1595 bdevname(rdev->bdev, b));
1599 md_error(mddev, rdev); 1596 md_error(mddev, rdev);
1600 } 1597 }
@@ -1622,20 +1619,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1622 READ) == 0) { 1619 READ) == 0) {
1623 /* Well, this device is dead */ 1620 /* Well, this device is dead */
1624 printk(KERN_NOTICE 1621 printk(KERN_NOTICE
1625 "raid10:%s: unable to read back " 1622 "md/raid10:%s: unable to read back "
1626 "corrected sectors" 1623 "corrected sectors"
1627 " (%d sectors at %llu on %s)\n", 1624 " (%d sectors at %llu on %s)\n",
1628 mdname(mddev), s, 1625 mdname(mddev), s,
1629 (unsigned long long)(sect+ 1626 (unsigned long long)(sect+
1630 rdev->data_offset), 1627 rdev->data_offset),
1631 bdevname(rdev->bdev, b)); 1628 bdevname(rdev->bdev, b));
1632 printk(KERN_NOTICE "raid10:%s: failing drive\n", 1629 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
1630 mdname(mddev),
1633 bdevname(rdev->bdev, b)); 1631 bdevname(rdev->bdev, b));
1634 1632
1635 md_error(mddev, rdev); 1633 md_error(mddev, rdev);
1636 } else { 1634 } else {
1637 printk(KERN_INFO 1635 printk(KERN_INFO
1638 "raid10:%s: read error corrected" 1636 "md/raid10:%s: read error corrected"
1639 " (%d sectors at %llu on %s)\n", 1637 " (%d sectors at %llu on %s)\n",
1640 mdname(mddev), s, 1638 mdname(mddev), s,
1641 (unsigned long long)(sect+ 1639 (unsigned long long)(sect+
@@ -1710,8 +1708,9 @@ static void raid10d(mddev_t *mddev)
1710 mddev->ro ? IO_BLOCKED : NULL; 1708 mddev->ro ? IO_BLOCKED : NULL;
1711 mirror = read_balance(conf, r10_bio); 1709 mirror = read_balance(conf, r10_bio);
1712 if (mirror == -1) { 1710 if (mirror == -1) {
1713 printk(KERN_ALERT "raid10: %s: unrecoverable I/O" 1711 printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
1714 " read error for block %llu\n", 1712 " read error for block %llu\n",
1713 mdname(mddev),
1715 bdevname(bio->bi_bdev,b), 1714 bdevname(bio->bi_bdev,b),
1716 (unsigned long long)r10_bio->sector); 1715 (unsigned long long)r10_bio->sector);
1717 raid_end_bio_io(r10_bio); 1716 raid_end_bio_io(r10_bio);
@@ -1721,8 +1720,9 @@ static void raid10d(mddev_t *mddev)
1721 bio_put(bio); 1720 bio_put(bio);
1722 rdev = conf->mirrors[mirror].rdev; 1721 rdev = conf->mirrors[mirror].rdev;
1723 if (printk_ratelimit()) 1722 if (printk_ratelimit())
1724 printk(KERN_ERR "raid10: %s: redirecting sector %llu to" 1723 printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
1725 " another mirror\n", 1724 " another mirror\n",
1725 mdname(mddev),
1726 bdevname(rdev->bdev,b), 1726 bdevname(rdev->bdev,b),
1727 (unsigned long long)r10_bio->sector); 1727 (unsigned long long)r10_bio->sector);
1728 bio = bio_clone(r10_bio->master_bio, GFP_NOIO); 1728 bio = bio_clone(r10_bio->master_bio, GFP_NOIO);
@@ -1980,7 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1980 r10_bio = rb2; 1980 r10_bio = rb2;
1981 if (!test_and_set_bit(MD_RECOVERY_INTR, 1981 if (!test_and_set_bit(MD_RECOVERY_INTR,
1982 &mddev->recovery)) 1982 &mddev->recovery))
1983 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", 1983 printk(KERN_INFO "md/raid10:%s: insufficient "
1984 "working devices for recovery.\n",
1984 mdname(mddev)); 1985 mdname(mddev));
1985 break; 1986 break;
1986 } 1987 }
@@ -2140,9 +2141,9 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2140 conf_t *conf = mddev->private; 2141 conf_t *conf = mddev->private;
2141 2142
2142 if (!raid_disks) 2143 if (!raid_disks)
2143 raid_disks = mddev->raid_disks; 2144 raid_disks = conf->raid_disks;
2144 if (!sectors) 2145 if (!sectors)
2145 sectors = mddev->dev_sectors; 2146 sectors = conf->dev_sectors;
2146 2147
2147 size = sectors >> conf->chunk_shift; 2148 size = sectors >> conf->chunk_shift;
2148 sector_div(size, conf->far_copies); 2149 sector_div(size, conf->far_copies);
@@ -2152,62 +2153,61 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2152 return size << conf->chunk_shift; 2153 return size << conf->chunk_shift;
2153} 2154}
2154 2155
2155static int run(mddev_t *mddev) 2156
2157static conf_t *setup_conf(mddev_t *mddev)
2156{ 2158{
2157 conf_t *conf; 2159 conf_t *conf = NULL;
2158 int i, disk_idx, chunk_size;
2159 mirror_info_t *disk;
2160 mdk_rdev_t *rdev;
2161 int nc, fc, fo; 2160 int nc, fc, fo;
2162 sector_t stride, size; 2161 sector_t stride, size;
2162 int err = -EINVAL;
2163 2163
2164 if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || 2164 if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
2165 !is_power_of_2(mddev->chunk_sectors)) { 2165 !is_power_of_2(mddev->chunk_sectors)) {
2166 printk(KERN_ERR "md/raid10: chunk size must be " 2166 printk(KERN_ERR "md/raid10:%s: chunk size must be "
2167 "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); 2167 "at least PAGE_SIZE(%ld) and be a power of 2.\n",
2168 return -EINVAL; 2168 mdname(mddev), PAGE_SIZE);
2169 goto out;
2169 } 2170 }
2170 2171
2171 nc = mddev->layout & 255; 2172 nc = mddev->layout & 255;
2172 fc = (mddev->layout >> 8) & 255; 2173 fc = (mddev->layout >> 8) & 255;
2173 fo = mddev->layout & (1<<16); 2174 fo = mddev->layout & (1<<16);
2175
2174 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || 2176 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
2175 (mddev->layout >> 17)) { 2177 (mddev->layout >> 17)) {
2176 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", 2178 printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
2177 mdname(mddev), mddev->layout); 2179 mdname(mddev), mddev->layout);
2178 goto out; 2180 goto out;
2179 } 2181 }
2180 /* 2182
2181 * copy the already verified devices into our private RAID10 2183 err = -ENOMEM;
2182 * bookkeeping area. [whatever we allocate in run(),
2183 * should be freed in stop()]
2184 */
2185 conf = kzalloc(sizeof(conf_t), GFP_KERNEL); 2184 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
2186 mddev->private = conf; 2185 if (!conf)
2187 if (!conf) {
2188 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
2189 mdname(mddev));
2190 goto out; 2186 goto out;
2191 } 2187
2192 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 2188 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
2193 GFP_KERNEL); 2189 GFP_KERNEL);
2194 if (!conf->mirrors) { 2190 if (!conf->mirrors)
2195 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 2191 goto out;
2196 mdname(mddev));
2197 goto out_free_conf;
2198 }
2199 2192
2200 conf->tmppage = alloc_page(GFP_KERNEL); 2193 conf->tmppage = alloc_page(GFP_KERNEL);
2201 if (!conf->tmppage) 2194 if (!conf->tmppage)
2202 goto out_free_conf; 2195 goto out;
2196
2203 2197
2204 conf->raid_disks = mddev->raid_disks; 2198 conf->raid_disks = mddev->raid_disks;
2205 conf->near_copies = nc; 2199 conf->near_copies = nc;
2206 conf->far_copies = fc; 2200 conf->far_copies = fc;
2207 conf->copies = nc*fc; 2201 conf->copies = nc*fc;
2208 conf->far_offset = fo; 2202 conf->far_offset = fo;
2209 conf->chunk_mask = mddev->chunk_sectors - 1; 2203 conf->chunk_mask = mddev->new_chunk_sectors - 1;
2210 conf->chunk_shift = ffz(~mddev->chunk_sectors); 2204 conf->chunk_shift = ffz(~mddev->new_chunk_sectors);
2205
2206 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
2207 r10bio_pool_free, conf);
2208 if (!conf->r10bio_pool)
2209 goto out;
2210
2211 size = mddev->dev_sectors >> conf->chunk_shift; 2211 size = mddev->dev_sectors >> conf->chunk_shift;
2212 sector_div(size, fc); 2212 sector_div(size, fc);
2213 size = size * conf->raid_disks; 2213 size = size * conf->raid_disks;
@@ -2221,7 +2221,8 @@ static int run(mddev_t *mddev)
2221 */ 2221 */
2222 stride += conf->raid_disks - 1; 2222 stride += conf->raid_disks - 1;
2223 sector_div(stride, conf->raid_disks); 2223 sector_div(stride, conf->raid_disks);
2224 mddev->dev_sectors = stride << conf->chunk_shift; 2224
2225 conf->dev_sectors = stride << conf->chunk_shift;
2225 2226
2226 if (fo) 2227 if (fo)
2227 stride = 1; 2228 stride = 1;
@@ -2229,18 +2230,63 @@ static int run(mddev_t *mddev)
2229 sector_div(stride, fc); 2230 sector_div(stride, fc);
2230 conf->stride = stride << conf->chunk_shift; 2231 conf->stride = stride << conf->chunk_shift;
2231 2232
2232 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
2233 r10bio_pool_free, conf);
2234 if (!conf->r10bio_pool) {
2235 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
2236 mdname(mddev));
2237 goto out_free_conf;
2238 }
2239 2233
2240 conf->mddev = mddev;
2241 spin_lock_init(&conf->device_lock); 2234 spin_lock_init(&conf->device_lock);
2235 INIT_LIST_HEAD(&conf->retry_list);
2236
2237 spin_lock_init(&conf->resync_lock);
2238 init_waitqueue_head(&conf->wait_barrier);
2239
2240 conf->thread = md_register_thread(raid10d, mddev, NULL);
2241 if (!conf->thread)
2242 goto out;
2243
2244 conf->scale_disks = 0;
2245 conf->mddev = mddev;
2246 return conf;
2247
2248 out:
2249 printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
2250 mdname(mddev));
2251 if (conf) {
2252 if (conf->r10bio_pool)
2253 mempool_destroy(conf->r10bio_pool);
2254 kfree(conf->mirrors);
2255 safe_put_page(conf->tmppage);
2256 kfree(conf);
2257 }
2258 return ERR_PTR(err);
2259}
2260
2261static int run(mddev_t *mddev)
2262{
2263 conf_t *conf;
2264 int i, disk_idx, chunk_size;
2265 mirror_info_t *disk;
2266 mdk_rdev_t *rdev;
2267 sector_t size;
2268
2269 /*
2270 * copy the already verified devices into our private RAID10
2271 * bookkeeping area. [whatever we allocate in run(),
2272 * should be freed in stop()]
2273 */
2274
2275 if (mddev->private == NULL) {
2276 conf = setup_conf(mddev);
2277 if (IS_ERR(conf))
2278 return PTR_ERR(conf);
2279 mddev->private = conf;
2280 }
2281 conf = mddev->private;
2282 if (!conf)
2283 goto out;
2284
2242 mddev->queue->queue_lock = &conf->device_lock; 2285 mddev->queue->queue_lock = &conf->device_lock;
2243 2286
2287 mddev->thread = conf->thread;
2288 conf->thread = NULL;
2289
2244 chunk_size = mddev->chunk_sectors << 9; 2290 chunk_size = mddev->chunk_sectors << 9;
2245 blk_queue_io_min(mddev->queue, chunk_size); 2291 blk_queue_io_min(mddev->queue, chunk_size);
2246 if (conf->raid_disks % conf->near_copies) 2292 if (conf->raid_disks % conf->near_copies)
@@ -2251,9 +2297,14 @@ static int run(mddev_t *mddev)
2251 2297
2252 list_for_each_entry(rdev, &mddev->disks, same_set) { 2298 list_for_each_entry(rdev, &mddev->disks, same_set) {
2253 disk_idx = rdev->raid_disk; 2299 disk_idx = rdev->raid_disk;
2254 if (disk_idx >= mddev->raid_disks 2300 if (disk_idx >= conf->raid_disks
2255 || disk_idx < 0) 2301 || disk_idx < 0)
2256 continue; 2302 continue;
2303 if (conf->scale_disks) {
2304 disk_idx *= conf->scale_disks;
2305 rdev->raid_disk = disk_idx;
2306 /* MOVE 'rd%d' link !! */
2307 }
2257 disk = conf->mirrors + disk_idx; 2308 disk = conf->mirrors + disk_idx;
2258 2309
2259 disk->rdev = rdev; 2310 disk->rdev = rdev;
@@ -2271,14 +2322,9 @@ static int run(mddev_t *mddev)
2271 2322
2272 disk->head_position = 0; 2323 disk->head_position = 0;
2273 } 2324 }
2274 INIT_LIST_HEAD(&conf->retry_list);
2275
2276 spin_lock_init(&conf->resync_lock);
2277 init_waitqueue_head(&conf->wait_barrier);
2278
2279 /* need to check that every block has at least one working mirror */ 2325 /* need to check that every block has at least one working mirror */
2280 if (!enough(conf)) { 2326 if (!enough(conf)) {
2281 printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", 2327 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
2282 mdname(mddev)); 2328 mdname(mddev));
2283 goto out_free_conf; 2329 goto out_free_conf;
2284 } 2330 }
@@ -2297,28 +2343,21 @@ static int run(mddev_t *mddev)
2297 } 2343 }
2298 } 2344 }
2299 2345
2300
2301 mddev->thread = md_register_thread(raid10d, mddev, NULL);
2302 if (!mddev->thread) {
2303 printk(KERN_ERR
2304 "raid10: couldn't allocate thread for %s\n",
2305 mdname(mddev));
2306 goto out_free_conf;
2307 }
2308
2309 if (mddev->recovery_cp != MaxSector) 2346 if (mddev->recovery_cp != MaxSector)
2310 printk(KERN_NOTICE "raid10: %s is not clean" 2347 printk(KERN_NOTICE "md/raid10:%s: not clean"
2311 " -- starting background reconstruction\n", 2348 " -- starting background reconstruction\n",
2312 mdname(mddev)); 2349 mdname(mddev));
2313 printk(KERN_INFO 2350 printk(KERN_INFO
2314 "raid10: raid set %s active with %d out of %d devices\n", 2351 "md/raid10:%s: active with %d out of %d devices\n",
2315 mdname(mddev), mddev->raid_disks - mddev->degraded, 2352 mdname(mddev), conf->raid_disks - mddev->degraded,
2316 mddev->raid_disks); 2353 conf->raid_disks);
2317 /* 2354 /*
2318 * Ok, everything is just fine now 2355 * Ok, everything is just fine now
2319 */ 2356 */
2320 md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); 2357 mddev->dev_sectors = conf->dev_sectors;
2321 mddev->resync_max_sectors = raid10_size(mddev, 0, 0); 2358 size = raid10_size(mddev, 0, 0);
2359 md_set_array_sectors(mddev, size);
2360 mddev->resync_max_sectors = size;
2322 2361
2323 mddev->queue->unplug_fn = raid10_unplug; 2362 mddev->queue->unplug_fn = raid10_unplug;
2324 mddev->queue->backing_dev_info.congested_fn = raid10_congested; 2363 mddev->queue->backing_dev_info.congested_fn = raid10_congested;
@@ -2336,7 +2375,7 @@ static int run(mddev_t *mddev)
2336 mddev->queue->backing_dev_info.ra_pages = 2* stripe; 2375 mddev->queue->backing_dev_info.ra_pages = 2* stripe;
2337 } 2376 }
2338 2377
2339 if (conf->near_copies < mddev->raid_disks) 2378 if (conf->near_copies < conf->raid_disks)
2340 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); 2379 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
2341 md_integrity_register(mddev); 2380 md_integrity_register(mddev);
2342 return 0; 2381 return 0;
@@ -2348,6 +2387,7 @@ out_free_conf:
2348 kfree(conf->mirrors); 2387 kfree(conf->mirrors);
2349 kfree(conf); 2388 kfree(conf);
2350 mddev->private = NULL; 2389 mddev->private = NULL;
2390 md_unregister_thread(mddev->thread);
2351out: 2391out:
2352 return -EIO; 2392 return -EIO;
2353} 2393}
@@ -2384,6 +2424,61 @@ static void raid10_quiesce(mddev_t *mddev, int state)
2384 } 2424 }
2385} 2425}
2386 2426
2427static void *raid10_takeover_raid0(mddev_t *mddev)
2428{
2429 mdk_rdev_t *rdev;
2430 conf_t *conf;
2431
2432 if (mddev->degraded > 0) {
2433 printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
2434 mdname(mddev));
2435 return ERR_PTR(-EINVAL);
2436 }
2437
2438 /* Update slot numbers to obtain
2439 * degraded raid10 with missing mirrors
2440 */
2441 list_for_each_entry(rdev, &mddev->disks, same_set) {
2442 rdev->raid_disk *= 2;
2443 }
2444
2445 /* Set new parameters */
2446 mddev->new_level = 10;
2447 /* new layout: far_copies = 1, near_copies = 2 */
2448 mddev->new_layout = (1<<8) + 2;
2449 mddev->new_chunk_sectors = mddev->chunk_sectors;
2450 mddev->delta_disks = mddev->raid_disks;
2451 mddev->degraded = mddev->raid_disks;
2452 mddev->raid_disks *= 2;
2453 /* make sure it will be not marked as dirty */
2454 mddev->recovery_cp = MaxSector;
2455
2456 conf = setup_conf(mddev);
2457 conf->scale_disks = 2;
2458 return conf;
2459}
2460
2461static void *raid10_takeover(mddev_t *mddev)
2462{
2463 struct raid0_private_data *raid0_priv;
2464
2465 /* raid10 can take over:
2466 * raid0 - providing it has only two drives
2467 */
2468 if (mddev->level == 0) {
2469 /* for raid0 takeover only one zone is supported */
2470 raid0_priv = mddev->private;
2471 if (raid0_priv->nr_strip_zones > 1) {
2472 printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
2473 " with more than one zone.\n",
2474 mdname(mddev));
2475 return ERR_PTR(-EINVAL);
2476 }
2477 return raid10_takeover_raid0(mddev);
2478 }
2479 return ERR_PTR(-EINVAL);
2480}
2481
2387static struct mdk_personality raid10_personality = 2482static struct mdk_personality raid10_personality =
2388{ 2483{
2389 .name = "raid10", 2484 .name = "raid10",
@@ -2400,6 +2495,7 @@ static struct mdk_personality raid10_personality =
2400 .sync_request = sync_request, 2495 .sync_request = sync_request,
2401 .quiesce = raid10_quiesce, 2496 .quiesce = raid10_quiesce,
2402 .size = raid10_size, 2497 .size = raid10_size,
2498 .takeover = raid10_takeover,
2403}; 2499};
2404 2500
2405static int __init raid_init(void) 2501static int __init raid_init(void)