1 files changed, 121 insertions, 43 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cacd008d6864..197f62681db5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -46,6 +46,20 @@
 */
 #define NR_RAID1_BIOS 256
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error.  To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio *)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting devs[n].bio to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 /* When there are this many requests queue to be written by
 * the raid1 thread, we become 'congested' to provide back-pressure
 * for writeback.
@@ -483,12 +497,14 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        const sector_t this_sector = r1_bio->sector;
        int sectors;
        int best_good_sectors;
-        int start_disk;
+        int best_disk, best_dist_disk, best_pending_disk;
-        int best_disk;
+        int has_nonrot_disk;
-        int i;
+        int disk;
        sector_t best_dist;
+        unsigned int min_pending;
        struct md_rdev *rdev;
        int choose_first;
+        int choose_next_idle;
        rcu_read_lock();
        /*
@@ -499,26 +515,26 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 retry:
        sectors = r1_bio->sectors;
        best_disk = -1;
+        best_dist_disk = -1;
        best_dist = MaxSector;
+        best_pending_disk = -1;
+        min_pending = UINT_MAX;
        best_good_sectors = 0;
+        has_nonrot_disk = 0;
+        choose_next_idle = 0;
        if (conf->mddev->recovery_cp < MaxSector &&
-            (this_sector + sectors >= conf->next_resync)) {
+            (this_sector + sectors >= conf->next_resync))
                choose_first = 1;
-                start_disk = 0;
+        else
-        } else {
                choose_first = 0;
-                start_disk = conf->last_used;
-        }
-        for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
+        for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
                sector_t dist;
                sector_t first_bad;
                int bad_sectors;
+                unsigned int pending;
-                int disk = start_disk + i;
+                bool nonrot;
-                if (disk >= conf->raid_disks * 2)
-                        disk -= conf->raid_disks * 2;
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
@@ -577,22 +593,77 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                } else
                        best_good_sectors = sectors;
+                nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
+                has_nonrot_disk |= nonrot;
+                pending = atomic_read(&rdev->nr_pending);
                dist = abs(this_sector - conf->mirrors[disk].head_position);
-                if (choose_first
+                if (choose_first) {
-                    /* Don't change to another disk for sequential reads */
+                        best_disk = disk;
-                    || conf->next_seq_sect == this_sector
+                        break;
-                    || dist == 0
+                }
-                    /* If device is idle, use it */
+                /* Don't change to another disk for sequential reads */
-                    || atomic_read(&rdev->nr_pending) == 0) {
+                if (conf->mirrors[disk].next_seq_sect == this_sector
+                    || dist == 0) {
+                        int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
+                        struct raid1_info *mirror = &conf->mirrors[disk];
+                        best_disk = disk;
+                        /*
+                         * If buffered sequential IO size exceeds optimal
+                         * iosize, check if there is idle disk. If yes, choose
+                         * the idle disk. read_balance could already choose an
+                         * idle disk before noticing it's a sequential IO in
+                         * this disk. This doesn't matter because this disk
+                         * will idle, next time it will be utilized after the
+                         * first disk has IO size exceeds optimal iosize. In
+                         * this way, iosize of the first disk will be optimal
+                         * iosize at least. iosize of the second disk might be
+                         * small, but not a big deal since when the second disk
+                         * starts IO, the first disk is likely still busy.
+                         */
+                        if (nonrot && opt_iosize > 0 &&
+                            mirror->seq_start != MaxSector &&
+                            mirror->next_seq_sect > opt_iosize &&
+                            mirror->next_seq_sect - opt_iosize >=
+                            mirror->seq_start) {
+                                choose_next_idle = 1;
+                                continue;
+                        }
+                        break;
+                }
+                /* If device is idle, use it */
+                if (pending == 0) {
                        best_disk = disk;
                        break;
                }
+                if (choose_next_idle)
+                        continue;
+                if (min_pending > pending) {
+                        min_pending = pending;
+                        best_pending_disk = disk;
+                }
                if (dist < best_dist) {
                        best_dist = dist;
-                        best_disk = disk;
+                        best_dist_disk = disk;
                }
        }
+        /*
+         * If all disks are rotational, choose the closest disk. If any disk is
+         * non-rotational, choose the disk with less pending request even the
+         * disk is rotational, which might/might not be optimal for raids with
+         * mixed ratation/non-rotational disks depending on workload.
+         */
+        if (best_disk == -1) {
+                if (has_nonrot_disk)
+                        best_disk = best_pending_disk;
+                else
+                        best_disk = best_dist_disk;
+        }
        if (best_disk >= 0) {
                rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
                if (!rdev)
@@ -606,8 +677,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                        goto retry;
                }
                sectors = best_good_sectors;
-                conf->next_seq_sect = this_sector + sectors;
-                conf->last_used = best_disk;
+                if (conf->mirrors[best_disk].next_seq_sect != this_sector)
+                        conf->mirrors[best_disk].seq_start = this_sector;
+                conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
        }
        rcu_read_unlock();
        *max_sectors = sectors;
@@ -873,7 +947,7 @@ do_sync_io:
 static void make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r1conf *conf = mddev->private;
-        struct mirror_info *mirror;
+        struct raid1_info *mirror;
        struct r1bio *r1_bio;
        struct bio *read_bio;
        int i, disks;
@@ -1364,7 +1438,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct r1conf *conf = mddev->private;
        int err = -EEXIST;
        int mirror = 0;
-        struct mirror_info *p;
+        struct raid1_info *p;
        int first = 0;
        int last = conf->raid_disks - 1;
        struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -1433,7 +1507,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
        struct r1conf *conf = mddev->private;
        int err = 0;
        int number = rdev->raid_disk;
-        struct mirror_info *p = conf->mirrors+ number;
+        struct raid1_info *p = conf->mirrors + number;
        if (rdev != p->rdev)
                p = conf->mirrors + conf->raid_disks + number;
@@ -2371,6 +2445,18 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                                bio->bi_rw = READ;
                                bio->bi_end_io = end_sync_read;
                                read_targets++;
+                        } else if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+                                test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
+                                !test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
+                                /*
+                                 * The device is suitable for reading (InSync),
+                                 * but has bad block(s) here. Let's try to correct them,
+                                 * if we are doing resync or repair. Otherwise, leave
+                                 * this device alone for this sync request.
+                                 */
+                                bio->bi_rw = WRITE;
+                                bio->bi_end_io = end_sync_write;
+                                write_targets++;
                        }
                }
                if (bio->bi_end_io) {
@@ -2428,7 +2514,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                /* There is nowhere to write, so all non-sync
                 * drives must be failed - so we are finished
                 */
-                sector_t rv = max_sector - sector_nr;
+                sector_t rv;
+                if (min_bad > 0)
+                        max_sector = sector_nr + min_bad;
+                rv = max_sector - sector_nr;
                *skipped = 1;
                put_buf(r1_bio);
                return rv;
@@ -2521,7 +2610,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
 {
        struct r1conf *conf;
        int i;
-        struct mirror_info *disk;
+        struct raid1_info *disk;
        struct md_rdev *rdev;
        int err = -ENOMEM;
@@ -2529,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        if (!conf)
                goto abort;
-        conf->mirrors = kzalloc(sizeof(struct mirror_info)
+        conf->mirrors = kzalloc(sizeof(struct raid1_info)
                                * mddev->raid_disks * 2,
                                 GFP_KERNEL);
        if (!conf->mirrors)
@@ -2572,6 +2661,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
                        mddev->merge_check_needed = 1;
                disk->head_position = 0;
+                disk->seq_start = MaxSector;
        }
        conf->raid_disks = mddev->raid_disks;
        conf->mddev = mddev;
@@ -2585,7 +2675,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
        conf->recovery_disabled = mddev->recovery_disabled - 1;
        err = -EIO;
-        conf->last_used = -1;
        for (i = 0; i < conf->raid_disks * 2; i++) {
                disk = conf->mirrors + i;
@@ -2611,19 +2700,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
                        if (disk->rdev &&
                            (disk->rdev->saved_raid_disk < 0))
                                conf->fullsync = 1;
-                } else if (conf->last_used < 0)
+                }
-                        /*
-                         * The first working device is used as a
-                         * starting point to read balancing.
-                         */
-                        conf->last_used = i;
        }
-        if (conf->last_used < 0) {
-                printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
-                       mdname(mddev));
-                goto abort;
-        }
        err = -ENOMEM;
        conf->thread = md_register_thread(raid1d, mddev, "raid1");
        if (!conf->thread) {
@@ -2798,7 +2877,7 @@ static int raid1_reshape(struct mddev *mddev)
         */
        mempool_t *newpool, *oldpool;
        struct pool_info *newpoolinfo;
-        struct mirror_info *newmirrors;
+        struct raid1_info *newmirrors;
        struct r1conf *conf = mddev->private;
        int cnt, raid_disks;
        unsigned long flags;
@@ -2841,7 +2920,7 @@ static int raid1_reshape(struct mddev *mddev)
                kfree(newpoolinfo);
                return -ENOMEM;
        }
-        newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
+        newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2,
                             GFP_KERNEL);
        if (!newmirrors) {
                kfree(newpoolinfo);
@@ -2880,7 +2959,6 @@ static int raid1_reshape(struct mddev *mddev)
        conf->raid_disks = mddev->raid_disks = raid_disks;
        mddev->delta_disks = 0;
-        conf->last_used = 0; /* just make sure it is in-range */
        lower_barrier(conf);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cacd008d6864..197f62681db5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c
@@ -46,6 +46,20 @@
46	*/	46	*/
47	#define NR_RAID1_BIOS 256	47	#define NR_RAID1_BIOS 256
48		48
		49	/* when we get a read error on a read-only array, we redirect to another
		50	* device without failing the first device, or trying to over-write to
		51	* correct the read error. To keep track of bad blocks on a per-bio
		52	* level, we store IO_BLOCKED in the appropriate 'bios' pointer
		53	*/
		54	#define IO_BLOCKED ((struct bio *)1)
		55	/* When we successfully write to a known bad-block, we need to remove the
		56	* bad-block marking which must be done from process context. So we record
		57	* the success by setting devs[n].bio to IO_MADE_GOOD
		58	*/
		59	#define IO_MADE_GOOD ((struct bio *)2)
		60
		61	#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
		62
49	/* When there are this many requests queue to be written by	63	/* When there are this many requests queue to be written by
50	* the raid1 thread, we become 'congested' to provide back-pressure	64	* the raid1 thread, we become 'congested' to provide back-pressure
51	* for writeback.	65	* for writeback.
@@ -483,12 +497,14 @@ static int read_balance(struct r1conf conf, struct r1bio r1_bio, int *max_sect
483	const sector_t this_sector = r1_bio->sector;	497	const sector_t this_sector = r1_bio->sector;
484	int sectors;	498	int sectors;
485	int best_good_sectors;	499	int best_good_sectors;
486	int start_disk;	500	int best_disk, best_dist_disk, best_pending_disk;
487	int best_disk;	501	int has_nonrot_disk;
488	int i;	502	int disk;
489	sector_t best_dist;	503	sector_t best_dist;
		504	unsigned int min_pending;
490	struct md_rdev *rdev;	505	struct md_rdev *rdev;
491	int choose_first;	506	int choose_first;
		507	int choose_next_idle;
492		508
493	rcu_read_lock();	509	rcu_read_lock();
494	/*	510	/*
@@ -499,26 +515,26 @@ static int read_balance(struct r1conf conf, struct r1bio r1_bio, int *max_sect
499	retry:	515	retry:
500	sectors = r1_bio->sectors;	516	sectors = r1_bio->sectors;
501	best_disk = -1;	517	best_disk = -1;
		518	best_dist_disk = -1;
502	best_dist = MaxSector;	519	best_dist = MaxSector;
		520	best_pending_disk = -1;
		521	min_pending = UINT_MAX;
503	best_good_sectors = 0;	522	best_good_sectors = 0;
		523	has_nonrot_disk = 0;
		524	choose_next_idle = 0;
504		525
505	if (conf->mddev->recovery_cp < MaxSector &&	526	if (conf->mddev->recovery_cp < MaxSector &&
506	(this_sector + sectors >= conf->next_resync)) {	527	(this_sector + sectors >= conf->next_resync))
507	choose_first = 1;	528	choose_first = 1;
508	start_disk = 0;	529	else
509	} else {
510	choose_first = 0;	530	choose_first = 0;
511	start_disk = conf->last_used;
512	}
513		531
514	for (i = 0 ; i < conf->raid_disks * 2 ; i++) {	532	for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
515	sector_t dist;	533	sector_t dist;
516	sector_t first_bad;	534	sector_t first_bad;
517	int bad_sectors;	535	int bad_sectors;
518		536	unsigned int pending;
519	int disk = start_disk + i;	537	bool nonrot;
520	if (disk >= conf->raid_disks * 2)
521	disk -= conf->raid_disks * 2;
522		538
523	rdev = rcu_dereference(conf->mirrors[disk].rdev);	539	rdev = rcu_dereference(conf->mirrors[disk].rdev);
524	if (r1_bio->bios[disk] == IO_BLOCKED	540	if (r1_bio->bios[disk] == IO_BLOCKED
@@ -577,22 +593,77 @@ static int read_balance(struct r1conf conf, struct r1bio r1_bio, int *max_sect
577	} else	593	} else
578	best_good_sectors = sectors;	594	best_good_sectors = sectors;
579		595
		596	nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev));
		597	has_nonrot_disk \|= nonrot;
		598	pending = atomic_read(&rdev->nr_pending);
580	dist = abs(this_sector - conf->mirrors[disk].head_position);	599	dist = abs(this_sector - conf->mirrors[disk].head_position);
581	if (choose_first	600	if (choose_first) {
582	/* Don't change to another disk for sequential reads */	601	best_disk = disk;
583	\|\| conf->next_seq_sect == this_sector	602	break;
584	\|\| dist == 0	603	}
585	/* If device is idle, use it */	604	/* Don't change to another disk for sequential reads */
586	\|\| atomic_read(&rdev->nr_pending) == 0) {	605	if (conf->mirrors[disk].next_seq_sect == this_sector
		606	\|\| dist == 0) {
		607	int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
		608	struct raid1_info *mirror = &conf->mirrors[disk];
		609
		610	best_disk = disk;
		611	/*
		612	* If buffered sequential IO size exceeds optimal
		613	* iosize, check if there is idle disk. If yes, choose
		614	* the idle disk. read_balance could already choose an
		615	* idle disk before noticing it's a sequential IO in
		616	* this disk. This doesn't matter because this disk
		617	* will idle, next time it will be utilized after the
		618	* first disk has IO size exceeds optimal iosize. In
		619	* this way, iosize of the first disk will be optimal
		620	* iosize at least. iosize of the second disk might be
		621	* small, but not a big deal since when the second disk
		622	* starts IO, the first disk is likely still busy.
		623	*/
		624	if (nonrot && opt_iosize > 0 &&
		625	mirror->seq_start != MaxSector &&
		626	mirror->next_seq_sect > opt_iosize &&
		627	mirror->next_seq_sect - opt_iosize >=
		628	mirror->seq_start) {
		629	choose_next_idle = 1;
		630	continue;
		631	}
		632	break;
		633	}
		634	/* If device is idle, use it */
		635	if (pending == 0) {
587	best_disk = disk;	636	best_disk = disk;
588	break;	637	break;
589	}	638	}
		639
		640	if (choose_next_idle)
		641	continue;
		642
		643	if (min_pending > pending) {
		644	min_pending = pending;
		645	best_pending_disk = disk;
		646	}
		647
590	if (dist < best_dist) {	648	if (dist < best_dist) {
591	best_dist = dist;	649	best_dist = dist;
592	best_disk = disk;	650	best_dist_disk = disk;
593	}	651	}
594	}	652	}
595		653
		654	/*
		655	* If all disks are rotational, choose the closest disk. If any disk is
		656	* non-rotational, choose the disk with less pending request even the
		657	* disk is rotational, which might/might not be optimal for raids with
		658	* mixed ratation/non-rotational disks depending on workload.
		659	*/
		660	if (best_disk == -1) {
		661	if (has_nonrot_disk)
		662	best_disk = best_pending_disk;
		663	else
		664	best_disk = best_dist_disk;
		665	}
		666
596	if (best_disk >= 0) {	667	if (best_disk >= 0) {
597	rdev = rcu_dereference(conf->mirrors[best_disk].rdev);	668	rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
598	if (!rdev)	669	if (!rdev)
@@ -606,8 +677,11 @@ static int read_balance(struct r1conf conf, struct r1bio r1_bio, int *max_sect
606	goto retry;	677	goto retry;
607	}	678	}
608	sectors = best_good_sectors;	679	sectors = best_good_sectors;
609	conf->next_seq_sect = this_sector + sectors;	680
610	conf->last_used = best_disk;	681	if (conf->mirrors[best_disk].next_seq_sect != this_sector)
		682	conf->mirrors[best_disk].seq_start = this_sector;
		683
		684	conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
611	}	685	}
612	rcu_read_unlock();	686	rcu_read_unlock();
613	*max_sectors = sectors;	687	*max_sectors = sectors;
@@ -873,7 +947,7 @@ do_sync_io:
873	static void make_request(struct mddev mddev, struct bio bio)	947	static void make_request(struct mddev mddev, struct bio bio)
874	{	948	{
875	struct r1conf *conf = mddev->private;	949	struct r1conf *conf = mddev->private;
876	struct mirror_info *mirror;	950	struct raid1_info *mirror;
877	struct r1bio *r1_bio;	951	struct r1bio *r1_bio;
878	struct bio *read_bio;	952	struct bio *read_bio;
879	int i, disks;	953	int i, disks;
@@ -1364,7 +1438,7 @@ static int raid1_add_disk(struct mddev mddev, struct md_rdev rdev)
1364	struct r1conf *conf = mddev->private;	1438	struct r1conf *conf = mddev->private;
1365	int err = -EEXIST;	1439	int err = -EEXIST;
1366	int mirror = 0;	1440	int mirror = 0;
1367	struct mirror_info *p;	1441	struct raid1_info *p;
1368	int first = 0;	1442	int first = 0;
1369	int last = conf->raid_disks - 1;	1443	int last = conf->raid_disks - 1;
1370	struct request_queue *q = bdev_get_queue(rdev->bdev);	1444	struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -1433,7 +1507,7 @@ static int raid1_remove_disk(struct mddev mddev, struct md_rdev rdev)
1433	struct r1conf *conf = mddev->private;	1507	struct r1conf *conf = mddev->private;
1434	int err = 0;	1508	int err = 0;
1435	int number = rdev->raid_disk;	1509	int number = rdev->raid_disk;
1436	struct mirror_info *p = conf->mirrors+ number;	1510	struct raid1_info *p = conf->mirrors + number;
1437		1511
1438	if (rdev != p->rdev)	1512	if (rdev != p->rdev)
1439	p = conf->mirrors + conf->raid_disks + number;	1513	p = conf->mirrors + conf->raid_disks + number;
@@ -2371,6 +2445,18 @@ static sector_t sync_request(struct mddev mddev, sector_t sector_nr, int skipp
2371	bio->bi_rw = READ;	2445	bio->bi_rw = READ;
2372	bio->bi_end_io = end_sync_read;	2446	bio->bi_end_io = end_sync_read;
2373	read_targets++;	2447	read_targets++;
		2448	} else if (!test_bit(WriteErrorSeen, &rdev->flags) &&
		2449	test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
		2450	!test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
		2451	/*
		2452	* The device is suitable for reading (InSync),
		2453	* but has bad block(s) here. Let's try to correct them,
		2454	* if we are doing resync or repair. Otherwise, leave
		2455	* this device alone for this sync request.
		2456	*/
		2457	bio->bi_rw = WRITE;
		2458	bio->bi_end_io = end_sync_write;
		2459	write_targets++;
2374	}	2460	}
2375	}	2461	}
2376	if (bio->bi_end_io) {	2462	if (bio->bi_end_io) {
@@ -2428,7 +2514,10 @@ static sector_t sync_request(struct mddev mddev, sector_t sector_nr, int skipp
2428	/* There is nowhere to write, so all non-sync	2514	/* There is nowhere to write, so all non-sync
2429	* drives must be failed - so we are finished	2515	* drives must be failed - so we are finished
2430	*/	2516	*/
2431	sector_t rv = max_sector - sector_nr;	2517	sector_t rv;
		2518	if (min_bad > 0)
		2519	max_sector = sector_nr + min_bad;
		2520	rv = max_sector - sector_nr;
2432	*skipped = 1;	2521	*skipped = 1;
2433	put_buf(r1_bio);	2522	put_buf(r1_bio);
2434	return rv;	2523	return rv;
@@ -2521,7 +2610,7 @@ static struct r1conf setup_conf(struct mddev mddev)
2521	{	2610	{
2522	struct r1conf *conf;	2611	struct r1conf *conf;
2523	int i;	2612	int i;
2524	struct mirror_info *disk;	2613	struct raid1_info *disk;
2525	struct md_rdev *rdev;	2614	struct md_rdev *rdev;
2526	int err = -ENOMEM;	2615	int err = -ENOMEM;
2527		2616
@@ -2529,7 +2618,7 @@ static struct r1conf setup_conf(struct mddev mddev)
2529	if (!conf)	2618	if (!conf)
2530	goto abort;	2619	goto abort;
2531		2620
2532	conf->mirrors = kzalloc(sizeof(struct mirror_info)	2621	conf->mirrors = kzalloc(sizeof(struct raid1_info)
2533	* mddev->raid_disks * 2,	2622	* mddev->raid_disks * 2,
2534	GFP_KERNEL);	2623	GFP_KERNEL);
2535	if (!conf->mirrors)	2624	if (!conf->mirrors)
@@ -2572,6 +2661,7 @@ static struct r1conf setup_conf(struct mddev mddev)
2572	mddev->merge_check_needed = 1;	2661	mddev->merge_check_needed = 1;
2573		2662
2574	disk->head_position = 0;	2663	disk->head_position = 0;
		2664	disk->seq_start = MaxSector;
2575	}	2665	}
2576	conf->raid_disks = mddev->raid_disks;	2666	conf->raid_disks = mddev->raid_disks;
2577	conf->mddev = mddev;	2667	conf->mddev = mddev;
@@ -2585,7 +2675,6 @@ static struct r1conf setup_conf(struct mddev mddev)
2585	conf->recovery_disabled = mddev->recovery_disabled - 1;	2675	conf->recovery_disabled = mddev->recovery_disabled - 1;
2586		2676
2587	err = -EIO;	2677	err = -EIO;
2588	conf->last_used = -1;
2589	for (i = 0; i < conf->raid_disks * 2; i++) {	2678	for (i = 0; i < conf->raid_disks * 2; i++) {
2590		2679
2591	disk = conf->mirrors + i;	2680	disk = conf->mirrors + i;
@@ -2611,19 +2700,9 @@ static struct r1conf setup_conf(struct mddev mddev)
2611	if (disk->rdev &&	2700	if (disk->rdev &&
2612	(disk->rdev->saved_raid_disk < 0))	2701	(disk->rdev->saved_raid_disk < 0))
2613	conf->fullsync = 1;	2702	conf->fullsync = 1;
2614	} else if (conf->last_used < 0)	2703	}
2615	/*
2616	* The first working device is used as a
2617	* starting point to read balancing.
2618	*/
2619	conf->last_used = i;
2620	}	2704	}
2621		2705
2622	if (conf->last_used < 0) {
2623	printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
2624	mdname(mddev));
2625	goto abort;
2626	}
2627	err = -ENOMEM;	2706	err = -ENOMEM;
2628	conf->thread = md_register_thread(raid1d, mddev, "raid1");	2707	conf->thread = md_register_thread(raid1d, mddev, "raid1");
2629	if (!conf->thread) {	2708	if (!conf->thread) {
@@ -2798,7 +2877,7 @@ static int raid1_reshape(struct mddev *mddev)
2798	*/	2877	*/
2799	mempool_t newpool, oldpool;	2878	mempool_t newpool, oldpool;
2800	struct pool_info *newpoolinfo;	2879	struct pool_info *newpoolinfo;
2801	struct mirror_info *newmirrors;	2880	struct raid1_info *newmirrors;
2802	struct r1conf *conf = mddev->private;	2881	struct r1conf *conf = mddev->private;
2803	int cnt, raid_disks;	2882	int cnt, raid_disks;
2804	unsigned long flags;	2883	unsigned long flags;
@@ -2841,7 +2920,7 @@ static int raid1_reshape(struct mddev *mddev)
2841	kfree(newpoolinfo);	2920	kfree(newpoolinfo);
2842	return -ENOMEM;	2921	return -ENOMEM;
2843	}	2922	}
2844	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,	2923	newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2,
2845	GFP_KERNEL);	2924	GFP_KERNEL);
2846	if (!newmirrors) {	2925	if (!newmirrors) {
2847	kfree(newpoolinfo);	2926	kfree(newpoolinfo);
@@ -2880,7 +2959,6 @@ static int raid1_reshape(struct mddev *mddev)
2880	conf->raid_disks = mddev->raid_disks = raid_disks;	2959	conf->raid_disks = mddev->raid_disks = raid_disks;
2881	mddev->delta_disks = 0;	2960	mddev->delta_disks = 0;
2882		2961
2883	conf->last_used = 0; /* just make sure it is in-range */
2884	lower_barrier(conf);	2962	lower_barrier(conf);
2885		2963
2886	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);	2964	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);