aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c544
1 files changed, 420 insertions, 124 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 713dc9c2c730..9e658e519a27 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,7 +18,9 @@
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21#include "dm-bio-list.h"
21#include <linux/raid/raid10.h> 22#include <linux/raid/raid10.h>
23#include <linux/raid/bitmap.h>
22 24
23/* 25/*
24 * RAID10 provides a combination of RAID0 and RAID1 functionality. 26 * RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -47,6 +49,9 @@
47 49
48static void unplug_slaves(mddev_t *mddev); 50static void unplug_slaves(mddev_t *mddev);
49 51
52static void allow_barrier(conf_t *conf);
53static void lower_barrier(conf_t *conf);
54
50static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) 55static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
51{ 56{
52 conf_t *conf = data; 57 conf_t *conf = data;
@@ -54,10 +59,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
54 int size = offsetof(struct r10bio_s, devs[conf->copies]); 59 int size = offsetof(struct r10bio_s, devs[conf->copies]);
55 60
56 /* allocate a r10bio with room for raid_disks entries in the bios array */ 61 /* allocate a r10bio with room for raid_disks entries in the bios array */
57 r10_bio = kmalloc(size, gfp_flags); 62 r10_bio = kzalloc(size, gfp_flags);
58 if (r10_bio) 63 if (!r10_bio)
59 memset(r10_bio, 0, size);
60 else
61 unplug_slaves(conf->mddev); 64 unplug_slaves(conf->mddev);
62 65
63 return r10_bio; 66 return r10_bio;
@@ -129,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
129 132
130out_free_pages: 133out_free_pages:
131 for ( ; i > 0 ; i--) 134 for ( ; i > 0 ; i--)
132 __free_page(bio->bi_io_vec[i-1].bv_page); 135 safe_put_page(bio->bi_io_vec[i-1].bv_page);
133 while (j--) 136 while (j--)
134 for (i = 0; i < RESYNC_PAGES ; i++) 137 for (i = 0; i < RESYNC_PAGES ; i++)
135 __free_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); 138 safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
136 j = -1; 139 j = -1;
137out_free_bio: 140out_free_bio:
138 while ( ++j < nalloc ) 141 while ( ++j < nalloc )
@@ -152,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
152 struct bio *bio = r10bio->devs[j].bio; 155 struct bio *bio = r10bio->devs[j].bio;
153 if (bio) { 156 if (bio) {
154 for (i = 0; i < RESYNC_PAGES; i++) { 157 for (i = 0; i < RESYNC_PAGES; i++) {
155 __free_page(bio->bi_io_vec[i].bv_page); 158 safe_put_page(bio->bi_io_vec[i].bv_page);
156 bio->bi_io_vec[i].bv_page = NULL; 159 bio->bi_io_vec[i].bv_page = NULL;
157 } 160 }
158 bio_put(bio); 161 bio_put(bio);
@@ -167,7 +170,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
167 170
168 for (i = 0; i < conf->copies; i++) { 171 for (i = 0; i < conf->copies; i++) {
169 struct bio **bio = & r10_bio->devs[i].bio; 172 struct bio **bio = & r10_bio->devs[i].bio;
170 if (*bio) 173 if (*bio && *bio != IO_BLOCKED)
171 bio_put(*bio); 174 bio_put(*bio);
172 *bio = NULL; 175 *bio = NULL;
173 } 176 }
@@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
175 178
176static inline void free_r10bio(r10bio_t *r10_bio) 179static inline void free_r10bio(r10bio_t *r10_bio)
177{ 180{
178 unsigned long flags;
179
180 conf_t *conf = mddev_to_conf(r10_bio->mddev); 181 conf_t *conf = mddev_to_conf(r10_bio->mddev);
181 182
182 /* 183 /*
183 * Wake up any possible resync thread that waits for the device 184 * Wake up any possible resync thread that waits for the device
184 * to go idle. 185 * to go idle.
185 */ 186 */
186 spin_lock_irqsave(&conf->resync_lock, flags); 187 allow_barrier(conf);
187 if (!--conf->nr_pending) {
188 wake_up(&conf->wait_idle);
189 wake_up(&conf->wait_resume);
190 }
191 spin_unlock_irqrestore(&conf->resync_lock, flags);
192 188
193 put_all_bios(conf, r10_bio); 189 put_all_bios(conf, r10_bio);
194 mempool_free(r10_bio, conf->r10bio_pool); 190 mempool_free(r10_bio, conf->r10bio_pool);
@@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio)
197static inline void put_buf(r10bio_t *r10_bio) 193static inline void put_buf(r10bio_t *r10_bio)
198{ 194{
199 conf_t *conf = mddev_to_conf(r10_bio->mddev); 195 conf_t *conf = mddev_to_conf(r10_bio->mddev);
200 unsigned long flags;
201 196
202 mempool_free(r10_bio, conf->r10buf_pool); 197 mempool_free(r10_bio, conf->r10buf_pool);
203 198
204 spin_lock_irqsave(&conf->resync_lock, flags); 199 lower_barrier(conf);
205 if (!conf->barrier)
206 BUG();
207 --conf->barrier;
208 wake_up(&conf->wait_resume);
209 wake_up(&conf->wait_idle);
210
211 if (!--conf->nr_pending) {
212 wake_up(&conf->wait_idle);
213 wake_up(&conf->wait_resume);
214 }
215 spin_unlock_irqrestore(&conf->resync_lock, flags);
216} 200}
217 201
218static void reschedule_retry(r10bio_t *r10_bio) 202static void reschedule_retry(r10bio_t *r10_bio)
@@ -223,6 +207,7 @@ static void reschedule_retry(r10bio_t *r10_bio)
223 207
224 spin_lock_irqsave(&conf->device_lock, flags); 208 spin_lock_irqsave(&conf->device_lock, flags);
225 list_add(&r10_bio->retry_list, &conf->retry_list); 209 list_add(&r10_bio->retry_list, &conf->retry_list);
210 conf->nr_queued ++;
226 spin_unlock_irqrestore(&conf->device_lock, flags); 211 spin_unlock_irqrestore(&conf->device_lock, flags);
227 212
228 md_wakeup_thread(mddev->thread); 213 md_wakeup_thread(mddev->thread);
@@ -268,9 +253,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
268 /* 253 /*
269 * this branch is our 'one mirror IO has finished' event handler: 254 * this branch is our 'one mirror IO has finished' event handler:
270 */ 255 */
271 if (!uptodate) 256 update_head_pos(slot, r10_bio);
272 md_error(r10_bio->mddev, conf->mirrors[dev].rdev); 257
273 else 258 if (uptodate) {
274 /* 259 /*
275 * Set R10BIO_Uptodate in our master bio, so that 260 * Set R10BIO_Uptodate in our master bio, so that
276 * we will return a good error code to the higher 261 * we will return a good error code to the higher
@@ -281,15 +266,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
281 * wait for the 'master' bio. 266 * wait for the 'master' bio.
282 */ 267 */
283 set_bit(R10BIO_Uptodate, &r10_bio->state); 268 set_bit(R10BIO_Uptodate, &r10_bio->state);
284
285 update_head_pos(slot, r10_bio);
286
287 /*
288 * we have only one bio on the read side
289 */
290 if (uptodate)
291 raid_end_bio_io(r10_bio); 269 raid_end_bio_io(r10_bio);
292 else { 270 } else {
293 /* 271 /*
294 * oops, read error: 272 * oops, read error:
295 */ 273 */
@@ -322,9 +300,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
322 /* 300 /*
323 * this branch is our 'one mirror IO has finished' event handler: 301 * this branch is our 'one mirror IO has finished' event handler:
324 */ 302 */
325 if (!uptodate) 303 if (!uptodate) {
326 md_error(r10_bio->mddev, conf->mirrors[dev].rdev); 304 md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
327 else 305 /* an I/O failed, we can't clear the bitmap */
306 set_bit(R10BIO_Degraded, &r10_bio->state);
307 } else
328 /* 308 /*
329 * Set R10BIO_Uptodate in our master bio, so that 309 * Set R10BIO_Uptodate in our master bio, so that
330 * we will return a good error code for to the higher 310 * we will return a good error code for to the higher
@@ -344,6 +324,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
344 * already. 324 * already.
345 */ 325 */
346 if (atomic_dec_and_test(&r10_bio->remaining)) { 326 if (atomic_dec_and_test(&r10_bio->remaining)) {
327 /* clear the bitmap if all writes complete successfully */
328 bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
329 r10_bio->sectors,
330 !test_bit(R10BIO_Degraded, &r10_bio->state),
331 0);
347 md_write_end(r10_bio->mddev); 332 md_write_end(r10_bio->mddev);
348 raid_end_bio_io(r10_bio); 333 raid_end_bio_io(r10_bio);
349 } 334 }
@@ -502,8 +487,9 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
502 rcu_read_lock(); 487 rcu_read_lock();
503 /* 488 /*
504 * Check if we can balance. We can balance on the whole 489 * Check if we can balance. We can balance on the whole
505 * device if no resync is going on, or below the resync window. 490 * device if no resync is going on (recovery is ok), or below
506 * We take the first readable disk when above the resync window. 491 * the resync window. We take the first readable disk when
492 * above the resync window.
507 */ 493 */
508 if (conf->mddev->recovery_cp < MaxSector 494 if (conf->mddev->recovery_cp < MaxSector
509 && (this_sector + sectors >= conf->next_resync)) { 495 && (this_sector + sectors >= conf->next_resync)) {
@@ -512,6 +498,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
512 disk = r10_bio->devs[slot].devnum; 498 disk = r10_bio->devs[slot].devnum;
513 499
514 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || 500 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
501 r10_bio->devs[slot].bio == IO_BLOCKED ||
515 !test_bit(In_sync, &rdev->flags)) { 502 !test_bit(In_sync, &rdev->flags)) {
516 slot++; 503 slot++;
517 if (slot == conf->copies) { 504 if (slot == conf->copies) {
@@ -529,6 +516,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
529 slot = 0; 516 slot = 0;
530 disk = r10_bio->devs[slot].devnum; 517 disk = r10_bio->devs[slot].devnum;
531 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || 518 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
519 r10_bio->devs[slot].bio == IO_BLOCKED ||
532 !test_bit(In_sync, &rdev->flags)) { 520 !test_bit(In_sync, &rdev->flags)) {
533 slot ++; 521 slot ++;
534 if (slot == conf->copies) { 522 if (slot == conf->copies) {
@@ -549,6 +537,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
549 537
550 538
551 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || 539 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
540 r10_bio->devs[nslot].bio == IO_BLOCKED ||
552 !test_bit(In_sync, &rdev->flags)) 541 !test_bit(In_sync, &rdev->flags))
553 continue; 542 continue;
554 543
@@ -607,7 +596,10 @@ static void unplug_slaves(mddev_t *mddev)
607 596
608static void raid10_unplug(request_queue_t *q) 597static void raid10_unplug(request_queue_t *q)
609{ 598{
599 mddev_t *mddev = q->queuedata;
600
610 unplug_slaves(q->queuedata); 601 unplug_slaves(q->queuedata);
602 md_wakeup_thread(mddev->thread);
611} 603}
612 604
613static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, 605static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
@@ -640,27 +632,107 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
640 return ret; 632 return ret;
641} 633}
642 634
643/* 635/* Barriers....
644 * Throttle resync depth, so that we can both get proper overlapping of 636 * Sometimes we need to suspend IO while we do something else,
645 * requests, but are still able to handle normal requests quickly. 637 * either some resync/recovery, or reconfigure the array.
638 * To do this we raise a 'barrier'.
639 * The 'barrier' is a counter that can be raised multiple times
640 * to count how many activities are happening which preclude
641 * normal IO.
642 * We can only raise the barrier if there is no pending IO.
643 * i.e. if nr_pending == 0.
644 * We choose only to raise the barrier if no-one is waiting for the
645 * barrier to go down. This means that as soon as an IO request
646 * is ready, no other operations which require a barrier will start
647 * until the IO request has had a chance.
648 *
649 * So: regular IO calls 'wait_barrier'. When that returns there
650 * is no backgroup IO happening, It must arrange to call
651 * allow_barrier when it has finished its IO.
652 * backgroup IO calls must call raise_barrier. Once that returns
653 * there is no normal IO happeing. It must arrange to call
654 * lower_barrier when the particular background IO completes.
646 */ 655 */
647#define RESYNC_DEPTH 32 656#define RESYNC_DEPTH 32
648 657
649static void device_barrier(conf_t *conf, sector_t sect) 658static void raise_barrier(conf_t *conf, int force)
659{
660 BUG_ON(force && !conf->barrier);
661 spin_lock_irq(&conf->resync_lock);
662
663 /* Wait until no block IO is waiting (unless 'force') */
664 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
665 conf->resync_lock,
666 raid10_unplug(conf->mddev->queue));
667
668 /* block any new IO from starting */
669 conf->barrier++;
670
671 /* No wait for all pending IO to complete */
672 wait_event_lock_irq(conf->wait_barrier,
673 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
674 conf->resync_lock,
675 raid10_unplug(conf->mddev->queue));
676
677 spin_unlock_irq(&conf->resync_lock);
678}
679
680static void lower_barrier(conf_t *conf)
681{
682 unsigned long flags;
683 spin_lock_irqsave(&conf->resync_lock, flags);
684 conf->barrier--;
685 spin_unlock_irqrestore(&conf->resync_lock, flags);
686 wake_up(&conf->wait_barrier);
687}
688
689static void wait_barrier(conf_t *conf)
650{ 690{
651 spin_lock_irq(&conf->resync_lock); 691 spin_lock_irq(&conf->resync_lock);
652 wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), 692 if (conf->barrier) {
653 conf->resync_lock, unplug_slaves(conf->mddev)); 693 conf->nr_waiting++;
654 694 wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
655 if (!conf->barrier++) { 695 conf->resync_lock,
656 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, 696 raid10_unplug(conf->mddev->queue));
657 conf->resync_lock, unplug_slaves(conf->mddev)); 697 conf->nr_waiting--;
658 if (conf->nr_pending)
659 BUG();
660 } 698 }
661 wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, 699 conf->nr_pending++;
662 conf->resync_lock, unplug_slaves(conf->mddev)); 700 spin_unlock_irq(&conf->resync_lock);
663 conf->next_resync = sect; 701}
702
703static void allow_barrier(conf_t *conf)
704{
705 unsigned long flags;
706 spin_lock_irqsave(&conf->resync_lock, flags);
707 conf->nr_pending--;
708 spin_unlock_irqrestore(&conf->resync_lock, flags);
709 wake_up(&conf->wait_barrier);
710}
711
712static void freeze_array(conf_t *conf)
713{
714 /* stop syncio and normal IO and wait for everything to
715 * go quiet.
716 * We increment barrier and nr_waiting, and then
717 * wait until barrier+nr_pending match nr_queued+2
718 */
719 spin_lock_irq(&conf->resync_lock);
720 conf->barrier++;
721 conf->nr_waiting++;
722 wait_event_lock_irq(conf->wait_barrier,
723 conf->barrier+conf->nr_pending == conf->nr_queued+2,
724 conf->resync_lock,
725 raid10_unplug(conf->mddev->queue));
726 spin_unlock_irq(&conf->resync_lock);
727}
728
729static void unfreeze_array(conf_t *conf)
730{
731 /* reverse the effect of the freeze */
732 spin_lock_irq(&conf->resync_lock);
733 conf->barrier--;
734 conf->nr_waiting--;
735 wake_up(&conf->wait_barrier);
664 spin_unlock_irq(&conf->resync_lock); 736 spin_unlock_irq(&conf->resync_lock);
665} 737}
666 738
@@ -674,6 +746,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
674 int i; 746 int i;
675 int chunk_sects = conf->chunk_mask + 1; 747 int chunk_sects = conf->chunk_mask + 1;
676 const int rw = bio_data_dir(bio); 748 const int rw = bio_data_dir(bio);
749 struct bio_list bl;
750 unsigned long flags;
677 751
678 if (unlikely(bio_barrier(bio))) { 752 if (unlikely(bio_barrier(bio))) {
679 bio_endio(bio, bio->bi_size, -EOPNOTSUPP); 753 bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
@@ -719,10 +793,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
719 * thread has put up a bar for new requests. 793 * thread has put up a bar for new requests.
720 * Continue immediately if no resync is active currently. 794 * Continue immediately if no resync is active currently.
721 */ 795 */
722 spin_lock_irq(&conf->resync_lock); 796 wait_barrier(conf);
723 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
724 conf->nr_pending++;
725 spin_unlock_irq(&conf->resync_lock);
726 797
727 disk_stat_inc(mddev->gendisk, ios[rw]); 798 disk_stat_inc(mddev->gendisk, ios[rw]);
728 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 799 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -734,6 +805,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
734 805
735 r10_bio->mddev = mddev; 806 r10_bio->mddev = mddev;
736 r10_bio->sector = bio->bi_sector; 807 r10_bio->sector = bio->bi_sector;
808 r10_bio->state = 0;
737 809
738 if (rw == READ) { 810 if (rw == READ) {
739 /* 811 /*
@@ -778,13 +850,16 @@ static int make_request(request_queue_t *q, struct bio * bio)
778 !test_bit(Faulty, &rdev->flags)) { 850 !test_bit(Faulty, &rdev->flags)) {
779 atomic_inc(&rdev->nr_pending); 851 atomic_inc(&rdev->nr_pending);
780 r10_bio->devs[i].bio = bio; 852 r10_bio->devs[i].bio = bio;
781 } else 853 } else {
782 r10_bio->devs[i].bio = NULL; 854 r10_bio->devs[i].bio = NULL;
855 set_bit(R10BIO_Degraded, &r10_bio->state);
856 }
783 } 857 }
784 rcu_read_unlock(); 858 rcu_read_unlock();
785 859
786 atomic_set(&r10_bio->remaining, 1); 860 atomic_set(&r10_bio->remaining, 0);
787 861
862 bio_list_init(&bl);
788 for (i = 0; i < conf->copies; i++) { 863 for (i = 0; i < conf->copies; i++) {
789 struct bio *mbio; 864 struct bio *mbio;
790 int d = r10_bio->devs[i].devnum; 865 int d = r10_bio->devs[i].devnum;
@@ -802,13 +877,14 @@ static int make_request(request_queue_t *q, struct bio * bio)
802 mbio->bi_private = r10_bio; 877 mbio->bi_private = r10_bio;
803 878
804 atomic_inc(&r10_bio->remaining); 879 atomic_inc(&r10_bio->remaining);
805 generic_make_request(mbio); 880 bio_list_add(&bl, mbio);
806 } 881 }
807 882
808 if (atomic_dec_and_test(&r10_bio->remaining)) { 883 bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
809 md_write_end(mddev); 884 spin_lock_irqsave(&conf->device_lock, flags);
810 raid_end_bio_io(r10_bio); 885 bio_list_merge(&conf->pending_bio_list, &bl);
811 } 886 blk_plug_device(mddev->queue);
887 spin_unlock_irqrestore(&conf->device_lock, flags);
812 888
813 return 0; 889 return 0;
814} 890}
@@ -897,13 +973,8 @@ static void print_conf(conf_t *conf)
897 973
898static void close_sync(conf_t *conf) 974static void close_sync(conf_t *conf)
899{ 975{
900 spin_lock_irq(&conf->resync_lock); 976 wait_barrier(conf);
901 wait_event_lock_irq(conf->wait_resume, !conf->barrier, 977 allow_barrier(conf);
902 conf->resync_lock, unplug_slaves(conf->mddev));
903 spin_unlock_irq(&conf->resync_lock);
904
905 if (conf->barrier) BUG();
906 if (waitqueue_active(&conf->wait_idle)) BUG();
907 978
908 mempool_destroy(conf->r10buf_pool); 979 mempool_destroy(conf->r10buf_pool);
909 conf->r10buf_pool = NULL; 980 conf->r10buf_pool = NULL;
@@ -971,7 +1042,12 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
971 if (!enough(conf)) 1042 if (!enough(conf))
972 return 0; 1043 return 0;
973 1044
974 for (mirror=0; mirror < mddev->raid_disks; mirror++) 1045 if (rdev->saved_raid_disk >= 0 &&
1046 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
1047 mirror = rdev->saved_raid_disk;
1048 else
1049 mirror = 0;
1050 for ( ; mirror < mddev->raid_disks; mirror++)
975 if ( !(p=conf->mirrors+mirror)->rdev) { 1051 if ( !(p=conf->mirrors+mirror)->rdev) {
976 1052
977 blk_queue_stack_limits(mddev->queue, 1053 blk_queue_stack_limits(mddev->queue,
@@ -987,6 +1063,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
987 p->head_position = 0; 1063 p->head_position = 0;
988 rdev->raid_disk = mirror; 1064 rdev->raid_disk = mirror;
989 found = 1; 1065 found = 1;
1066 if (rdev->saved_raid_disk != mirror)
1067 conf->fullsync = 1;
990 rcu_assign_pointer(p->rdev, rdev); 1068 rcu_assign_pointer(p->rdev, rdev);
991 break; 1069 break;
992 } 1070 }
@@ -1027,7 +1105,6 @@ abort:
1027 1105
1028static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) 1106static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1029{ 1107{
1030 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1031 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 1108 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
1032 conf_t *conf = mddev_to_conf(r10_bio->mddev); 1109 conf_t *conf = mddev_to_conf(r10_bio->mddev);
1033 int i,d; 1110 int i,d;
@@ -1042,9 +1119,16 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1042 BUG(); 1119 BUG();
1043 update_head_pos(i, r10_bio); 1120 update_head_pos(i, r10_bio);
1044 d = r10_bio->devs[i].devnum; 1121 d = r10_bio->devs[i].devnum;
1045 if (!uptodate) 1122
1046 md_error(r10_bio->mddev, 1123 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1047 conf->mirrors[d].rdev); 1124 set_bit(R10BIO_Uptodate, &r10_bio->state);
1125 else {
1126 atomic_add(r10_bio->sectors,
1127 &conf->mirrors[d].rdev->corrected_errors);
1128 if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
1129 md_error(r10_bio->mddev,
1130 conf->mirrors[d].rdev);
1131 }
1048 1132
1049 /* for reconstruct, we always reschedule after a read. 1133 /* for reconstruct, we always reschedule after a read.
1050 * for resync, only after all reads 1134 * for resync, only after all reads
@@ -1132,23 +1216,32 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1132 fbio = r10_bio->devs[i].bio; 1216 fbio = r10_bio->devs[i].bio;
1133 1217
1134 /* now find blocks with errors */ 1218 /* now find blocks with errors */
1135 for (i=first+1 ; i < conf->copies ; i++) { 1219 for (i=0 ; i < conf->copies ; i++) {
1136 int vcnt, j, d; 1220 int j, d;
1221 int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
1137 1222
1138 if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
1139 continue;
1140 /* We know that the bi_io_vec layout is the same for
1141 * both 'first' and 'i', so we just compare them.
1142 * All vec entries are PAGE_SIZE;
1143 */
1144 tbio = r10_bio->devs[i].bio; 1223 tbio = r10_bio->devs[i].bio;
1145 vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); 1224
1146 for (j = 0; j < vcnt; j++) 1225 if (tbio->bi_end_io != end_sync_read)
1147 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), 1226 continue;
1148 page_address(tbio->bi_io_vec[j].bv_page), 1227 if (i == first)
1149 PAGE_SIZE)) 1228 continue;
1150 break; 1229 if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
1151 if (j == vcnt) 1230 /* We know that the bi_io_vec layout is the same for
1231 * both 'first' and 'i', so we just compare them.
1232 * All vec entries are PAGE_SIZE;
1233 */
1234 for (j = 0; j < vcnt; j++)
1235 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
1236 page_address(tbio->bi_io_vec[j].bv_page),
1237 PAGE_SIZE))
1238 break;
1239 if (j == vcnt)
1240 continue;
1241 mddev->resync_mismatches += r10_bio->sectors;
1242 }
1243 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1244 /* Don't fix anything. */
1152 continue; 1245 continue;
1153 /* Ok, we need to write this bio 1246 /* Ok, we need to write this bio
1154 * First we need to fixup bv_offset, bv_len and 1247 * First we need to fixup bv_offset, bv_len and
@@ -1227,7 +1320,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1227 1320
1228 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 1321 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1229 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); 1322 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
1230 generic_make_request(wbio); 1323 if (test_bit(R10BIO_Uptodate, &r10_bio->state))
1324 generic_make_request(wbio);
1325 else
1326 bio_endio(wbio, wbio->bi_size, -EIO);
1231} 1327}
1232 1328
1233 1329
@@ -1254,10 +1350,31 @@ static void raid10d(mddev_t *mddev)
1254 for (;;) { 1350 for (;;) {
1255 char b[BDEVNAME_SIZE]; 1351 char b[BDEVNAME_SIZE];
1256 spin_lock_irqsave(&conf->device_lock, flags); 1352 spin_lock_irqsave(&conf->device_lock, flags);
1353
1354 if (conf->pending_bio_list.head) {
1355 bio = bio_list_get(&conf->pending_bio_list);
1356 blk_remove_plug(mddev->queue);
1357 spin_unlock_irqrestore(&conf->device_lock, flags);
1358 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1359 if (bitmap_unplug(mddev->bitmap) != 0)
1360 printk("%s: bitmap file write failed!\n", mdname(mddev));
1361
1362 while (bio) { /* submit pending writes */
1363 struct bio *next = bio->bi_next;
1364 bio->bi_next = NULL;
1365 generic_make_request(bio);
1366 bio = next;
1367 }
1368 unplug = 1;
1369
1370 continue;
1371 }
1372
1257 if (list_empty(head)) 1373 if (list_empty(head))
1258 break; 1374 break;
1259 r10_bio = list_entry(head->prev, r10bio_t, retry_list); 1375 r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1260 list_del(head->prev); 1376 list_del(head->prev);
1377 conf->nr_queued--;
1261 spin_unlock_irqrestore(&conf->device_lock, flags); 1378 spin_unlock_irqrestore(&conf->device_lock, flags);
1262 1379
1263 mddev = r10_bio->mddev; 1380 mddev = r10_bio->mddev;
@@ -1270,8 +1387,96 @@ static void raid10d(mddev_t *mddev)
1270 unplug = 1; 1387 unplug = 1;
1271 } else { 1388 } else {
1272 int mirror; 1389 int mirror;
1390 /* we got a read error. Maybe the drive is bad. Maybe just
1391 * the block and we can fix it.
1392 * We freeze all other IO, and try reading the block from
1393 * other devices. When we find one, we re-write
1394 * and check it that fixes the read error.
1395 * This is all done synchronously while the array is
1396 * frozen.
1397 */
1398 int sect = 0; /* Offset from r10_bio->sector */
1399 int sectors = r10_bio->sectors;
1400 freeze_array(conf);
1401 if (mddev->ro == 0) while(sectors) {
1402 int s = sectors;
1403 int sl = r10_bio->read_slot;
1404 int success = 0;
1405
1406 if (s > (PAGE_SIZE>>9))
1407 s = PAGE_SIZE >> 9;
1408
1409 do {
1410 int d = r10_bio->devs[sl].devnum;
1411 rdev = conf->mirrors[d].rdev;
1412 if (rdev &&
1413 test_bit(In_sync, &rdev->flags) &&
1414 sync_page_io(rdev->bdev,
1415 r10_bio->devs[sl].addr +
1416 sect + rdev->data_offset,
1417 s<<9,
1418 conf->tmppage, READ))
1419 success = 1;
1420 else {
1421 sl++;
1422 if (sl == conf->copies)
1423 sl = 0;
1424 }
1425 } while (!success && sl != r10_bio->read_slot);
1426
1427 if (success) {
1428 int start = sl;
1429 /* write it back and re-read */
1430 while (sl != r10_bio->read_slot) {
1431 int d;
1432 if (sl==0)
1433 sl = conf->copies;
1434 sl--;
1435 d = r10_bio->devs[sl].devnum;
1436 rdev = conf->mirrors[d].rdev;
1437 atomic_add(s, &rdev->corrected_errors);
1438 if (rdev &&
1439 test_bit(In_sync, &rdev->flags)) {
1440 if (sync_page_io(rdev->bdev,
1441 r10_bio->devs[sl].addr +
1442 sect + rdev->data_offset,
1443 s<<9, conf->tmppage, WRITE) == 0)
1444 /* Well, this device is dead */
1445 md_error(mddev, rdev);
1446 }
1447 }
1448 sl = start;
1449 while (sl != r10_bio->read_slot) {
1450 int d;
1451 if (sl==0)
1452 sl = conf->copies;
1453 sl--;
1454 d = r10_bio->devs[sl].devnum;
1455 rdev = conf->mirrors[d].rdev;
1456 if (rdev &&
1457 test_bit(In_sync, &rdev->flags)) {
1458 if (sync_page_io(rdev->bdev,
1459 r10_bio->devs[sl].addr +
1460 sect + rdev->data_offset,
1461 s<<9, conf->tmppage, READ) == 0)
1462 /* Well, this device is dead */
1463 md_error(mddev, rdev);
1464 }
1465 }
1466 } else {
1467 /* Cannot read from anywhere -- bye bye array */
1468 md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
1469 break;
1470 }
1471 sectors -= s;
1472 sect += s;
1473 }
1474
1475 unfreeze_array(conf);
1476
1273 bio = r10_bio->devs[r10_bio->read_slot].bio; 1477 bio = r10_bio->devs[r10_bio->read_slot].bio;
1274 r10_bio->devs[r10_bio->read_slot].bio = NULL; 1478 r10_bio->devs[r10_bio->read_slot].bio =
1479 mddev->ro ? IO_BLOCKED : NULL;
1275 bio_put(bio); 1480 bio_put(bio);
1276 mirror = read_balance(conf, r10_bio); 1481 mirror = read_balance(conf, r10_bio);
1277 if (mirror == -1) { 1482 if (mirror == -1) {
@@ -1360,6 +1565,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1360 sector_t max_sector, nr_sectors; 1565 sector_t max_sector, nr_sectors;
1361 int disk; 1566 int disk;
1362 int i; 1567 int i;
1568 int max_sync;
1569 int sync_blocks;
1363 1570
1364 sector_t sectors_skipped = 0; 1571 sector_t sectors_skipped = 0;
1365 int chunks_skipped = 0; 1572 int chunks_skipped = 0;
@@ -1373,6 +1580,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1373 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 1580 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
1374 max_sector = mddev->resync_max_sectors; 1581 max_sector = mddev->resync_max_sectors;
1375 if (sector_nr >= max_sector) { 1582 if (sector_nr >= max_sector) {
1583 /* If we aborted, we need to abort the
1584 * sync on the 'current' bitmap chucks (there can
1585 * be several when recovering multiple devices).
1586 * as we may have started syncing it but not finished.
1587 * We can find the current address in
1588 * mddev->curr_resync, but for recovery,
1589 * we need to convert that to several
1590 * virtual addresses.
1591 */
1592 if (mddev->curr_resync < max_sector) { /* aborted */
1593 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
1594 bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
1595 &sync_blocks, 1);
1596 else for (i=0; i<conf->raid_disks; i++) {
1597 sector_t sect =
1598 raid10_find_virt(conf, mddev->curr_resync, i);
1599 bitmap_end_sync(mddev->bitmap, sect,
1600 &sync_blocks, 1);
1601 }
1602 } else /* completed sync */
1603 conf->fullsync = 0;
1604
1605 bitmap_close_sync(mddev->bitmap);
1376 close_sync(conf); 1606 close_sync(conf);
1377 *skipped = 1; 1607 *skipped = 1;
1378 return sectors_skipped; 1608 return sectors_skipped;
@@ -1395,9 +1625,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1395 * If there is non-resync activity waiting for us then 1625 * If there is non-resync activity waiting for us then
1396 * put in a delay to throttle resync. 1626 * put in a delay to throttle resync.
1397 */ 1627 */
1398 if (!go_faster && waitqueue_active(&conf->wait_resume)) 1628 if (!go_faster && conf->nr_waiting)
1399 msleep_interruptible(1000); 1629 msleep_interruptible(1000);
1400 device_barrier(conf, sector_nr + RESYNC_SECTORS);
1401 1630
1402 /* Again, very different code for resync and recovery. 1631 /* Again, very different code for resync and recovery.
1403 * Both must result in an r10bio with a list of bios that 1632 * Both must result in an r10bio with a list of bios that
@@ -1414,6 +1643,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1414 * end_sync_write if we will want to write. 1643 * end_sync_write if we will want to write.
1415 */ 1644 */
1416 1645
1646 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
1417 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 1647 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
1418 /* recovery... the complicated one */ 1648 /* recovery... the complicated one */
1419 int i, j, k; 1649 int i, j, k;
@@ -1422,14 +1652,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1422 for (i=0 ; i<conf->raid_disks; i++) 1652 for (i=0 ; i<conf->raid_disks; i++)
1423 if (conf->mirrors[i].rdev && 1653 if (conf->mirrors[i].rdev &&
1424 !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { 1654 !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
1655 int still_degraded = 0;
1425 /* want to reconstruct this device */ 1656 /* want to reconstruct this device */
1426 r10bio_t *rb2 = r10_bio; 1657 r10bio_t *rb2 = r10_bio;
1658 sector_t sect = raid10_find_virt(conf, sector_nr, i);
1659 int must_sync;
1660 /* Unless we are doing a full sync, we only need
1661 * to recover the block if it is set in the bitmap
1662 */
1663 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1664 &sync_blocks, 1);
1665 if (sync_blocks < max_sync)
1666 max_sync = sync_blocks;
1667 if (!must_sync &&
1668 !conf->fullsync) {
1669 /* yep, skip the sync_blocks here, but don't assume
1670 * that there will never be anything to do here
1671 */
1672 chunks_skipped = -1;
1673 continue;
1674 }
1427 1675
1428 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 1676 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1429 spin_lock_irq(&conf->resync_lock); 1677 raise_barrier(conf, rb2 != NULL);
1430 conf->nr_pending++;
1431 if (rb2) conf->barrier++;
1432 spin_unlock_irq(&conf->resync_lock);
1433 atomic_set(&r10_bio->remaining, 0); 1678 atomic_set(&r10_bio->remaining, 0);
1434 1679
1435 r10_bio->master_bio = (struct bio*)rb2; 1680 r10_bio->master_bio = (struct bio*)rb2;
@@ -1437,8 +1682,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1437 atomic_inc(&rb2->remaining); 1682 atomic_inc(&rb2->remaining);
1438 r10_bio->mddev = mddev; 1683 r10_bio->mddev = mddev;
1439 set_bit(R10BIO_IsRecover, &r10_bio->state); 1684 set_bit(R10BIO_IsRecover, &r10_bio->state);
1440 r10_bio->sector = raid10_find_virt(conf, sector_nr, i); 1685 r10_bio->sector = sect;
1686
1441 raid10_find_phys(conf, r10_bio); 1687 raid10_find_phys(conf, r10_bio);
1688 /* Need to check if this section will still be
1689 * degraded
1690 */
1691 for (j=0; j<conf->copies;j++) {
1692 int d = r10_bio->devs[j].devnum;
1693 if (conf->mirrors[d].rdev == NULL ||
1694 test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
1695 still_degraded = 1;
1696 break;
1697 }
1698 }
1699 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1700 &sync_blocks, still_degraded);
1701
1442 for (j=0; j<conf->copies;j++) { 1702 for (j=0; j<conf->copies;j++) {
1443 int d = r10_bio->devs[j].devnum; 1703 int d = r10_bio->devs[j].devnum;
1444 if (conf->mirrors[d].rdev && 1704 if (conf->mirrors[d].rdev &&
@@ -1498,14 +1758,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1498 } else { 1758 } else {
1499 /* resync. Schedule a read for every block at this virt offset */ 1759 /* resync. Schedule a read for every block at this virt offset */
1500 int count = 0; 1760 int count = 0;
1501 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1502 1761
1503 spin_lock_irq(&conf->resync_lock); 1762 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
1504 conf->nr_pending++; 1763 &sync_blocks, mddev->degraded) &&
1505 spin_unlock_irq(&conf->resync_lock); 1764 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1765 /* We can skip this block */
1766 *skipped = 1;
1767 return sync_blocks + sectors_skipped;
1768 }
1769 if (sync_blocks < max_sync)
1770 max_sync = sync_blocks;
1771 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1506 1772
1507 r10_bio->mddev = mddev; 1773 r10_bio->mddev = mddev;
1508 atomic_set(&r10_bio->remaining, 0); 1774 atomic_set(&r10_bio->remaining, 0);
1775 raise_barrier(conf, 0);
1776 conf->next_resync = sector_nr;
1509 1777
1510 r10_bio->master_bio = NULL; 1778 r10_bio->master_bio = NULL;
1511 r10_bio->sector = sector_nr; 1779 r10_bio->sector = sector_nr;
@@ -1558,6 +1826,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1558 } 1826 }
1559 1827
1560 nr_sectors = 0; 1828 nr_sectors = 0;
1829 if (sector_nr + max_sync < max_sector)
1830 max_sector = sector_nr + max_sync;
1561 do { 1831 do {
1562 struct page *page; 1832 struct page *page;
1563 int len = PAGE_SIZE; 1833 int len = PAGE_SIZE;
@@ -1632,11 +1902,11 @@ static int run(mddev_t *mddev)
1632 int nc, fc; 1902 int nc, fc;
1633 sector_t stride, size; 1903 sector_t stride, size;
1634 1904
1635 if (mddev->level != 10) { 1905 if (mddev->chunk_size == 0) {
1636 printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n", 1906 printk(KERN_ERR "md/raid10: non-zero chunk size required.\n");
1637 mdname(mddev), mddev->level); 1907 return -EINVAL;
1638 goto out;
1639 } 1908 }
1909
1640 nc = mddev->layout & 255; 1910 nc = mddev->layout & 255;
1641 fc = (mddev->layout >> 8) & 255; 1911 fc = (mddev->layout >> 8) & 255;
1642 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || 1912 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
@@ -1650,22 +1920,24 @@ static int run(mddev_t *mddev)
1650 * bookkeeping area. [whatever we allocate in run(), 1920 * bookkeeping area. [whatever we allocate in run(),
1651 * should be freed in stop()] 1921 * should be freed in stop()]
1652 */ 1922 */
1653 conf = kmalloc(sizeof(conf_t), GFP_KERNEL); 1923 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
1654 mddev->private = conf; 1924 mddev->private = conf;
1655 if (!conf) { 1925 if (!conf) {
1656 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 1926 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
1657 mdname(mddev)); 1927 mdname(mddev));
1658 goto out; 1928 goto out;
1659 } 1929 }
1660 memset(conf, 0, sizeof(*conf)); 1930 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1661 conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
1662 GFP_KERNEL); 1931 GFP_KERNEL);
1663 if (!conf->mirrors) { 1932 if (!conf->mirrors) {
1664 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 1933 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
1665 mdname(mddev)); 1934 mdname(mddev));
1666 goto out_free_conf; 1935 goto out_free_conf;
1667 } 1936 }
1668 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); 1937
1938 conf->tmppage = alloc_page(GFP_KERNEL);
1939 if (!conf->tmppage)
1940 goto out_free_conf;
1669 1941
1670 conf->near_copies = nc; 1942 conf->near_copies = nc;
1671 conf->far_copies = fc; 1943 conf->far_copies = fc;
@@ -1713,8 +1985,7 @@ static int run(mddev_t *mddev)
1713 INIT_LIST_HEAD(&conf->retry_list); 1985 INIT_LIST_HEAD(&conf->retry_list);
1714 1986
1715 spin_lock_init(&conf->resync_lock); 1987 spin_lock_init(&conf->resync_lock);
1716 init_waitqueue_head(&conf->wait_idle); 1988 init_waitqueue_head(&conf->wait_barrier);
1717 init_waitqueue_head(&conf->wait_resume);
1718 1989
1719 /* need to check that every block has at least one working mirror */ 1990 /* need to check that every block has at least one working mirror */
1720 if (!enough(conf)) { 1991 if (!enough(conf)) {
@@ -1763,7 +2034,7 @@ static int run(mddev_t *mddev)
1763 * maybe... 2034 * maybe...
1764 */ 2035 */
1765 { 2036 {
1766 int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE; 2037 int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE;
1767 stripe /= conf->near_copies; 2038 stripe /= conf->near_copies;
1768 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) 2039 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
1769 mddev->queue->backing_dev_info.ra_pages = 2* stripe; 2040 mddev->queue->backing_dev_info.ra_pages = 2* stripe;
@@ -1776,6 +2047,7 @@ static int run(mddev_t *mddev)
1776out_free_conf: 2047out_free_conf:
1777 if (conf->r10bio_pool) 2048 if (conf->r10bio_pool)
1778 mempool_destroy(conf->r10bio_pool); 2049 mempool_destroy(conf->r10bio_pool);
2050 safe_put_page(conf->tmppage);
1779 kfree(conf->mirrors); 2051 kfree(conf->mirrors);
1780 kfree(conf); 2052 kfree(conf);
1781 mddev->private = NULL; 2053 mddev->private = NULL;
@@ -1798,10 +2070,31 @@ static int stop(mddev_t *mddev)
1798 return 0; 2070 return 0;
1799} 2071}
1800 2072
2073static void raid10_quiesce(mddev_t *mddev, int state)
2074{
2075 conf_t *conf = mddev_to_conf(mddev);
2076
2077 switch(state) {
2078 case 1:
2079 raise_barrier(conf, 0);
2080 break;
2081 case 0:
2082 lower_barrier(conf);
2083 break;
2084 }
2085 if (mddev->thread) {
2086 if (mddev->bitmap)
2087 mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
2088 else
2089 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
2090 md_wakeup_thread(mddev->thread);
2091 }
2092}
1801 2093
1802static mdk_personality_t raid10_personality = 2094static struct mdk_personality raid10_personality =
1803{ 2095{
1804 .name = "raid10", 2096 .name = "raid10",
2097 .level = 10,
1805 .owner = THIS_MODULE, 2098 .owner = THIS_MODULE,
1806 .make_request = make_request, 2099 .make_request = make_request,
1807 .run = run, 2100 .run = run,
@@ -1812,19 +2105,22 @@ static mdk_personality_t raid10_personality =
1812 .hot_remove_disk= raid10_remove_disk, 2105 .hot_remove_disk= raid10_remove_disk,
1813 .spare_active = raid10_spare_active, 2106 .spare_active = raid10_spare_active,
1814 .sync_request = sync_request, 2107 .sync_request = sync_request,
2108 .quiesce = raid10_quiesce,
1815}; 2109};
1816 2110
1817static int __init raid_init(void) 2111static int __init raid_init(void)
1818{ 2112{
1819 return register_md_personality(RAID10, &raid10_personality); 2113 return register_md_personality(&raid10_personality);
1820} 2114}
1821 2115
1822static void raid_exit(void) 2116static void raid_exit(void)
1823{ 2117{
1824 unregister_md_personality(RAID10); 2118 unregister_md_personality(&raid10_personality);
1825} 2119}
1826 2120
1827module_init(raid_init); 2121module_init(raid_init);
1828module_exit(raid_exit); 2122module_exit(raid_exit);
1829MODULE_LICENSE("GPL"); 2123MODULE_LICENSE("GPL");
1830MODULE_ALIAS("md-personality-9"); /* RAID10 */ 2124MODULE_ALIAS("md-personality-9"); /* RAID10 */
2125MODULE_ALIAS("md-raid10");
2126MODULE_ALIAS("md-level-10");