aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:12 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:01 -0500
commit17999be4aa408e7ff3b9d32c735649676567a3cd (patch)
tree14f9fd4ef0299f16274ce2980baef7b96f111bea
parentac81b2ee45eb811fdb0aa1cfb71d468d944d00ce (diff)
[PATCH] md: improve raid1 "IO Barrier" concept
raid1 needs to put up a barrier to new requests while it does resync or other background recovery. The code for this is currently open-coded, slighty obscure by its use of two waitqueues, and not documented. This patch gathers all the related code into 4 functions, and includes a comment which (hopefully) explains what is happening. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid1.c167
-rw-r--r--include/linux/raid/raid1.h4
2 files changed, 91 insertions, 80 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 229d7b204297..f5204149ab65 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality;
51 51
52static void unplug_slaves(mddev_t *mddev); 52static void unplug_slaves(mddev_t *mddev);
53 53
54static void allow_barrier(conf_t *conf);
55static void lower_barrier(conf_t *conf);
54 56
55static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) 57static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
56{ 58{
@@ -160,20 +162,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
160 162
161static inline void free_r1bio(r1bio_t *r1_bio) 163static inline void free_r1bio(r1bio_t *r1_bio)
162{ 164{
163 unsigned long flags;
164
165 conf_t *conf = mddev_to_conf(r1_bio->mddev); 165 conf_t *conf = mddev_to_conf(r1_bio->mddev);
166 166
167 /* 167 /*
168 * Wake up any possible resync thread that waits for the device 168 * Wake up any possible resync thread that waits for the device
169 * to go idle. 169 * to go idle.
170 */ 170 */
171 spin_lock_irqsave(&conf->resync_lock, flags); 171 allow_barrier(conf);
172 if (!--conf->nr_pending) {
173 wake_up(&conf->wait_idle);
174 wake_up(&conf->wait_resume);
175 }
176 spin_unlock_irqrestore(&conf->resync_lock, flags);
177 172
178 put_all_bios(conf, r1_bio); 173 put_all_bios(conf, r1_bio);
179 mempool_free(r1_bio, conf->r1bio_pool); 174 mempool_free(r1_bio, conf->r1bio_pool);
@@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio)
182static inline void put_buf(r1bio_t *r1_bio) 177static inline void put_buf(r1bio_t *r1_bio)
183{ 178{
184 conf_t *conf = mddev_to_conf(r1_bio->mddev); 179 conf_t *conf = mddev_to_conf(r1_bio->mddev);
185 unsigned long flags;
186 180
187 mempool_free(r1_bio, conf->r1buf_pool); 181 mempool_free(r1_bio, conf->r1buf_pool);
188 182
189 spin_lock_irqsave(&conf->resync_lock, flags); 183 lower_barrier(conf);
190 if (!conf->barrier)
191 BUG();
192 --conf->barrier;
193 wake_up(&conf->wait_resume);
194 wake_up(&conf->wait_idle);
195
196 if (!--conf->nr_pending) {
197 wake_up(&conf->wait_idle);
198 wake_up(&conf->wait_resume);
199 }
200 spin_unlock_irqrestore(&conf->resync_lock, flags);
201} 184}
202 185
203static void reschedule_retry(r1bio_t *r1_bio) 186static void reschedule_retry(r1bio_t *r1_bio)
@@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
210 list_add(&r1_bio->retry_list, &conf->retry_list); 193 list_add(&r1_bio->retry_list, &conf->retry_list);
211 spin_unlock_irqrestore(&conf->device_lock, flags); 194 spin_unlock_irqrestore(&conf->device_lock, flags);
212 195
196 wake_up(&conf->wait_barrier);
213 md_wakeup_thread(mddev->thread); 197 md_wakeup_thread(mddev->thread);
214} 198}
215 199
@@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
593 return ret; 577 return ret;
594} 578}
595 579
596/* 580/* Barriers....
597 * Throttle resync depth, so that we can both get proper overlapping of 581 * Sometimes we need to suspend IO while we do something else,
598 * requests, but are still able to handle normal requests quickly. 582 * either some resync/recovery, or reconfigure the array.
583 * To do this we raise a 'barrier'.
584 * The 'barrier' is a counter that can be raised multiple times
585 * to count how many activities are happening which preclude
586 * normal IO.
587 * We can only raise the barrier if there is no pending IO.
588 * i.e. if nr_pending == 0.
589 * We choose only to raise the barrier if no-one is waiting for the
590 * barrier to go down. This means that as soon as an IO request
591 * is ready, no other operations which require a barrier will start
592 * until the IO request has had a chance.
593 *
594 * So: regular IO calls 'wait_barrier'. When that returns there
595 * is no backgroup IO happening, It must arrange to call
596 * allow_barrier when it has finished its IO.
597 * backgroup IO calls must call raise_barrier. Once that returns
598 * there is no normal IO happeing. It must arrange to call
599 * lower_barrier when the particular background IO completes.
599 */ 600 */
600#define RESYNC_DEPTH 32 601#define RESYNC_DEPTH 32
601 602
602static void device_barrier(conf_t *conf, sector_t sect) 603static void raise_barrier(conf_t *conf)
603{ 604{
604 spin_lock_irq(&conf->resync_lock); 605 spin_lock_irq(&conf->resync_lock);
605 wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), 606
606 conf->resync_lock, raid1_unplug(conf->mddev->queue)); 607 /* Wait until no block IO is waiting */
607 608 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
608 if (!conf->barrier++) { 609 conf->resync_lock,
609 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, 610 raid1_unplug(conf->mddev->queue));
610 conf->resync_lock, raid1_unplug(conf->mddev->queue)); 611
611 if (conf->nr_pending) 612 /* block any new IO from starting */
612 BUG(); 613 conf->barrier++;
614
615 /* No wait for all pending IO to complete */
616 wait_event_lock_irq(conf->wait_barrier,
617 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
618 conf->resync_lock,
619 raid1_unplug(conf->mddev->queue));
620
621 spin_unlock_irq(&conf->resync_lock);
622}
623
624static void lower_barrier(conf_t *conf)
625{
626 unsigned long flags;
627 spin_lock_irqsave(&conf->resync_lock, flags);
628 conf->barrier--;
629 spin_unlock_irqrestore(&conf->resync_lock, flags);
630 wake_up(&conf->wait_barrier);
631}
632
633static void wait_barrier(conf_t *conf)
634{
635 spin_lock_irq(&conf->resync_lock);
636 if (conf->barrier) {
637 conf->nr_waiting++;
638 wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
639 conf->resync_lock,
640 raid1_unplug(conf->mddev->queue));
641 conf->nr_waiting--;
613 } 642 }
614 wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, 643 conf->nr_pending++;
615 conf->resync_lock, raid1_unplug(conf->mddev->queue));
616 conf->next_resync = sect;
617 spin_unlock_irq(&conf->resync_lock); 644 spin_unlock_irq(&conf->resync_lock);
618} 645}
619 646
647static void allow_barrier(conf_t *conf)
648{
649 unsigned long flags;
650 spin_lock_irqsave(&conf->resync_lock, flags);
651 conf->nr_pending--;
652 spin_unlock_irqrestore(&conf->resync_lock, flags);
653 wake_up(&conf->wait_barrier);
654}
655
656
620/* duplicate the data pages for behind I/O */ 657/* duplicate the data pages for behind I/O */
621static struct page **alloc_behind_pages(struct bio *bio) 658static struct page **alloc_behind_pages(struct bio *bio)
622{ 659{
@@ -678,10 +715,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
678 */ 715 */
679 md_write_start(mddev, bio); /* wait on superblock update early */ 716 md_write_start(mddev, bio); /* wait on superblock update early */
680 717
681 spin_lock_irq(&conf->resync_lock); 718 wait_barrier(conf);
682 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
683 conf->nr_pending++;
684 spin_unlock_irq(&conf->resync_lock);
685 719
686 disk_stat_inc(mddev->gendisk, ios[rw]); 720 disk_stat_inc(mddev->gendisk, ios[rw]);
687 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 721 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -909,13 +943,8 @@ static void print_conf(conf_t *conf)
909 943
910static void close_sync(conf_t *conf) 944static void close_sync(conf_t *conf)
911{ 945{
912 spin_lock_irq(&conf->resync_lock); 946 wait_barrier(conf);
913 wait_event_lock_irq(conf->wait_resume, !conf->barrier, 947 allow_barrier(conf);
914 conf->resync_lock, raid1_unplug(conf->mddev->queue));
915 spin_unlock_irq(&conf->resync_lock);
916
917 if (conf->barrier) BUG();
918 if (waitqueue_active(&conf->wait_idle)) BUG();
919 948
920 mempool_destroy(conf->r1buf_pool); 949 mempool_destroy(conf->r1buf_pool);
921 conf->r1buf_pool = NULL; 950 conf->r1buf_pool = NULL;
@@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1317 return sync_blocks; 1346 return sync_blocks;
1318 } 1347 }
1319 /* 1348 /*
1320 * If there is non-resync activity waiting for us then 1349 * If there is non-resync activity waiting for a turn,
1321 * put in a delay to throttle resync. 1350 * and resync is going fast enough,
1351 * then let it though before starting on this new sync request.
1322 */ 1352 */
1323 if (!go_faster && waitqueue_active(&conf->wait_resume)) 1353 if (!go_faster && conf->nr_waiting)
1324 msleep_interruptible(1000); 1354 msleep_interruptible(1000);
1325 device_barrier(conf, sector_nr + RESYNC_SECTORS); 1355
1356 raise_barrier(conf);
1357
1358 conf->next_resync = sector_nr;
1326 1359
1327 /* 1360 /*
1328 * If reconstructing, and >1 working disc, 1361 * If reconstructing, and >1 working disc,
@@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1355 1388
1356 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); 1389 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
1357 1390
1358 spin_lock_irq(&conf->resync_lock);
1359 conf->nr_pending++;
1360 spin_unlock_irq(&conf->resync_lock);
1361
1362 r1_bio->mddev = mddev; 1391 r1_bio->mddev = mddev;
1363 r1_bio->sector = sector_nr; 1392 r1_bio->sector = sector_nr;
1364 r1_bio->state = 0; 1393 r1_bio->state = 0;
@@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev)
1542 mddev->recovery_cp = MaxSector; 1571 mddev->recovery_cp = MaxSector;
1543 1572
1544 spin_lock_init(&conf->resync_lock); 1573 spin_lock_init(&conf->resync_lock);
1545 init_waitqueue_head(&conf->wait_idle); 1574 init_waitqueue_head(&conf->wait_barrier);
1546 init_waitqueue_head(&conf->wait_resume);
1547 1575
1548 bio_list_init(&conf->pending_bio_list); 1576 bio_list_init(&conf->pending_bio_list);
1549 bio_list_init(&conf->flushing_bio_list); 1577 bio_list_init(&conf->flushing_bio_list);
@@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
1714 } 1742 }
1715 memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks); 1743 memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
1716 1744
1717 spin_lock_irq(&conf->resync_lock); 1745 raise_barrier(conf);
1718 conf->barrier++;
1719 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
1720 conf->resync_lock, raid1_unplug(mddev->queue));
1721 spin_unlock_irq(&conf->resync_lock);
1722 1746
1723 /* ok, everything is stopped */ 1747 /* ok, everything is stopped */
1724 oldpool = conf->r1bio_pool; 1748 oldpool = conf->r1bio_pool;
@@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
1738 conf->raid_disks = mddev->raid_disks = raid_disks; 1762 conf->raid_disks = mddev->raid_disks = raid_disks;
1739 1763
1740 conf->last_used = 0; /* just make sure it is in-range */ 1764 conf->last_used = 0; /* just make sure it is in-range */
1741 spin_lock_irq(&conf->resync_lock); 1765 lower_barrier(conf);
1742 conf->barrier--;
1743 spin_unlock_irq(&conf->resync_lock);
1744 wake_up(&conf->wait_resume);
1745 wake_up(&conf->wait_idle);
1746
1747 1766
1748 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 1767 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1749 md_wakeup_thread(mddev->thread); 1768 md_wakeup_thread(mddev->thread);
@@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state)
1758 1777
1759 switch(state) { 1778 switch(state) {
1760 case 1: 1779 case 1:
1761 spin_lock_irq(&conf->resync_lock); 1780 raise_barrier(conf);
1762 conf->barrier++;
1763 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
1764 conf->resync_lock, raid1_unplug(mddev->queue));
1765 spin_unlock_irq(&conf->resync_lock);
1766 break; 1781 break;
1767 case 0: 1782 case 0:
1768 spin_lock_irq(&conf->resync_lock); 1783 lower_barrier(conf);
1769 conf->barrier--;
1770 spin_unlock_irq(&conf->resync_lock);
1771 wake_up(&conf->wait_resume);
1772 wake_up(&conf->wait_idle);
1773 break; 1784 break;
1774 } 1785 }
1775 if (mddev->thread) { 1786 if (mddev->thread) {
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index 292b98f2b408..c55674252533 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -45,6 +45,7 @@ struct r1_private_data_s {
45 45
46 spinlock_t resync_lock; 46 spinlock_t resync_lock;
47 int nr_pending; 47 int nr_pending;
48 int nr_waiting;
48 int barrier; 49 int barrier;
49 sector_t next_resync; 50 sector_t next_resync;
50 int fullsync; /* set to 1 if a full sync is needed, 51 int fullsync; /* set to 1 if a full sync is needed,
@@ -52,8 +53,7 @@ struct r1_private_data_s {
52 * Cleared when a sync completes. 53 * Cleared when a sync completes.
53 */ 54 */
54 55
55 wait_queue_head_t wait_idle; 56 wait_queue_head_t wait_barrier;
56 wait_queue_head_t wait_resume;
57 57
58 struct pool_info *poolinfo; 58 struct pool_info *poolinfo;
59 59