diff options
author | NeilBrown <neilb@suse.de> | 2006-01-06 03:20:12 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:34:01 -0500 |
commit | 17999be4aa408e7ff3b9d32c735649676567a3cd (patch) | |
tree | 14f9fd4ef0299f16274ce2980baef7b96f111bea | |
parent | ac81b2ee45eb811fdb0aa1cfb71d468d944d00ce (diff) |
[PATCH] md: improve raid1 "IO Barrier" concept
raid1 needs to put up a barrier to new requests while it does resync or other
background recovery. The code for this is currently open-coded, slighty
obscure by its use of two waitqueues, and not documented.
This patch gathers all the related code into 4 functions, and includes a
comment which (hopefully) explains what is happening.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/raid1.c | 167 | ||||
-rw-r--r-- | include/linux/raid/raid1.h | 4 |
2 files changed, 91 insertions, 80 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 229d7b204297..f5204149ab65 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality; | |||
51 | 51 | ||
52 | static void unplug_slaves(mddev_t *mddev); | 52 | static void unplug_slaves(mddev_t *mddev); |
53 | 53 | ||
54 | static void allow_barrier(conf_t *conf); | ||
55 | static void lower_barrier(conf_t *conf); | ||
54 | 56 | ||
55 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) | 57 | static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) |
56 | { | 58 | { |
@@ -160,20 +162,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) | |||
160 | 162 | ||
161 | static inline void free_r1bio(r1bio_t *r1_bio) | 163 | static inline void free_r1bio(r1bio_t *r1_bio) |
162 | { | 164 | { |
163 | unsigned long flags; | ||
164 | |||
165 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 165 | conf_t *conf = mddev_to_conf(r1_bio->mddev); |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * Wake up any possible resync thread that waits for the device | 168 | * Wake up any possible resync thread that waits for the device |
169 | * to go idle. | 169 | * to go idle. |
170 | */ | 170 | */ |
171 | spin_lock_irqsave(&conf->resync_lock, flags); | 171 | allow_barrier(conf); |
172 | if (!--conf->nr_pending) { | ||
173 | wake_up(&conf->wait_idle); | ||
174 | wake_up(&conf->wait_resume); | ||
175 | } | ||
176 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
177 | 172 | ||
178 | put_all_bios(conf, r1_bio); | 173 | put_all_bios(conf, r1_bio); |
179 | mempool_free(r1_bio, conf->r1bio_pool); | 174 | mempool_free(r1_bio, conf->r1bio_pool); |
@@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio) | |||
182 | static inline void put_buf(r1bio_t *r1_bio) | 177 | static inline void put_buf(r1bio_t *r1_bio) |
183 | { | 178 | { |
184 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 179 | conf_t *conf = mddev_to_conf(r1_bio->mddev); |
185 | unsigned long flags; | ||
186 | 180 | ||
187 | mempool_free(r1_bio, conf->r1buf_pool); | 181 | mempool_free(r1_bio, conf->r1buf_pool); |
188 | 182 | ||
189 | spin_lock_irqsave(&conf->resync_lock, flags); | 183 | lower_barrier(conf); |
190 | if (!conf->barrier) | ||
191 | BUG(); | ||
192 | --conf->barrier; | ||
193 | wake_up(&conf->wait_resume); | ||
194 | wake_up(&conf->wait_idle); | ||
195 | |||
196 | if (!--conf->nr_pending) { | ||
197 | wake_up(&conf->wait_idle); | ||
198 | wake_up(&conf->wait_resume); | ||
199 | } | ||
200 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
201 | } | 184 | } |
202 | 185 | ||
203 | static void reschedule_retry(r1bio_t *r1_bio) | 186 | static void reschedule_retry(r1bio_t *r1_bio) |
@@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio) | |||
210 | list_add(&r1_bio->retry_list, &conf->retry_list); | 193 | list_add(&r1_bio->retry_list, &conf->retry_list); |
211 | spin_unlock_irqrestore(&conf->device_lock, flags); | 194 | spin_unlock_irqrestore(&conf->device_lock, flags); |
212 | 195 | ||
196 | wake_up(&conf->wait_barrier); | ||
213 | md_wakeup_thread(mddev->thread); | 197 | md_wakeup_thread(mddev->thread); |
214 | } | 198 | } |
215 | 199 | ||
@@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
593 | return ret; | 577 | return ret; |
594 | } | 578 | } |
595 | 579 | ||
596 | /* | 580 | /* Barriers.... |
597 | * Throttle resync depth, so that we can both get proper overlapping of | 581 | * Sometimes we need to suspend IO while we do something else, |
598 | * requests, but are still able to handle normal requests quickly. | 582 | * either some resync/recovery, or reconfigure the array. |
583 | * To do this we raise a 'barrier'. | ||
584 | * The 'barrier' is a counter that can be raised multiple times | ||
585 | * to count how many activities are happening which preclude | ||
586 | * normal IO. | ||
587 | * We can only raise the barrier if there is no pending IO. | ||
588 | * i.e. if nr_pending == 0. | ||
589 | * We choose only to raise the barrier if no-one is waiting for the | ||
590 | * barrier to go down. This means that as soon as an IO request | ||
591 | * is ready, no other operations which require a barrier will start | ||
592 | * until the IO request has had a chance. | ||
593 | * | ||
594 | * So: regular IO calls 'wait_barrier'. When that returns there | ||
595 | * is no backgroup IO happening, It must arrange to call | ||
596 | * allow_barrier when it has finished its IO. | ||
597 | * backgroup IO calls must call raise_barrier. Once that returns | ||
598 | * there is no normal IO happeing. It must arrange to call | ||
599 | * lower_barrier when the particular background IO completes. | ||
599 | */ | 600 | */ |
600 | #define RESYNC_DEPTH 32 | 601 | #define RESYNC_DEPTH 32 |
601 | 602 | ||
602 | static void device_barrier(conf_t *conf, sector_t sect) | 603 | static void raise_barrier(conf_t *conf) |
603 | { | 604 | { |
604 | spin_lock_irq(&conf->resync_lock); | 605 | spin_lock_irq(&conf->resync_lock); |
605 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), | 606 | |
606 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); | 607 | /* Wait until no block IO is waiting */ |
607 | 608 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, | |
608 | if (!conf->barrier++) { | 609 | conf->resync_lock, |
609 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 610 | raid1_unplug(conf->mddev->queue)); |
610 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); | 611 | |
611 | if (conf->nr_pending) | 612 | /* block any new IO from starting */ |
612 | BUG(); | 613 | conf->barrier++; |
614 | |||
615 | /* No wait for all pending IO to complete */ | ||
616 | wait_event_lock_irq(conf->wait_barrier, | ||
617 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | ||
618 | conf->resync_lock, | ||
619 | raid1_unplug(conf->mddev->queue)); | ||
620 | |||
621 | spin_unlock_irq(&conf->resync_lock); | ||
622 | } | ||
623 | |||
624 | static void lower_barrier(conf_t *conf) | ||
625 | { | ||
626 | unsigned long flags; | ||
627 | spin_lock_irqsave(&conf->resync_lock, flags); | ||
628 | conf->barrier--; | ||
629 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
630 | wake_up(&conf->wait_barrier); | ||
631 | } | ||
632 | |||
633 | static void wait_barrier(conf_t *conf) | ||
634 | { | ||
635 | spin_lock_irq(&conf->resync_lock); | ||
636 | if (conf->barrier) { | ||
637 | conf->nr_waiting++; | ||
638 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | ||
639 | conf->resync_lock, | ||
640 | raid1_unplug(conf->mddev->queue)); | ||
641 | conf->nr_waiting--; | ||
613 | } | 642 | } |
614 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, | 643 | conf->nr_pending++; |
615 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); | ||
616 | conf->next_resync = sect; | ||
617 | spin_unlock_irq(&conf->resync_lock); | 644 | spin_unlock_irq(&conf->resync_lock); |
618 | } | 645 | } |
619 | 646 | ||
647 | static void allow_barrier(conf_t *conf) | ||
648 | { | ||
649 | unsigned long flags; | ||
650 | spin_lock_irqsave(&conf->resync_lock, flags); | ||
651 | conf->nr_pending--; | ||
652 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
653 | wake_up(&conf->wait_barrier); | ||
654 | } | ||
655 | |||
656 | |||
620 | /* duplicate the data pages for behind I/O */ | 657 | /* duplicate the data pages for behind I/O */ |
621 | static struct page **alloc_behind_pages(struct bio *bio) | 658 | static struct page **alloc_behind_pages(struct bio *bio) |
622 | { | 659 | { |
@@ -678,10 +715,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
678 | */ | 715 | */ |
679 | md_write_start(mddev, bio); /* wait on superblock update early */ | 716 | md_write_start(mddev, bio); /* wait on superblock update early */ |
680 | 717 | ||
681 | spin_lock_irq(&conf->resync_lock); | 718 | wait_barrier(conf); |
682 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); | ||
683 | conf->nr_pending++; | ||
684 | spin_unlock_irq(&conf->resync_lock); | ||
685 | 719 | ||
686 | disk_stat_inc(mddev->gendisk, ios[rw]); | 720 | disk_stat_inc(mddev->gendisk, ios[rw]); |
687 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 721 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); |
@@ -909,13 +943,8 @@ static void print_conf(conf_t *conf) | |||
909 | 943 | ||
910 | static void close_sync(conf_t *conf) | 944 | static void close_sync(conf_t *conf) |
911 | { | 945 | { |
912 | spin_lock_irq(&conf->resync_lock); | 946 | wait_barrier(conf); |
913 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, | 947 | allow_barrier(conf); |
914 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); | ||
915 | spin_unlock_irq(&conf->resync_lock); | ||
916 | |||
917 | if (conf->barrier) BUG(); | ||
918 | if (waitqueue_active(&conf->wait_idle)) BUG(); | ||
919 | 948 | ||
920 | mempool_destroy(conf->r1buf_pool); | 949 | mempool_destroy(conf->r1buf_pool); |
921 | conf->r1buf_pool = NULL; | 950 | conf->r1buf_pool = NULL; |
@@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1317 | return sync_blocks; | 1346 | return sync_blocks; |
1318 | } | 1347 | } |
1319 | /* | 1348 | /* |
1320 | * If there is non-resync activity waiting for us then | 1349 | * If there is non-resync activity waiting for a turn, |
1321 | * put in a delay to throttle resync. | 1350 | * and resync is going fast enough, |
1351 | * then let it though before starting on this new sync request. | ||
1322 | */ | 1352 | */ |
1323 | if (!go_faster && waitqueue_active(&conf->wait_resume)) | 1353 | if (!go_faster && conf->nr_waiting) |
1324 | msleep_interruptible(1000); | 1354 | msleep_interruptible(1000); |
1325 | device_barrier(conf, sector_nr + RESYNC_SECTORS); | 1355 | |
1356 | raise_barrier(conf); | ||
1357 | |||
1358 | conf->next_resync = sector_nr; | ||
1326 | 1359 | ||
1327 | /* | 1360 | /* |
1328 | * If reconstructing, and >1 working disc, | 1361 | * If reconstructing, and >1 working disc, |
@@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1355 | 1388 | ||
1356 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | 1389 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); |
1357 | 1390 | ||
1358 | spin_lock_irq(&conf->resync_lock); | ||
1359 | conf->nr_pending++; | ||
1360 | spin_unlock_irq(&conf->resync_lock); | ||
1361 | |||
1362 | r1_bio->mddev = mddev; | 1391 | r1_bio->mddev = mddev; |
1363 | r1_bio->sector = sector_nr; | 1392 | r1_bio->sector = sector_nr; |
1364 | r1_bio->state = 0; | 1393 | r1_bio->state = 0; |
@@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev) | |||
1542 | mddev->recovery_cp = MaxSector; | 1571 | mddev->recovery_cp = MaxSector; |
1543 | 1572 | ||
1544 | spin_lock_init(&conf->resync_lock); | 1573 | spin_lock_init(&conf->resync_lock); |
1545 | init_waitqueue_head(&conf->wait_idle); | 1574 | init_waitqueue_head(&conf->wait_barrier); |
1546 | init_waitqueue_head(&conf->wait_resume); | ||
1547 | 1575 | ||
1548 | bio_list_init(&conf->pending_bio_list); | 1576 | bio_list_init(&conf->pending_bio_list); |
1549 | bio_list_init(&conf->flushing_bio_list); | 1577 | bio_list_init(&conf->flushing_bio_list); |
@@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1714 | } | 1742 | } |
1715 | memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks); | 1743 | memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks); |
1716 | 1744 | ||
1717 | spin_lock_irq(&conf->resync_lock); | 1745 | raise_barrier(conf); |
1718 | conf->barrier++; | ||
1719 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | ||
1720 | conf->resync_lock, raid1_unplug(mddev->queue)); | ||
1721 | spin_unlock_irq(&conf->resync_lock); | ||
1722 | 1746 | ||
1723 | /* ok, everything is stopped */ | 1747 | /* ok, everything is stopped */ |
1724 | oldpool = conf->r1bio_pool; | 1748 | oldpool = conf->r1bio_pool; |
@@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1738 | conf->raid_disks = mddev->raid_disks = raid_disks; | 1762 | conf->raid_disks = mddev->raid_disks = raid_disks; |
1739 | 1763 | ||
1740 | conf->last_used = 0; /* just make sure it is in-range */ | 1764 | conf->last_used = 0; /* just make sure it is in-range */ |
1741 | spin_lock_irq(&conf->resync_lock); | 1765 | lower_barrier(conf); |
1742 | conf->barrier--; | ||
1743 | spin_unlock_irq(&conf->resync_lock); | ||
1744 | wake_up(&conf->wait_resume); | ||
1745 | wake_up(&conf->wait_idle); | ||
1746 | |||
1747 | 1766 | ||
1748 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 1767 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
1749 | md_wakeup_thread(mddev->thread); | 1768 | md_wakeup_thread(mddev->thread); |
@@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state) | |||
1758 | 1777 | ||
1759 | switch(state) { | 1778 | switch(state) { |
1760 | case 1: | 1779 | case 1: |
1761 | spin_lock_irq(&conf->resync_lock); | 1780 | raise_barrier(conf); |
1762 | conf->barrier++; | ||
1763 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | ||
1764 | conf->resync_lock, raid1_unplug(mddev->queue)); | ||
1765 | spin_unlock_irq(&conf->resync_lock); | ||
1766 | break; | 1781 | break; |
1767 | case 0: | 1782 | case 0: |
1768 | spin_lock_irq(&conf->resync_lock); | 1783 | lower_barrier(conf); |
1769 | conf->barrier--; | ||
1770 | spin_unlock_irq(&conf->resync_lock); | ||
1771 | wake_up(&conf->wait_resume); | ||
1772 | wake_up(&conf->wait_idle); | ||
1773 | break; | 1784 | break; |
1774 | } | 1785 | } |
1775 | if (mddev->thread) { | 1786 | if (mddev->thread) { |
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index 292b98f2b408..c55674252533 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h | |||
@@ -45,6 +45,7 @@ struct r1_private_data_s { | |||
45 | 45 | ||
46 | spinlock_t resync_lock; | 46 | spinlock_t resync_lock; |
47 | int nr_pending; | 47 | int nr_pending; |
48 | int nr_waiting; | ||
48 | int barrier; | 49 | int barrier; |
49 | sector_t next_resync; | 50 | sector_t next_resync; |
50 | int fullsync; /* set to 1 if a full sync is needed, | 51 | int fullsync; /* set to 1 if a full sync is needed, |
@@ -52,8 +53,7 @@ struct r1_private_data_s { | |||
52 | * Cleared when a sync completes. | 53 | * Cleared when a sync completes. |
53 | */ | 54 | */ |
54 | 55 | ||
55 | wait_queue_head_t wait_idle; | 56 | wait_queue_head_t wait_barrier; |
56 | wait_queue_head_t wait_resume; | ||
57 | 57 | ||
58 | struct pool_info *poolinfo; | 58 | struct pool_info *poolinfo; |
59 | 59 | ||