diff options
author | NeilBrown <neilb@suse.de> | 2006-01-06 03:20:13 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:34:02 -0500 |
commit | 0a27ec96b6fb1abf867e36d7b0b681d67588767a (patch) | |
tree | 4db5d9b8ef02b417fc2077d65311e2ea7cda5ce0 | |
parent | 17999be4aa408e7ff3b9d32c735649676567a3cd (diff) |
[PATCH] md: improve raid10 "IO Barrier" concept
raid10 needs to put up a barrier to new requests while it does resync or other
background recovery. The code for this is currently open-coded, slighty
obscure by its use of two waitqueues, and not documented.
This patch gathers all the related code into 4 functions, and includes a
comment which (hopefully) explains what is happening.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/raid10.c | 135 | ||||
-rw-r--r-- | include/linux/raid/raid10.h | 4 |
2 files changed, 81 insertions, 58 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 713dc9c2c730..50bd7b152f28 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -47,6 +47,9 @@ | |||
47 | 47 | ||
48 | static void unplug_slaves(mddev_t *mddev); | 48 | static void unplug_slaves(mddev_t *mddev); |
49 | 49 | ||
50 | static void allow_barrier(conf_t *conf); | ||
51 | static void lower_barrier(conf_t *conf); | ||
52 | |||
50 | static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) | 53 | static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) |
51 | { | 54 | { |
52 | conf_t *conf = data; | 55 | conf_t *conf = data; |
@@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) | |||
175 | 178 | ||
176 | static inline void free_r10bio(r10bio_t *r10_bio) | 179 | static inline void free_r10bio(r10bio_t *r10_bio) |
177 | { | 180 | { |
178 | unsigned long flags; | ||
179 | |||
180 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 181 | conf_t *conf = mddev_to_conf(r10_bio->mddev); |
181 | 182 | ||
182 | /* | 183 | /* |
183 | * Wake up any possible resync thread that waits for the device | 184 | * Wake up any possible resync thread that waits for the device |
184 | * to go idle. | 185 | * to go idle. |
185 | */ | 186 | */ |
186 | spin_lock_irqsave(&conf->resync_lock, flags); | 187 | allow_barrier(conf); |
187 | if (!--conf->nr_pending) { | ||
188 | wake_up(&conf->wait_idle); | ||
189 | wake_up(&conf->wait_resume); | ||
190 | } | ||
191 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
192 | 188 | ||
193 | put_all_bios(conf, r10_bio); | 189 | put_all_bios(conf, r10_bio); |
194 | mempool_free(r10_bio, conf->r10bio_pool); | 190 | mempool_free(r10_bio, conf->r10bio_pool); |
@@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio) | |||
197 | static inline void put_buf(r10bio_t *r10_bio) | 193 | static inline void put_buf(r10bio_t *r10_bio) |
198 | { | 194 | { |
199 | conf_t *conf = mddev_to_conf(r10_bio->mddev); | 195 | conf_t *conf = mddev_to_conf(r10_bio->mddev); |
200 | unsigned long flags; | ||
201 | 196 | ||
202 | mempool_free(r10_bio, conf->r10buf_pool); | 197 | mempool_free(r10_bio, conf->r10buf_pool); |
203 | 198 | ||
204 | spin_lock_irqsave(&conf->resync_lock, flags); | 199 | lower_barrier(conf); |
205 | if (!conf->barrier) | ||
206 | BUG(); | ||
207 | --conf->barrier; | ||
208 | wake_up(&conf->wait_resume); | ||
209 | wake_up(&conf->wait_idle); | ||
210 | |||
211 | if (!--conf->nr_pending) { | ||
212 | wake_up(&conf->wait_idle); | ||
213 | wake_up(&conf->wait_resume); | ||
214 | } | ||
215 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
216 | } | 200 | } |
217 | 201 | ||
218 | static void reschedule_retry(r10bio_t *r10_bio) | 202 | static void reschedule_retry(r10bio_t *r10_bio) |
@@ -640,30 +624,82 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
640 | return ret; | 624 | return ret; |
641 | } | 625 | } |
642 | 626 | ||
643 | /* | 627 | /* Barriers.... |
644 | * Throttle resync depth, so that we can both get proper overlapping of | 628 | * Sometimes we need to suspend IO while we do something else, |
645 | * requests, but are still able to handle normal requests quickly. | 629 | * either some resync/recovery, or reconfigure the array. |
630 | * To do this we raise a 'barrier'. | ||
631 | * The 'barrier' is a counter that can be raised multiple times | ||
632 | * to count how many activities are happening which preclude | ||
633 | * normal IO. | ||
634 | * We can only raise the barrier if there is no pending IO. | ||
635 | * i.e. if nr_pending == 0. | ||
636 | * We choose only to raise the barrier if no-one is waiting for the | ||
637 | * barrier to go down. This means that as soon as an IO request | ||
638 | * is ready, no other operations which require a barrier will start | ||
639 | * until the IO request has had a chance. | ||
640 | * | ||
641 | * So: regular IO calls 'wait_barrier'. When that returns there | ||
642 | * is no backgroup IO happening, It must arrange to call | ||
643 | * allow_barrier when it has finished its IO. | ||
644 | * backgroup IO calls must call raise_barrier. Once that returns | ||
645 | * there is no normal IO happeing. It must arrange to call | ||
646 | * lower_barrier when the particular background IO completes. | ||
646 | */ | 647 | */ |
647 | #define RESYNC_DEPTH 32 | 648 | #define RESYNC_DEPTH 32 |
648 | 649 | ||
649 | static void device_barrier(conf_t *conf, sector_t sect) | 650 | static void raise_barrier(conf_t *conf) |
650 | { | 651 | { |
651 | spin_lock_irq(&conf->resync_lock); | 652 | spin_lock_irq(&conf->resync_lock); |
652 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), | 653 | |
653 | conf->resync_lock, unplug_slaves(conf->mddev)); | 654 | /* Wait until no block IO is waiting */ |
654 | 655 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, | |
655 | if (!conf->barrier++) { | 656 | conf->resync_lock, |
656 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 657 | raid10_unplug(conf->mddev->queue)); |
657 | conf->resync_lock, unplug_slaves(conf->mddev)); | 658 | |
658 | if (conf->nr_pending) | 659 | /* block any new IO from starting */ |
659 | BUG(); | 660 | conf->barrier++; |
661 | |||
662 | /* No wait for all pending IO to complete */ | ||
663 | wait_event_lock_irq(conf->wait_barrier, | ||
664 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | ||
665 | conf->resync_lock, | ||
666 | raid10_unplug(conf->mddev->queue)); | ||
667 | |||
668 | spin_unlock_irq(&conf->resync_lock); | ||
669 | } | ||
670 | |||
671 | static void lower_barrier(conf_t *conf) | ||
672 | { | ||
673 | unsigned long flags; | ||
674 | spin_lock_irqsave(&conf->resync_lock, flags); | ||
675 | conf->barrier--; | ||
676 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
677 | wake_up(&conf->wait_barrier); | ||
678 | } | ||
679 | |||
680 | static void wait_barrier(conf_t *conf) | ||
681 | { | ||
682 | spin_lock_irq(&conf->resync_lock); | ||
683 | if (conf->barrier) { | ||
684 | conf->nr_waiting++; | ||
685 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | ||
686 | conf->resync_lock, | ||
687 | raid10_unplug(conf->mddev->queue)); | ||
688 | conf->nr_waiting--; | ||
660 | } | 689 | } |
661 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, | 690 | conf->nr_pending++; |
662 | conf->resync_lock, unplug_slaves(conf->mddev)); | ||
663 | conf->next_resync = sect; | ||
664 | spin_unlock_irq(&conf->resync_lock); | 691 | spin_unlock_irq(&conf->resync_lock); |
665 | } | 692 | } |
666 | 693 | ||
694 | static void allow_barrier(conf_t *conf) | ||
695 | { | ||
696 | unsigned long flags; | ||
697 | spin_lock_irqsave(&conf->resync_lock, flags); | ||
698 | conf->nr_pending--; | ||
699 | spin_unlock_irqrestore(&conf->resync_lock, flags); | ||
700 | wake_up(&conf->wait_barrier); | ||
701 | } | ||
702 | |||
667 | static int make_request(request_queue_t *q, struct bio * bio) | 703 | static int make_request(request_queue_t *q, struct bio * bio) |
668 | { | 704 | { |
669 | mddev_t *mddev = q->queuedata; | 705 | mddev_t *mddev = q->queuedata; |
@@ -719,10 +755,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
719 | * thread has put up a bar for new requests. | 755 | * thread has put up a bar for new requests. |
720 | * Continue immediately if no resync is active currently. | 756 | * Continue immediately if no resync is active currently. |
721 | */ | 757 | */ |
722 | spin_lock_irq(&conf->resync_lock); | 758 | wait_barrier(conf); |
723 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); | ||
724 | conf->nr_pending++; | ||
725 | spin_unlock_irq(&conf->resync_lock); | ||
726 | 759 | ||
727 | disk_stat_inc(mddev->gendisk, ios[rw]); | 760 | disk_stat_inc(mddev->gendisk, ios[rw]); |
728 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 761 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); |
@@ -897,13 +930,8 @@ static void print_conf(conf_t *conf) | |||
897 | 930 | ||
898 | static void close_sync(conf_t *conf) | 931 | static void close_sync(conf_t *conf) |
899 | { | 932 | { |
900 | spin_lock_irq(&conf->resync_lock); | 933 | wait_barrier(conf); |
901 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, | 934 | allow_barrier(conf); |
902 | conf->resync_lock, unplug_slaves(conf->mddev)); | ||
903 | spin_unlock_irq(&conf->resync_lock); | ||
904 | |||
905 | if (conf->barrier) BUG(); | ||
906 | if (waitqueue_active(&conf->wait_idle)) BUG(); | ||
907 | 935 | ||
908 | mempool_destroy(conf->r10buf_pool); | 936 | mempool_destroy(conf->r10buf_pool); |
909 | conf->r10buf_pool = NULL; | 937 | conf->r10buf_pool = NULL; |
@@ -1395,9 +1423,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1395 | * If there is non-resync activity waiting for us then | 1423 | * If there is non-resync activity waiting for us then |
1396 | * put in a delay to throttle resync. | 1424 | * put in a delay to throttle resync. |
1397 | */ | 1425 | */ |
1398 | if (!go_faster && waitqueue_active(&conf->wait_resume)) | 1426 | if (!go_faster && conf->nr_waiting) |
1399 | msleep_interruptible(1000); | 1427 | msleep_interruptible(1000); |
1400 | device_barrier(conf, sector_nr + RESYNC_SECTORS); | 1428 | raise_barrier(conf); |
1429 | conf->next_resync = sector_nr; | ||
1401 | 1430 | ||
1402 | /* Again, very different code for resync and recovery. | 1431 | /* Again, very different code for resync and recovery. |
1403 | * Both must result in an r10bio with a list of bios that | 1432 | * Both must result in an r10bio with a list of bios that |
@@ -1427,7 +1456,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1427 | 1456 | ||
1428 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); | 1457 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); |
1429 | spin_lock_irq(&conf->resync_lock); | 1458 | spin_lock_irq(&conf->resync_lock); |
1430 | conf->nr_pending++; | ||
1431 | if (rb2) conf->barrier++; | 1459 | if (rb2) conf->barrier++; |
1432 | spin_unlock_irq(&conf->resync_lock); | 1460 | spin_unlock_irq(&conf->resync_lock); |
1433 | atomic_set(&r10_bio->remaining, 0); | 1461 | atomic_set(&r10_bio->remaining, 0); |
@@ -1500,10 +1528,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1500 | int count = 0; | 1528 | int count = 0; |
1501 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); | 1529 | r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); |
1502 | 1530 | ||
1503 | spin_lock_irq(&conf->resync_lock); | ||
1504 | conf->nr_pending++; | ||
1505 | spin_unlock_irq(&conf->resync_lock); | ||
1506 | |||
1507 | r10_bio->mddev = mddev; | 1531 | r10_bio->mddev = mddev; |
1508 | atomic_set(&r10_bio->remaining, 0); | 1532 | atomic_set(&r10_bio->remaining, 0); |
1509 | 1533 | ||
@@ -1713,8 +1737,7 @@ static int run(mddev_t *mddev) | |||
1713 | INIT_LIST_HEAD(&conf->retry_list); | 1737 | INIT_LIST_HEAD(&conf->retry_list); |
1714 | 1738 | ||
1715 | spin_lock_init(&conf->resync_lock); | 1739 | spin_lock_init(&conf->resync_lock); |
1716 | init_waitqueue_head(&conf->wait_idle); | 1740 | init_waitqueue_head(&conf->wait_barrier); |
1717 | init_waitqueue_head(&conf->wait_resume); | ||
1718 | 1741 | ||
1719 | /* need to check that every block has at least one working mirror */ | 1742 | /* need to check that every block has at least one working mirror */ |
1720 | if (!enough(conf)) { | 1743 | if (!enough(conf)) { |
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index 60708789c8f9..08317b77802b 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h | |||
@@ -39,11 +39,11 @@ struct r10_private_data_s { | |||
39 | 39 | ||
40 | spinlock_t resync_lock; | 40 | spinlock_t resync_lock; |
41 | int nr_pending; | 41 | int nr_pending; |
42 | int nr_waiting; | ||
42 | int barrier; | 43 | int barrier; |
43 | sector_t next_resync; | 44 | sector_t next_resync; |
44 | 45 | ||
45 | wait_queue_head_t wait_idle; | 46 | wait_queue_head_t wait_barrier; |
46 | wait_queue_head_t wait_resume; | ||
47 | 47 | ||
48 | mempool_t *r10bio_pool; | 48 | mempool_t *r10bio_pool; |
49 | mempool_t *r10buf_pool; | 49 | mempool_t *r10buf_pool; |