aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:13 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:02 -0500
commit0a27ec96b6fb1abf867e36d7b0b681d67588767a (patch)
tree4db5d9b8ef02b417fc2077d65311e2ea7cda5ce0
parent17999be4aa408e7ff3b9d32c735649676567a3cd (diff)
[PATCH] md: improve raid10 "IO Barrier" concept
raid10 needs to put up a barrier to new requests while it does resync or other background recovery. The code for this is currently open-coded, slighty obscure by its use of two waitqueues, and not documented. This patch gathers all the related code into 4 functions, and includes a comment which (hopefully) explains what is happening. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid10.c135
-rw-r--r--include/linux/raid/raid10.h4
2 files changed, 81 insertions, 58 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 713dc9c2c730..50bd7b152f28 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -47,6 +47,9 @@
47 47
48static void unplug_slaves(mddev_t *mddev); 48static void unplug_slaves(mddev_t *mddev);
49 49
50static void allow_barrier(conf_t *conf);
51static void lower_barrier(conf_t *conf);
52
50static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) 53static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
51{ 54{
52 conf_t *conf = data; 55 conf_t *conf = data;
@@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
175 178
176static inline void free_r10bio(r10bio_t *r10_bio) 179static inline void free_r10bio(r10bio_t *r10_bio)
177{ 180{
178 unsigned long flags;
179
180 conf_t *conf = mddev_to_conf(r10_bio->mddev); 181 conf_t *conf = mddev_to_conf(r10_bio->mddev);
181 182
182 /* 183 /*
183 * Wake up any possible resync thread that waits for the device 184 * Wake up any possible resync thread that waits for the device
184 * to go idle. 185 * to go idle.
185 */ 186 */
186 spin_lock_irqsave(&conf->resync_lock, flags); 187 allow_barrier(conf);
187 if (!--conf->nr_pending) {
188 wake_up(&conf->wait_idle);
189 wake_up(&conf->wait_resume);
190 }
191 spin_unlock_irqrestore(&conf->resync_lock, flags);
192 188
193 put_all_bios(conf, r10_bio); 189 put_all_bios(conf, r10_bio);
194 mempool_free(r10_bio, conf->r10bio_pool); 190 mempool_free(r10_bio, conf->r10bio_pool);
@@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio)
197static inline void put_buf(r10bio_t *r10_bio) 193static inline void put_buf(r10bio_t *r10_bio)
198{ 194{
199 conf_t *conf = mddev_to_conf(r10_bio->mddev); 195 conf_t *conf = mddev_to_conf(r10_bio->mddev);
200 unsigned long flags;
201 196
202 mempool_free(r10_bio, conf->r10buf_pool); 197 mempool_free(r10_bio, conf->r10buf_pool);
203 198
204 spin_lock_irqsave(&conf->resync_lock, flags); 199 lower_barrier(conf);
205 if (!conf->barrier)
206 BUG();
207 --conf->barrier;
208 wake_up(&conf->wait_resume);
209 wake_up(&conf->wait_idle);
210
211 if (!--conf->nr_pending) {
212 wake_up(&conf->wait_idle);
213 wake_up(&conf->wait_resume);
214 }
215 spin_unlock_irqrestore(&conf->resync_lock, flags);
216} 200}
217 201
218static void reschedule_retry(r10bio_t *r10_bio) 202static void reschedule_retry(r10bio_t *r10_bio)
@@ -640,30 +624,82 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
640 return ret; 624 return ret;
641} 625}
642 626
643/* 627/* Barriers....
644 * Throttle resync depth, so that we can both get proper overlapping of 628 * Sometimes we need to suspend IO while we do something else,
645 * requests, but are still able to handle normal requests quickly. 629 * either some resync/recovery, or reconfigure the array.
630 * To do this we raise a 'barrier'.
631 * The 'barrier' is a counter that can be raised multiple times
632 * to count how many activities are happening which preclude
633 * normal IO.
634 * We can only raise the barrier if there is no pending IO.
635 * i.e. if nr_pending == 0.
636 * We choose only to raise the barrier if no-one is waiting for the
637 * barrier to go down. This means that as soon as an IO request
638 * is ready, no other operations which require a barrier will start
639 * until the IO request has had a chance.
640 *
641 * So: regular IO calls 'wait_barrier'. When that returns there
642 * is no backgroup IO happening, It must arrange to call
643 * allow_barrier when it has finished its IO.
644 * backgroup IO calls must call raise_barrier. Once that returns
645 * there is no normal IO happeing. It must arrange to call
646 * lower_barrier when the particular background IO completes.
646 */ 647 */
647#define RESYNC_DEPTH 32 648#define RESYNC_DEPTH 32
648 649
649static void device_barrier(conf_t *conf, sector_t sect) 650static void raise_barrier(conf_t *conf)
650{ 651{
651 spin_lock_irq(&conf->resync_lock); 652 spin_lock_irq(&conf->resync_lock);
652 wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), 653
653 conf->resync_lock, unplug_slaves(conf->mddev)); 654 /* Wait until no block IO is waiting */
654 655 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
655 if (!conf->barrier++) { 656 conf->resync_lock,
656 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, 657 raid10_unplug(conf->mddev->queue));
657 conf->resync_lock, unplug_slaves(conf->mddev)); 658
658 if (conf->nr_pending) 659 /* block any new IO from starting */
659 BUG(); 660 conf->barrier++;
661
662 /* No wait for all pending IO to complete */
663 wait_event_lock_irq(conf->wait_barrier,
664 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
665 conf->resync_lock,
666 raid10_unplug(conf->mddev->queue));
667
668 spin_unlock_irq(&conf->resync_lock);
669}
670
671static void lower_barrier(conf_t *conf)
672{
673 unsigned long flags;
674 spin_lock_irqsave(&conf->resync_lock, flags);
675 conf->barrier--;
676 spin_unlock_irqrestore(&conf->resync_lock, flags);
677 wake_up(&conf->wait_barrier);
678}
679
680static void wait_barrier(conf_t *conf)
681{
682 spin_lock_irq(&conf->resync_lock);
683 if (conf->barrier) {
684 conf->nr_waiting++;
685 wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
686 conf->resync_lock,
687 raid10_unplug(conf->mddev->queue));
688 conf->nr_waiting--;
660 } 689 }
661 wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, 690 conf->nr_pending++;
662 conf->resync_lock, unplug_slaves(conf->mddev));
663 conf->next_resync = sect;
664 spin_unlock_irq(&conf->resync_lock); 691 spin_unlock_irq(&conf->resync_lock);
665} 692}
666 693
694static void allow_barrier(conf_t *conf)
695{
696 unsigned long flags;
697 spin_lock_irqsave(&conf->resync_lock, flags);
698 conf->nr_pending--;
699 spin_unlock_irqrestore(&conf->resync_lock, flags);
700 wake_up(&conf->wait_barrier);
701}
702
667static int make_request(request_queue_t *q, struct bio * bio) 703static int make_request(request_queue_t *q, struct bio * bio)
668{ 704{
669 mddev_t *mddev = q->queuedata; 705 mddev_t *mddev = q->queuedata;
@@ -719,10 +755,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
719 * thread has put up a bar for new requests. 755 * thread has put up a bar for new requests.
720 * Continue immediately if no resync is active currently. 756 * Continue immediately if no resync is active currently.
721 */ 757 */
722 spin_lock_irq(&conf->resync_lock); 758 wait_barrier(conf);
723 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
724 conf->nr_pending++;
725 spin_unlock_irq(&conf->resync_lock);
726 759
727 disk_stat_inc(mddev->gendisk, ios[rw]); 760 disk_stat_inc(mddev->gendisk, ios[rw]);
728 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 761 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -897,13 +930,8 @@ static void print_conf(conf_t *conf)
897 930
898static void close_sync(conf_t *conf) 931static void close_sync(conf_t *conf)
899{ 932{
900 spin_lock_irq(&conf->resync_lock); 933 wait_barrier(conf);
901 wait_event_lock_irq(conf->wait_resume, !conf->barrier, 934 allow_barrier(conf);
902 conf->resync_lock, unplug_slaves(conf->mddev));
903 spin_unlock_irq(&conf->resync_lock);
904
905 if (conf->barrier) BUG();
906 if (waitqueue_active(&conf->wait_idle)) BUG();
907 935
908 mempool_destroy(conf->r10buf_pool); 936 mempool_destroy(conf->r10buf_pool);
909 conf->r10buf_pool = NULL; 937 conf->r10buf_pool = NULL;
@@ -1395,9 +1423,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1395 * If there is non-resync activity waiting for us then 1423 * If there is non-resync activity waiting for us then
1396 * put in a delay to throttle resync. 1424 * put in a delay to throttle resync.
1397 */ 1425 */
1398 if (!go_faster && waitqueue_active(&conf->wait_resume)) 1426 if (!go_faster && conf->nr_waiting)
1399 msleep_interruptible(1000); 1427 msleep_interruptible(1000);
1400 device_barrier(conf, sector_nr + RESYNC_SECTORS); 1428 raise_barrier(conf);
1429 conf->next_resync = sector_nr;
1401 1430
1402 /* Again, very different code for resync and recovery. 1431 /* Again, very different code for resync and recovery.
1403 * Both must result in an r10bio with a list of bios that 1432 * Both must result in an r10bio with a list of bios that
@@ -1427,7 +1456,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1427 1456
1428 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 1457 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1429 spin_lock_irq(&conf->resync_lock); 1458 spin_lock_irq(&conf->resync_lock);
1430 conf->nr_pending++;
1431 if (rb2) conf->barrier++; 1459 if (rb2) conf->barrier++;
1432 spin_unlock_irq(&conf->resync_lock); 1460 spin_unlock_irq(&conf->resync_lock);
1433 atomic_set(&r10_bio->remaining, 0); 1461 atomic_set(&r10_bio->remaining, 0);
@@ -1500,10 +1528,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1500 int count = 0; 1528 int count = 0;
1501 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 1529 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1502 1530
1503 spin_lock_irq(&conf->resync_lock);
1504 conf->nr_pending++;
1505 spin_unlock_irq(&conf->resync_lock);
1506
1507 r10_bio->mddev = mddev; 1531 r10_bio->mddev = mddev;
1508 atomic_set(&r10_bio->remaining, 0); 1532 atomic_set(&r10_bio->remaining, 0);
1509 1533
@@ -1713,8 +1737,7 @@ static int run(mddev_t *mddev)
1713 INIT_LIST_HEAD(&conf->retry_list); 1737 INIT_LIST_HEAD(&conf->retry_list);
1714 1738
1715 spin_lock_init(&conf->resync_lock); 1739 spin_lock_init(&conf->resync_lock);
1716 init_waitqueue_head(&conf->wait_idle); 1740 init_waitqueue_head(&conf->wait_barrier);
1717 init_waitqueue_head(&conf->wait_resume);
1718 1741
1719 /* need to check that every block has at least one working mirror */ 1742 /* need to check that every block has at least one working mirror */
1720 if (!enough(conf)) { 1743 if (!enough(conf)) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index 60708789c8f9..08317b77802b 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -39,11 +39,11 @@ struct r10_private_data_s {
39 39
40 spinlock_t resync_lock; 40 spinlock_t resync_lock;
41 int nr_pending; 41 int nr_pending;
42 int nr_waiting;
42 int barrier; 43 int barrier;
43 sector_t next_resync; 44 sector_t next_resync;
44 45
45 wait_queue_head_t wait_idle; 46 wait_queue_head_t wait_barrier;
46 wait_queue_head_t wait_resume;
47 47
48 mempool_t *r10bio_pool; 48 mempool_t *r10bio_pool;
49 mempool_t *r10buf_pool; 49 mempool_t *r10buf_pool;