aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2012-11-30 05:42:40 -0500
committerJens Axboe <axboe@kernel.dk>2012-11-30 05:47:57 -0500
commiteed8c02e680c04cd737e0a9cef74e68d8eb0cefa (patch)
tree8bd2bd10b0c02bb8a579ca3fd4f1482e5335c747
parentd33b98fc82b0908e91fb05ae081acaed7323f9d2 (diff)
wait: add wait_event_lock_irq() interface
New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit moves the private wait_event_lock_irq() macro from MD to regular wait includes, introduces new macro wait_event_lock_irq_cmd() instead of using the old method with omitting cmd parameter which is ugly and makes a use of new macros in the MD. It also introduces the _interruptible_ variant. The use of new interface is when one have a special lock to protect data structures used in the condition, or one also needs to invoke "cmd" before putting it to sleep. All new macros are expected to be called with the lock taken. The lock is released before sleep and is reacquired afterwards. We will leave the macro with the lock held. Note to DM: IMO this should also fix theoretical race on waitqueue while using simultaneously wait_event_lock_irq() and wait_event() because of lack of locking around current state setting and wait queue removal. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Cc: Neil Brown <neilb@suse.de> Cc: David Howells <dhowells@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/md/md.h26
-rw-r--r--drivers/md/raid1.c15
-rw-r--r--drivers/md/raid10.c15
-rw-r--r--drivers/md/raid5.c12
-rw-r--r--include/linux/wait.h164
6 files changed, 184 insertions, 50 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ab768acfb62..7e513a38cec7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
452 spin_lock_irq(&mddev->write_lock); 452 spin_lock_irq(&mddev->write_lock);
453 wait_event_lock_irq(mddev->sb_wait, 453 wait_event_lock_irq(mddev->sb_wait,
454 !mddev->flush_bio, 454 !mddev->flush_bio,
455 mddev->write_lock, /*nothing*/); 455 mddev->write_lock);
456 mddev->flush_bio = bio; 456 mddev->flush_bio = bio;
457 spin_unlock_irq(&mddev->write_lock); 457 spin_unlock_irq(&mddev->write_lock);
458 458
diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab868db..1e2fc3d9c74c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -551,32 +551,6 @@ struct md_thread {
551 551
552#define THREAD_WAKEUP 0 552#define THREAD_WAKEUP 0
553 553
554#define __wait_event_lock_irq(wq, condition, lock, cmd) \
555do { \
556 wait_queue_t __wait; \
557 init_waitqueue_entry(&__wait, current); \
558 \
559 add_wait_queue(&wq, &__wait); \
560 for (;;) { \
561 set_current_state(TASK_UNINTERRUPTIBLE); \
562 if (condition) \
563 break; \
564 spin_unlock_irq(&lock); \
565 cmd; \
566 schedule(); \
567 spin_lock_irq(&lock); \
568 } \
569 current->state = TASK_RUNNING; \
570 remove_wait_queue(&wq, &__wait); \
571} while (0)
572
573#define wait_event_lock_irq(wq, condition, lock, cmd) \
574do { \
575 if (condition) \
576 break; \
577 __wait_event_lock_irq(wq, condition, lock, cmd); \
578} while (0)
579
580static inline void safe_put_page(struct page *p) 554static inline void safe_put_page(struct page *p)
581{ 555{
582 if (p) put_page(p); 556 if (p) put_page(p);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8034fbd6190c..534dd74a2da0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
822 822
823 /* Wait until no block IO is waiting */ 823 /* Wait until no block IO is waiting */
824 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, 824 wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
825 conf->resync_lock, ); 825 conf->resync_lock);
826 826
827 /* block any new IO from starting */ 827 /* block any new IO from starting */
828 conf->barrier++; 828 conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
830 /* Now wait for all pending IO to complete */ 830 /* Now wait for all pending IO to complete */
831 wait_event_lock_irq(conf->wait_barrier, 831 wait_event_lock_irq(conf->wait_barrier,
832 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 832 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
833 conf->resync_lock, ); 833 conf->resync_lock);
834 834
835 spin_unlock_irq(&conf->resync_lock); 835 spin_unlock_irq(&conf->resync_lock);
836} 836}
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
864 (conf->nr_pending && 864 (conf->nr_pending &&
865 current->bio_list && 865 current->bio_list &&
866 !bio_list_empty(current->bio_list)), 866 !bio_list_empty(current->bio_list)),
867 conf->resync_lock, 867 conf->resync_lock);
868 );
869 conf->nr_waiting--; 868 conf->nr_waiting--;
870 } 869 }
871 conf->nr_pending++; 870 conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
898 spin_lock_irq(&conf->resync_lock); 897 spin_lock_irq(&conf->resync_lock);
899 conf->barrier++; 898 conf->barrier++;
900 conf->nr_waiting++; 899 conf->nr_waiting++;
901 wait_event_lock_irq(conf->wait_barrier, 900 wait_event_lock_irq_cmd(conf->wait_barrier,
902 conf->nr_pending == conf->nr_queued+1, 901 conf->nr_pending == conf->nr_queued+1,
903 conf->resync_lock, 902 conf->resync_lock,
904 flush_pending_writes(conf)); 903 flush_pending_writes(conf));
905 spin_unlock_irq(&conf->resync_lock); 904 spin_unlock_irq(&conf->resync_lock);
906} 905}
907static void unfreeze_array(struct r1conf *conf) 906static void unfreeze_array(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 906ccbd0f7dc..9a08f621b27d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
952 952
953 /* Wait until no block IO is waiting (unless 'force') */ 953 /* Wait until no block IO is waiting (unless 'force') */
954 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, 954 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
955 conf->resync_lock, ); 955 conf->resync_lock);
956 956
957 /* block any new IO from starting */ 957 /* block any new IO from starting */
958 conf->barrier++; 958 conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
960 /* Now wait for all pending IO to complete */ 960 /* Now wait for all pending IO to complete */
961 wait_event_lock_irq(conf->wait_barrier, 961 wait_event_lock_irq(conf->wait_barrier,
962 !conf->nr_pending && conf->barrier < RESYNC_DEPTH, 962 !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
963 conf->resync_lock, ); 963 conf->resync_lock);
964 964
965 spin_unlock_irq(&conf->resync_lock); 965 spin_unlock_irq(&conf->resync_lock);
966} 966}
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
993 (conf->nr_pending && 993 (conf->nr_pending &&
994 current->bio_list && 994 current->bio_list &&
995 !bio_list_empty(current->bio_list)), 995 !bio_list_empty(current->bio_list)),
996 conf->resync_lock, 996 conf->resync_lock);
997 );
998 conf->nr_waiting--; 997 conf->nr_waiting--;
999 } 998 }
1000 conf->nr_pending++; 999 conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
1027 spin_lock_irq(&conf->resync_lock); 1026 spin_lock_irq(&conf->resync_lock);
1028 conf->barrier++; 1027 conf->barrier++;
1029 conf->nr_waiting++; 1028 conf->nr_waiting++;
1030 wait_event_lock_irq(conf->wait_barrier, 1029 wait_event_lock_irq_cmd(conf->wait_barrier,
1031 conf->nr_pending == conf->nr_queued+1, 1030 conf->nr_pending == conf->nr_queued+1,
1032 conf->resync_lock, 1031 conf->resync_lock,
1033 flush_pending_writes(conf)); 1032 flush_pending_writes(conf));
1034 1033
1035 spin_unlock_irq(&conf->resync_lock); 1034 spin_unlock_irq(&conf->resync_lock);
1036} 1035}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c5439dce0295..2bf617d6f4fd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
466 do { 466 do {
467 wait_event_lock_irq(conf->wait_for_stripe, 467 wait_event_lock_irq(conf->wait_for_stripe,
468 conf->quiesce == 0 || noquiesce, 468 conf->quiesce == 0 || noquiesce,
469 conf->device_lock, /* nothing */); 469 conf->device_lock);
470 sh = __find_stripe(conf, sector, conf->generation - previous); 470 sh = __find_stripe(conf, sector, conf->generation - previous);
471 if (!sh) { 471 if (!sh) {
472 if (!conf->inactive_blocked) 472 if (!conf->inactive_blocked)
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
480 (atomic_read(&conf->active_stripes) 480 (atomic_read(&conf->active_stripes)
481 < (conf->max_nr_stripes *3/4) 481 < (conf->max_nr_stripes *3/4)
482 || !conf->inactive_blocked), 482 || !conf->inactive_blocked),
483 conf->device_lock, 483 conf->device_lock);
484 );
485 conf->inactive_blocked = 0; 484 conf->inactive_blocked = 0;
486 } else 485 } else
487 init_stripe(sh, sector, previous); 486 init_stripe(sh, sector, previous);
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
1646 spin_lock_irq(&conf->device_lock); 1645 spin_lock_irq(&conf->device_lock);
1647 wait_event_lock_irq(conf->wait_for_stripe, 1646 wait_event_lock_irq(conf->wait_for_stripe,
1648 !list_empty(&conf->inactive_list), 1647 !list_empty(&conf->inactive_list),
1649 conf->device_lock, 1648 conf->device_lock);
1650 );
1651 osh = get_free_stripe(conf); 1649 osh = get_free_stripe(conf);
1652 spin_unlock_irq(&conf->device_lock); 1650 spin_unlock_irq(&conf->device_lock);
1653 atomic_set(&nsh->count, 1); 1651 atomic_set(&nsh->count, 1);
@@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4000 spin_lock_irq(&conf->device_lock); 3998 spin_lock_irq(&conf->device_lock);
4001 wait_event_lock_irq(conf->wait_for_stripe, 3999 wait_event_lock_irq(conf->wait_for_stripe,
4002 conf->quiesce == 0, 4000 conf->quiesce == 0,
4003 conf->device_lock, /* nothing */); 4001 conf->device_lock);
4004 atomic_inc(&conf->active_aligned_reads); 4002 atomic_inc(&conf->active_aligned_reads);
4005 spin_unlock_irq(&conf->device_lock); 4003 spin_unlock_irq(&conf->device_lock);
4006 4004
@@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
6088 wait_event_lock_irq(conf->wait_for_stripe, 6086 wait_event_lock_irq(conf->wait_for_stripe,
6089 atomic_read(&conf->active_stripes) == 0 && 6087 atomic_read(&conf->active_stripes) == 0 &&
6090 atomic_read(&conf->active_aligned_reads) == 0, 6088 atomic_read(&conf->active_aligned_reads) == 0,
6091 conf->device_lock, /* nothing */); 6089 conf->device_lock);
6092 conf->quiesce = 1; 6090 conf->quiesce = 1;
6093 spin_unlock_irq(&conf->device_lock); 6091 spin_unlock_irq(&conf->device_lock);
6094 /* allow reshape to continue */ 6092 /* allow reshape to continue */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe122dd3..7cb64d4b499d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do { \
550 __ret; \ 550 __ret; \
551}) 551})
552 552
553
554#define __wait_event_lock_irq(wq, condition, lock, cmd) \
555do { \
556 DEFINE_WAIT(__wait); \
557 \
558 for (;;) { \
559 prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
560 if (condition) \
561 break; \
562 spin_unlock_irq(&lock); \
563 cmd; \
564 schedule(); \
565 spin_lock_irq(&lock); \
566 } \
567 finish_wait(&wq, &__wait); \
568} while (0)
569
570/**
571 * wait_event_lock_irq_cmd - sleep until a condition gets true. The
572 * condition is checked under the lock. This
573 * is expected to be called with the lock
574 * taken.
575 * @wq: the waitqueue to wait on
576 * @condition: a C expression for the event to wait for
577 * @lock: a locked spinlock_t, which will be released before cmd
578 * and schedule() and reacquired afterwards.
579 * @cmd: a command which is invoked outside the critical section before
580 * sleep
581 *
582 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
583 * @condition evaluates to true. The @condition is checked each time
584 * the waitqueue @wq is woken up.
585 *
586 * wake_up() has to be called after changing any variable that could
587 * change the result of the wait condition.
588 *
589 * This is supposed to be called while holding the lock. The lock is
590 * dropped before invoking the cmd and going to sleep and is reacquired
591 * afterwards.
592 */
593#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
594do { \
595 if (condition) \
596 break; \
597 __wait_event_lock_irq(wq, condition, lock, cmd); \
598} while (0)
599
600/**
601 * wait_event_lock_irq - sleep until a condition gets true. The
602 * condition is checked under the lock. This
603 * is expected to be called with the lock
604 * taken.
605 * @wq: the waitqueue to wait on
606 * @condition: a C expression for the event to wait for
607 * @lock: a locked spinlock_t, which will be released before schedule()
608 * and reacquired afterwards.
609 *
610 * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
611 * @condition evaluates to true. The @condition is checked each time
612 * the waitqueue @wq is woken up.
613 *
614 * wake_up() has to be called after changing any variable that could
615 * change the result of the wait condition.
616 *
617 * This is supposed to be called while holding the lock. The lock is
618 * dropped before going to sleep and is reacquired afterwards.
619 */
620#define wait_event_lock_irq(wq, condition, lock) \
621do { \
622 if (condition) \
623 break; \
624 __wait_event_lock_irq(wq, condition, lock, ); \
625} while (0)
626
627
628#define __wait_event_interruptible_lock_irq(wq, condition, \
629 lock, ret, cmd) \
630do { \
631 DEFINE_WAIT(__wait); \
632 \
633 for (;;) { \
634 prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
635 if (condition) \
636 break; \
637 if (signal_pending(current)) { \
638 ret = -ERESTARTSYS; \
639 break; \
640 } \
641 spin_unlock_irq(&lock); \
642 cmd; \
643 schedule(); \
644 spin_lock_irq(&lock); \
645 } \
646 finish_wait(&wq, &__wait); \
647} while (0)
648
649/**
650 * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
651 * The condition is checked under the lock. This is expected to
652 * be called with the lock taken.
653 * @wq: the waitqueue to wait on
654 * @condition: a C expression for the event to wait for
655 * @lock: a locked spinlock_t, which will be released before cmd and
656 * schedule() and reacquired afterwards.
657 * @cmd: a command which is invoked outside the critical section before
658 * sleep
659 *
660 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
661 * @condition evaluates to true or a signal is received. The @condition is
662 * checked each time the waitqueue @wq is woken up.
663 *
664 * wake_up() has to be called after changing any variable that could
665 * change the result of the wait condition.
666 *
667 * This is supposed to be called while holding the lock. The lock is
668 * dropped before invoking the cmd and going to sleep and is reacquired
669 * afterwards.
670 *
671 * The macro will return -ERESTARTSYS if it was interrupted by a signal
672 * and 0 if @condition evaluated to true.
673 */
674#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
675({ \
676 int __ret = 0; \
677 \
678 if (!(condition)) \
679 __wait_event_interruptible_lock_irq(wq, condition, \
680 lock, __ret, cmd); \
681 __ret; \
682})
683
684/**
685 * wait_event_interruptible_lock_irq - sleep until a condition gets true.
686 * The condition is checked under the lock. This is expected
687 * to be called with the lock taken.
688 * @wq: the waitqueue to wait on
689 * @condition: a C expression for the event to wait for
690 * @lock: a locked spinlock_t, which will be released before schedule()
691 * and reacquired afterwards.
692 *
693 * The process is put to sleep (TASK_INTERRUPTIBLE) until the
694 * @condition evaluates to true or signal is received. The @condition is
695 * checked each time the waitqueue @wq is woken up.
696 *
697 * wake_up() has to be called after changing any variable that could
698 * change the result of the wait condition.
699 *
700 * This is supposed to be called while holding the lock. The lock is
701 * dropped before going to sleep and is reacquired afterwards.
702 *
703 * The macro will return -ERESTARTSYS if it was interrupted by a signal
704 * and 0 if @condition evaluated to true.
705 */
706#define wait_event_interruptible_lock_irq(wq, condition, lock) \
707({ \
708 int __ret = 0; \
709 \
710 if (!(condition)) \
711 __wait_event_interruptible_lock_irq(wq, condition, \
712 lock, __ret, ); \
713 __ret; \
714})
715
716
553/* 717/*
554 * These are the old interfaces to sleep waiting for an event. 718 * These are the old interfaces to sleep waiting for an event.
555 * They are racy. DO NOT use them, use the wait_event* interfaces above. 719 * They are racy. DO NOT use them, use the wait_event* interfaces above.