aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorManfred Spraul <manfred@colorfullife.com>2013-07-08 19:01:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-09 13:33:28 -0400
commit1a82e9e1d0f1b45f47a97c9e2349020536ff8987 (patch)
tree956810c32e02f6ae1527db015c6ae622800bd720 /ipc
parentf5c936c0f267ec58641451cf8b8d39b4c207ee4d (diff)
ipc/sem: separate wait-for-zero and alter tasks into seperate queues
Introduce separate queues for operations that do not modify the semaphore values. Advantages: - Simpler logic in check_restart(). - Faster update_queue(): Right now, all wait-for-zero operations are always tested, even if the semaphore value is not 0. - wait-for-zero gets again priority, as in linux <=3.0.9 Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Rik van Riel <riel@redhat.com> Cc: Davidlohr Bueso <davidlohr.bueso@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/sem.c211
1 files changed, 151 insertions, 60 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 8498b67a3b62..4d7f88cefada 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -95,7 +95,10 @@ struct sem {
95 int semval; /* current value */ 95 int semval; /* current value */
96 int sempid; /* pid of last operation */ 96 int sempid; /* pid of last operation */
97 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 97 spinlock_t lock; /* spinlock for fine-grained semtimedop */
98 struct list_head sem_pending; /* pending single-sop operations */ 98 struct list_head pending_alter; /* pending single-sop operations */
99 /* that alter the semaphore */
100 struct list_head pending_const; /* pending single-sop operations */
101 /* that do not alter the semaphore*/
99} ____cacheline_aligned_in_smp; 102} ____cacheline_aligned_in_smp;
100 103
101/* One queue for each sleeping process in the system. */ 104/* One queue for each sleeping process in the system. */
@@ -152,7 +155,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
152/* 155/*
153 * linked list protection: 156 * linked list protection:
154 * sem_undo.id_next, 157 * sem_undo.id_next,
155 * sem_array.sem_pending{,last}, 158 * sem_array.pending{_alter,_cont},
156 * sem_array.sem_undo: sem_lock() for read/write 159 * sem_array.sem_undo: sem_lock() for read/write
157 * sem_undo.proc_next: only "current" is allowed to read/write that field. 160 * sem_undo.proc_next: only "current" is allowed to read/write that field.
158 * 161 *
@@ -337,7 +340,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
337 * Without the check/retry algorithm a lockless wakeup is possible: 340 * Without the check/retry algorithm a lockless wakeup is possible:
338 * - queue.status is initialized to -EINTR before blocking. 341 * - queue.status is initialized to -EINTR before blocking.
339 * - wakeup is performed by 342 * - wakeup is performed by
340 * * unlinking the queue entry from sma->sem_pending 343 * * unlinking the queue entry from the pending list
341 * * setting queue.status to IN_WAKEUP 344 * * setting queue.status to IN_WAKEUP
342 * This is the notification for the blocked thread that a 345 * This is the notification for the blocked thread that a
343 * result value is imminent. 346 * result value is imminent.
@@ -418,12 +421,14 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
418 sma->sem_base = (struct sem *) &sma[1]; 421 sma->sem_base = (struct sem *) &sma[1];
419 422
420 for (i = 0; i < nsems; i++) { 423 for (i = 0; i < nsems; i++) {
421 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 424 INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
425 INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
422 spin_lock_init(&sma->sem_base[i].lock); 426 spin_lock_init(&sma->sem_base[i].lock);
423 } 427 }
424 428
425 sma->complex_count = 0; 429 sma->complex_count = 0;
426 INIT_LIST_HEAD(&sma->sem_pending); 430 INIT_LIST_HEAD(&sma->pending_alter);
431 INIT_LIST_HEAD(&sma->pending_const);
427 INIT_LIST_HEAD(&sma->list_id); 432 INIT_LIST_HEAD(&sma->list_id);
428 sma->sem_nsems = nsems; 433 sma->sem_nsems = nsems;
429 sma->sem_ctime = get_seconds(); 434 sma->sem_ctime = get_seconds();
@@ -609,60 +614,132 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
609 * update_queue is O(N^2) when it restarts scanning the whole queue of 614 * update_queue is O(N^2) when it restarts scanning the whole queue of
610 * waiting operations. Therefore this function checks if the restart is 615 * waiting operations. Therefore this function checks if the restart is
611 * really necessary. It is called after a previously waiting operation 616 * really necessary. It is called after a previously waiting operation
612 * was completed. 617 * modified the array.
618 * Note that wait-for-zero operations are handled without restart.
613 */ 619 */
614static int check_restart(struct sem_array *sma, struct sem_queue *q) 620static int check_restart(struct sem_array *sma, struct sem_queue *q)
615{ 621{
616 struct sem *curr; 622 /* pending complex alter operations are too difficult to analyse */
617 struct sem_queue *h; 623 if (!list_empty(&sma->pending_alter))
618
619 /* if the operation didn't modify the array, then no restart */
620 if (q->alter == 0)
621 return 0;
622
623 /* pending complex operations are too difficult to analyse */
624 if (sma->complex_count)
625 return 1; 624 return 1;
626 625
627 /* we were a sleeping complex operation. Too difficult */ 626 /* we were a sleeping complex operation. Too difficult */
628 if (q->nsops > 1) 627 if (q->nsops > 1)
629 return 1; 628 return 1;
630 629
631 curr = sma->sem_base + q->sops[0].sem_num; 630 /* It is impossible that someone waits for the new value:
631 * - complex operations always restart.
632 * - wait-for-zero are handled seperately.
633 * - q is a previously sleeping simple operation that
634 * altered the array. It must be a decrement, because
635 * simple increments never sleep.
636 * - If there are older (higher priority) decrements
637 * in the queue, then they have observed the original
638 * semval value and couldn't proceed. The operation
639 * decremented to value - thus they won't proceed either.
640 */
641 return 0;
642}
632 643
633 /* No-one waits on this queue */ 644/**
634 if (list_empty(&curr->sem_pending)) 645 * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks
635 return 0; 646 * @sma: semaphore array.
647 * @semnum: semaphore that was modified.
648 * @pt: list head for the tasks that must be woken up.
649 *
650 * wake_const_ops must be called after a semaphore in a semaphore array
651 * was set to 0. If complex const operations are pending, wake_const_ops must
652 * be called with semnum = -1, as well as with the number of each modified
653 * semaphore.
654 * The tasks that must be woken up are added to @pt. The return code
655 * is stored in q->pid.
656 * The function returns 1 if at least one operation was completed successfully.
657 */
658static int wake_const_ops(struct sem_array *sma, int semnum,
659 struct list_head *pt)
660{
661 struct sem_queue *q;
662 struct list_head *walk;
663 struct list_head *pending_list;
664 int semop_completed = 0;
665
666 if (semnum == -1)
667 pending_list = &sma->pending_const;
668 else
669 pending_list = &sma->sem_base[semnum].pending_const;
636 670
637 /* the new semaphore value */ 671 walk = pending_list->next;
638 if (curr->semval) { 672 while (walk != pending_list) {
639 /* It is impossible that someone waits for the new value: 673 int error;
640 * - q is a previously sleeping simple operation that 674
641 * altered the array. It must be a decrement, because 675 q = container_of(walk, struct sem_queue, list);
642 * simple increments never sleep. 676 walk = walk->next;
643 * - The value is not 0, thus wait-for-zero won't proceed. 677
644 * - If there are older (higher priority) decrements 678 error = try_atomic_semop(sma, q->sops, q->nsops,
645 * in the queue, then they have observed the original 679 q->undo, q->pid);
646 * semval value and couldn't proceed. The operation 680
647 * decremented to value - thus they won't proceed either. 681 if (error <= 0) {
682 /* operation completed, remove from queue & wakeup */
683
684 unlink_queue(sma, q);
685
686 wake_up_sem_queue_prepare(pt, q, error);
687 if (error == 0)
688 semop_completed = 1;
689 }
690 }
691 return semop_completed;
692}
693
694/**
695 * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks
696 * @sma: semaphore array
697 * @sops: operations that were performed
698 * @nsops: number of operations
699 * @pt: list head of the tasks that must be woken up.
700 *
701 * do_smart_wakeup_zero() checks all required queue for wait-for-zero
702 * operations, based on the actual changes that were performed on the
703 * semaphore array.
704 * The function returns 1 if at least one operation was completed successfully.
705 */
706static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
707 int nsops, struct list_head *pt)
708{
709 int i;
710 int semop_completed = 0;
711 int got_zero = 0;
712
713 /* first: the per-semaphore queues, if known */
714 if (sops) {
715 for (i = 0; i < nsops; i++) {
716 int num = sops[i].sem_num;
717
718 if (sma->sem_base[num].semval == 0) {
719 got_zero = 1;
720 semop_completed |= wake_const_ops(sma, num, pt);
721 }
722 }
723 } else {
724 /*
725 * No sops means modified semaphores not known.
726 * Assume all were changed.
648 */ 727 */
649 BUG_ON(q->sops[0].sem_op >= 0); 728 for (i = 0; i < sma->sem_nsems; i++) {
650 return 0; 729 if (sma->sem_base[i].semval == 0) {
730 got_zero = 1;
731 semop_completed |= wake_const_ops(sma, i, pt);
732 }
733 }
651 } 734 }
652 /* 735 /*
653 * semval is 0. Check if there are wait-for-zero semops. 736 * If one of the modified semaphores got 0,
654 * They must be the first entries in the per-semaphore queue 737 * then check the global queue, too.
655 */ 738 */
656 h = list_first_entry(&curr->sem_pending, struct sem_queue, list); 739 if (got_zero)
657 BUG_ON(h->nsops != 1); 740 semop_completed |= wake_const_ops(sma, -1, pt);
658 BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
659 741
660 /* Yes, there is a wait-for-zero semop. Restart */ 742 return semop_completed;
661 if (h->sops[0].sem_op == 0)
662 return 1;
663
664 /* Again - no-one is waiting for the new value. */
665 return 0;
666} 743}
667 744
668 745
@@ -678,6 +755,8 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q)
678 * semaphore. 755 * semaphore.
679 * The tasks that must be woken up are added to @pt. The return code 756 * The tasks that must be woken up are added to @pt. The return code
680 * is stored in q->pid. 757 * is stored in q->pid.
758 * The function internally checks if const operations can now succeed.
759 *
681 * The function return 1 if at least one semop was completed successfully. 760 * The function return 1 if at least one semop was completed successfully.
682 */ 761 */
683static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) 762static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
@@ -688,9 +767,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
688 int semop_completed = 0; 767 int semop_completed = 0;
689 768
690 if (semnum == -1) 769 if (semnum == -1)
691 pending_list = &sma->sem_pending; 770 pending_list = &sma->pending_alter;
692 else 771 else
693 pending_list = &sma->sem_base[semnum].sem_pending; 772 pending_list = &sma->sem_base[semnum].pending_alter;
694 773
695again: 774again:
696 walk = pending_list->next; 775 walk = pending_list->next;
@@ -702,13 +781,12 @@ again:
702 781
703 /* If we are scanning the single sop, per-semaphore list of 782 /* If we are scanning the single sop, per-semaphore list of
704 * one semaphore and that semaphore is 0, then it is not 783 * one semaphore and that semaphore is 0, then it is not
705 * necessary to scan the "alter" entries: simple increments 784 * necessary to scan further: simple increments
706 * that affect only one entry succeed immediately and cannot 785 * that affect only one entry succeed immediately and cannot
707 * be in the per semaphore pending queue, and decrements 786 * be in the per semaphore pending queue, and decrements
708 * cannot be successful if the value is already 0. 787 * cannot be successful if the value is already 0.
709 */ 788 */
710 if (semnum != -1 && sma->sem_base[semnum].semval == 0 && 789 if (semnum != -1 && sma->sem_base[semnum].semval == 0)
711 q->alter)
712 break; 790 break;
713 791
714 error = try_atomic_semop(sma, q->sops, q->nsops, 792 error = try_atomic_semop(sma, q->sops, q->nsops,
@@ -724,6 +802,7 @@ again:
724 restart = 0; 802 restart = 0;
725 } else { 803 } else {
726 semop_completed = 1; 804 semop_completed = 1;
805 do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
727 restart = check_restart(sma, q); 806 restart = check_restart(sma, q);
728 } 807 }
729 808
@@ -742,8 +821,8 @@ again:
742 * @otime: force setting otime 821 * @otime: force setting otime
743 * @pt: list head of the tasks that must be woken up. 822 * @pt: list head of the tasks that must be woken up.
744 * 823 *
745 * do_smart_update() does the required called to update_queue, based on the 824 * do_smart_update() does the required calls to update_queue and wakeup_zero,
746 * actual changes that were performed on the semaphore array. 825 * based on the actual changes that were performed on the semaphore array.
747 * Note that the function does not do the actual wake-up: the caller is 826 * Note that the function does not do the actual wake-up: the caller is
748 * responsible for calling wake_up_sem_queue_do(@pt). 827 * responsible for calling wake_up_sem_queue_do(@pt).
749 * It is safe to perform this call after dropping all locks. 828 * It is safe to perform this call after dropping all locks.
@@ -754,6 +833,8 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
754 int i; 833 int i;
755 int progress; 834 int progress;
756 835
836 otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
837
757 progress = 1; 838 progress = 1;
758retry_global: 839retry_global:
759 if (sma->complex_count) { 840 if (sma->complex_count) {
@@ -813,14 +894,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
813 struct sem_queue * q; 894 struct sem_queue * q;
814 895
815 semncnt = 0; 896 semncnt = 0;
816 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 897 list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) {
817 struct sembuf * sops = q->sops; 898 struct sembuf * sops = q->sops;
818 BUG_ON(sops->sem_num != semnum); 899 BUG_ON(sops->sem_num != semnum);
819 if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) 900 if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT))
820 semncnt++; 901 semncnt++;
821 } 902 }
822 903
823 list_for_each_entry(q, &sma->sem_pending, list) { 904 list_for_each_entry(q, &sma->pending_alter, list) {
824 struct sembuf * sops = q->sops; 905 struct sembuf * sops = q->sops;
825 int nsops = q->nsops; 906 int nsops = q->nsops;
826 int i; 907 int i;
@@ -839,14 +920,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
839 struct sem_queue * q; 920 struct sem_queue * q;
840 921
841 semzcnt = 0; 922 semzcnt = 0;
842 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 923 list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) {
843 struct sembuf * sops = q->sops; 924 struct sembuf * sops = q->sops;
844 BUG_ON(sops->sem_num != semnum); 925 BUG_ON(sops->sem_num != semnum);
845 if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) 926 if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT))
846 semzcnt++; 927 semzcnt++;
847 } 928 }
848 929
849 list_for_each_entry(q, &sma->sem_pending, list) { 930 list_for_each_entry(q, &sma->pending_const, list) {
850 struct sembuf * sops = q->sops; 931 struct sembuf * sops = q->sops;
851 int nsops = q->nsops; 932 int nsops = q->nsops;
852 int i; 933 int i;
@@ -884,13 +965,22 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
884 965
885 /* Wake up all pending processes and let them fail with EIDRM. */ 966 /* Wake up all pending processes and let them fail with EIDRM. */
886 INIT_LIST_HEAD(&tasks); 967 INIT_LIST_HEAD(&tasks);
887 list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { 968 list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
969 unlink_queue(sma, q);
970 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
971 }
972
973 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
888 unlink_queue(sma, q); 974 unlink_queue(sma, q);
889 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 975 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
890 } 976 }
891 for (i = 0; i < sma->sem_nsems; i++) { 977 for (i = 0; i < sma->sem_nsems; i++) {
892 struct sem *sem = sma->sem_base + i; 978 struct sem *sem = sma->sem_base + i;
893 list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { 979 list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
980 unlink_queue(sma, q);
981 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
982 }
983 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
894 unlink_queue(sma, q); 984 unlink_queue(sma, q);
895 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 985 wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
896 } 986 }
@@ -1658,14 +1748,15 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1658 curr = &sma->sem_base[sops->sem_num]; 1748 curr = &sma->sem_base[sops->sem_num];
1659 1749
1660 if (alter) 1750 if (alter)
1661 list_add_tail(&queue.list, &curr->sem_pending); 1751 list_add_tail(&queue.list, &curr->pending_alter);
1662 else 1752 else
1663 list_add(&queue.list, &curr->sem_pending); 1753 list_add_tail(&queue.list, &curr->pending_const);
1664 } else { 1754 } else {
1665 if (alter) 1755 if (alter)
1666 list_add_tail(&queue.list, &sma->sem_pending); 1756 list_add_tail(&queue.list, &sma->pending_alter);
1667 else 1757 else
1668 list_add(&queue.list, &sma->sem_pending); 1758 list_add_tail(&queue.list, &sma->pending_const);
1759
1669 sma->complex_count++; 1760 sma->complex_count++;
1670 } 1761 }
1671 1762