diff options
Diffstat (limited to 'ipc/sem.c')
-rw-r--r-- | ipc/sem.c | 512 |
1 files changed, 236 insertions, 276 deletions
@@ -11,6 +11,7 @@ | |||
11 | * (c) 2001 Red Hat Inc | 11 | * (c) 2001 Red Hat Inc |
12 | * Lockless wakeup | 12 | * Lockless wakeup |
13 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> | 13 | * (c) 2003 Manfred Spraul <manfred@colorfullife.com> |
14 | * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> | ||
14 | * Further wakeup optimizations, documentation | 15 | * Further wakeup optimizations, documentation |
15 | * (c) 2010 Manfred Spraul <manfred@colorfullife.com> | 16 | * (c) 2010 Manfred Spraul <manfred@colorfullife.com> |
16 | * | 17 | * |
@@ -53,15 +54,11 @@ | |||
53 | * Semaphores are actively given to waiting tasks (necessary for FIFO). | 54 | * Semaphores are actively given to waiting tasks (necessary for FIFO). |
54 | * (see update_queue()) | 55 | * (see update_queue()) |
55 | * - To improve the scalability, the actual wake-up calls are performed after | 56 | * - To improve the scalability, the actual wake-up calls are performed after |
56 | * dropping all locks. (see wake_up_sem_queue_prepare(), | 57 | * dropping all locks. (see wake_up_sem_queue_prepare()) |
57 | * wake_up_sem_queue_do()) | ||
58 | * - All work is done by the waker, the woken up task does not have to do | 58 | * - All work is done by the waker, the woken up task does not have to do |
59 | * anything - not even acquiring a lock or dropping a refcount. | 59 | * anything - not even acquiring a lock or dropping a refcount. |
60 | * - A woken up task may not even touch the semaphore array anymore, it may | 60 | * - A woken up task may not even touch the semaphore array anymore, it may |
61 | * have been destroyed already by a semctl(RMID). | 61 | * have been destroyed already by a semctl(RMID). |
62 | * - The synchronizations between wake-ups due to a timeout/signal and a | ||
63 | * wake-up due to a completed semaphore operation is achieved by using an | ||
64 | * intermediate state (IN_WAKEUP). | ||
65 | * - UNDO values are stored in an array (one per process and per | 62 | * - UNDO values are stored in an array (one per process and per |
66 | * semaphore array, lazily allocated). For backwards compatibility, multiple | 63 | * semaphore array, lazily allocated). For backwards compatibility, multiple |
67 | * modes for the UNDO variables are supported (per process, per thread) | 64 | * modes for the UNDO variables are supported (per process, per thread) |
@@ -118,7 +115,8 @@ struct sem_queue { | |||
118 | struct sembuf *sops; /* array of pending operations */ | 115 | struct sembuf *sops; /* array of pending operations */ |
119 | struct sembuf *blocking; /* the operation that blocked */ | 116 | struct sembuf *blocking; /* the operation that blocked */ |
120 | int nsops; /* number of operations */ | 117 | int nsops; /* number of operations */ |
121 | int alter; /* does *sops alter the array? */ | 118 | bool alter; /* does *sops alter the array? */ |
119 | bool dupsop; /* sops on more than one sem_num */ | ||
122 | }; | 120 | }; |
123 | 121 | ||
124 | /* Each task has a list of undo requests. They are executed automatically | 122 | /* Each task has a list of undo requests. They are executed automatically |
@@ -416,29 +414,6 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) | |||
416 | * | 414 | * |
417 | * The caller holds the RCU read lock. | 415 | * The caller holds the RCU read lock. |
418 | */ | 416 | */ |
419 | static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, | ||
420 | int id, struct sembuf *sops, int nsops, int *locknum) | ||
421 | { | ||
422 | struct kern_ipc_perm *ipcp; | ||
423 | struct sem_array *sma; | ||
424 | |||
425 | ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); | ||
426 | if (IS_ERR(ipcp)) | ||
427 | return ERR_CAST(ipcp); | ||
428 | |||
429 | sma = container_of(ipcp, struct sem_array, sem_perm); | ||
430 | *locknum = sem_lock(sma, sops, nsops); | ||
431 | |||
432 | /* ipc_rmid() may have already freed the ID while sem_lock | ||
433 | * was spinning: verify that the structure is still valid | ||
434 | */ | ||
435 | if (ipc_valid_object(ipcp)) | ||
436 | return container_of(ipcp, struct sem_array, sem_perm); | ||
437 | |||
438 | sem_unlock(sma, *locknum); | ||
439 | return ERR_PTR(-EINVAL); | ||
440 | } | ||
441 | |||
442 | static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) | 417 | static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) |
443 | { | 418 | { |
444 | struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); | 419 | struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); |
@@ -471,40 +446,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | |||
471 | ipc_rmid(&sem_ids(ns), &s->sem_perm); | 446 | ipc_rmid(&sem_ids(ns), &s->sem_perm); |
472 | } | 447 | } |
473 | 448 | ||
474 | /* | ||
475 | * Lockless wakeup algorithm: | ||
476 | * Without the check/retry algorithm a lockless wakeup is possible: | ||
477 | * - queue.status is initialized to -EINTR before blocking. | ||
478 | * - wakeup is performed by | ||
479 | * * unlinking the queue entry from the pending list | ||
480 | * * setting queue.status to IN_WAKEUP | ||
481 | * This is the notification for the blocked thread that a | ||
482 | * result value is imminent. | ||
483 | * * call wake_up_process | ||
484 | * * set queue.status to the final value. | ||
485 | * - the previously blocked thread checks queue.status: | ||
486 | * * if it's IN_WAKEUP, then it must wait until the value changes | ||
487 | * * if it's not -EINTR, then the operation was completed by | ||
488 | * update_queue. semtimedop can return queue.status without | ||
489 | * performing any operation on the sem array. | ||
490 | * * otherwise it must acquire the spinlock and check what's up. | ||
491 | * | ||
492 | * The two-stage algorithm is necessary to protect against the following | ||
493 | * races: | ||
494 | * - if queue.status is set after wake_up_process, then the woken up idle | ||
495 | * thread could race forward and try (and fail) to acquire sma->lock | ||
496 | * before update_queue had a chance to set queue.status | ||
497 | * - if queue.status is written before wake_up_process and if the | ||
498 | * blocked process is woken up by a signal between writing | ||
499 | * queue.status and the wake_up_process, then the woken up | ||
500 | * process could return from semtimedop and die by calling | ||
501 | * sys_exit before wake_up_process is called. Then wake_up_process | ||
502 | * will oops, because the task structure is already invalid. | ||
503 | * (yes, this happened on s390 with sysv msg). | ||
504 | * | ||
505 | */ | ||
506 | #define IN_WAKEUP 1 | ||
507 | |||
508 | /** | 449 | /** |
509 | * newary - Create a new semaphore set | 450 | * newary - Create a new semaphore set |
510 | * @ns: namespace | 451 | * @ns: namespace |
@@ -624,15 +565,23 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) | |||
624 | } | 565 | } |
625 | 566 | ||
626 | /** | 567 | /** |
627 | * perform_atomic_semop - Perform (if possible) a semaphore operation | 568 | * perform_atomic_semop[_slow] - Attempt to perform semaphore |
569 | * operations on a given array. | ||
628 | * @sma: semaphore array | 570 | * @sma: semaphore array |
629 | * @q: struct sem_queue that describes the operation | 571 | * @q: struct sem_queue that describes the operation |
630 | * | 572 | * |
573 | * Caller blocking are as follows, based the value | ||
574 | * indicated by the semaphore operation (sem_op): | ||
575 | * | ||
576 | * (1) >0 never blocks. | ||
577 | * (2) 0 (wait-for-zero operation): semval is non-zero. | ||
578 | * (3) <0 attempting to decrement semval to a value smaller than zero. | ||
579 | * | ||
631 | * Returns 0 if the operation was possible. | 580 | * Returns 0 if the operation was possible. |
632 | * Returns 1 if the operation is impossible, the caller must sleep. | 581 | * Returns 1 if the operation is impossible, the caller must sleep. |
633 | * Negative values are error codes. | 582 | * Returns <0 for error codes. |
634 | */ | 583 | */ |
635 | static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) | 584 | static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) |
636 | { | 585 | { |
637 | int result, sem_op, nsops, pid; | 586 | int result, sem_op, nsops, pid; |
638 | struct sembuf *sop; | 587 | struct sembuf *sop; |
@@ -703,51 +652,84 @@ undo: | |||
703 | return result; | 652 | return result; |
704 | } | 653 | } |
705 | 654 | ||
706 | /** wake_up_sem_queue_prepare(q, error): Prepare wake-up | 655 | static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) |
707 | * @q: queue entry that must be signaled | ||
708 | * @error: Error value for the signal | ||
709 | * | ||
710 | * Prepare the wake-up of the queue entry q. | ||
711 | */ | ||
712 | static void wake_up_sem_queue_prepare(struct list_head *pt, | ||
713 | struct sem_queue *q, int error) | ||
714 | { | 656 | { |
715 | if (list_empty(pt)) { | 657 | int result, sem_op, nsops; |
716 | /* | 658 | struct sembuf *sop; |
717 | * Hold preempt off so that we don't get preempted and have the | 659 | struct sem *curr; |
718 | * wakee busy-wait until we're scheduled back on. | 660 | struct sembuf *sops; |
719 | */ | 661 | struct sem_undo *un; |
720 | preempt_disable(); | 662 | |
663 | sops = q->sops; | ||
664 | nsops = q->nsops; | ||
665 | un = q->undo; | ||
666 | |||
667 | if (unlikely(q->dupsop)) | ||
668 | return perform_atomic_semop_slow(sma, q); | ||
669 | |||
670 | /* | ||
671 | * We scan the semaphore set twice, first to ensure that the entire | ||
672 | * operation can succeed, therefore avoiding any pointless writes | ||
673 | * to shared memory and having to undo such changes in order to block | ||
674 | * until the operations can go through. | ||
675 | */ | ||
676 | for (sop = sops; sop < sops + nsops; sop++) { | ||
677 | curr = sma->sem_base + sop->sem_num; | ||
678 | sem_op = sop->sem_op; | ||
679 | result = curr->semval; | ||
680 | |||
681 | if (!sem_op && result) | ||
682 | goto would_block; /* wait-for-zero */ | ||
683 | |||
684 | result += sem_op; | ||
685 | if (result < 0) | ||
686 | goto would_block; | ||
687 | |||
688 | if (result > SEMVMX) | ||
689 | return -ERANGE; | ||
690 | |||
691 | if (sop->sem_flg & SEM_UNDO) { | ||
692 | int undo = un->semadj[sop->sem_num] - sem_op; | ||
693 | |||
694 | /* Exceeding the undo range is an error. */ | ||
695 | if (undo < (-SEMAEM - 1) || undo > SEMAEM) | ||
696 | return -ERANGE; | ||
697 | } | ||
698 | } | ||
699 | |||
700 | for (sop = sops; sop < sops + nsops; sop++) { | ||
701 | curr = sma->sem_base + sop->sem_num; | ||
702 | sem_op = sop->sem_op; | ||
703 | result = curr->semval; | ||
704 | |||
705 | if (sop->sem_flg & SEM_UNDO) { | ||
706 | int undo = un->semadj[sop->sem_num] - sem_op; | ||
707 | |||
708 | un->semadj[sop->sem_num] = undo; | ||
709 | } | ||
710 | curr->semval += sem_op; | ||
711 | curr->sempid = q->pid; | ||
721 | } | 712 | } |
722 | q->status = IN_WAKEUP; | ||
723 | q->pid = error; | ||
724 | 713 | ||
725 | list_add_tail(&q->list, pt); | 714 | return 0; |
715 | |||
716 | would_block: | ||
717 | q->blocking = sop; | ||
718 | return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; | ||
726 | } | 719 | } |
727 | 720 | ||
728 | /** | 721 | static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, |
729 | * wake_up_sem_queue_do - do the actual wake-up | 722 | struct wake_q_head *wake_q) |
730 | * @pt: list of tasks to be woken up | ||
731 | * | ||
732 | * Do the actual wake-up. | ||
733 | * The function is called without any locks held, thus the semaphore array | ||
734 | * could be destroyed already and the tasks can disappear as soon as the | ||
735 | * status is set to the actual return code. | ||
736 | */ | ||
737 | static void wake_up_sem_queue_do(struct list_head *pt) | ||
738 | { | 723 | { |
739 | struct sem_queue *q, *t; | 724 | wake_q_add(wake_q, q->sleeper); |
740 | int did_something; | 725 | /* |
741 | 726 | * Rely on the above implicit barrier, such that we can | |
742 | did_something = !list_empty(pt); | 727 | * ensure that we hold reference to the task before setting |
743 | list_for_each_entry_safe(q, t, pt, list) { | 728 | * q->status. Otherwise we could race with do_exit if the |
744 | wake_up_process(q->sleeper); | 729 | * task is awoken by an external event before calling |
745 | /* q can disappear immediately after writing q->status. */ | 730 | * wake_up_process(). |
746 | smp_wmb(); | 731 | */ |
747 | q->status = q->pid; | 732 | WRITE_ONCE(q->status, error); |
748 | } | ||
749 | if (did_something) | ||
750 | preempt_enable(); | ||
751 | } | 733 | } |
752 | 734 | ||
753 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | 735 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) |
@@ -767,7 +749,7 @@ static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |||
767 | * modified the array. | 749 | * modified the array. |
768 | * Note that wait-for-zero operations are handled without restart. | 750 | * Note that wait-for-zero operations are handled without restart. |
769 | */ | 751 | */ |
770 | static int check_restart(struct sem_array *sma, struct sem_queue *q) | 752 | static inline int check_restart(struct sem_array *sma, struct sem_queue *q) |
771 | { | 753 | { |
772 | /* pending complex alter operations are too difficult to analyse */ | 754 | /* pending complex alter operations are too difficult to analyse */ |
773 | if (!list_empty(&sma->pending_alter)) | 755 | if (!list_empty(&sma->pending_alter)) |
@@ -795,21 +777,20 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) | |||
795 | * wake_const_ops - wake up non-alter tasks | 777 | * wake_const_ops - wake up non-alter tasks |
796 | * @sma: semaphore array. | 778 | * @sma: semaphore array. |
797 | * @semnum: semaphore that was modified. | 779 | * @semnum: semaphore that was modified. |
798 | * @pt: list head for the tasks that must be woken up. | 780 | * @wake_q: lockless wake-queue head. |
799 | * | 781 | * |
800 | * wake_const_ops must be called after a semaphore in a semaphore array | 782 | * wake_const_ops must be called after a semaphore in a semaphore array |
801 | * was set to 0. If complex const operations are pending, wake_const_ops must | 783 | * was set to 0. If complex const operations are pending, wake_const_ops must |
802 | * be called with semnum = -1, as well as with the number of each modified | 784 | * be called with semnum = -1, as well as with the number of each modified |
803 | * semaphore. | 785 | * semaphore. |
804 | * The tasks that must be woken up are added to @pt. The return code | 786 | * The tasks that must be woken up are added to @wake_q. The return code |
805 | * is stored in q->pid. | 787 | * is stored in q->pid. |
806 | * The function returns 1 if at least one operation was completed successfully. | 788 | * The function returns 1 if at least one operation was completed successfully. |
807 | */ | 789 | */ |
808 | static int wake_const_ops(struct sem_array *sma, int semnum, | 790 | static int wake_const_ops(struct sem_array *sma, int semnum, |
809 | struct list_head *pt) | 791 | struct wake_q_head *wake_q) |
810 | { | 792 | { |
811 | struct sem_queue *q; | 793 | struct sem_queue *q, *tmp; |
812 | struct list_head *walk; | ||
813 | struct list_head *pending_list; | 794 | struct list_head *pending_list; |
814 | int semop_completed = 0; | 795 | int semop_completed = 0; |
815 | 796 | ||
@@ -818,25 +799,19 @@ static int wake_const_ops(struct sem_array *sma, int semnum, | |||
818 | else | 799 | else |
819 | pending_list = &sma->sem_base[semnum].pending_const; | 800 | pending_list = &sma->sem_base[semnum].pending_const; |
820 | 801 | ||
821 | walk = pending_list->next; | 802 | list_for_each_entry_safe(q, tmp, pending_list, list) { |
822 | while (walk != pending_list) { | 803 | int error = perform_atomic_semop(sma, q); |
823 | int error; | ||
824 | |||
825 | q = container_of(walk, struct sem_queue, list); | ||
826 | walk = walk->next; | ||
827 | |||
828 | error = perform_atomic_semop(sma, q); | ||
829 | |||
830 | if (error <= 0) { | ||
831 | /* operation completed, remove from queue & wakeup */ | ||
832 | 804 | ||
833 | unlink_queue(sma, q); | 805 | if (error > 0) |
806 | continue; | ||
807 | /* operation completed, remove from queue & wakeup */ | ||
808 | unlink_queue(sma, q); | ||
834 | 809 | ||
835 | wake_up_sem_queue_prepare(pt, q, error); | 810 | wake_up_sem_queue_prepare(q, error, wake_q); |
836 | if (error == 0) | 811 | if (error == 0) |
837 | semop_completed = 1; | 812 | semop_completed = 1; |
838 | } | ||
839 | } | 813 | } |
814 | |||
840 | return semop_completed; | 815 | return semop_completed; |
841 | } | 816 | } |
842 | 817 | ||
@@ -845,14 +820,14 @@ static int wake_const_ops(struct sem_array *sma, int semnum, | |||
845 | * @sma: semaphore array | 820 | * @sma: semaphore array |
846 | * @sops: operations that were performed | 821 | * @sops: operations that were performed |
847 | * @nsops: number of operations | 822 | * @nsops: number of operations |
848 | * @pt: list head of the tasks that must be woken up. | 823 | * @wake_q: lockless wake-queue head |
849 | * | 824 | * |
850 | * Checks all required queue for wait-for-zero operations, based | 825 | * Checks all required queue for wait-for-zero operations, based |
851 | * on the actual changes that were performed on the semaphore array. | 826 | * on the actual changes that were performed on the semaphore array. |
852 | * The function returns 1 if at least one operation was completed successfully. | 827 | * The function returns 1 if at least one operation was completed successfully. |
853 | */ | 828 | */ |
854 | static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, | 829 | static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, |
855 | int nsops, struct list_head *pt) | 830 | int nsops, struct wake_q_head *wake_q) |
856 | { | 831 | { |
857 | int i; | 832 | int i; |
858 | int semop_completed = 0; | 833 | int semop_completed = 0; |
@@ -865,7 +840,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, | |||
865 | 840 | ||
866 | if (sma->sem_base[num].semval == 0) { | 841 | if (sma->sem_base[num].semval == 0) { |
867 | got_zero = 1; | 842 | got_zero = 1; |
868 | semop_completed |= wake_const_ops(sma, num, pt); | 843 | semop_completed |= wake_const_ops(sma, num, wake_q); |
869 | } | 844 | } |
870 | } | 845 | } |
871 | } else { | 846 | } else { |
@@ -876,7 +851,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, | |||
876 | for (i = 0; i < sma->sem_nsems; i++) { | 851 | for (i = 0; i < sma->sem_nsems; i++) { |
877 | if (sma->sem_base[i].semval == 0) { | 852 | if (sma->sem_base[i].semval == 0) { |
878 | got_zero = 1; | 853 | got_zero = 1; |
879 | semop_completed |= wake_const_ops(sma, i, pt); | 854 | semop_completed |= wake_const_ops(sma, i, wake_q); |
880 | } | 855 | } |
881 | } | 856 | } |
882 | } | 857 | } |
@@ -885,7 +860,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, | |||
885 | * then check the global queue, too. | 860 | * then check the global queue, too. |
886 | */ | 861 | */ |
887 | if (got_zero) | 862 | if (got_zero) |
888 | semop_completed |= wake_const_ops(sma, -1, pt); | 863 | semop_completed |= wake_const_ops(sma, -1, wake_q); |
889 | 864 | ||
890 | return semop_completed; | 865 | return semop_completed; |
891 | } | 866 | } |
@@ -895,22 +870,21 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, | |||
895 | * update_queue - look for tasks that can be completed. | 870 | * update_queue - look for tasks that can be completed. |
896 | * @sma: semaphore array. | 871 | * @sma: semaphore array. |
897 | * @semnum: semaphore that was modified. | 872 | * @semnum: semaphore that was modified. |
898 | * @pt: list head for the tasks that must be woken up. | 873 | * @wake_q: lockless wake-queue head. |
899 | * | 874 | * |
900 | * update_queue must be called after a semaphore in a semaphore array | 875 | * update_queue must be called after a semaphore in a semaphore array |
901 | * was modified. If multiple semaphores were modified, update_queue must | 876 | * was modified. If multiple semaphores were modified, update_queue must |
902 | * be called with semnum = -1, as well as with the number of each modified | 877 | * be called with semnum = -1, as well as with the number of each modified |
903 | * semaphore. | 878 | * semaphore. |
904 | * The tasks that must be woken up are added to @pt. The return code | 879 | * The tasks that must be woken up are added to @wake_q. The return code |
905 | * is stored in q->pid. | 880 | * is stored in q->pid. |
906 | * The function internally checks if const operations can now succeed. | 881 | * The function internally checks if const operations can now succeed. |
907 | * | 882 | * |
908 | * The function return 1 if at least one semop was completed successfully. | 883 | * The function return 1 if at least one semop was completed successfully. |
909 | */ | 884 | */ |
910 | static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) | 885 | static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) |
911 | { | 886 | { |
912 | struct sem_queue *q; | 887 | struct sem_queue *q, *tmp; |
913 | struct list_head *walk; | ||
914 | struct list_head *pending_list; | 888 | struct list_head *pending_list; |
915 | int semop_completed = 0; | 889 | int semop_completed = 0; |
916 | 890 | ||
@@ -920,13 +894,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) | |||
920 | pending_list = &sma->sem_base[semnum].pending_alter; | 894 | pending_list = &sma->sem_base[semnum].pending_alter; |
921 | 895 | ||
922 | again: | 896 | again: |
923 | walk = pending_list->next; | 897 | list_for_each_entry_safe(q, tmp, pending_list, list) { |
924 | while (walk != pending_list) { | ||
925 | int error, restart; | 898 | int error, restart; |
926 | 899 | ||
927 | q = container_of(walk, struct sem_queue, list); | ||
928 | walk = walk->next; | ||
929 | |||
930 | /* If we are scanning the single sop, per-semaphore list of | 900 | /* If we are scanning the single sop, per-semaphore list of |
931 | * one semaphore and that semaphore is 0, then it is not | 901 | * one semaphore and that semaphore is 0, then it is not |
932 | * necessary to scan further: simple increments | 902 | * necessary to scan further: simple increments |
@@ -949,11 +919,11 @@ again: | |||
949 | restart = 0; | 919 | restart = 0; |
950 | } else { | 920 | } else { |
951 | semop_completed = 1; | 921 | semop_completed = 1; |
952 | do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); | 922 | do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); |
953 | restart = check_restart(sma, q); | 923 | restart = check_restart(sma, q); |
954 | } | 924 | } |
955 | 925 | ||
956 | wake_up_sem_queue_prepare(pt, q, error); | 926 | wake_up_sem_queue_prepare(q, error, wake_q); |
957 | if (restart) | 927 | if (restart) |
958 | goto again; | 928 | goto again; |
959 | } | 929 | } |
@@ -984,24 +954,24 @@ static void set_semotime(struct sem_array *sma, struct sembuf *sops) | |||
984 | * @sops: operations that were performed | 954 | * @sops: operations that were performed |
985 | * @nsops: number of operations | 955 | * @nsops: number of operations |
986 | * @otime: force setting otime | 956 | * @otime: force setting otime |
987 | * @pt: list head of the tasks that must be woken up. | 957 | * @wake_q: lockless wake-queue head |
988 | * | 958 | * |
989 | * do_smart_update() does the required calls to update_queue and wakeup_zero, | 959 | * do_smart_update() does the required calls to update_queue and wakeup_zero, |
990 | * based on the actual changes that were performed on the semaphore array. | 960 | * based on the actual changes that were performed on the semaphore array. |
991 | * Note that the function does not do the actual wake-up: the caller is | 961 | * Note that the function does not do the actual wake-up: the caller is |
992 | * responsible for calling wake_up_sem_queue_do(@pt). | 962 | * responsible for calling wake_up_q(). |
993 | * It is safe to perform this call after dropping all locks. | 963 | * It is safe to perform this call after dropping all locks. |
994 | */ | 964 | */ |
995 | static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, | 965 | static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, |
996 | int otime, struct list_head *pt) | 966 | int otime, struct wake_q_head *wake_q) |
997 | { | 967 | { |
998 | int i; | 968 | int i; |
999 | 969 | ||
1000 | otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); | 970 | otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); |
1001 | 971 | ||
1002 | if (!list_empty(&sma->pending_alter)) { | 972 | if (!list_empty(&sma->pending_alter)) { |
1003 | /* semaphore array uses the global queue - just process it. */ | 973 | /* semaphore array uses the global queue - just process it. */ |
1004 | otime |= update_queue(sma, -1, pt); | 974 | otime |= update_queue(sma, -1, wake_q); |
1005 | } else { | 975 | } else { |
1006 | if (!sops) { | 976 | if (!sops) { |
1007 | /* | 977 | /* |
@@ -1009,7 +979,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop | |||
1009 | * known. Check all. | 979 | * known. Check all. |
1010 | */ | 980 | */ |
1011 | for (i = 0; i < sma->sem_nsems; i++) | 981 | for (i = 0; i < sma->sem_nsems; i++) |
1012 | otime |= update_queue(sma, i, pt); | 982 | otime |= update_queue(sma, i, wake_q); |
1013 | } else { | 983 | } else { |
1014 | /* | 984 | /* |
1015 | * Check the semaphores that were increased: | 985 | * Check the semaphores that were increased: |
@@ -1023,7 +993,7 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop | |||
1023 | for (i = 0; i < nsops; i++) { | 993 | for (i = 0; i < nsops; i++) { |
1024 | if (sops[i].sem_op > 0) { | 994 | if (sops[i].sem_op > 0) { |
1025 | otime |= update_queue(sma, | 995 | otime |= update_queue(sma, |
1026 | sops[i].sem_num, pt); | 996 | sops[i].sem_num, wake_q); |
1027 | } | 997 | } |
1028 | } | 998 | } |
1029 | } | 999 | } |
@@ -1111,8 +1081,8 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
1111 | struct sem_undo *un, *tu; | 1081 | struct sem_undo *un, *tu; |
1112 | struct sem_queue *q, *tq; | 1082 | struct sem_queue *q, *tq; |
1113 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); | 1083 | struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); |
1114 | struct list_head tasks; | ||
1115 | int i; | 1084 | int i; |
1085 | DEFINE_WAKE_Q(wake_q); | ||
1116 | 1086 | ||
1117 | /* Free the existing undo structures for this semaphore set. */ | 1087 | /* Free the existing undo structures for this semaphore set. */ |
1118 | ipc_assert_locked_object(&sma->sem_perm); | 1088 | ipc_assert_locked_object(&sma->sem_perm); |
@@ -1126,25 +1096,24 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
1126 | } | 1096 | } |
1127 | 1097 | ||
1128 | /* Wake up all pending processes and let them fail with EIDRM. */ | 1098 | /* Wake up all pending processes and let them fail with EIDRM. */ |
1129 | INIT_LIST_HEAD(&tasks); | ||
1130 | list_for_each_entry_safe(q, tq, &sma->pending_const, list) { | 1099 | list_for_each_entry_safe(q, tq, &sma->pending_const, list) { |
1131 | unlink_queue(sma, q); | 1100 | unlink_queue(sma, q); |
1132 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); | 1101 | wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1133 | } | 1102 | } |
1134 | 1103 | ||
1135 | list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { | 1104 | list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { |
1136 | unlink_queue(sma, q); | 1105 | unlink_queue(sma, q); |
1137 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); | 1106 | wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1138 | } | 1107 | } |
1139 | for (i = 0; i < sma->sem_nsems; i++) { | 1108 | for (i = 0; i < sma->sem_nsems; i++) { |
1140 | struct sem *sem = sma->sem_base + i; | 1109 | struct sem *sem = sma->sem_base + i; |
1141 | list_for_each_entry_safe(q, tq, &sem->pending_const, list) { | 1110 | list_for_each_entry_safe(q, tq, &sem->pending_const, list) { |
1142 | unlink_queue(sma, q); | 1111 | unlink_queue(sma, q); |
1143 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); | 1112 | wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1144 | } | 1113 | } |
1145 | list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { | 1114 | list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { |
1146 | unlink_queue(sma, q); | 1115 | unlink_queue(sma, q); |
1147 | wake_up_sem_queue_prepare(&tasks, q, -EIDRM); | 1116 | wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); |
1148 | } | 1117 | } |
1149 | } | 1118 | } |
1150 | 1119 | ||
@@ -1153,7 +1122,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
1153 | sem_unlock(sma, -1); | 1122 | sem_unlock(sma, -1); |
1154 | rcu_read_unlock(); | 1123 | rcu_read_unlock(); |
1155 | 1124 | ||
1156 | wake_up_sem_queue_do(&tasks); | 1125 | wake_up_q(&wake_q); |
1157 | ns->used_sems -= sma->sem_nsems; | 1126 | ns->used_sems -= sma->sem_nsems; |
1158 | ipc_rcu_putref(sma, sem_rcu_free); | 1127 | ipc_rcu_putref(sma, sem_rcu_free); |
1159 | } | 1128 | } |
@@ -1292,9 +1261,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, | |||
1292 | struct sem_undo *un; | 1261 | struct sem_undo *un; |
1293 | struct sem_array *sma; | 1262 | struct sem_array *sma; |
1294 | struct sem *curr; | 1263 | struct sem *curr; |
1295 | int err; | 1264 | int err, val; |
1296 | struct list_head tasks; | 1265 | DEFINE_WAKE_Q(wake_q); |
1297 | int val; | 1266 | |
1298 | #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) | 1267 | #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) |
1299 | /* big-endian 64bit */ | 1268 | /* big-endian 64bit */ |
1300 | val = arg >> 32; | 1269 | val = arg >> 32; |
@@ -1306,8 +1275,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, | |||
1306 | if (val > SEMVMX || val < 0) | 1275 | if (val > SEMVMX || val < 0) |
1307 | return -ERANGE; | 1276 | return -ERANGE; |
1308 | 1277 | ||
1309 | INIT_LIST_HEAD(&tasks); | ||
1310 | |||
1311 | rcu_read_lock(); | 1278 | rcu_read_lock(); |
1312 | sma = sem_obtain_object_check(ns, semid); | 1279 | sma = sem_obtain_object_check(ns, semid); |
1313 | if (IS_ERR(sma)) { | 1280 | if (IS_ERR(sma)) { |
@@ -1350,10 +1317,10 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, | |||
1350 | curr->sempid = task_tgid_vnr(current); | 1317 | curr->sempid = task_tgid_vnr(current); |
1351 | sma->sem_ctime = get_seconds(); | 1318 | sma->sem_ctime = get_seconds(); |
1352 | /* maybe some queued-up processes were waiting for this */ | 1319 | /* maybe some queued-up processes were waiting for this */ |
1353 | do_smart_update(sma, NULL, 0, 0, &tasks); | 1320 | do_smart_update(sma, NULL, 0, 0, &wake_q); |
1354 | sem_unlock(sma, -1); | 1321 | sem_unlock(sma, -1); |
1355 | rcu_read_unlock(); | 1322 | rcu_read_unlock(); |
1356 | wake_up_sem_queue_do(&tasks); | 1323 | wake_up_q(&wake_q); |
1357 | return 0; | 1324 | return 0; |
1358 | } | 1325 | } |
1359 | 1326 | ||
@@ -1365,9 +1332,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1365 | int err, nsems; | 1332 | int err, nsems; |
1366 | ushort fast_sem_io[SEMMSL_FAST]; | 1333 | ushort fast_sem_io[SEMMSL_FAST]; |
1367 | ushort *sem_io = fast_sem_io; | 1334 | ushort *sem_io = fast_sem_io; |
1368 | struct list_head tasks; | 1335 | DEFINE_WAKE_Q(wake_q); |
1369 | |||
1370 | INIT_LIST_HEAD(&tasks); | ||
1371 | 1336 | ||
1372 | rcu_read_lock(); | 1337 | rcu_read_lock(); |
1373 | sma = sem_obtain_object_check(ns, semid); | 1338 | sma = sem_obtain_object_check(ns, semid); |
@@ -1478,7 +1443,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1478 | } | 1443 | } |
1479 | sma->sem_ctime = get_seconds(); | 1444 | sma->sem_ctime = get_seconds(); |
1480 | /* maybe some queued-up processes were waiting for this */ | 1445 | /* maybe some queued-up processes were waiting for this */ |
1481 | do_smart_update(sma, NULL, 0, 0, &tasks); | 1446 | do_smart_update(sma, NULL, 0, 0, &wake_q); |
1482 | err = 0; | 1447 | err = 0; |
1483 | goto out_unlock; | 1448 | goto out_unlock; |
1484 | } | 1449 | } |
@@ -1514,7 +1479,7 @@ out_unlock: | |||
1514 | sem_unlock(sma, -1); | 1479 | sem_unlock(sma, -1); |
1515 | out_rcu_wakeup: | 1480 | out_rcu_wakeup: |
1516 | rcu_read_unlock(); | 1481 | rcu_read_unlock(); |
1517 | wake_up_sem_queue_do(&tasks); | 1482 | wake_up_q(&wake_q); |
1518 | out_free: | 1483 | out_free: |
1519 | if (sem_io != fast_sem_io) | 1484 | if (sem_io != fast_sem_io) |
1520 | ipc_free(sem_io); | 1485 | ipc_free(sem_io); |
@@ -1787,32 +1752,6 @@ out: | |||
1787 | return un; | 1752 | return un; |
1788 | } | 1753 | } |
1789 | 1754 | ||
1790 | |||
1791 | /** | ||
1792 | * get_queue_result - retrieve the result code from sem_queue | ||
1793 | * @q: Pointer to queue structure | ||
1794 | * | ||
1795 | * Retrieve the return code from the pending queue. If IN_WAKEUP is found in | ||
1796 | * q->status, then we must loop until the value is replaced with the final | ||
1797 | * value: This may happen if a task is woken up by an unrelated event (e.g. | ||
1798 | * signal) and in parallel the task is woken up by another task because it got | ||
1799 | * the requested semaphores. | ||
1800 | * | ||
1801 | * The function can be called with or without holding the semaphore spinlock. | ||
1802 | */ | ||
1803 | static int get_queue_result(struct sem_queue *q) | ||
1804 | { | ||
1805 | int error; | ||
1806 | |||
1807 | error = q->status; | ||
1808 | while (unlikely(error == IN_WAKEUP)) { | ||
1809 | cpu_relax(); | ||
1810 | error = q->status; | ||
1811 | } | ||
1812 | |||
1813 | return error; | ||
1814 | } | ||
1815 | |||
1816 | SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | 1755 | SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, |
1817 | unsigned, nsops, const struct timespec __user *, timeout) | 1756 | unsigned, nsops, const struct timespec __user *, timeout) |
1818 | { | 1757 | { |
@@ -1821,11 +1760,11 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1821 | struct sembuf fast_sops[SEMOPM_FAST]; | 1760 | struct sembuf fast_sops[SEMOPM_FAST]; |
1822 | struct sembuf *sops = fast_sops, *sop; | 1761 | struct sembuf *sops = fast_sops, *sop; |
1823 | struct sem_undo *un; | 1762 | struct sem_undo *un; |
1824 | int undos = 0, alter = 0, max, locknum; | 1763 | int max, locknum; |
1764 | bool undos = false, alter = false, dupsop = false; | ||
1825 | struct sem_queue queue; | 1765 | struct sem_queue queue; |
1826 | unsigned long jiffies_left = 0; | 1766 | unsigned long dup = 0, jiffies_left = 0; |
1827 | struct ipc_namespace *ns; | 1767 | struct ipc_namespace *ns; |
1828 | struct list_head tasks; | ||
1829 | 1768 | ||
1830 | ns = current->nsproxy->ipc_ns; | 1769 | ns = current->nsproxy->ipc_ns; |
1831 | 1770 | ||
@@ -1838,10 +1777,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1838 | if (sops == NULL) | 1777 | if (sops == NULL) |
1839 | return -ENOMEM; | 1778 | return -ENOMEM; |
1840 | } | 1779 | } |
1780 | |||
1841 | if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { | 1781 | if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { |
1842 | error = -EFAULT; | 1782 | error = -EFAULT; |
1843 | goto out_free; | 1783 | goto out_free; |
1844 | } | 1784 | } |
1785 | |||
1845 | if (timeout) { | 1786 | if (timeout) { |
1846 | struct timespec _timeout; | 1787 | struct timespec _timeout; |
1847 | if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { | 1788 | if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { |
@@ -1855,18 +1796,30 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1855 | } | 1796 | } |
1856 | jiffies_left = timespec_to_jiffies(&_timeout); | 1797 | jiffies_left = timespec_to_jiffies(&_timeout); |
1857 | } | 1798 | } |
1799 | |||
1858 | max = 0; | 1800 | max = 0; |
1859 | for (sop = sops; sop < sops + nsops; sop++) { | 1801 | for (sop = sops; sop < sops + nsops; sop++) { |
1802 | unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); | ||
1803 | |||
1860 | if (sop->sem_num >= max) | 1804 | if (sop->sem_num >= max) |
1861 | max = sop->sem_num; | 1805 | max = sop->sem_num; |
1862 | if (sop->sem_flg & SEM_UNDO) | 1806 | if (sop->sem_flg & SEM_UNDO) |
1863 | undos = 1; | 1807 | undos = true; |
1864 | if (sop->sem_op != 0) | 1808 | if (dup & mask) { |
1865 | alter = 1; | 1809 | /* |
1810 | * There was a previous alter access that appears | ||
1811 | * to have accessed the same semaphore, thus use | ||
1812 | * the dupsop logic. "appears", because the detection | ||
1813 | * can only check % BITS_PER_LONG. | ||
1814 | */ | ||
1815 | dupsop = true; | ||
1816 | } | ||
1817 | if (sop->sem_op != 0) { | ||
1818 | alter = true; | ||
1819 | dup |= mask; | ||
1820 | } | ||
1866 | } | 1821 | } |
1867 | 1822 | ||
1868 | INIT_LIST_HEAD(&tasks); | ||
1869 | |||
1870 | if (undos) { | 1823 | if (undos) { |
1871 | /* On success, find_alloc_undo takes the rcu_read_lock */ | 1824 | /* On success, find_alloc_undo takes the rcu_read_lock */ |
1872 | un = find_alloc_undo(ns, semid); | 1825 | un = find_alloc_undo(ns, semid); |
@@ -1887,16 +1840,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1887 | } | 1840 | } |
1888 | 1841 | ||
1889 | error = -EFBIG; | 1842 | error = -EFBIG; |
1890 | if (max >= sma->sem_nsems) | 1843 | if (max >= sma->sem_nsems) { |
1891 | goto out_rcu_wakeup; | 1844 | rcu_read_unlock(); |
1845 | goto out_free; | ||
1846 | } | ||
1892 | 1847 | ||
1893 | error = -EACCES; | 1848 | error = -EACCES; |
1894 | if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) | 1849 | if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { |
1895 | goto out_rcu_wakeup; | 1850 | rcu_read_unlock(); |
1851 | goto out_free; | ||
1852 | } | ||
1896 | 1853 | ||
1897 | error = security_sem_semop(sma, sops, nsops, alter); | 1854 | error = security_sem_semop(sma, sops, nsops, alter); |
1898 | if (error) | 1855 | if (error) { |
1899 | goto out_rcu_wakeup; | 1856 | rcu_read_unlock(); |
1857 | goto out_free; | ||
1858 | } | ||
1900 | 1859 | ||
1901 | error = -EIDRM; | 1860 | error = -EIDRM; |
1902 | locknum = sem_lock(sma, sops, nsops); | 1861 | locknum = sem_lock(sma, sops, nsops); |
@@ -1925,24 +1884,34 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1925 | queue.undo = un; | 1884 | queue.undo = un; |
1926 | queue.pid = task_tgid_vnr(current); | 1885 | queue.pid = task_tgid_vnr(current); |
1927 | queue.alter = alter; | 1886 | queue.alter = alter; |
1887 | queue.dupsop = dupsop; | ||
1928 | 1888 | ||
1929 | error = perform_atomic_semop(sma, &queue); | 1889 | error = perform_atomic_semop(sma, &queue); |
1930 | if (error == 0) { | 1890 | if (error == 0) { /* non-blocking succesfull path */ |
1931 | /* If the operation was successful, then do | 1891 | DEFINE_WAKE_Q(wake_q); |
1892 | |||
1893 | /* | ||
1894 | * If the operation was successful, then do | ||
1932 | * the required updates. | 1895 | * the required updates. |
1933 | */ | 1896 | */ |
1934 | if (alter) | 1897 | if (alter) |
1935 | do_smart_update(sma, sops, nsops, 1, &tasks); | 1898 | do_smart_update(sma, sops, nsops, 1, &wake_q); |
1936 | else | 1899 | else |
1937 | set_semotime(sma, sops); | 1900 | set_semotime(sma, sops); |
1901 | |||
1902 | sem_unlock(sma, locknum); | ||
1903 | rcu_read_unlock(); | ||
1904 | wake_up_q(&wake_q); | ||
1905 | |||
1906 | goto out_free; | ||
1938 | } | 1907 | } |
1939 | if (error <= 0) | 1908 | if (error < 0) /* non-blocking error path */ |
1940 | goto out_unlock_free; | 1909 | goto out_unlock_free; |
1941 | 1910 | ||
1942 | /* We need to sleep on this operation, so we put the current | 1911 | /* |
1912 | * We need to sleep on this operation, so we put the current | ||
1943 | * task into the pending queue and go to sleep. | 1913 | * task into the pending queue and go to sleep. |
1944 | */ | 1914 | */ |
1945 | |||
1946 | if (nsops == 1) { | 1915 | if (nsops == 1) { |
1947 | struct sem *curr; | 1916 | struct sem *curr; |
1948 | curr = &sma->sem_base[sops->sem_num]; | 1917 | curr = &sma->sem_base[sops->sem_num]; |
@@ -1971,77 +1940,69 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1971 | sma->complex_count++; | 1940 | sma->complex_count++; |
1972 | } | 1941 | } |
1973 | 1942 | ||
1974 | queue.status = -EINTR; | 1943 | do { |
1975 | queue.sleeper = current; | 1944 | queue.status = -EINTR; |
1945 | queue.sleeper = current; | ||
1976 | 1946 | ||
1977 | sleep_again: | 1947 | __set_current_state(TASK_INTERRUPTIBLE); |
1978 | __set_current_state(TASK_INTERRUPTIBLE); | 1948 | sem_unlock(sma, locknum); |
1979 | sem_unlock(sma, locknum); | 1949 | rcu_read_unlock(); |
1980 | rcu_read_unlock(); | ||
1981 | |||
1982 | if (timeout) | ||
1983 | jiffies_left = schedule_timeout(jiffies_left); | ||
1984 | else | ||
1985 | schedule(); | ||
1986 | 1950 | ||
1987 | error = get_queue_result(&queue); | 1951 | if (timeout) |
1952 | jiffies_left = schedule_timeout(jiffies_left); | ||
1953 | else | ||
1954 | schedule(); | ||
1988 | 1955 | ||
1989 | if (error != -EINTR) { | 1956 | /* |
1990 | /* fast path: update_queue already obtained all requested | 1957 | * fastpath: the semop has completed, either successfully or |
1991 | * resources. | 1958 | * not, from the syscall pov, is quite irrelevant to us at this |
1992 | * Perform a smp_mb(): User space could assume that semop() | 1959 | * point; we're done. |
1993 | * is a memory barrier: Without the mb(), the cpu could | 1960 | * |
1994 | * speculatively read in user space stale data that was | 1961 | * We _do_ care, nonetheless, about being awoken by a signal or |
1995 | * overwritten by the previous owner of the semaphore. | 1962 | * spuriously. The queue.status is checked again in the |
1963 | * slowpath (aka after taking sem_lock), such that we can detect | ||
1964 | * scenarios where we were awakened externally, during the | ||
1965 | * window between wake_q_add() and wake_up_q(). | ||
1996 | */ | 1966 | */ |
1997 | smp_mb(); | 1967 | error = READ_ONCE(queue.status); |
1998 | 1968 | if (error != -EINTR) { | |
1999 | goto out_free; | 1969 | /* |
2000 | } | 1970 | * User space could assume that semop() is a memory |
2001 | 1971 | * barrier: Without the mb(), the cpu could | |
2002 | rcu_read_lock(); | 1972 | * speculatively read in userspace stale data that was |
2003 | sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); | 1973 | * overwritten by the previous owner of the semaphore. |
2004 | 1974 | */ | |
2005 | /* | 1975 | smp_mb(); |
2006 | * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. | 1976 | goto out_free; |
2007 | */ | 1977 | } |
2008 | error = get_queue_result(&queue); | ||
2009 | 1978 | ||
2010 | /* | 1979 | rcu_read_lock(); |
2011 | * Array removed? If yes, leave without sem_unlock(). | 1980 | sem_lock(sma, sops, nsops); |
2012 | */ | ||
2013 | if (IS_ERR(sma)) { | ||
2014 | rcu_read_unlock(); | ||
2015 | goto out_free; | ||
2016 | } | ||
2017 | 1981 | ||
1982 | if (!ipc_valid_object(&sma->sem_perm)) | ||
1983 | goto out_unlock_free; | ||
2018 | 1984 | ||
2019 | /* | 1985 | error = READ_ONCE(queue.status); |
2020 | * If queue.status != -EINTR we are woken up by another process. | ||
2021 | * Leave without unlink_queue(), but with sem_unlock(). | ||
2022 | */ | ||
2023 | if (error != -EINTR) | ||
2024 | goto out_unlock_free; | ||
2025 | 1986 | ||
2026 | /* | 1987 | /* |
2027 | * If an interrupt occurred we have to clean up the queue | 1988 | * If queue.status != -EINTR we are woken up by another process. |
2028 | */ | 1989 | * Leave without unlink_queue(), but with sem_unlock(). |
2029 | if (timeout && jiffies_left == 0) | 1990 | */ |
2030 | error = -EAGAIN; | 1991 | if (error != -EINTR) |
1992 | goto out_unlock_free; | ||
2031 | 1993 | ||
2032 | /* | 1994 | /* |
2033 | * If the wakeup was spurious, just retry | 1995 | * If an interrupt occurred we have to clean up the queue. |
2034 | */ | 1996 | */ |
2035 | if (error == -EINTR && !signal_pending(current)) | 1997 | if (timeout && jiffies_left == 0) |
2036 | goto sleep_again; | 1998 | error = -EAGAIN; |
1999 | } while (error == -EINTR && !signal_pending(current)); /* spurious */ | ||
2037 | 2000 | ||
2038 | unlink_queue(sma, &queue); | 2001 | unlink_queue(sma, &queue); |
2039 | 2002 | ||
2040 | out_unlock_free: | 2003 | out_unlock_free: |
2041 | sem_unlock(sma, locknum); | 2004 | sem_unlock(sma, locknum); |
2042 | out_rcu_wakeup: | ||
2043 | rcu_read_unlock(); | 2005 | rcu_read_unlock(); |
2044 | wake_up_sem_queue_do(&tasks); | ||
2045 | out_free: | 2006 | out_free: |
2046 | if (sops != fast_sops) | 2007 | if (sops != fast_sops) |
2047 | kfree(sops); | 2008 | kfree(sops); |
@@ -2102,8 +2063,8 @@ void exit_sem(struct task_struct *tsk) | |||
2102 | for (;;) { | 2063 | for (;;) { |
2103 | struct sem_array *sma; | 2064 | struct sem_array *sma; |
2104 | struct sem_undo *un; | 2065 | struct sem_undo *un; |
2105 | struct list_head tasks; | ||
2106 | int semid, i; | 2066 | int semid, i; |
2067 | DEFINE_WAKE_Q(wake_q); | ||
2107 | 2068 | ||
2108 | cond_resched(); | 2069 | cond_resched(); |
2109 | 2070 | ||
@@ -2191,11 +2152,10 @@ void exit_sem(struct task_struct *tsk) | |||
2191 | } | 2152 | } |
2192 | } | 2153 | } |
2193 | /* maybe some queued-up processes were waiting for this */ | 2154 | /* maybe some queued-up processes were waiting for this */ |
2194 | INIT_LIST_HEAD(&tasks); | 2155 | do_smart_update(sma, NULL, 0, 1, &wake_q); |
2195 | do_smart_update(sma, NULL, 0, 1, &tasks); | ||
2196 | sem_unlock(sma, -1); | 2156 | sem_unlock(sma, -1); |
2197 | rcu_read_unlock(); | 2157 | rcu_read_unlock(); |
2198 | wake_up_sem_queue_do(&tasks); | 2158 | wake_up_q(&wake_q); |
2199 | 2159 | ||
2200 | kfree_rcu(un, rcu); | 2160 | kfree_rcu(un, rcu); |
2201 | } | 2161 | } |