diff options
Diffstat (limited to 'ipc/sem.c')
-rw-r--r-- | ipc/sem.c | 171 |
1 files changed, 91 insertions, 80 deletions
@@ -47,8 +47,7 @@ | |||
47 | * Thus: Perfect SMP scaling between independent semaphore arrays. | 47 | * Thus: Perfect SMP scaling between independent semaphore arrays. |
48 | * If multiple semaphores in one array are used, then cache line | 48 | * If multiple semaphores in one array are used, then cache line |
49 | * trashing on the semaphore array spinlock will limit the scaling. | 49 | * trashing on the semaphore array spinlock will limit the scaling. |
50 | * - semncnt and semzcnt are calculated on demand in count_semncnt() and | 50 | * - semncnt and semzcnt are calculated on demand in count_semcnt() |
51 | * count_semzcnt() | ||
52 | * - the task that performs a successful semop() scans the list of all | 51 | * - the task that performs a successful semop() scans the list of all |
53 | * sleeping tasks and completes any pending operations that can be fulfilled. | 52 | * sleeping tasks and completes any pending operations that can be fulfilled. |
54 | * Semaphores are actively given to waiting tasks (necessary for FIFO). | 53 | * Semaphores are actively given to waiting tasks (necessary for FIFO). |
@@ -87,7 +86,7 @@ | |||
87 | #include <linux/nsproxy.h> | 86 | #include <linux/nsproxy.h> |
88 | #include <linux/ipc_namespace.h> | 87 | #include <linux/ipc_namespace.h> |
89 | 88 | ||
90 | #include <asm/uaccess.h> | 89 | #include <linux/uaccess.h> |
91 | #include "util.h" | 90 | #include "util.h" |
92 | 91 | ||
93 | /* One semaphore structure for each semaphore in the system. */ | 92 | /* One semaphore structure for each semaphore in the system. */ |
@@ -110,6 +109,7 @@ struct sem_queue { | |||
110 | int pid; /* process id of requesting process */ | 109 | int pid; /* process id of requesting process */ |
111 | int status; /* completion status of operation */ | 110 | int status; /* completion status of operation */ |
112 | struct sembuf *sops; /* array of pending operations */ | 111 | struct sembuf *sops; /* array of pending operations */ |
112 | struct sembuf *blocking; /* the operation that blocked */ | ||
113 | int nsops; /* number of operations */ | 113 | int nsops; /* number of operations */ |
114 | int alter; /* does *sops alter the array? */ | 114 | int alter; /* does *sops alter the array? */ |
115 | }; | 115 | }; |
@@ -160,7 +160,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); | |||
160 | * sem_array.pending{_alter,_cont}, | 160 | * sem_array.pending{_alter,_cont}, |
161 | * sem_array.sem_undo: global sem_lock() for read/write | 161 | * sem_array.sem_undo: global sem_lock() for read/write |
162 | * sem_undo.proc_next: only "current" is allowed to read/write that field. | 162 | * sem_undo.proc_next: only "current" is allowed to read/write that field. |
163 | * | 163 | * |
164 | * sem_array.sem_base[i].pending_{const,alter}: | 164 | * sem_array.sem_base[i].pending_{const,alter}: |
165 | * global or semaphore sem_lock() for read/write | 165 | * global or semaphore sem_lock() for read/write |
166 | */ | 166 | */ |
@@ -564,7 +564,11 @@ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, | |||
564 | SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) | 564 | SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) |
565 | { | 565 | { |
566 | struct ipc_namespace *ns; | 566 | struct ipc_namespace *ns; |
567 | struct ipc_ops sem_ops; | 567 | static const struct ipc_ops sem_ops = { |
568 | .getnew = newary, | ||
569 | .associate = sem_security, | ||
570 | .more_checks = sem_more_checks, | ||
571 | }; | ||
568 | struct ipc_params sem_params; | 572 | struct ipc_params sem_params; |
569 | 573 | ||
570 | ns = current->nsproxy->ipc_ns; | 574 | ns = current->nsproxy->ipc_ns; |
@@ -572,10 +576,6 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) | |||
572 | if (nsems < 0 || nsems > ns->sc_semmsl) | 576 | if (nsems < 0 || nsems > ns->sc_semmsl) |
573 | return -EINVAL; | 577 | return -EINVAL; |
574 | 578 | ||
575 | sem_ops.getnew = newary; | ||
576 | sem_ops.associate = sem_security; | ||
577 | sem_ops.more_checks = sem_more_checks; | ||
578 | |||
579 | sem_params.key = key; | 579 | sem_params.key = key; |
580 | sem_params.flg = semflg; | 580 | sem_params.flg = semflg; |
581 | sem_params.u.nsems = nsems; | 581 | sem_params.u.nsems = nsems; |
@@ -586,21 +586,23 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) | |||
586 | /** | 586 | /** |
587 | * perform_atomic_semop - Perform (if possible) a semaphore operation | 587 | * perform_atomic_semop - Perform (if possible) a semaphore operation |
588 | * @sma: semaphore array | 588 | * @sma: semaphore array |
589 | * @sops: array with operations that should be checked | 589 | * @q: struct sem_queue that describes the operation |
590 | * @nsops: number of operations | ||
591 | * @un: undo array | ||
592 | * @pid: pid that did the change | ||
593 | * | 590 | * |
594 | * Returns 0 if the operation was possible. | 591 | * Returns 0 if the operation was possible. |
595 | * Returns 1 if the operation is impossible, the caller must sleep. | 592 | * Returns 1 if the operation is impossible, the caller must sleep. |
596 | * Negative values are error codes. | 593 | * Negative values are error codes. |
597 | */ | 594 | */ |
598 | static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, | 595 | static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) |
599 | int nsops, struct sem_undo *un, int pid) | ||
600 | { | 596 | { |
601 | int result, sem_op; | 597 | int result, sem_op, nsops, pid; |
602 | struct sembuf *sop; | 598 | struct sembuf *sop; |
603 | struct sem *curr; | 599 | struct sem *curr; |
600 | struct sembuf *sops; | ||
601 | struct sem_undo *un; | ||
602 | |||
603 | sops = q->sops; | ||
604 | nsops = q->nsops; | ||
605 | un = q->undo; | ||
604 | 606 | ||
605 | for (sop = sops; sop < sops + nsops; sop++) { | 607 | for (sop = sops; sop < sops + nsops; sop++) { |
606 | curr = sma->sem_base + sop->sem_num; | 608 | curr = sma->sem_base + sop->sem_num; |
@@ -628,6 +630,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, | |||
628 | } | 630 | } |
629 | 631 | ||
630 | sop--; | 632 | sop--; |
633 | pid = q->pid; | ||
631 | while (sop >= sops) { | 634 | while (sop >= sops) { |
632 | sma->sem_base[sop->sem_num].sempid = pid; | 635 | sma->sem_base[sop->sem_num].sempid = pid; |
633 | sop--; | 636 | sop--; |
@@ -640,6 +643,8 @@ out_of_range: | |||
640 | goto undo; | 643 | goto undo; |
641 | 644 | ||
642 | would_block: | 645 | would_block: |
646 | q->blocking = sop; | ||
647 | |||
643 | if (sop->sem_flg & IPC_NOWAIT) | 648 | if (sop->sem_flg & IPC_NOWAIT) |
644 | result = -EAGAIN; | 649 | result = -EAGAIN; |
645 | else | 650 | else |
@@ -780,8 +785,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum, | |||
780 | q = container_of(walk, struct sem_queue, list); | 785 | q = container_of(walk, struct sem_queue, list); |
781 | walk = walk->next; | 786 | walk = walk->next; |
782 | 787 | ||
783 | error = perform_atomic_semop(sma, q->sops, q->nsops, | 788 | error = perform_atomic_semop(sma, q); |
784 | q->undo, q->pid); | ||
785 | 789 | ||
786 | if (error <= 0) { | 790 | if (error <= 0) { |
787 | /* operation completed, remove from queue & wakeup */ | 791 | /* operation completed, remove from queue & wakeup */ |
@@ -893,8 +897,7 @@ again: | |||
893 | if (semnum != -1 && sma->sem_base[semnum].semval == 0) | 897 | if (semnum != -1 && sma->sem_base[semnum].semval == 0) |
894 | break; | 898 | break; |
895 | 899 | ||
896 | error = perform_atomic_semop(sma, q->sops, q->nsops, | 900 | error = perform_atomic_semop(sma, q); |
897 | q->undo, q->pid); | ||
898 | 901 | ||
899 | /* Does q->sleeper still need to sleep? */ | 902 | /* Does q->sleeper still need to sleep? */ |
900 | if (error > 0) | 903 | if (error > 0) |
@@ -989,65 +992,74 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop | |||
989 | set_semotime(sma, sops); | 992 | set_semotime(sma, sops); |
990 | } | 993 | } |
991 | 994 | ||
992 | /* The following counts are associated to each semaphore: | 995 | /* |
993 | * semncnt number of tasks waiting on semval being nonzero | 996 | * check_qop: Test if a queued operation sleeps on the semaphore semnum |
994 | * semzcnt number of tasks waiting on semval being zero | ||
995 | * This model assumes that a task waits on exactly one semaphore. | ||
996 | * Since semaphore operations are to be performed atomically, tasks actually | ||
997 | * wait on a whole sequence of semaphores simultaneously. | ||
998 | * The counts we return here are a rough approximation, but still | ||
999 | * warrant that semncnt+semzcnt>0 if the task is on the pending queue. | ||
1000 | */ | 997 | */ |
1001 | static int count_semncnt(struct sem_array *sma, ushort semnum) | 998 | static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, |
999 | bool count_zero) | ||
1002 | { | 1000 | { |
1003 | int semncnt; | 1001 | struct sembuf *sop = q->blocking; |
1004 | struct sem_queue *q; | ||
1005 | 1002 | ||
1006 | semncnt = 0; | 1003 | /* |
1007 | list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { | 1004 | * Linux always (since 0.99.10) reported a task as sleeping on all |
1008 | struct sembuf *sops = q->sops; | 1005 | * semaphores. This violates SUS, therefore it was changed to the |
1009 | BUG_ON(sops->sem_num != semnum); | 1006 | * standard compliant behavior. |
1010 | if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) | 1007 | * Give the administrators a chance to notice that an application |
1011 | semncnt++; | 1008 | * might misbehave because it relies on the Linux behavior. |
1012 | } | 1009 | */ |
1010 | pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" | ||
1011 | "The task %s (%d) triggered the difference, watch for misbehavior.\n", | ||
1012 | current->comm, task_pid_nr(current)); | ||
1013 | 1013 | ||
1014 | list_for_each_entry(q, &sma->pending_alter, list) { | 1014 | if (sop->sem_num != semnum) |
1015 | struct sembuf *sops = q->sops; | 1015 | return 0; |
1016 | int nsops = q->nsops; | 1016 | |
1017 | int i; | 1017 | if (count_zero && sop->sem_op == 0) |
1018 | for (i = 0; i < nsops; i++) | 1018 | return 1; |
1019 | if (sops[i].sem_num == semnum | 1019 | if (!count_zero && sop->sem_op < 0) |
1020 | && (sops[i].sem_op < 0) | 1020 | return 1; |
1021 | && !(sops[i].sem_flg & IPC_NOWAIT)) | 1021 | |
1022 | semncnt++; | 1022 | return 0; |
1023 | } | ||
1024 | return semncnt; | ||
1025 | } | 1023 | } |
1026 | 1024 | ||
1027 | static int count_semzcnt(struct sem_array *sma, ushort semnum) | 1025 | /* The following counts are associated to each semaphore: |
1026 | * semncnt number of tasks waiting on semval being nonzero | ||
1027 | * semzcnt number of tasks waiting on semval being zero | ||
1028 | * | ||
1029 | * Per definition, a task waits only on the semaphore of the first semop | ||
1030 | * that cannot proceed, even if additional operation would block, too. | ||
1031 | */ | ||
1032 | static int count_semcnt(struct sem_array *sma, ushort semnum, | ||
1033 | bool count_zero) | ||
1028 | { | 1034 | { |
1029 | int semzcnt; | 1035 | struct list_head *l; |
1030 | struct sem_queue *q; | 1036 | struct sem_queue *q; |
1037 | int semcnt; | ||
1038 | |||
1039 | semcnt = 0; | ||
1040 | /* First: check the simple operations. They are easy to evaluate */ | ||
1041 | if (count_zero) | ||
1042 | l = &sma->sem_base[semnum].pending_const; | ||
1043 | else | ||
1044 | l = &sma->sem_base[semnum].pending_alter; | ||
1031 | 1045 | ||
1032 | semzcnt = 0; | 1046 | list_for_each_entry(q, l, list) { |
1033 | list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { | 1047 | /* all task on a per-semaphore list sleep on exactly |
1034 | struct sembuf *sops = q->sops; | 1048 | * that semaphore |
1035 | BUG_ON(sops->sem_num != semnum); | 1049 | */ |
1036 | if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) | 1050 | semcnt++; |
1037 | semzcnt++; | ||
1038 | } | 1051 | } |
1039 | 1052 | ||
1040 | list_for_each_entry(q, &sma->pending_const, list) { | 1053 | /* Then: check the complex operations. */ |
1041 | struct sembuf *sops = q->sops; | 1054 | list_for_each_entry(q, &sma->pending_alter, list) { |
1042 | int nsops = q->nsops; | 1055 | semcnt += check_qop(sma, semnum, q, count_zero); |
1043 | int i; | ||
1044 | for (i = 0; i < nsops; i++) | ||
1045 | if (sops[i].sem_num == semnum | ||
1046 | && (sops[i].sem_op == 0) | ||
1047 | && !(sops[i].sem_flg & IPC_NOWAIT)) | ||
1048 | semzcnt++; | ||
1049 | } | 1056 | } |
1050 | return semzcnt; | 1057 | if (count_zero) { |
1058 | list_for_each_entry(q, &sma->pending_const, list) { | ||
1059 | semcnt += check_qop(sma, semnum, q, count_zero); | ||
1060 | } | ||
1061 | } | ||
1062 | return semcnt; | ||
1051 | } | 1063 | } |
1052 | 1064 | ||
1053 | /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked | 1065 | /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked |
@@ -1161,7 +1173,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1161 | err = security_sem_semctl(NULL, cmd); | 1173 | err = security_sem_semctl(NULL, cmd); |
1162 | if (err) | 1174 | if (err) |
1163 | return err; | 1175 | return err; |
1164 | 1176 | ||
1165 | memset(&seminfo, 0, sizeof(seminfo)); | 1177 | memset(&seminfo, 0, sizeof(seminfo)); |
1166 | seminfo.semmni = ns->sc_semmni; | 1178 | seminfo.semmni = ns->sc_semmni; |
1167 | seminfo.semmns = ns->sc_semmns; | 1179 | seminfo.semmns = ns->sc_semmns; |
@@ -1181,7 +1193,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1181 | } | 1193 | } |
1182 | max_id = ipc_get_maxid(&sem_ids(ns)); | 1194 | max_id = ipc_get_maxid(&sem_ids(ns)); |
1183 | up_read(&sem_ids(ns).rwsem); | 1195 | up_read(&sem_ids(ns).rwsem); |
1184 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) | 1196 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) |
1185 | return -EFAULT; | 1197 | return -EFAULT; |
1186 | return (max_id < 0) ? 0 : max_id; | 1198 | return (max_id < 0) ? 0 : max_id; |
1187 | } | 1199 | } |
@@ -1449,10 +1461,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1449 | err = curr->sempid; | 1461 | err = curr->sempid; |
1450 | goto out_unlock; | 1462 | goto out_unlock; |
1451 | case GETNCNT: | 1463 | case GETNCNT: |
1452 | err = count_semncnt(sma, semnum); | 1464 | err = count_semcnt(sma, semnum, 0); |
1453 | goto out_unlock; | 1465 | goto out_unlock; |
1454 | case GETZCNT: | 1466 | case GETZCNT: |
1455 | err = count_semzcnt(sma, semnum); | 1467 | err = count_semcnt(sma, semnum, 1); |
1456 | goto out_unlock; | 1468 | goto out_unlock; |
1457 | } | 1469 | } |
1458 | 1470 | ||
@@ -1866,8 +1878,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1866 | if (un && un->semid == -1) | 1878 | if (un && un->semid == -1) |
1867 | goto out_unlock_free; | 1879 | goto out_unlock_free; |
1868 | 1880 | ||
1869 | error = perform_atomic_semop(sma, sops, nsops, un, | 1881 | queue.sops = sops; |
1870 | task_tgid_vnr(current)); | 1882 | queue.nsops = nsops; |
1883 | queue.undo = un; | ||
1884 | queue.pid = task_tgid_vnr(current); | ||
1885 | queue.alter = alter; | ||
1886 | |||
1887 | error = perform_atomic_semop(sma, &queue); | ||
1871 | if (error == 0) { | 1888 | if (error == 0) { |
1872 | /* If the operation was successful, then do | 1889 | /* If the operation was successful, then do |
1873 | * the required updates. | 1890 | * the required updates. |
@@ -1883,12 +1900,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1883 | /* We need to sleep on this operation, so we put the current | 1900 | /* We need to sleep on this operation, so we put the current |
1884 | * task into the pending queue and go to sleep. | 1901 | * task into the pending queue and go to sleep. |
1885 | */ | 1902 | */ |
1886 | |||
1887 | queue.sops = sops; | ||
1888 | queue.nsops = nsops; | ||
1889 | queue.undo = un; | ||
1890 | queue.pid = task_tgid_vnr(current); | ||
1891 | queue.alter = alter; | ||
1892 | 1903 | ||
1893 | if (nsops == 1) { | 1904 | if (nsops == 1) { |
1894 | struct sem *curr; | 1905 | struct sem *curr; |
@@ -2016,7 +2027,7 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) | |||
2016 | return error; | 2027 | return error; |
2017 | atomic_inc(&undo_list->refcnt); | 2028 | atomic_inc(&undo_list->refcnt); |
2018 | tsk->sysvsem.undo_list = undo_list; | 2029 | tsk->sysvsem.undo_list = undo_list; |
2019 | } else | 2030 | } else |
2020 | tsk->sysvsem.undo_list = NULL; | 2031 | tsk->sysvsem.undo_list = NULL; |
2021 | 2032 | ||
2022 | return 0; | 2033 | return 0; |