aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/sem.c
diff options
context:
space:
mode:
authorRik van Riel <riel@surriel.com>2013-04-30 22:15:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 11:12:58 -0400
commit6062a8dc0517bce23e3c2f7d2fea5e22411269a3 (patch)
treee1dd1553167fccb726a8aa9352b27ba14f188374 /ipc/sem.c
parent9f1bc2c9022c1d4944c4a1a44c2f365487420aca (diff)
ipc,sem: fine grained locking for semtimedop
Introduce finer grained locking for semtimedop, to handle the common case of a program wanting to manipulate one semaphore from an array with multiple semaphores. If the call is a semop manipulating just one semaphore in an array with multiple semaphores, only take the lock for that semaphore itself. If the call needs to manipulate multiple semaphores, or another caller is in a transaction that manipulates multiple semaphores, the sem_array lock is taken, as well as all the locks for the individual semaphores. On a 24 CPU system, performance numbers with the semop-multi test with N threads and N semaphores, look like this: vanilla Davidlohr's Davidlohr's + Davidlohr's + threads patches rwlock patches v3 patches 10 610652 726325 1783589 2142206 20 341570 365699 1520453 1977878 30 288102 307037 1498167 2037995 40 290714 305955 1612665 2256484 50 288620 312890 1733453 2650292 60 289987 306043 1649360 2388008 70 291298 306347 1723167 2717486 80 290948 305662 1729545 2763582 90 290996 306680 1736021 2757524 100 292243 306700 1773700 3059159 [davidlohr.bueso@hp.com: do not call sem_lock when bogus sma] [davidlohr.bueso@hp.com: make refcounter atomic] Signed-off-by: Rik van Riel <riel@redhat.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Cc: Chegu Vinod <chegu_vinod@hp.com> Cc: Jason Low <jason.low2@hp.com> Reviewed-by: Michel Lespinasse <walken@google.com> Cc: Peter Hurley <peter@hurleysoftware.com> Cc: Stanislav Kinsbursky <skinsbursky@parallels.com> Tested-by: Emmanuel Benisty <benisty.e@gmail.com> Tested-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc/sem.c')
-rw-r--r--ipc/sem.c271
1 files changed, 171 insertions, 100 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index f68b61749a85..e78ee3186d1f 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -94,6 +94,7 @@
94struct sem { 94struct sem {
95 int semval; /* current value */ 95 int semval; /* current value */
96 int sempid; /* pid of last operation */ 96 int sempid; /* pid of last operation */
97 spinlock_t lock; /* spinlock for fine-grained semtimedop */
97 struct list_head sem_pending; /* pending single-sop operations */ 98 struct list_head sem_pending; /* pending single-sop operations */
98}; 99};
99 100
@@ -137,7 +138,6 @@ struct sem_undo_list {
137 138
138#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 139#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
139 140
140#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
141#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 141#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
142 142
143static int newary(struct ipc_namespace *, struct ipc_params *); 143static int newary(struct ipc_namespace *, struct ipc_params *);
@@ -190,10 +190,89 @@ void __init sem_init (void)
190} 190}
191 191
192/* 192/*
193 * If the request contains only one semaphore operation, and there are
194 * no complex transactions pending, lock only the semaphore involved.
195 * Otherwise, lock the entire semaphore array, since we either have
196 * multiple semaphores in our own semops, or we need to look at
197 * semaphores from other pending complex operations.
198 *
199 * Carefully guard against sma->complex_count changing between zero
200 * and non-zero while we are spinning for the lock. The value of
201 * sma->complex_count cannot change while we are holding the lock,
202 * so sem_unlock should be fine.
203 *
204 * The global lock path checks that all the local locks have been released,
205 * checking each local lock once. This means that the local lock paths
206 * cannot start their critical sections while the global lock is held.
207 */
208static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
209 int nsops)
210{
211 int locknum;
212 again:
213 if (nsops == 1 && !sma->complex_count) {
214 struct sem *sem = sma->sem_base + sops->sem_num;
215
216 /* Lock just the semaphore we are interested in. */
217 spin_lock(&sem->lock);
218
219 /*
220 * If sma->complex_count was set while we were spinning,
221 * we may need to look at things we did not lock here.
222 */
223 if (unlikely(sma->complex_count)) {
224 spin_unlock(&sem->lock);
225 goto lock_array;
226 }
227
228 /*
229 * Another process is holding the global lock on the
230 * sem_array; we cannot enter our critical section,
231 * but have to wait for the global lock to be released.
232 */
233 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
234 spin_unlock(&sem->lock);
235 spin_unlock_wait(&sma->sem_perm.lock);
236 goto again;
237 }
238
239 locknum = sops->sem_num;
240 } else {
241 int i;
242 /*
243 * Lock the semaphore array, and wait for all of the
244 * individual semaphore locks to go away. The code
245 * above ensures no new single-lock holders will enter
246 * their critical section while the array lock is held.
247 */
248 lock_array:
249 spin_lock(&sma->sem_perm.lock);
250 for (i = 0; i < sma->sem_nsems; i++) {
251 struct sem *sem = sma->sem_base + i;
252 spin_unlock_wait(&sem->lock);
253 }
254 locknum = -1;
255 }
256 return locknum;
257}
258
259static inline void sem_unlock(struct sem_array *sma, int locknum)
260{
261 if (locknum == -1) {
262 spin_unlock(&sma->sem_perm.lock);
263 } else {
264 struct sem *sem = sma->sem_base + locknum;
265 spin_unlock(&sem->lock);
266 }
267 rcu_read_unlock();
268}
269
270/*
193 * sem_lock_(check_) routines are called in the paths where the rw_mutex 271 * sem_lock_(check_) routines are called in the paths where the rw_mutex
194 * is not held. 272 * is not held.
195 */ 273 */
196static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, int id) 274static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
275 int id, struct sembuf *sops, int nsops, int *locknum)
197{ 276{
198 struct kern_ipc_perm *ipcp; 277 struct kern_ipc_perm *ipcp;
199 struct sem_array *sma; 278 struct sem_array *sma;
@@ -205,7 +284,8 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, int id
205 goto err; 284 goto err;
206 } 285 }
207 286
208 spin_lock(&ipcp->lock); 287 sma = container_of(ipcp, struct sem_array, sem_perm);
288 *locknum = sem_lock(sma, sops, nsops);
209 289
210 /* ipc_rmid() may have already freed the ID while sem_lock 290 /* ipc_rmid() may have already freed the ID while sem_lock
211 * was spinning: verify that the structure is still valid 291 * was spinning: verify that the structure is still valid
@@ -213,7 +293,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, int id
213 if (!ipcp->deleted) 293 if (!ipcp->deleted)
214 return container_of(ipcp, struct sem_array, sem_perm); 294 return container_of(ipcp, struct sem_array, sem_perm);
215 295
216 spin_unlock(&ipcp->lock); 296 sem_unlock(sma, *locknum);
217 sma = ERR_PTR(-EINVAL); 297 sma = ERR_PTR(-EINVAL);
218err: 298err:
219 rcu_read_unlock(); 299 rcu_read_unlock();
@@ -230,17 +310,6 @@ static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int
230 return container_of(ipcp, struct sem_array, sem_perm); 310 return container_of(ipcp, struct sem_array, sem_perm);
231} 311}
232 312
233static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
234 int id)
235{
236 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
237
238 if (IS_ERR(ipcp))
239 return ERR_CAST(ipcp);
240
241 return container_of(ipcp, struct sem_array, sem_perm);
242}
243
244static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 313static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
245 int id) 314 int id)
246{ 315{
@@ -254,21 +323,21 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
254 323
255static inline void sem_lock_and_putref(struct sem_array *sma) 324static inline void sem_lock_and_putref(struct sem_array *sma)
256{ 325{
257 ipc_lock_by_ptr(&sma->sem_perm); 326 rcu_read_lock();
327 sem_lock(sma, NULL, -1);
258 ipc_rcu_putref(sma); 328 ipc_rcu_putref(sma);
259} 329}
260 330
261static inline void sem_getref_and_unlock(struct sem_array *sma) 331static inline void sem_getref_and_unlock(struct sem_array *sma)
262{ 332{
263 ipc_rcu_getref(sma); 333 WARN_ON_ONCE(!ipc_rcu_getref(sma));
264 ipc_unlock(&(sma)->sem_perm); 334 sem_unlock(sma, -1);
265} 335}
266 336
267static inline void sem_putref(struct sem_array *sma) 337static inline void sem_putref(struct sem_array *sma)
268{ 338{
269 ipc_lock_by_ptr(&sma->sem_perm); 339 sem_lock_and_putref(sma);
270 ipc_rcu_putref(sma); 340 sem_unlock(sma, -1);
271 ipc_unlock(&(sma)->sem_perm);
272} 341}
273 342
274/* 343/*
@@ -276,9 +345,9 @@ static inline void sem_putref(struct sem_array *sma)
276 */ 345 */
277static inline void sem_getref(struct sem_array *sma) 346static inline void sem_getref(struct sem_array *sma)
278{ 347{
279 spin_lock(&(sma)->sem_perm.lock); 348 sem_lock(sma, NULL, -1);
280 ipc_rcu_getref(sma); 349 WARN_ON_ONCE(!ipc_rcu_getref(sma));
281 ipc_unlock(&(sma)->sem_perm); 350 sem_unlock(sma, -1);
282} 351}
283 352
284static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 353static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -371,15 +440,17 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
371 440
372 sma->sem_base = (struct sem *) &sma[1]; 441 sma->sem_base = (struct sem *) &sma[1];
373 442
374 for (i = 0; i < nsems; i++) 443 for (i = 0; i < nsems; i++) {
375 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 444 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
445 spin_lock_init(&sma->sem_base[i].lock);
446 }
376 447
377 sma->complex_count = 0; 448 sma->complex_count = 0;
378 INIT_LIST_HEAD(&sma->sem_pending); 449 INIT_LIST_HEAD(&sma->sem_pending);
379 INIT_LIST_HEAD(&sma->list_id); 450 INIT_LIST_HEAD(&sma->list_id);
380 sma->sem_nsems = nsems; 451 sma->sem_nsems = nsems;
381 sma->sem_ctime = get_seconds(); 452 sma->sem_ctime = get_seconds();
382 sem_unlock(sma); 453 sem_unlock(sma, -1);
383 454
384 return sma->sem_perm.id; 455 return sma->sem_perm.id;
385} 456}
@@ -818,7 +889,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
818 889
819 /* Remove the semaphore set from the IDR */ 890 /* Remove the semaphore set from the IDR */
820 sem_rmid(ns, sma); 891 sem_rmid(ns, sma);
821 sem_unlock(sma); 892 sem_unlock(sma, -1);
822 893
823 wake_up_sem_queue_do(&tasks); 894 wake_up_sem_queue_do(&tasks);
824 ns->used_sems -= sma->sem_nsems; 895 ns->used_sems -= sma->sem_nsems;
@@ -947,7 +1018,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
947 struct sem_array *sma; 1018 struct sem_array *sma;
948 struct sem* curr; 1019 struct sem* curr;
949 int err; 1020 int err;
950 int nsems;
951 struct list_head tasks; 1021 struct list_head tasks;
952 int val; 1022 int val;
953#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1023#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
@@ -958,31 +1028,39 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
958 val = arg; 1028 val = arg;
959#endif 1029#endif
960 1030
961 sma = sem_lock_check(ns, semid); 1031 if (val > SEMVMX || val < 0)
962 if (IS_ERR(sma)) 1032 return -ERANGE;
963 return PTR_ERR(sma);
964 1033
965 INIT_LIST_HEAD(&tasks); 1034 INIT_LIST_HEAD(&tasks);
966 nsems = sma->sem_nsems;
967 1035
968 err = -EACCES; 1036 rcu_read_lock();
969 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) 1037 sma = sem_obtain_object_check(ns, semid);
970 goto out_unlock; 1038 if (IS_ERR(sma)) {
1039 rcu_read_unlock();
1040 return PTR_ERR(sma);
1041 }
1042
1043 if (semnum < 0 || semnum >= sma->sem_nsems) {
1044 rcu_read_unlock();
1045 return -EINVAL;
1046 }
1047
1048
1049 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
1050 rcu_read_unlock();
1051 return -EACCES;
1052 }
971 1053
972 err = security_sem_semctl(sma, SETVAL); 1054 err = security_sem_semctl(sma, SETVAL);
973 if (err) 1055 if (err) {
974 goto out_unlock; 1056 rcu_read_unlock();
1057 return -EACCES;
1058 }
975 1059
976 err = -EINVAL; 1060 sem_lock(sma, NULL, -1);
977 if(semnum < 0 || semnum >= nsems)
978 goto out_unlock;
979 1061
980 curr = &sma->sem_base[semnum]; 1062 curr = &sma->sem_base[semnum];
981 1063
982 err = -ERANGE;
983 if (val > SEMVMX || val < 0)
984 goto out_unlock;
985
986 assert_spin_locked(&sma->sem_perm.lock); 1064 assert_spin_locked(&sma->sem_perm.lock);
987 list_for_each_entry(un, &sma->list_id, list_id) 1065 list_for_each_entry(un, &sma->list_id, list_id)
988 un->semadj[semnum] = 0; 1066 un->semadj[semnum] = 0;
@@ -992,11 +1070,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
992 sma->sem_ctime = get_seconds(); 1070 sma->sem_ctime = get_seconds();
993 /* maybe some queued-up processes were waiting for this */ 1071 /* maybe some queued-up processes were waiting for this */
994 do_smart_update(sma, NULL, 0, 0, &tasks); 1072 do_smart_update(sma, NULL, 0, 0, &tasks);
995 err = 0; 1073 sem_unlock(sma, -1);
996out_unlock:
997 sem_unlock(sma);
998 wake_up_sem_queue_do(&tasks); 1074 wake_up_sem_queue_do(&tasks);
999 return err; 1075 return 0;
1000} 1076}
1001 1077
1002static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1078static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
@@ -1051,16 +1127,16 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1051 1127
1052 sem_lock_and_putref(sma); 1128 sem_lock_and_putref(sma);
1053 if (sma->sem_perm.deleted) { 1129 if (sma->sem_perm.deleted) {
1054 sem_unlock(sma); 1130 sem_unlock(sma, -1);
1055 err = -EIDRM; 1131 err = -EIDRM;
1056 goto out_free; 1132 goto out_free;
1057 } 1133 }
1058 } 1134 } else
1135 sem_lock(sma, NULL, -1);
1059 1136
1060 spin_lock(&sma->sem_perm.lock);
1061 for (i = 0; i < sma->sem_nsems; i++) 1137 for (i = 0; i < sma->sem_nsems; i++)
1062 sem_io[i] = sma->sem_base[i].semval; 1138 sem_io[i] = sma->sem_base[i].semval;
1063 sem_unlock(sma); 1139 sem_unlock(sma, -1);
1064 err = 0; 1140 err = 0;
1065 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1141 if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
1066 err = -EFAULT; 1142 err = -EFAULT;
@@ -1071,7 +1147,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1071 int i; 1147 int i;
1072 struct sem_undo *un; 1148 struct sem_undo *un;
1073 1149
1074 ipc_rcu_getref(sma); 1150 if (!ipc_rcu_getref(sma)) {
1151 rcu_read_unlock();
1152 return -EIDRM;
1153 }
1075 rcu_read_unlock(); 1154 rcu_read_unlock();
1076 1155
1077 if(nsems > SEMMSL_FAST) { 1156 if(nsems > SEMMSL_FAST) {
@@ -1097,7 +1176,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1097 } 1176 }
1098 sem_lock_and_putref(sma); 1177 sem_lock_and_putref(sma);
1099 if (sma->sem_perm.deleted) { 1178 if (sma->sem_perm.deleted) {
1100 sem_unlock(sma); 1179 sem_unlock(sma, -1);
1101 err = -EIDRM; 1180 err = -EIDRM;
1102 goto out_free; 1181 goto out_free;
1103 } 1182 }
@@ -1124,7 +1203,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1124 goto out_wakeup; 1203 goto out_wakeup;
1125 } 1204 }
1126 1205
1127 spin_lock(&sma->sem_perm.lock); 1206 sem_lock(sma, NULL, -1);
1128 curr = &sma->sem_base[semnum]; 1207 curr = &sma->sem_base[semnum];
1129 1208
1130 switch (cmd) { 1209 switch (cmd) {
@@ -1143,7 +1222,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1143 } 1222 }
1144 1223
1145out_unlock: 1224out_unlock:
1146 sem_unlock(sma); 1225 sem_unlock(sma, -1);
1147out_wakeup: 1226out_wakeup:
1148 wake_up_sem_queue_do(&tasks); 1227 wake_up_sem_queue_do(&tasks);
1149out_free: 1228out_free:
@@ -1211,11 +1290,11 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
1211 1290
1212 switch(cmd){ 1291 switch(cmd){
1213 case IPC_RMID: 1292 case IPC_RMID:
1214 ipc_lock_object(&sma->sem_perm); 1293 sem_lock(sma, NULL, -1);
1215 freeary(ns, ipcp); 1294 freeary(ns, ipcp);
1216 goto out_up; 1295 goto out_up;
1217 case IPC_SET: 1296 case IPC_SET:
1218 ipc_lock_object(&sma->sem_perm); 1297 sem_lock(sma, NULL, -1);
1219 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1298 err = ipc_update_perm(&semid64.sem_perm, ipcp);
1220 if (err) 1299 if (err)
1221 goto out_unlock; 1300 goto out_unlock;
@@ -1228,7 +1307,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
1228 } 1307 }
1229 1308
1230out_unlock: 1309out_unlock:
1231 sem_unlock(sma); 1310 sem_unlock(sma, -1);
1232out_up: 1311out_up:
1233 up_write(&sem_ids(ns).rw_mutex); 1312 up_write(&sem_ids(ns).rw_mutex);
1234 return err; 1313 return err;
@@ -1340,8 +1419,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1340 struct sem_array *sma; 1419 struct sem_array *sma;
1341 struct sem_undo_list *ulp; 1420 struct sem_undo_list *ulp;
1342 struct sem_undo *un, *new; 1421 struct sem_undo *un, *new;
1343 int nsems; 1422 int nsems, error;
1344 int error;
1345 1423
1346 error = get_undo_list(&ulp); 1424 error = get_undo_list(&ulp);
1347 if (error) 1425 if (error)
@@ -1363,7 +1441,11 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1363 } 1441 }
1364 1442
1365 nsems = sma->sem_nsems; 1443 nsems = sma->sem_nsems;
1366 ipc_rcu_getref(sma); 1444 if (!ipc_rcu_getref(sma)) {
1445 rcu_read_unlock();
1446 un = ERR_PTR(-EIDRM);
1447 goto out;
1448 }
1367 rcu_read_unlock(); 1449 rcu_read_unlock();
1368 1450
1369 /* step 2: allocate new undo structure */ 1451 /* step 2: allocate new undo structure */
@@ -1376,7 +1458,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1376 /* step 3: Acquire the lock on semaphore array */ 1458 /* step 3: Acquire the lock on semaphore array */
1377 sem_lock_and_putref(sma); 1459 sem_lock_and_putref(sma);
1378 if (sma->sem_perm.deleted) { 1460 if (sma->sem_perm.deleted) {
1379 sem_unlock(sma); 1461 sem_unlock(sma, -1);
1380 kfree(new); 1462 kfree(new);
1381 un = ERR_PTR(-EIDRM); 1463 un = ERR_PTR(-EIDRM);
1382 goto out; 1464 goto out;
@@ -1404,7 +1486,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1404success: 1486success:
1405 spin_unlock(&ulp->lock); 1487 spin_unlock(&ulp->lock);
1406 rcu_read_lock(); 1488 rcu_read_lock();
1407 sem_unlock(sma); 1489 sem_unlock(sma, -1);
1408out: 1490out:
1409 return un; 1491 return un;
1410} 1492}
@@ -1444,7 +1526,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1444 struct sembuf fast_sops[SEMOPM_FAST]; 1526 struct sembuf fast_sops[SEMOPM_FAST];
1445 struct sembuf* sops = fast_sops, *sop; 1527 struct sembuf* sops = fast_sops, *sop;
1446 struct sem_undo *un; 1528 struct sem_undo *un;
1447 int undos = 0, alter = 0, max; 1529 int undos = 0, alter = 0, max, locknum;
1448 struct sem_queue queue; 1530 struct sem_queue queue;
1449 unsigned long jiffies_left = 0; 1531 unsigned long jiffies_left = 0;
1450 struct ipc_namespace *ns; 1532 struct ipc_namespace *ns;
@@ -1488,22 +1570,23 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1488 alter = 1; 1570 alter = 1;
1489 } 1571 }
1490 1572
1573 INIT_LIST_HEAD(&tasks);
1574
1491 if (undos) { 1575 if (undos) {
1576 /* On success, find_alloc_undo takes the rcu_read_lock */
1492 un = find_alloc_undo(ns, semid); 1577 un = find_alloc_undo(ns, semid);
1493 if (IS_ERR(un)) { 1578 if (IS_ERR(un)) {
1494 error = PTR_ERR(un); 1579 error = PTR_ERR(un);
1495 goto out_free; 1580 goto out_free;
1496 } 1581 }
1497 } else 1582 } else {
1498 un = NULL; 1583 un = NULL;
1584 rcu_read_lock();
1585 }
1499 1586
1500 INIT_LIST_HEAD(&tasks);
1501
1502 rcu_read_lock();
1503 sma = sem_obtain_object_check(ns, semid); 1587 sma = sem_obtain_object_check(ns, semid);
1504 if (IS_ERR(sma)) { 1588 if (IS_ERR(sma)) {
1505 if (un) 1589 rcu_read_unlock();
1506 rcu_read_unlock();
1507 error = PTR_ERR(sma); 1590 error = PTR_ERR(sma);
1508 goto out_free; 1591 goto out_free;
1509 } 1592 }
@@ -1534,23 +1617,9 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1534 * "un" itself is guaranteed by rcu. 1617 * "un" itself is guaranteed by rcu.
1535 */ 1618 */
1536 error = -EIDRM; 1619 error = -EIDRM;
1537 ipc_lock_object(&sma->sem_perm); 1620 locknum = sem_lock(sma, sops, nsops);
1538 if (un) { 1621 if (un && un->semid == -1)
1539 if (un->semid == -1) { 1622 goto out_unlock_free;
1540 rcu_read_unlock();
1541 goto out_unlock_free;
1542 } else {
1543 /*
1544 * rcu lock can be released, "un" cannot disappear:
1545 * - sem_lock is acquired, thus IPC_RMID is
1546 * impossible.
1547 * - exit_sem is impossible, it always operates on
1548 * current (or a dead task).
1549 */
1550
1551 rcu_read_unlock();
1552 }
1553 }
1554 1623
1555 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1624 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
1556 if (error <= 0) { 1625 if (error <= 0) {
@@ -1591,7 +1660,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1591 1660
1592sleep_again: 1661sleep_again:
1593 current->state = TASK_INTERRUPTIBLE; 1662 current->state = TASK_INTERRUPTIBLE;
1594 sem_unlock(sma); 1663 sem_unlock(sma, locknum);
1595 1664
1596 if (timeout) 1665 if (timeout)
1597 jiffies_left = schedule_timeout(jiffies_left); 1666 jiffies_left = schedule_timeout(jiffies_left);
@@ -1613,7 +1682,7 @@ sleep_again:
1613 goto out_free; 1682 goto out_free;
1614 } 1683 }
1615 1684
1616 sma = sem_obtain_lock(ns, semid); 1685 sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
1617 1686
1618 /* 1687 /*
1619 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. 1688 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
@@ -1652,7 +1721,7 @@ sleep_again:
1652 unlink_queue(sma, &queue); 1721 unlink_queue(sma, &queue);
1653 1722
1654out_unlock_free: 1723out_unlock_free:
1655 sem_unlock(sma); 1724 sem_unlock(sma, locknum);
1656out_wakeup: 1725out_wakeup:
1657 wake_up_sem_queue_do(&tasks); 1726 wake_up_sem_queue_do(&tasks);
1658out_free: 1727out_free:
@@ -1716,8 +1785,7 @@ void exit_sem(struct task_struct *tsk)
1716 struct sem_array *sma; 1785 struct sem_array *sma;
1717 struct sem_undo *un; 1786 struct sem_undo *un;
1718 struct list_head tasks; 1787 struct list_head tasks;
1719 int semid; 1788 int semid, i;
1720 int i;
1721 1789
1722 rcu_read_lock(); 1790 rcu_read_lock();
1723 un = list_entry_rcu(ulp->list_proc.next, 1791 un = list_entry_rcu(ulp->list_proc.next,
@@ -1726,23 +1794,26 @@ void exit_sem(struct task_struct *tsk)
1726 semid = -1; 1794 semid = -1;
1727 else 1795 else
1728 semid = un->semid; 1796 semid = un->semid;
1729 rcu_read_unlock();
1730 1797
1731 if (semid == -1) 1798 if (semid == -1) {
1799 rcu_read_unlock();
1732 break; 1800 break;
1801 }
1733 1802
1734 sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); 1803 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
1735
1736 /* exit_sem raced with IPC_RMID, nothing to do */ 1804 /* exit_sem raced with IPC_RMID, nothing to do */
1737 if (IS_ERR(sma)) 1805 if (IS_ERR(sma)) {
1806 rcu_read_unlock();
1738 continue; 1807 continue;
1808 }
1739 1809
1810 sem_lock(sma, NULL, -1);
1740 un = __lookup_undo(ulp, semid); 1811 un = __lookup_undo(ulp, semid);
1741 if (un == NULL) { 1812 if (un == NULL) {
1742 /* exit_sem raced with IPC_RMID+semget() that created 1813 /* exit_sem raced with IPC_RMID+semget() that created
1743 * exactly the same semid. Nothing to do. 1814 * exactly the same semid. Nothing to do.
1744 */ 1815 */
1745 sem_unlock(sma); 1816 sem_unlock(sma, -1);
1746 continue; 1817 continue;
1747 } 1818 }
1748 1819
@@ -1782,7 +1853,7 @@ void exit_sem(struct task_struct *tsk)
1782 /* maybe some queued-up processes were waiting for this */ 1853 /* maybe some queued-up processes were waiting for this */
1783 INIT_LIST_HEAD(&tasks); 1854 INIT_LIST_HEAD(&tasks);
1784 do_smart_update(sma, NULL, 0, 1, &tasks); 1855 do_smart_update(sma, NULL, 0, 1, &tasks);
1785 sem_unlock(sma); 1856 sem_unlock(sma, -1);
1786 wake_up_sem_queue_do(&tasks); 1857 wake_up_sem_queue_do(&tasks);
1787 1858
1788 kfree_rcu(un, rcu); 1859 kfree_rcu(un, rcu);