aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/sem.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/sem.c')
-rw-r--r--ipc/sem.c256
1 files changed, 170 insertions, 86 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f3844..db9d241af133 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
243 } 243 }
244} 244}
245 245
246static void sem_rcu_free(struct rcu_head *head)
247{
248 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
249 struct sem_array *sma = ipc_rcu_to_struct(p);
250
251 security_sem_free(sma);
252 ipc_rcu_free(head);
253}
254
255/*
256 * Wait until all currently ongoing simple ops have completed.
257 * Caller must own sem_perm.lock.
258 * New simple ops cannot start, because simple ops first check
259 * that sem_perm.lock is free.
260 * that a) sem_perm.lock is free and b) complex_count is 0.
261 */
262static void sem_wait_array(struct sem_array *sma)
263{
264 int i;
265 struct sem *sem;
266
267 if (sma->complex_count) {
268 /* The thread that increased sma->complex_count waited on
269 * all sem->lock locks. Thus we don't need to wait again.
270 */
271 return;
272 }
273
274 for (i = 0; i < sma->sem_nsems; i++) {
275 sem = sma->sem_base + i;
276 spin_unlock_wait(&sem->lock);
277 }
278}
279
246/* 280/*
247 * If the request contains only one semaphore operation, and there are 281 * If the request contains only one semaphore operation, and there are
248 * no complex transactions pending, lock only the semaphore involved. 282 * no complex transactions pending, lock only the semaphore involved.
249 * Otherwise, lock the entire semaphore array, since we either have 283 * Otherwise, lock the entire semaphore array, since we either have
250 * multiple semaphores in our own semops, or we need to look at 284 * multiple semaphores in our own semops, or we need to look at
251 * semaphores from other pending complex operations. 285 * semaphores from other pending complex operations.
252 *
253 * Carefully guard against sma->complex_count changing between zero
254 * and non-zero while we are spinning for the lock. The value of
255 * sma->complex_count cannot change while we are holding the lock,
256 * so sem_unlock should be fine.
257 *
258 * The global lock path checks that all the local locks have been released,
259 * checking each local lock once. This means that the local lock paths
260 * cannot start their critical sections while the global lock is held.
261 */ 286 */
262static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 287static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
263 int nsops) 288 int nsops)
264{ 289{
265 int locknum; 290 struct sem *sem;
266 again:
267 if (nsops == 1 && !sma->complex_count) {
268 struct sem *sem = sma->sem_base + sops->sem_num;
269 291
270 /* Lock just the semaphore we are interested in. */ 292 if (nsops != 1) {
271 spin_lock(&sem->lock); 293 /* Complex operation - acquire a full lock */
294 ipc_lock_object(&sma->sem_perm);
272 295
273 /* 296 /* And wait until all simple ops that are processed
274 * If sma->complex_count was set while we were spinning, 297 * right now have dropped their locks.
275 * we may need to look at things we did not lock here.
276 */ 298 */
277 if (unlikely(sma->complex_count)) { 299 sem_wait_array(sma);
278 spin_unlock(&sem->lock); 300 return -1;
279 goto lock_array; 301 }
280 } 302
303 /*
304 * Only one semaphore affected - try to optimize locking.
305 * The rules are:
306 * - optimized locking is possible if no complex operation
307 * is either enqueued or processed right now.
308 * - The test for enqueued complex ops is simple:
309 * sma->complex_count != 0
310 * - Testing for complex ops that are processed right now is
311 * a bit more difficult. Complex ops acquire the full lock
312 * and first wait that the running simple ops have completed.
313 * (see above)
314 * Thus: If we own a simple lock and the global lock is free
315 * and complex_count is now 0, then it will stay 0 and
316 * thus just locking sem->lock is sufficient.
317 */
318 sem = sma->sem_base + sops->sem_num;
281 319
320 if (sma->complex_count == 0) {
282 /* 321 /*
283 * Another process is holding the global lock on the 322 * It appears that no complex operation is around.
284 * sem_array; we cannot enter our critical section, 323 * Acquire the per-semaphore lock.
285 * but have to wait for the global lock to be released.
286 */ 324 */
287 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 325 spin_lock(&sem->lock);
288 spin_unlock(&sem->lock); 326
289 spin_unlock_wait(&sma->sem_perm.lock); 327 /* Then check that the global lock is free */
290 goto again; 328 if (!spin_is_locked(&sma->sem_perm.lock)) {
329 /* spin_is_locked() is not a memory barrier */
330 smp_mb();
331
332 /* Now repeat the test of complex_count:
333 * It can't change anymore until we drop sem->lock.
334 * Thus: if is now 0, then it will stay 0.
335 */
336 if (sma->complex_count == 0) {
337 /* fast path successful! */
338 return sops->sem_num;
339 }
291 } 340 }
341 spin_unlock(&sem->lock);
342 }
292 343
293 locknum = sops->sem_num; 344 /* slow path: acquire the full lock */
345 ipc_lock_object(&sma->sem_perm);
346
347 if (sma->complex_count == 0) {
348 /* False alarm:
349 * There is no complex operation, thus we can switch
350 * back to the fast path.
351 */
352 spin_lock(&sem->lock);
353 ipc_unlock_object(&sma->sem_perm);
354 return sops->sem_num;
294 } else { 355 } else {
295 int i; 356 /* Not a false alarm, thus complete the sequence for a
296 /* 357 * full lock.
297 * Lock the semaphore array, and wait for all of the
298 * individual semaphore locks to go away. The code
299 * above ensures no new single-lock holders will enter
300 * their critical section while the array lock is held.
301 */ 358 */
302 lock_array: 359 sem_wait_array(sma);
303 ipc_lock_object(&sma->sem_perm); 360 return -1;
304 for (i = 0; i < sma->sem_nsems; i++) {
305 struct sem *sem = sma->sem_base + i;
306 spin_unlock_wait(&sem->lock);
307 }
308 locknum = -1;
309 } 361 }
310 return locknum;
311} 362}
312 363
313static inline void sem_unlock(struct sem_array *sma, int locknum) 364static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
374static inline void sem_lock_and_putref(struct sem_array *sma) 425static inline void sem_lock_and_putref(struct sem_array *sma)
375{ 426{
376 sem_lock(sma, NULL, -1); 427 sem_lock(sma, NULL, -1);
377 ipc_rcu_putref(sma); 428 ipc_rcu_putref(sma, ipc_rcu_free);
378}
379
380static inline void sem_putref(struct sem_array *sma)
381{
382 ipc_rcu_putref(sma);
383} 429}
384 430
385static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 431static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
458 sma->sem_perm.security = NULL; 504 sma->sem_perm.security = NULL;
459 retval = security_sem_alloc(sma); 505 retval = security_sem_alloc(sma);
460 if (retval) { 506 if (retval) {
461 ipc_rcu_putref(sma); 507 ipc_rcu_putref(sma, ipc_rcu_free);
462 return retval; 508 return retval;
463 } 509 }
464 510
465 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 511 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
466 if (id < 0) { 512 if (id < 0) {
467 security_sem_free(sma); 513 ipc_rcu_putref(sma, sem_rcu_free);
468 ipc_rcu_putref(sma);
469 return id; 514 return id;
470 } 515 }
471 ns->used_sems += nsems; 516 ns->used_sems += nsems;
@@ -873,6 +918,24 @@ again:
873} 918}
874 919
875/** 920/**
921 * set_semotime(sma, sops) - set sem_otime
922 * @sma: semaphore array
923 * @sops: operations that modified the array, may be NULL
924 *
925 * sem_otime is replicated to avoid cache line trashing.
926 * This function sets one instance to the current time.
927 */
928static void set_semotime(struct sem_array *sma, struct sembuf *sops)
929{
930 if (sops == NULL) {
931 sma->sem_base[0].sem_otime = get_seconds();
932 } else {
933 sma->sem_base[sops[0].sem_num].sem_otime =
934 get_seconds();
935 }
936}
937
938/**
876 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 939 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
877 * @sma: semaphore array 940 * @sma: semaphore array
878 * @sops: operations that were performed 941 * @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
922 } 985 }
923 } 986 }
924 } 987 }
925 if (otime) { 988 if (otime)
926 if (sops == NULL) { 989 set_semotime(sma, sops);
927 sma->sem_base[0].sem_otime = get_seconds();
928 } else {
929 sma->sem_base[sops[0].sem_num].sem_otime =
930 get_seconds();
931 }
932 }
933} 990}
934 991
935
936/* The following counts are associated to each semaphore: 992/* The following counts are associated to each semaphore:
937 * semncnt number of tasks waiting on semval being nonzero 993 * semncnt number of tasks waiting on semval being nonzero
938 * semzcnt number of tasks waiting on semval being zero 994 * semzcnt number of tasks waiting on semval being zero
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1047 1103
1048 wake_up_sem_queue_do(&tasks); 1104 wake_up_sem_queue_do(&tasks);
1049 ns->used_sems -= sma->sem_nsems; 1105 ns->used_sems -= sma->sem_nsems;
1050 security_sem_free(sma); 1106 ipc_rcu_putref(sma, sem_rcu_free);
1051 ipc_rcu_putref(sma);
1052} 1107}
1053 1108
1054static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1109static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1227 1282
1228 sem_lock(sma, NULL, -1); 1283 sem_lock(sma, NULL, -1);
1229 1284
1285 if (sma->sem_perm.deleted) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 return -EIDRM;
1289 }
1290
1230 curr = &sma->sem_base[semnum]; 1291 curr = &sma->sem_base[semnum];
1231 1292
1232 ipc_assert_locked_object(&sma->sem_perm); 1293 ipc_assert_locked_object(&sma->sem_perm);
@@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1281 int i; 1342 int i;
1282 1343
1283 sem_lock(sma, NULL, -1); 1344 sem_lock(sma, NULL, -1);
1345 if (sma->sem_perm.deleted) {
1346 err = -EIDRM;
1347 goto out_unlock;
1348 }
1284 if(nsems > SEMMSL_FAST) { 1349 if(nsems > SEMMSL_FAST) {
1285 if (!ipc_rcu_getref(sma)) { 1350 if (!ipc_rcu_getref(sma)) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 err = -EIDRM; 1351 err = -EIDRM;
1289 goto out_free; 1352 goto out_unlock;
1290 } 1353 }
1291 sem_unlock(sma, -1); 1354 sem_unlock(sma, -1);
1292 rcu_read_unlock(); 1355 rcu_read_unlock();
1293 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1356 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1294 if(sem_io == NULL) { 1357 if(sem_io == NULL) {
1295 sem_putref(sma); 1358 ipc_rcu_putref(sma, ipc_rcu_free);
1296 return -ENOMEM; 1359 return -ENOMEM;
1297 } 1360 }
1298 1361
1299 rcu_read_lock(); 1362 rcu_read_lock();
1300 sem_lock_and_putref(sma); 1363 sem_lock_and_putref(sma);
1301 if (sma->sem_perm.deleted) { 1364 if (sma->sem_perm.deleted) {
1302 sem_unlock(sma, -1);
1303 rcu_read_unlock();
1304 err = -EIDRM; 1365 err = -EIDRM;
1305 goto out_free; 1366 goto out_unlock;
1306 } 1367 }
1307 } 1368 }
1308 for (i = 0; i < sma->sem_nsems; i++) 1369 for (i = 0; i < sma->sem_nsems; i++)
@@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1320 struct sem_undo *un; 1381 struct sem_undo *un;
1321 1382
1322 if (!ipc_rcu_getref(sma)) { 1383 if (!ipc_rcu_getref(sma)) {
1323 rcu_read_unlock(); 1384 err = -EIDRM;
1324 return -EIDRM; 1385 goto out_rcu_wakeup;
1325 } 1386 }
1326 rcu_read_unlock(); 1387 rcu_read_unlock();
1327 1388
1328 if(nsems > SEMMSL_FAST) { 1389 if(nsems > SEMMSL_FAST) {
1329 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1390 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1330 if(sem_io == NULL) { 1391 if(sem_io == NULL) {
1331 sem_putref(sma); 1392 ipc_rcu_putref(sma, ipc_rcu_free);
1332 return -ENOMEM; 1393 return -ENOMEM;
1333 } 1394 }
1334 } 1395 }
1335 1396
1336 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1397 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1337 sem_putref(sma); 1398 ipc_rcu_putref(sma, ipc_rcu_free);
1338 err = -EFAULT; 1399 err = -EFAULT;
1339 goto out_free; 1400 goto out_free;
1340 } 1401 }
1341 1402
1342 for (i = 0; i < nsems; i++) { 1403 for (i = 0; i < nsems; i++) {
1343 if (sem_io[i] > SEMVMX) { 1404 if (sem_io[i] > SEMVMX) {
1344 sem_putref(sma); 1405 ipc_rcu_putref(sma, ipc_rcu_free);
1345 err = -ERANGE; 1406 err = -ERANGE;
1346 goto out_free; 1407 goto out_free;
1347 } 1408 }
@@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1349 rcu_read_lock(); 1410 rcu_read_lock();
1350 sem_lock_and_putref(sma); 1411 sem_lock_and_putref(sma);
1351 if (sma->sem_perm.deleted) { 1412 if (sma->sem_perm.deleted) {
1352 sem_unlock(sma, -1);
1353 rcu_read_unlock();
1354 err = -EIDRM; 1413 err = -EIDRM;
1355 goto out_free; 1414 goto out_unlock;
1356 } 1415 }
1357 1416
1358 for (i = 0; i < nsems; i++) 1417 for (i = 0; i < nsems; i++)
@@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1376 goto out_rcu_wakeup; 1435 goto out_rcu_wakeup;
1377 1436
1378 sem_lock(sma, NULL, -1); 1437 sem_lock(sma, NULL, -1);
1438 if (sma->sem_perm.deleted) {
1439 err = -EIDRM;
1440 goto out_unlock;
1441 }
1379 curr = &sma->sem_base[semnum]; 1442 curr = &sma->sem_base[semnum];
1380 1443
1381 switch (cmd) { 1444 switch (cmd) {
@@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1629 /* step 2: allocate new undo structure */ 1692 /* step 2: allocate new undo structure */
1630 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1693 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1631 if (!new) { 1694 if (!new) {
1632 sem_putref(sma); 1695 ipc_rcu_putref(sma, ipc_rcu_free);
1633 return ERR_PTR(-ENOMEM); 1696 return ERR_PTR(-ENOMEM);
1634 } 1697 }
1635 1698
@@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1781 if (error) 1844 if (error)
1782 goto out_rcu_wakeup; 1845 goto out_rcu_wakeup;
1783 1846
1847 error = -EIDRM;
1848 locknum = sem_lock(sma, sops, nsops);
1849 if (sma->sem_perm.deleted)
1850 goto out_unlock_free;
1784 /* 1851 /*
1785 * semid identifiers are not unique - find_alloc_undo may have 1852 * semid identifiers are not unique - find_alloc_undo may have
1786 * allocated an undo structure, it was invalidated by an RMID 1853 * allocated an undo structure, it was invalidated by an RMID
@@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1788 * This case can be detected checking un->semid. The existence of 1855 * This case can be detected checking un->semid. The existence of
1789 * "un" itself is guaranteed by rcu. 1856 * "un" itself is guaranteed by rcu.
1790 */ 1857 */
1791 error = -EIDRM;
1792 locknum = sem_lock(sma, sops, nsops);
1793 if (un && un->semid == -1) 1858 if (un && un->semid == -1)
1794 goto out_unlock_free; 1859 goto out_unlock_free;
1795 1860
1796 error = perform_atomic_semop(sma, sops, nsops, un, 1861 error = perform_atomic_semop(sma, sops, nsops, un,
1797 task_tgid_vnr(current)); 1862 task_tgid_vnr(current));
1798 if (error <= 0) { 1863 if (error == 0) {
1799 if (alter && error == 0) 1864 /* If the operation was successful, then do
1865 * the required updates.
1866 */
1867 if (alter)
1800 do_smart_update(sma, sops, nsops, 1, &tasks); 1868 do_smart_update(sma, sops, nsops, 1, &tasks);
1801 1869 else
1802 goto out_unlock_free; 1870 set_semotime(sma, sops);
1803 } 1871 }
1872 if (error <= 0)
1873 goto out_unlock_free;
1804 1874
1805 /* We need to sleep on this operation, so we put the current 1875 /* We need to sleep on this operation, so we put the current
1806 * task into the pending queue and go to sleep. 1876 * task into the pending queue and go to sleep.
@@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
1997 } 2067 }
1998 2068
1999 sem_lock(sma, NULL, -1); 2069 sem_lock(sma, NULL, -1);
2070 /* exit_sem raced with IPC_RMID, nothing to do */
2071 if (sma->sem_perm.deleted) {
2072 sem_unlock(sma, -1);
2073 rcu_read_unlock();
2074 continue;
2075 }
2000 un = __lookup_undo(ulp, semid); 2076 un = __lookup_undo(ulp, semid);
2001 if (un == NULL) { 2077 if (un == NULL) {
2002 /* exit_sem raced with IPC_RMID+semget() that created 2078 /* exit_sem raced with IPC_RMID+semget() that created
@@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2059 struct sem_array *sma = it; 2135 struct sem_array *sma = it;
2060 time_t sem_otime; 2136 time_t sem_otime;
2061 2137
2138 /*
2139 * The proc interface isn't aware of sem_lock(), it calls
2140 * ipc_lock_object() directly (in sysvipc_find_ipc).
2141 * In order to stay compatible with sem_lock(), we must wait until
2142 * all simple semop() calls have left their critical regions.
2143 */
2144 sem_wait_array(sma);
2145
2062 sem_otime = get_semotime(sma); 2146 sem_otime = get_semotime(sma);
2063 2147
2064 return seq_printf(s, 2148 return seq_printf(s,