aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/sem.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/sem.c')
-rw-r--r--ipc/sem.c214
1 files changed, 141 insertions, 73 deletions
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f3844..8c4f59b0204a 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
243 } 243 }
244} 244}
245 245
246static void sem_rcu_free(struct rcu_head *head)
247{
248 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
249 struct sem_array *sma = ipc_rcu_to_struct(p);
250
251 security_sem_free(sma);
252 ipc_rcu_free(head);
253}
254
255/*
256 * Wait until all currently ongoing simple ops have completed.
257 * Caller must own sem_perm.lock.
258 * New simple ops cannot start, because simple ops first check
259 * that sem_perm.lock is free.
260 * that a) sem_perm.lock is free and b) complex_count is 0.
261 */
262static void sem_wait_array(struct sem_array *sma)
263{
264 int i;
265 struct sem *sem;
266
267 if (sma->complex_count) {
268 /* The thread that increased sma->complex_count waited on
269 * all sem->lock locks. Thus we don't need to wait again.
270 */
271 return;
272 }
273
274 for (i = 0; i < sma->sem_nsems; i++) {
275 sem = sma->sem_base + i;
276 spin_unlock_wait(&sem->lock);
277 }
278}
279
246/* 280/*
247 * If the request contains only one semaphore operation, and there are 281 * If the request contains only one semaphore operation, and there are
248 * no complex transactions pending, lock only the semaphore involved. 282 * no complex transactions pending, lock only the semaphore involved.
249 * Otherwise, lock the entire semaphore array, since we either have 283 * Otherwise, lock the entire semaphore array, since we either have
250 * multiple semaphores in our own semops, or we need to look at 284 * multiple semaphores in our own semops, or we need to look at
251 * semaphores from other pending complex operations. 285 * semaphores from other pending complex operations.
252 *
253 * Carefully guard against sma->complex_count changing between zero
254 * and non-zero while we are spinning for the lock. The value of
255 * sma->complex_count cannot change while we are holding the lock,
256 * so sem_unlock should be fine.
257 *
258 * The global lock path checks that all the local locks have been released,
259 * checking each local lock once. This means that the local lock paths
260 * cannot start their critical sections while the global lock is held.
261 */ 286 */
262static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 287static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
263 int nsops) 288 int nsops)
264{ 289{
265 int locknum; 290 struct sem *sem;
266 again:
267 if (nsops == 1 && !sma->complex_count) {
268 struct sem *sem = sma->sem_base + sops->sem_num;
269 291
270 /* Lock just the semaphore we are interested in. */ 292 if (nsops != 1) {
271 spin_lock(&sem->lock); 293 /* Complex operation - acquire a full lock */
294 ipc_lock_object(&sma->sem_perm);
272 295
273 /* 296 /* And wait until all simple ops that are processed
274 * If sma->complex_count was set while we were spinning, 297 * right now have dropped their locks.
275 * we may need to look at things we did not lock here.
276 */ 298 */
277 if (unlikely(sma->complex_count)) { 299 sem_wait_array(sma);
278 spin_unlock(&sem->lock); 300 return -1;
279 goto lock_array; 301 }
280 } 302
303 /*
304 * Only one semaphore affected - try to optimize locking.
305 * The rules are:
306 * - optimized locking is possible if no complex operation
307 * is either enqueued or processed right now.
308 * - The test for enqueued complex ops is simple:
309 * sma->complex_count != 0
310 * - Testing for complex ops that are processed right now is
311 * a bit more difficult. Complex ops acquire the full lock
312 * and first wait that the running simple ops have completed.
313 * (see above)
314 * Thus: If we own a simple lock and the global lock is free
315 * and complex_count is now 0, then it will stay 0 and
316 * thus just locking sem->lock is sufficient.
317 */
318 sem = sma->sem_base + sops->sem_num;
281 319
320 if (sma->complex_count == 0) {
282 /* 321 /*
283 * Another process is holding the global lock on the 322 * It appears that no complex operation is around.
284 * sem_array; we cannot enter our critical section, 323 * Acquire the per-semaphore lock.
285 * but have to wait for the global lock to be released.
286 */ 324 */
287 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 325 spin_lock(&sem->lock);
288 spin_unlock(&sem->lock); 326
289 spin_unlock_wait(&sma->sem_perm.lock); 327 /* Then check that the global lock is free */
290 goto again; 328 if (!spin_is_locked(&sma->sem_perm.lock)) {
329 /* spin_is_locked() is not a memory barrier */
330 smp_mb();
331
332 /* Now repeat the test of complex_count:
333 * It can't change anymore until we drop sem->lock.
334 * Thus: if is now 0, then it will stay 0.
335 */
336 if (sma->complex_count == 0) {
337 /* fast path successful! */
338 return sops->sem_num;
339 }
291 } 340 }
341 spin_unlock(&sem->lock);
342 }
292 343
293 locknum = sops->sem_num; 344 /* slow path: acquire the full lock */
345 ipc_lock_object(&sma->sem_perm);
346
347 if (sma->complex_count == 0) {
348 /* False alarm:
349 * There is no complex operation, thus we can switch
350 * back to the fast path.
351 */
352 spin_lock(&sem->lock);
353 ipc_unlock_object(&sma->sem_perm);
354 return sops->sem_num;
294 } else { 355 } else {
295 int i; 356 /* Not a false alarm, thus complete the sequence for a
296 /* 357 * full lock.
297 * Lock the semaphore array, and wait for all of the
298 * individual semaphore locks to go away. The code
299 * above ensures no new single-lock holders will enter
300 * their critical section while the array lock is held.
301 */ 358 */
302 lock_array: 359 sem_wait_array(sma);
303 ipc_lock_object(&sma->sem_perm); 360 return -1;
304 for (i = 0; i < sma->sem_nsems; i++) {
305 struct sem *sem = sma->sem_base + i;
306 spin_unlock_wait(&sem->lock);
307 }
308 locknum = -1;
309 } 361 }
310 return locknum;
311} 362}
312 363
313static inline void sem_unlock(struct sem_array *sma, int locknum) 364static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
374static inline void sem_lock_and_putref(struct sem_array *sma) 425static inline void sem_lock_and_putref(struct sem_array *sma)
375{ 426{
376 sem_lock(sma, NULL, -1); 427 sem_lock(sma, NULL, -1);
377 ipc_rcu_putref(sma); 428 ipc_rcu_putref(sma, ipc_rcu_free);
378}
379
380static inline void sem_putref(struct sem_array *sma)
381{
382 ipc_rcu_putref(sma);
383} 429}
384 430
385static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 431static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
458 sma->sem_perm.security = NULL; 504 sma->sem_perm.security = NULL;
459 retval = security_sem_alloc(sma); 505 retval = security_sem_alloc(sma);
460 if (retval) { 506 if (retval) {
461 ipc_rcu_putref(sma); 507 ipc_rcu_putref(sma, ipc_rcu_free);
462 return retval; 508 return retval;
463 } 509 }
464 510
465 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 511 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
466 if (id < 0) { 512 if (id < 0) {
467 security_sem_free(sma); 513 ipc_rcu_putref(sma, sem_rcu_free);
468 ipc_rcu_putref(sma);
469 return id; 514 return id;
470 } 515 }
471 ns->used_sems += nsems; 516 ns->used_sems += nsems;
@@ -873,6 +918,24 @@ again:
873} 918}
874 919
875/** 920/**
921 * set_semotime(sma, sops) - set sem_otime
922 * @sma: semaphore array
923 * @sops: operations that modified the array, may be NULL
924 *
925 * sem_otime is replicated to avoid cache line trashing.
926 * This function sets one instance to the current time.
927 */
928static void set_semotime(struct sem_array *sma, struct sembuf *sops)
929{
930 if (sops == NULL) {
931 sma->sem_base[0].sem_otime = get_seconds();
932 } else {
933 sma->sem_base[sops[0].sem_num].sem_otime =
934 get_seconds();
935 }
936}
937
938/**
876 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 939 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
877 * @sma: semaphore array 940 * @sma: semaphore array
878 * @sops: operations that were performed 941 * @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
922 } 985 }
923 } 986 }
924 } 987 }
925 if (otime) { 988 if (otime)
926 if (sops == NULL) { 989 set_semotime(sma, sops);
927 sma->sem_base[0].sem_otime = get_seconds();
928 } else {
929 sma->sem_base[sops[0].sem_num].sem_otime =
930 get_seconds();
931 }
932 }
933} 990}
934 991
935
936/* The following counts are associated to each semaphore: 992/* The following counts are associated to each semaphore:
937 * semncnt number of tasks waiting on semval being nonzero 993 * semncnt number of tasks waiting on semval being nonzero
938 * semzcnt number of tasks waiting on semval being zero 994 * semzcnt number of tasks waiting on semval being zero
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1047 1103
1048 wake_up_sem_queue_do(&tasks); 1104 wake_up_sem_queue_do(&tasks);
1049 ns->used_sems -= sma->sem_nsems; 1105 ns->used_sems -= sma->sem_nsems;
1050 security_sem_free(sma); 1106 ipc_rcu_putref(sma, sem_rcu_free);
1051 ipc_rcu_putref(sma);
1052} 1107}
1053 1108
1054static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1109static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1292,7 +1347,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1292 rcu_read_unlock(); 1347 rcu_read_unlock();
1293 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1348 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1294 if(sem_io == NULL) { 1349 if(sem_io == NULL) {
1295 sem_putref(sma); 1350 ipc_rcu_putref(sma, ipc_rcu_free);
1296 return -ENOMEM; 1351 return -ENOMEM;
1297 } 1352 }
1298 1353
@@ -1328,20 +1383,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1328 if(nsems > SEMMSL_FAST) { 1383 if(nsems > SEMMSL_FAST) {
1329 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1384 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1330 if(sem_io == NULL) { 1385 if(sem_io == NULL) {
1331 sem_putref(sma); 1386 ipc_rcu_putref(sma, ipc_rcu_free);
1332 return -ENOMEM; 1387 return -ENOMEM;
1333 } 1388 }
1334 } 1389 }
1335 1390
1336 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1391 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1337 sem_putref(sma); 1392 ipc_rcu_putref(sma, ipc_rcu_free);
1338 err = -EFAULT; 1393 err = -EFAULT;
1339 goto out_free; 1394 goto out_free;
1340 } 1395 }
1341 1396
1342 for (i = 0; i < nsems; i++) { 1397 for (i = 0; i < nsems; i++) {
1343 if (sem_io[i] > SEMVMX) { 1398 if (sem_io[i] > SEMVMX) {
1344 sem_putref(sma); 1399 ipc_rcu_putref(sma, ipc_rcu_free);
1345 err = -ERANGE; 1400 err = -ERANGE;
1346 goto out_free; 1401 goto out_free;
1347 } 1402 }
@@ -1629,7 +1684,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1629 /* step 2: allocate new undo structure */ 1684 /* step 2: allocate new undo structure */
1630 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1685 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1631 if (!new) { 1686 if (!new) {
1632 sem_putref(sma); 1687 ipc_rcu_putref(sma, ipc_rcu_free);
1633 return ERR_PTR(-ENOMEM); 1688 return ERR_PTR(-ENOMEM);
1634 } 1689 }
1635 1690
@@ -1795,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1795 1850
1796 error = perform_atomic_semop(sma, sops, nsops, un, 1851 error = perform_atomic_semop(sma, sops, nsops, un,
1797 task_tgid_vnr(current)); 1852 task_tgid_vnr(current));
1798 if (error <= 0) { 1853 if (error == 0) {
1799 if (alter && error == 0) 1854 /* If the operation was successful, then do
1855 * the required updates.
1856 */
1857 if (alter)
1800 do_smart_update(sma, sops, nsops, 1, &tasks); 1858 do_smart_update(sma, sops, nsops, 1, &tasks);
1801 1859 else
1802 goto out_unlock_free; 1860 set_semotime(sma, sops);
1803 } 1861 }
1862 if (error <= 0)
1863 goto out_unlock_free;
1804 1864
1805 /* We need to sleep on this operation, so we put the current 1865 /* We need to sleep on this operation, so we put the current
1806 * task into the pending queue and go to sleep. 1866 * task into the pending queue and go to sleep.
@@ -2059,6 +2119,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2059 struct sem_array *sma = it; 2119 struct sem_array *sma = it;
2060 time_t sem_otime; 2120 time_t sem_otime;
2061 2121
2122 /*
2123 * The proc interface isn't aware of sem_lock(), it calls
2124 * ipc_lock_object() directly (in sysvipc_find_ipc).
2125 * In order to stay compatible with sem_lock(), we must wait until
2126 * all simple semop() calls have left their critical regions.
2127 */
2128 sem_wait_array(sma);
2129
2062 sem_otime = get_semotime(sma); 2130 sem_otime = get_semotime(sma);
2063 2131
2064 return seq_printf(s, 2132 return seq_printf(s,