aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/msg.c32
-rw-r--r--ipc/sem.c256
-rw-r--r--ipc/shm.c17
-rw-r--r--ipc/util.c59
-rw-r--r--ipc/util.h10
5 files changed, 250 insertions, 124 deletions
diff --git a/ipc/msg.c b/ipc/msg.c
index b0d541d42677..558aa91186b6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
165 ipc_rmid(&msg_ids(ns), &s->q_perm); 165 ipc_rmid(&msg_ids(ns), &s->q_perm);
166} 166}
167 167
168static void msg_rcu_free(struct rcu_head *head)
169{
170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
171 struct msg_queue *msq = ipc_rcu_to_struct(p);
172
173 security_msg_queue_free(msq);
174 ipc_rcu_free(head);
175}
176
168/** 177/**
169 * newque - Create a new msg queue 178 * newque - Create a new msg queue
170 * @ns: namespace 179 * @ns: namespace
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
189 msq->q_perm.security = NULL; 198 msq->q_perm.security = NULL;
190 retval = security_msg_queue_alloc(msq); 199 retval = security_msg_queue_alloc(msq);
191 if (retval) { 200 if (retval) {
192 ipc_rcu_putref(msq); 201 ipc_rcu_putref(msq, ipc_rcu_free);
193 return retval; 202 return retval;
194 } 203 }
195 204
196 /* ipc_addid() locks msq upon success. */ 205 /* ipc_addid() locks msq upon success. */
197 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
198 if (id < 0) { 207 if (id < 0) {
199 security_msg_queue_free(msq); 208 ipc_rcu_putref(msq, msg_rcu_free);
200 ipc_rcu_putref(msq);
201 return id; 209 return id;
202 } 210 }
203 211
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
276 free_msg(msg); 284 free_msg(msg);
277 } 285 }
278 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 286 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
279 security_msg_queue_free(msq); 287 ipc_rcu_putref(msq, msg_rcu_free);
280 ipc_rcu_putref(msq);
281} 288}
282 289
283/* 290/*
@@ -688,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
688 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 695 if (ipcperms(ns, &msq->q_perm, S_IWUGO))
689 goto out_unlock0; 696 goto out_unlock0;
690 697
698 /* raced with RMID? */
699 if (msq->q_perm.deleted) {
700 err = -EIDRM;
701 goto out_unlock0;
702 }
703
691 err = security_msg_queue_msgsnd(msq, msg, msgflg); 704 err = security_msg_queue_msgsnd(msq, msg, msgflg);
692 if (err) 705 if (err)
693 goto out_unlock0; 706 goto out_unlock0;
@@ -717,7 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
717 rcu_read_lock(); 730 rcu_read_lock();
718 ipc_lock_object(&msq->q_perm); 731 ipc_lock_object(&msq->q_perm);
719 732
720 ipc_rcu_putref(msq); 733 ipc_rcu_putref(msq, ipc_rcu_free);
721 if (msq->q_perm.deleted) { 734 if (msq->q_perm.deleted) {
722 err = -EIDRM; 735 err = -EIDRM;
723 goto out_unlock0; 736 goto out_unlock0;
@@ -894,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
894 goto out_unlock1; 907 goto out_unlock1;
895 908
896 ipc_lock_object(&msq->q_perm); 909 ipc_lock_object(&msq->q_perm);
910
911 /* raced with RMID? */
912 if (msq->q_perm.deleted) {
913 msg = ERR_PTR(-EIDRM);
914 goto out_unlock0;
915 }
916
897 msg = find_msg(msq, &msgtyp, mode); 917 msg = find_msg(msq, &msgtyp, mode);
898 if (!IS_ERR(msg)) { 918 if (!IS_ERR(msg)) {
899 /* 919 /*
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f3844..db9d241af133 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
243 } 243 }
244} 244}
245 245
246static void sem_rcu_free(struct rcu_head *head)
247{
248 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
249 struct sem_array *sma = ipc_rcu_to_struct(p);
250
251 security_sem_free(sma);
252 ipc_rcu_free(head);
253}
254
255/*
256 * Wait until all currently ongoing simple ops have completed.
257 * Caller must own sem_perm.lock.
258 * New simple ops cannot start, because simple ops first check
259 * that sem_perm.lock is free.
260 * that a) sem_perm.lock is free and b) complex_count is 0.
261 */
262static void sem_wait_array(struct sem_array *sma)
263{
264 int i;
265 struct sem *sem;
266
267 if (sma->complex_count) {
268 /* The thread that increased sma->complex_count waited on
269 * all sem->lock locks. Thus we don't need to wait again.
270 */
271 return;
272 }
273
274 for (i = 0; i < sma->sem_nsems; i++) {
275 sem = sma->sem_base + i;
276 spin_unlock_wait(&sem->lock);
277 }
278}
279
246/* 280/*
247 * If the request contains only one semaphore operation, and there are 281 * If the request contains only one semaphore operation, and there are
248 * no complex transactions pending, lock only the semaphore involved. 282 * no complex transactions pending, lock only the semaphore involved.
249 * Otherwise, lock the entire semaphore array, since we either have 283 * Otherwise, lock the entire semaphore array, since we either have
250 * multiple semaphores in our own semops, or we need to look at 284 * multiple semaphores in our own semops, or we need to look at
251 * semaphores from other pending complex operations. 285 * semaphores from other pending complex operations.
252 *
253 * Carefully guard against sma->complex_count changing between zero
254 * and non-zero while we are spinning for the lock. The value of
255 * sma->complex_count cannot change while we are holding the lock,
256 * so sem_unlock should be fine.
257 *
258 * The global lock path checks that all the local locks have been released,
259 * checking each local lock once. This means that the local lock paths
260 * cannot start their critical sections while the global lock is held.
261 */ 286 */
262static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 287static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
263 int nsops) 288 int nsops)
264{ 289{
265 int locknum; 290 struct sem *sem;
266 again:
267 if (nsops == 1 && !sma->complex_count) {
268 struct sem *sem = sma->sem_base + sops->sem_num;
269 291
270 /* Lock just the semaphore we are interested in. */ 292 if (nsops != 1) {
271 spin_lock(&sem->lock); 293 /* Complex operation - acquire a full lock */
294 ipc_lock_object(&sma->sem_perm);
272 295
273 /* 296 /* And wait until all simple ops that are processed
274 * If sma->complex_count was set while we were spinning, 297 * right now have dropped their locks.
275 * we may need to look at things we did not lock here.
276 */ 298 */
277 if (unlikely(sma->complex_count)) { 299 sem_wait_array(sma);
278 spin_unlock(&sem->lock); 300 return -1;
279 goto lock_array; 301 }
280 } 302
303 /*
304 * Only one semaphore affected - try to optimize locking.
305 * The rules are:
306 * - optimized locking is possible if no complex operation
307 * is either enqueued or processed right now.
308 * - The test for enqueued complex ops is simple:
309 * sma->complex_count != 0
310 * - Testing for complex ops that are processed right now is
311 * a bit more difficult. Complex ops acquire the full lock
312 * and first wait that the running simple ops have completed.
313 * (see above)
314 * Thus: If we own a simple lock and the global lock is free
315 * and complex_count is now 0, then it will stay 0 and
316 * thus just locking sem->lock is sufficient.
317 */
318 sem = sma->sem_base + sops->sem_num;
281 319
320 if (sma->complex_count == 0) {
282 /* 321 /*
283 * Another process is holding the global lock on the 322 * It appears that no complex operation is around.
284 * sem_array; we cannot enter our critical section, 323 * Acquire the per-semaphore lock.
285 * but have to wait for the global lock to be released.
286 */ 324 */
287 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 325 spin_lock(&sem->lock);
288 spin_unlock(&sem->lock); 326
289 spin_unlock_wait(&sma->sem_perm.lock); 327 /* Then check that the global lock is free */
290 goto again; 328 if (!spin_is_locked(&sma->sem_perm.lock)) {
329 /* spin_is_locked() is not a memory barrier */
330 smp_mb();
331
332 /* Now repeat the test of complex_count:
333 * It can't change anymore until we drop sem->lock.
334 * Thus: if is now 0, then it will stay 0.
335 */
336 if (sma->complex_count == 0) {
337 /* fast path successful! */
338 return sops->sem_num;
339 }
291 } 340 }
341 spin_unlock(&sem->lock);
342 }
292 343
293 locknum = sops->sem_num; 344 /* slow path: acquire the full lock */
345 ipc_lock_object(&sma->sem_perm);
346
347 if (sma->complex_count == 0) {
348 /* False alarm:
349 * There is no complex operation, thus we can switch
350 * back to the fast path.
351 */
352 spin_lock(&sem->lock);
353 ipc_unlock_object(&sma->sem_perm);
354 return sops->sem_num;
294 } else { 355 } else {
295 int i; 356 /* Not a false alarm, thus complete the sequence for a
296 /* 357 * full lock.
297 * Lock the semaphore array, and wait for all of the
298 * individual semaphore locks to go away. The code
299 * above ensures no new single-lock holders will enter
300 * their critical section while the array lock is held.
301 */ 358 */
302 lock_array: 359 sem_wait_array(sma);
303 ipc_lock_object(&sma->sem_perm); 360 return -1;
304 for (i = 0; i < sma->sem_nsems; i++) {
305 struct sem *sem = sma->sem_base + i;
306 spin_unlock_wait(&sem->lock);
307 }
308 locknum = -1;
309 } 361 }
310 return locknum;
311} 362}
312 363
313static inline void sem_unlock(struct sem_array *sma, int locknum) 364static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
374static inline void sem_lock_and_putref(struct sem_array *sma) 425static inline void sem_lock_and_putref(struct sem_array *sma)
375{ 426{
376 sem_lock(sma, NULL, -1); 427 sem_lock(sma, NULL, -1);
377 ipc_rcu_putref(sma); 428 ipc_rcu_putref(sma, ipc_rcu_free);
378}
379
380static inline void sem_putref(struct sem_array *sma)
381{
382 ipc_rcu_putref(sma);
383} 429}
384 430
385static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 431static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
458 sma->sem_perm.security = NULL; 504 sma->sem_perm.security = NULL;
459 retval = security_sem_alloc(sma); 505 retval = security_sem_alloc(sma);
460 if (retval) { 506 if (retval) {
461 ipc_rcu_putref(sma); 507 ipc_rcu_putref(sma, ipc_rcu_free);
462 return retval; 508 return retval;
463 } 509 }
464 510
465 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 511 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
466 if (id < 0) { 512 if (id < 0) {
467 security_sem_free(sma); 513 ipc_rcu_putref(sma, sem_rcu_free);
468 ipc_rcu_putref(sma);
469 return id; 514 return id;
470 } 515 }
471 ns->used_sems += nsems; 516 ns->used_sems += nsems;
@@ -873,6 +918,24 @@ again:
873} 918}
874 919
875/** 920/**
921 * set_semotime(sma, sops) - set sem_otime
922 * @sma: semaphore array
923 * @sops: operations that modified the array, may be NULL
924 *
925 * sem_otime is replicated to avoid cache line trashing.
926 * This function sets one instance to the current time.
927 */
928static void set_semotime(struct sem_array *sma, struct sembuf *sops)
929{
930 if (sops == NULL) {
931 sma->sem_base[0].sem_otime = get_seconds();
932 } else {
933 sma->sem_base[sops[0].sem_num].sem_otime =
934 get_seconds();
935 }
936}
937
938/**
876 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 939 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
877 * @sma: semaphore array 940 * @sma: semaphore array
878 * @sops: operations that were performed 941 * @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
922 } 985 }
923 } 986 }
924 } 987 }
925 if (otime) { 988 if (otime)
926 if (sops == NULL) { 989 set_semotime(sma, sops);
927 sma->sem_base[0].sem_otime = get_seconds();
928 } else {
929 sma->sem_base[sops[0].sem_num].sem_otime =
930 get_seconds();
931 }
932 }
933} 990}
934 991
935
936/* The following counts are associated to each semaphore: 992/* The following counts are associated to each semaphore:
937 * semncnt number of tasks waiting on semval being nonzero 993 * semncnt number of tasks waiting on semval being nonzero
938 * semzcnt number of tasks waiting on semval being zero 994 * semzcnt number of tasks waiting on semval being zero
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1047 1103
1048 wake_up_sem_queue_do(&tasks); 1104 wake_up_sem_queue_do(&tasks);
1049 ns->used_sems -= sma->sem_nsems; 1105 ns->used_sems -= sma->sem_nsems;
1050 security_sem_free(sma); 1106 ipc_rcu_putref(sma, sem_rcu_free);
1051 ipc_rcu_putref(sma);
1052} 1107}
1053 1108
1054static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1109static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1227 1282
1228 sem_lock(sma, NULL, -1); 1283 sem_lock(sma, NULL, -1);
1229 1284
1285 if (sma->sem_perm.deleted) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 return -EIDRM;
1289 }
1290
1230 curr = &sma->sem_base[semnum]; 1291 curr = &sma->sem_base[semnum];
1231 1292
1232 ipc_assert_locked_object(&sma->sem_perm); 1293 ipc_assert_locked_object(&sma->sem_perm);
@@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1281 int i; 1342 int i;
1282 1343
1283 sem_lock(sma, NULL, -1); 1344 sem_lock(sma, NULL, -1);
1345 if (sma->sem_perm.deleted) {
1346 err = -EIDRM;
1347 goto out_unlock;
1348 }
1284 if(nsems > SEMMSL_FAST) { 1349 if(nsems > SEMMSL_FAST) {
1285 if (!ipc_rcu_getref(sma)) { 1350 if (!ipc_rcu_getref(sma)) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 err = -EIDRM; 1351 err = -EIDRM;
1289 goto out_free; 1352 goto out_unlock;
1290 } 1353 }
1291 sem_unlock(sma, -1); 1354 sem_unlock(sma, -1);
1292 rcu_read_unlock(); 1355 rcu_read_unlock();
1293 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1356 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1294 if(sem_io == NULL) { 1357 if(sem_io == NULL) {
1295 sem_putref(sma); 1358 ipc_rcu_putref(sma, ipc_rcu_free);
1296 return -ENOMEM; 1359 return -ENOMEM;
1297 } 1360 }
1298 1361
1299 rcu_read_lock(); 1362 rcu_read_lock();
1300 sem_lock_and_putref(sma); 1363 sem_lock_and_putref(sma);
1301 if (sma->sem_perm.deleted) { 1364 if (sma->sem_perm.deleted) {
1302 sem_unlock(sma, -1);
1303 rcu_read_unlock();
1304 err = -EIDRM; 1365 err = -EIDRM;
1305 goto out_free; 1366 goto out_unlock;
1306 } 1367 }
1307 } 1368 }
1308 for (i = 0; i < sma->sem_nsems; i++) 1369 for (i = 0; i < sma->sem_nsems; i++)
@@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1320 struct sem_undo *un; 1381 struct sem_undo *un;
1321 1382
1322 if (!ipc_rcu_getref(sma)) { 1383 if (!ipc_rcu_getref(sma)) {
1323 rcu_read_unlock(); 1384 err = -EIDRM;
1324 return -EIDRM; 1385 goto out_rcu_wakeup;
1325 } 1386 }
1326 rcu_read_unlock(); 1387 rcu_read_unlock();
1327 1388
1328 if(nsems > SEMMSL_FAST) { 1389 if(nsems > SEMMSL_FAST) {
1329 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1390 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1330 if(sem_io == NULL) { 1391 if(sem_io == NULL) {
1331 sem_putref(sma); 1392 ipc_rcu_putref(sma, ipc_rcu_free);
1332 return -ENOMEM; 1393 return -ENOMEM;
1333 } 1394 }
1334 } 1395 }
1335 1396
1336 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1397 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1337 sem_putref(sma); 1398 ipc_rcu_putref(sma, ipc_rcu_free);
1338 err = -EFAULT; 1399 err = -EFAULT;
1339 goto out_free; 1400 goto out_free;
1340 } 1401 }
1341 1402
1342 for (i = 0; i < nsems; i++) { 1403 for (i = 0; i < nsems; i++) {
1343 if (sem_io[i] > SEMVMX) { 1404 if (sem_io[i] > SEMVMX) {
1344 sem_putref(sma); 1405 ipc_rcu_putref(sma, ipc_rcu_free);
1345 err = -ERANGE; 1406 err = -ERANGE;
1346 goto out_free; 1407 goto out_free;
1347 } 1408 }
@@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1349 rcu_read_lock(); 1410 rcu_read_lock();
1350 sem_lock_and_putref(sma); 1411 sem_lock_and_putref(sma);
1351 if (sma->sem_perm.deleted) { 1412 if (sma->sem_perm.deleted) {
1352 sem_unlock(sma, -1);
1353 rcu_read_unlock();
1354 err = -EIDRM; 1413 err = -EIDRM;
1355 goto out_free; 1414 goto out_unlock;
1356 } 1415 }
1357 1416
1358 for (i = 0; i < nsems; i++) 1417 for (i = 0; i < nsems; i++)
@@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1376 goto out_rcu_wakeup; 1435 goto out_rcu_wakeup;
1377 1436
1378 sem_lock(sma, NULL, -1); 1437 sem_lock(sma, NULL, -1);
1438 if (sma->sem_perm.deleted) {
1439 err = -EIDRM;
1440 goto out_unlock;
1441 }
1379 curr = &sma->sem_base[semnum]; 1442 curr = &sma->sem_base[semnum];
1380 1443
1381 switch (cmd) { 1444 switch (cmd) {
@@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1629 /* step 2: allocate new undo structure */ 1692 /* step 2: allocate new undo structure */
1630 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1693 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1631 if (!new) { 1694 if (!new) {
1632 sem_putref(sma); 1695 ipc_rcu_putref(sma, ipc_rcu_free);
1633 return ERR_PTR(-ENOMEM); 1696 return ERR_PTR(-ENOMEM);
1634 } 1697 }
1635 1698
@@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1781 if (error) 1844 if (error)
1782 goto out_rcu_wakeup; 1845 goto out_rcu_wakeup;
1783 1846
1847 error = -EIDRM;
1848 locknum = sem_lock(sma, sops, nsops);
1849 if (sma->sem_perm.deleted)
1850 goto out_unlock_free;
1784 /* 1851 /*
1785 * semid identifiers are not unique - find_alloc_undo may have 1852 * semid identifiers are not unique - find_alloc_undo may have
1786 * allocated an undo structure, it was invalidated by an RMID 1853 * allocated an undo structure, it was invalidated by an RMID
@@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1788 * This case can be detected checking un->semid. The existence of 1855 * This case can be detected checking un->semid. The existence of
1789 * "un" itself is guaranteed by rcu. 1856 * "un" itself is guaranteed by rcu.
1790 */ 1857 */
1791 error = -EIDRM;
1792 locknum = sem_lock(sma, sops, nsops);
1793 if (un && un->semid == -1) 1858 if (un && un->semid == -1)
1794 goto out_unlock_free; 1859 goto out_unlock_free;
1795 1860
1796 error = perform_atomic_semop(sma, sops, nsops, un, 1861 error = perform_atomic_semop(sma, sops, nsops, un,
1797 task_tgid_vnr(current)); 1862 task_tgid_vnr(current));
1798 if (error <= 0) { 1863 if (error == 0) {
1799 if (alter && error == 0) 1864 /* If the operation was successful, then do
1865 * the required updates.
1866 */
1867 if (alter)
1800 do_smart_update(sma, sops, nsops, 1, &tasks); 1868 do_smart_update(sma, sops, nsops, 1, &tasks);
1801 1869 else
1802 goto out_unlock_free; 1870 set_semotime(sma, sops);
1803 } 1871 }
1872 if (error <= 0)
1873 goto out_unlock_free;
1804 1874
1805 /* We need to sleep on this operation, so we put the current 1875 /* We need to sleep on this operation, so we put the current
1806 * task into the pending queue and go to sleep. 1876 * task into the pending queue and go to sleep.
@@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
1997 } 2067 }
1998 2068
1999 sem_lock(sma, NULL, -1); 2069 sem_lock(sma, NULL, -1);
2070 /* exit_sem raced with IPC_RMID, nothing to do */
2071 if (sma->sem_perm.deleted) {
2072 sem_unlock(sma, -1);
2073 rcu_read_unlock();
2074 continue;
2075 }
2000 un = __lookup_undo(ulp, semid); 2076 un = __lookup_undo(ulp, semid);
2001 if (un == NULL) { 2077 if (un == NULL) {
2002 /* exit_sem raced with IPC_RMID+semget() that created 2078 /* exit_sem raced with IPC_RMID+semget() that created
@@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2059 struct sem_array *sma = it; 2135 struct sem_array *sma = it;
2060 time_t sem_otime; 2136 time_t sem_otime;
2061 2137
2138 /*
2139 * The proc interface isn't aware of sem_lock(), it calls
2140 * ipc_lock_object() directly (in sysvipc_find_ipc).
2141 * In order to stay compatible with sem_lock(), we must wait until
2142 * all simple semop() calls have left their critical regions.
2143 */
2144 sem_wait_array(sma);
2145
2062 sem_otime = get_semotime(sma); 2146 sem_otime = get_semotime(sma);
2063 2147
2064 return seq_printf(s, 2148 return seq_printf(s,
diff --git a/ipc/shm.c b/ipc/shm.c
index 2821cdf93adb..d69739610fd4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
167 ipc_lock_object(&ipcp->shm_perm); 167 ipc_lock_object(&ipcp->shm_perm);
168} 168}
169 169
170static void shm_rcu_free(struct rcu_head *head)
171{
172 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
173 struct shmid_kernel *shp = ipc_rcu_to_struct(p);
174
175 security_shm_free(shp);
176 ipc_rcu_free(head);
177}
178
170static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
171{ 180{
172 ipc_rmid(&shm_ids(ns), &s->shm_perm); 181 ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
208 user_shm_unlock(file_inode(shp->shm_file)->i_size, 217 user_shm_unlock(file_inode(shp->shm_file)->i_size,
209 shp->mlock_user); 218 shp->mlock_user);
210 fput (shp->shm_file); 219 fput (shp->shm_file);
211 security_shm_free(shp); 220 ipc_rcu_putref(shp, shm_rcu_free);
212 ipc_rcu_putref(shp);
213} 221}
214 222
215/* 223/*
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
497 shp->shm_perm.security = NULL; 505 shp->shm_perm.security = NULL;
498 error = security_shm_alloc(shp); 506 error = security_shm_alloc(shp);
499 if (error) { 507 if (error) {
500 ipc_rcu_putref(shp); 508 ipc_rcu_putref(shp, ipc_rcu_free);
501 return error; 509 return error;
502 } 510 }
503 511
@@ -566,8 +574,7 @@ no_id:
566 user_shm_unlock(size, shp->mlock_user); 574 user_shm_unlock(size, shp->mlock_user);
567 fput(file); 575 fput(file);
568no_file: 576no_file:
569 security_shm_free(shp); 577 ipc_rcu_putref(shp, shm_rcu_free);
570 ipc_rcu_putref(shp);
571 return error; 578 return error;
572} 579}
573 580
diff --git a/ipc/util.c b/ipc/util.c
index e829da9ed01f..7684f41bce76 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -17,12 +17,27 @@
17 * Pavel Emelianov <xemul@openvz.org> 17 * Pavel Emelianov <xemul@openvz.org>
18 * 18 *
19 * General sysv ipc locking scheme: 19 * General sysv ipc locking scheme:
20 * when doing ipc id lookups, take the ids->rwsem 20 * rcu_read_lock()
21 * rcu_read_lock() 21 * obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
22 * obtain the ipc object (kern_ipc_perm) 22 * tree.
23 * perform security, capabilities, auditing and permission checks, etc. 23 * - perform initial checks (capabilities, auditing and permission,
24 * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() 24 * etc).
25 * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) 25 * - perform read-only operations, such as STAT, INFO commands.
26 * acquire the ipc lock (kern_ipc_perm.lock) through
27 * ipc_lock_object()
28 * - perform data updates, such as SET, RMID commands and
29 * mechanism-specific operations (semop/semtimedop,
30 * msgsnd/msgrcv, shmat/shmdt).
31 * drop the ipc lock, through ipc_unlock_object().
32 * rcu_read_unlock()
33 *
34 * The ids->rwsem must be taken when:
35 * - creating, removing and iterating the existing entries in ipc
36 * identifier sets.
37 * - iterating through files under /proc/sysvipc/
38 *
39 * Note that sems have a special fast path that avoids kern_ipc_perm.lock -
40 * see sem_lock().
26 */ 41 */
27 42
28#include <linux/mm.h> 43#include <linux/mm.h>
@@ -474,11 +489,6 @@ void ipc_free(void* ptr, int size)
474 kfree(ptr); 489 kfree(ptr);
475} 490}
476 491
477struct ipc_rcu {
478 struct rcu_head rcu;
479 atomic_t refcount;
480} ____cacheline_aligned_in_smp;
481
482/** 492/**
483 * ipc_rcu_alloc - allocate ipc and rcu space 493 * ipc_rcu_alloc - allocate ipc and rcu space
484 * @size: size desired 494 * @size: size desired
@@ -505,27 +515,24 @@ int ipc_rcu_getref(void *ptr)
505 return atomic_inc_not_zero(&p->refcount); 515 return atomic_inc_not_zero(&p->refcount);
506} 516}
507 517
508/** 518void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
509 * ipc_schedule_free - free ipc + rcu space
510 * @head: RCU callback structure for queued work
511 */
512static void ipc_schedule_free(struct rcu_head *head)
513{
514 vfree(container_of(head, struct ipc_rcu, rcu));
515}
516
517void ipc_rcu_putref(void *ptr)
518{ 519{
519 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; 520 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
520 521
521 if (!atomic_dec_and_test(&p->refcount)) 522 if (!atomic_dec_and_test(&p->refcount))
522 return; 523 return;
523 524
524 if (is_vmalloc_addr(ptr)) { 525 call_rcu(&p->rcu, func);
525 call_rcu(&p->rcu, ipc_schedule_free); 526}
526 } else { 527
527 kfree_rcu(p, rcu); 528void ipc_rcu_free(struct rcu_head *head)
528 } 529{
530 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
531
532 if (is_vmalloc_addr(p))
533 vfree(p);
534 else
535 kfree(p);
529} 536}
530 537
531/** 538/**
diff --git a/ipc/util.h b/ipc/util.h
index c5f3338ba1fa..f2f5036f2eed 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
47static inline void shm_exit_ns(struct ipc_namespace *ns) { } 47static inline void shm_exit_ns(struct ipc_namespace *ns) { }
48#endif 48#endif
49 49
50struct ipc_rcu {
51 struct rcu_head rcu;
52 atomic_t refcount;
53} ____cacheline_aligned_in_smp;
54
55#define ipc_rcu_to_struct(p) ((void *)(p+1))
56
50/* 57/*
51 * Structure that holds the parameters needed by the ipc operations 58 * Structure that holds the parameters needed by the ipc operations
52 * (see after) 59 * (see after)
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
120 */ 127 */
121void* ipc_rcu_alloc(int size); 128void* ipc_rcu_alloc(int size);
122int ipc_rcu_getref(void *ptr); 129int ipc_rcu_getref(void *ptr);
123void ipc_rcu_putref(void *ptr); 130void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
131void ipc_rcu_free(struct rcu_head *head);
124 132
125struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 133struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
126struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); 134struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);