aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/msg.c69
-rw-r--r--ipc/namespace.c9
-rw-r--r--ipc/sem.c280
-rw-r--r--ipc/shm.c264
-rw-r--r--ipc/util.c129
-rw-r--r--ipc/util.h24
6 files changed, 454 insertions, 321 deletions
diff --git a/ipc/msg.c b/ipc/msg.c
index 9f29d9e89bac..558aa91186b6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -70,8 +70,6 @@ struct msg_sender {
70 70
71#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
72 72
73#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
74
75static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 73static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
76static int newque(struct ipc_namespace *, struct ipc_params *); 74static int newque(struct ipc_namespace *, struct ipc_params *);
77#ifdef CONFIG_PROC_FS 75#ifdef CONFIG_PROC_FS
@@ -167,12 +165,21 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
167 ipc_rmid(&msg_ids(ns), &s->q_perm); 165 ipc_rmid(&msg_ids(ns), &s->q_perm);
168} 166}
169 167
168static void msg_rcu_free(struct rcu_head *head)
169{
170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
171 struct msg_queue *msq = ipc_rcu_to_struct(p);
172
173 security_msg_queue_free(msq);
174 ipc_rcu_free(head);
175}
176
170/** 177/**
171 * newque - Create a new msg queue 178 * newque - Create a new msg queue
172 * @ns: namespace 179 * @ns: namespace
173 * @params: ptr to the structure that contains the key and msgflg 180 * @params: ptr to the structure that contains the key and msgflg
174 * 181 *
175 * Called with msg_ids.rw_mutex held (writer) 182 * Called with msg_ids.rwsem held (writer)
176 */ 183 */
177static int newque(struct ipc_namespace *ns, struct ipc_params *params) 184static int newque(struct ipc_namespace *ns, struct ipc_params *params)
178{ 185{
@@ -191,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
191 msq->q_perm.security = NULL; 198 msq->q_perm.security = NULL;
192 retval = security_msg_queue_alloc(msq); 199 retval = security_msg_queue_alloc(msq);
193 if (retval) { 200 if (retval) {
194 ipc_rcu_putref(msq); 201 ipc_rcu_putref(msq, ipc_rcu_free);
195 return retval; 202 return retval;
196 } 203 }
197 204
198 /* ipc_addid() locks msq upon success. */ 205 /* ipc_addid() locks msq upon success. */
199 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
200 if (id < 0) { 207 if (id < 0) {
201 security_msg_queue_free(msq); 208 ipc_rcu_putref(msq, msg_rcu_free);
202 ipc_rcu_putref(msq);
203 return id; 209 return id;
204 } 210 }
205 211
@@ -259,8 +265,8 @@ static void expunge_all(struct msg_queue *msq, int res)
259 * removes the message queue from message queue ID IDR, and cleans up all the 265 * removes the message queue from message queue ID IDR, and cleans up all the
260 * messages associated with this queue. 266 * messages associated with this queue.
261 * 267 *
262 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 268 * msg_ids.rwsem (writer) and the spinlock for this message queue are held
263 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 269 * before freeque() is called. msg_ids.rwsem remains locked on exit.
264 */ 270 */
265static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 271static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
266{ 272{
@@ -270,19 +276,19 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
270 expunge_all(msq, -EIDRM); 276 expunge_all(msq, -EIDRM);
271 ss_wakeup(&msq->q_senders, 1); 277 ss_wakeup(&msq->q_senders, 1);
272 msg_rmid(ns, msq); 278 msg_rmid(ns, msq);
273 msg_unlock(msq); 279 ipc_unlock_object(&msq->q_perm);
280 rcu_read_unlock();
274 281
275 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 282 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
276 atomic_dec(&ns->msg_hdrs); 283 atomic_dec(&ns->msg_hdrs);
277 free_msg(msg); 284 free_msg(msg);
278 } 285 }
279 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 286 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
280 security_msg_queue_free(msq); 287 ipc_rcu_putref(msq, msg_rcu_free);
281 ipc_rcu_putref(msq);
282} 288}
283 289
284/* 290/*
285 * Called with msg_ids.rw_mutex and ipcp locked. 291 * Called with msg_ids.rwsem and ipcp locked.
286 */ 292 */
287static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 293static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
288{ 294{
@@ -386,9 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
386} 392}
387 393
388/* 394/*
389 * This function handles some msgctl commands which require the rw_mutex 395 * This function handles some msgctl commands which require the rwsem
390 * to be held in write mode. 396 * to be held in write mode.
391 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 397 * NOTE: no locks must be held, the rwsem is taken inside this function.
392 */ 398 */
393static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 399static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
394 struct msqid_ds __user *buf, int version) 400 struct msqid_ds __user *buf, int version)
@@ -403,7 +409,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
403 return -EFAULT; 409 return -EFAULT;
404 } 410 }
405 411
406 down_write(&msg_ids(ns).rw_mutex); 412 down_write(&msg_ids(ns).rwsem);
407 rcu_read_lock(); 413 rcu_read_lock();
408 414
409 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 415 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
@@ -459,7 +465,7 @@ out_unlock0:
459out_unlock1: 465out_unlock1:
460 rcu_read_unlock(); 466 rcu_read_unlock();
461out_up: 467out_up:
462 up_write(&msg_ids(ns).rw_mutex); 468 up_write(&msg_ids(ns).rwsem);
463 return err; 469 return err;
464} 470}
465 471
@@ -494,7 +500,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
494 msginfo.msgmnb = ns->msg_ctlmnb; 500 msginfo.msgmnb = ns->msg_ctlmnb;
495 msginfo.msgssz = MSGSSZ; 501 msginfo.msgssz = MSGSSZ;
496 msginfo.msgseg = MSGSEG; 502 msginfo.msgseg = MSGSEG;
497 down_read(&msg_ids(ns).rw_mutex); 503 down_read(&msg_ids(ns).rwsem);
498 if (cmd == MSG_INFO) { 504 if (cmd == MSG_INFO) {
499 msginfo.msgpool = msg_ids(ns).in_use; 505 msginfo.msgpool = msg_ids(ns).in_use;
500 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 506 msginfo.msgmap = atomic_read(&ns->msg_hdrs);
@@ -505,7 +511,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
505 msginfo.msgtql = MSGTQL; 511 msginfo.msgtql = MSGTQL;
506 } 512 }
507 max_id = ipc_get_maxid(&msg_ids(ns)); 513 max_id = ipc_get_maxid(&msg_ids(ns));
508 up_read(&msg_ids(ns).rw_mutex); 514 up_read(&msg_ids(ns).rwsem);
509 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 515 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
510 return -EFAULT; 516 return -EFAULT;
511 return (max_id < 0) ? 0 : max_id; 517 return (max_id < 0) ? 0 : max_id;
@@ -680,16 +686,24 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
680 goto out_unlock1; 686 goto out_unlock1;
681 } 687 }
682 688
689 ipc_lock_object(&msq->q_perm);
690
683 for (;;) { 691 for (;;) {
684 struct msg_sender s; 692 struct msg_sender s;
685 693
686 err = -EACCES; 694 err = -EACCES;
687 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 695 if (ipcperms(ns, &msq->q_perm, S_IWUGO))
688 goto out_unlock1; 696 goto out_unlock0;
697
698 /* raced with RMID? */
699 if (msq->q_perm.deleted) {
700 err = -EIDRM;
701 goto out_unlock0;
702 }
689 703
690 err = security_msg_queue_msgsnd(msq, msg, msgflg); 704 err = security_msg_queue_msgsnd(msq, msg, msgflg);
691 if (err) 705 if (err)
692 goto out_unlock1; 706 goto out_unlock0;
693 707
694 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 708 if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
695 1 + msq->q_qnum <= msq->q_qbytes) { 709 1 + msq->q_qnum <= msq->q_qbytes) {
@@ -699,10 +713,9 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
699 /* queue full, wait: */ 713 /* queue full, wait: */
700 if (msgflg & IPC_NOWAIT) { 714 if (msgflg & IPC_NOWAIT) {
701 err = -EAGAIN; 715 err = -EAGAIN;
702 goto out_unlock1; 716 goto out_unlock0;
703 } 717 }
704 718
705 ipc_lock_object(&msq->q_perm);
706 ss_add(msq, &s); 719 ss_add(msq, &s);
707 720
708 if (!ipc_rcu_getref(msq)) { 721 if (!ipc_rcu_getref(msq)) {
@@ -717,7 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
717 rcu_read_lock(); 730 rcu_read_lock();
718 ipc_lock_object(&msq->q_perm); 731 ipc_lock_object(&msq->q_perm);
719 732
720 ipc_rcu_putref(msq); 733 ipc_rcu_putref(msq, ipc_rcu_free);
721 if (msq->q_perm.deleted) { 734 if (msq->q_perm.deleted) {
722 err = -EIDRM; 735 err = -EIDRM;
723 goto out_unlock0; 736 goto out_unlock0;
@@ -730,10 +743,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
730 goto out_unlock0; 743 goto out_unlock0;
731 } 744 }
732 745
733 ipc_unlock_object(&msq->q_perm);
734 } 746 }
735
736 ipc_lock_object(&msq->q_perm);
737 msq->q_lspid = task_tgid_vnr(current); 747 msq->q_lspid = task_tgid_vnr(current);
738 msq->q_stime = get_seconds(); 748 msq->q_stime = get_seconds();
739 749
@@ -897,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
897 goto out_unlock1; 907 goto out_unlock1;
898 908
899 ipc_lock_object(&msq->q_perm); 909 ipc_lock_object(&msq->q_perm);
910
911 /* raced with RMID? */
912 if (msq->q_perm.deleted) {
913 msg = ERR_PTR(-EIDRM);
914 goto out_unlock0;
915 }
916
900 msg = find_msg(msq, &msgtyp, mode); 917 msg = find_msg(msq, &msgtyp, mode);
901 if (!IS_ERR(msg)) { 918 if (!IS_ERR(msg)) {
902 /* 919 /*
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7ee61bf44933..59451c1e214d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
81 int next_id; 81 int next_id;
82 int total, in_use; 82 int total, in_use;
83 83
84 down_write(&ids->rw_mutex); 84 down_write(&ids->rwsem);
85 85
86 in_use = ids->in_use; 86 in_use = ids->in_use;
87 87
@@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
89 perm = idr_find(&ids->ipcs_idr, next_id); 89 perm = idr_find(&ids->ipcs_idr, next_id);
90 if (perm == NULL) 90 if (perm == NULL)
91 continue; 91 continue;
92 ipc_lock_by_ptr(perm); 92 rcu_read_lock();
93 ipc_lock_object(perm);
93 free(ns, perm); 94 free(ns, perm);
94 total++; 95 total++;
95 } 96 }
96 up_write(&ids->rw_mutex); 97 up_write(&ids->rwsem);
97} 98}
98 99
99static void free_ipc_ns(struct ipc_namespace *ns) 100static void free_ipc_ns(struct ipc_namespace *ns)
@@ -171,7 +172,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
171{ 172{
172 struct ipc_namespace *ns = new; 173 struct ipc_namespace *ns = new;
173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || 174 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
174 !nsown_capable(CAP_SYS_ADMIN)) 175 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
175 return -EPERM; 176 return -EPERM;
176 177
177 /* Ditch state from the old ipc namespace */ 178 /* Ditch state from the old ipc namespace */
diff --git a/ipc/sem.c b/ipc/sem.c
index 41088899783d..db9d241af133 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma)
243 } 243 }
244} 244}
245 245
246static void sem_rcu_free(struct rcu_head *head)
247{
248 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
249 struct sem_array *sma = ipc_rcu_to_struct(p);
250
251 security_sem_free(sma);
252 ipc_rcu_free(head);
253}
254
255/*
256 * Wait until all currently ongoing simple ops have completed.
257 * Caller must own sem_perm.lock.
258 * New simple ops cannot start, because simple ops first check
259 * that sem_perm.lock is free.
260 * that a) sem_perm.lock is free and b) complex_count is 0.
261 */
262static void sem_wait_array(struct sem_array *sma)
263{
264 int i;
265 struct sem *sem;
266
267 if (sma->complex_count) {
268 /* The thread that increased sma->complex_count waited on
269 * all sem->lock locks. Thus we don't need to wait again.
270 */
271 return;
272 }
273
274 for (i = 0; i < sma->sem_nsems; i++) {
275 sem = sma->sem_base + i;
276 spin_unlock_wait(&sem->lock);
277 }
278}
279
246/* 280/*
247 * If the request contains only one semaphore operation, and there are 281 * If the request contains only one semaphore operation, and there are
248 * no complex transactions pending, lock only the semaphore involved. 282 * no complex transactions pending, lock only the semaphore involved.
249 * Otherwise, lock the entire semaphore array, since we either have 283 * Otherwise, lock the entire semaphore array, since we either have
250 * multiple semaphores in our own semops, or we need to look at 284 * multiple semaphores in our own semops, or we need to look at
251 * semaphores from other pending complex operations. 285 * semaphores from other pending complex operations.
252 *
253 * Carefully guard against sma->complex_count changing between zero
254 * and non-zero while we are spinning for the lock. The value of
255 * sma->complex_count cannot change while we are holding the lock,
256 * so sem_unlock should be fine.
257 *
258 * The global lock path checks that all the local locks have been released,
259 * checking each local lock once. This means that the local lock paths
260 * cannot start their critical sections while the global lock is held.
261 */ 286 */
262static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 287static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
263 int nsops) 288 int nsops)
264{ 289{
265 int locknum; 290 struct sem *sem;
266 again:
267 if (nsops == 1 && !sma->complex_count) {
268 struct sem *sem = sma->sem_base + sops->sem_num;
269 291
270 /* Lock just the semaphore we are interested in. */ 292 if (nsops != 1) {
271 spin_lock(&sem->lock); 293 /* Complex operation - acquire a full lock */
294 ipc_lock_object(&sma->sem_perm);
272 295
273 /* 296 /* And wait until all simple ops that are processed
274 * If sma->complex_count was set while we were spinning, 297 * right now have dropped their locks.
275 * we may need to look at things we did not lock here.
276 */ 298 */
277 if (unlikely(sma->complex_count)) { 299 sem_wait_array(sma);
278 spin_unlock(&sem->lock); 300 return -1;
279 goto lock_array; 301 }
280 } 302
303 /*
304 * Only one semaphore affected - try to optimize locking.
305 * The rules are:
306 * - optimized locking is possible if no complex operation
307 * is either enqueued or processed right now.
308 * - The test for enqueued complex ops is simple:
309 * sma->complex_count != 0
310 * - Testing for complex ops that are processed right now is
311 * a bit more difficult. Complex ops acquire the full lock
312 * and first wait that the running simple ops have completed.
313 * (see above)
314 * Thus: If we own a simple lock and the global lock is free
315 * and complex_count is now 0, then it will stay 0 and
316 * thus just locking sem->lock is sufficient.
317 */
318 sem = sma->sem_base + sops->sem_num;
281 319
320 if (sma->complex_count == 0) {
282 /* 321 /*
283 * Another process is holding the global lock on the 322 * It appears that no complex operation is around.
284 * sem_array; we cannot enter our critical section, 323 * Acquire the per-semaphore lock.
285 * but have to wait for the global lock to be released.
286 */ 324 */
287 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 325 spin_lock(&sem->lock);
288 spin_unlock(&sem->lock); 326
289 spin_unlock_wait(&sma->sem_perm.lock); 327 /* Then check that the global lock is free */
290 goto again; 328 if (!spin_is_locked(&sma->sem_perm.lock)) {
329 /* spin_is_locked() is not a memory barrier */
330 smp_mb();
331
332 /* Now repeat the test of complex_count:
333 * It can't change anymore until we drop sem->lock.
334 * Thus: if is now 0, then it will stay 0.
335 */
336 if (sma->complex_count == 0) {
337 /* fast path successful! */
338 return sops->sem_num;
339 }
291 } 340 }
341 spin_unlock(&sem->lock);
342 }
292 343
293 locknum = sops->sem_num; 344 /* slow path: acquire the full lock */
345 ipc_lock_object(&sma->sem_perm);
346
347 if (sma->complex_count == 0) {
348 /* False alarm:
349 * There is no complex operation, thus we can switch
350 * back to the fast path.
351 */
352 spin_lock(&sem->lock);
353 ipc_unlock_object(&sma->sem_perm);
354 return sops->sem_num;
294 } else { 355 } else {
295 int i; 356 /* Not a false alarm, thus complete the sequence for a
296 /* 357 * full lock.
297 * Lock the semaphore array, and wait for all of the
298 * individual semaphore locks to go away. The code
299 * above ensures no new single-lock holders will enter
300 * their critical section while the array lock is held.
301 */ 358 */
302 lock_array: 359 sem_wait_array(sma);
303 ipc_lock_object(&sma->sem_perm); 360 return -1;
304 for (i = 0; i < sma->sem_nsems; i++) {
305 struct sem *sem = sma->sem_base + i;
306 spin_unlock_wait(&sem->lock);
307 }
308 locknum = -1;
309 } 361 }
310 return locknum;
311} 362}
312 363
313static inline void sem_unlock(struct sem_array *sma, int locknum) 364static inline void sem_unlock(struct sem_array *sma, int locknum)
@@ -322,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
322} 373}
323 374
324/* 375/*
325 * sem_lock_(check_) routines are called in the paths where the rw_mutex 376 * sem_lock_(check_) routines are called in the paths where the rwsem
326 * is not held. 377 * is not held.
327 * 378 *
328 * The caller holds the RCU read lock. 379 * The caller holds the RCU read lock.
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
374static inline void sem_lock_and_putref(struct sem_array *sma) 425static inline void sem_lock_and_putref(struct sem_array *sma)
375{ 426{
376 sem_lock(sma, NULL, -1); 427 sem_lock(sma, NULL, -1);
377 ipc_rcu_putref(sma); 428 ipc_rcu_putref(sma, ipc_rcu_free);
378}
379
380static inline void sem_putref(struct sem_array *sma)
381{
382 ipc_rcu_putref(sma);
383} 429}
384 430
385static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 431static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -426,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
426 * @ns: namespace 472 * @ns: namespace
427 * @params: ptr to the structure that contains key, semflg and nsems 473 * @params: ptr to the structure that contains key, semflg and nsems
428 * 474 *
429 * Called with sem_ids.rw_mutex held (as a writer) 475 * Called with sem_ids.rwsem held (as a writer)
430 */ 476 */
431 477
432static int newary(struct ipc_namespace *ns, struct ipc_params *params) 478static int newary(struct ipc_namespace *ns, struct ipc_params *params)
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
458 sma->sem_perm.security = NULL; 504 sma->sem_perm.security = NULL;
459 retval = security_sem_alloc(sma); 505 retval = security_sem_alloc(sma);
460 if (retval) { 506 if (retval) {
461 ipc_rcu_putref(sma); 507 ipc_rcu_putref(sma, ipc_rcu_free);
462 return retval; 508 return retval;
463 } 509 }
464 510
465 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 511 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
466 if (id < 0) { 512 if (id < 0) {
467 security_sem_free(sma); 513 ipc_rcu_putref(sma, sem_rcu_free);
468 ipc_rcu_putref(sma);
469 return id; 514 return id;
470 } 515 }
471 ns->used_sems += nsems; 516 ns->used_sems += nsems;
@@ -492,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
492 537
493 538
494/* 539/*
495 * Called with sem_ids.rw_mutex and ipcp locked. 540 * Called with sem_ids.rwsem and ipcp locked.
496 */ 541 */
497static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 542static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
498{ 543{
@@ -503,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
503} 548}
504 549
505/* 550/*
506 * Called with sem_ids.rw_mutex and ipcp locked. 551 * Called with sem_ids.rwsem and ipcp locked.
507 */ 552 */
508static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 553static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
509 struct ipc_params *params) 554 struct ipc_params *params)
@@ -873,6 +918,24 @@ again:
873} 918}
874 919
875/** 920/**
921 * set_semotime(sma, sops) - set sem_otime
922 * @sma: semaphore array
923 * @sops: operations that modified the array, may be NULL
924 *
925 * sem_otime is replicated to avoid cache line trashing.
926 * This function sets one instance to the current time.
927 */
928static void set_semotime(struct sem_array *sma, struct sembuf *sops)
929{
930 if (sops == NULL) {
931 sma->sem_base[0].sem_otime = get_seconds();
932 } else {
933 sma->sem_base[sops[0].sem_num].sem_otime =
934 get_seconds();
935 }
936}
937
938/**
876 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 939 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
877 * @sma: semaphore array 940 * @sma: semaphore array
878 * @sops: operations that were performed 941 * @sops: operations that were performed
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
922 } 985 }
923 } 986 }
924 } 987 }
925 if (otime) { 988 if (otime)
926 if (sops == NULL) { 989 set_semotime(sma, sops);
927 sma->sem_base[0].sem_otime = get_seconds();
928 } else {
929 sma->sem_base[sops[0].sem_num].sem_otime =
930 get_seconds();
931 }
932 }
933} 990}
934 991
935
936/* The following counts are associated to each semaphore: 992/* The following counts are associated to each semaphore:
937 * semncnt number of tasks waiting on semval being nonzero 993 * semncnt number of tasks waiting on semval being nonzero
938 * semzcnt number of tasks waiting on semval being zero 994 * semzcnt number of tasks waiting on semval being zero
@@ -994,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
994 return semzcnt; 1050 return semzcnt;
995} 1051}
996 1052
997/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 1053/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
998 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 1054 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
999 * remains locked on exit. 1055 * remains locked on exit.
1000 */ 1056 */
1001static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1057static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1047 1103
1048 wake_up_sem_queue_do(&tasks); 1104 wake_up_sem_queue_do(&tasks);
1049 ns->used_sems -= sma->sem_nsems; 1105 ns->used_sems -= sma->sem_nsems;
1050 security_sem_free(sma); 1106 ipc_rcu_putref(sma, sem_rcu_free);
1051 ipc_rcu_putref(sma);
1052} 1107}
1053 1108
1054static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1109static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1116,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
1116 seminfo.semmnu = SEMMNU; 1171 seminfo.semmnu = SEMMNU;
1117 seminfo.semmap = SEMMAP; 1172 seminfo.semmap = SEMMAP;
1118 seminfo.semume = SEMUME; 1173 seminfo.semume = SEMUME;
1119 down_read(&sem_ids(ns).rw_mutex); 1174 down_read(&sem_ids(ns).rwsem);
1120 if (cmd == SEM_INFO) { 1175 if (cmd == SEM_INFO) {
1121 seminfo.semusz = sem_ids(ns).in_use; 1176 seminfo.semusz = sem_ids(ns).in_use;
1122 seminfo.semaem = ns->used_sems; 1177 seminfo.semaem = ns->used_sems;
@@ -1125,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
1125 seminfo.semaem = SEMAEM; 1180 seminfo.semaem = SEMAEM;
1126 } 1181 }
1127 max_id = ipc_get_maxid(&sem_ids(ns)); 1182 max_id = ipc_get_maxid(&sem_ids(ns));
1128 up_read(&sem_ids(ns).rw_mutex); 1183 up_read(&sem_ids(ns).rwsem);
1129 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1184 if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
1130 return -EFAULT; 1185 return -EFAULT;
1131 return (max_id < 0) ? 0: max_id; 1186 return (max_id < 0) ? 0: max_id;
@@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1227 1282
1228 sem_lock(sma, NULL, -1); 1283 sem_lock(sma, NULL, -1);
1229 1284
1285 if (sma->sem_perm.deleted) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 return -EIDRM;
1289 }
1290
1230 curr = &sma->sem_base[semnum]; 1291 curr = &sma->sem_base[semnum];
1231 1292
1232 ipc_assert_locked_object(&sma->sem_perm); 1293 ipc_assert_locked_object(&sma->sem_perm);
@@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1281 int i; 1342 int i;
1282 1343
1283 sem_lock(sma, NULL, -1); 1344 sem_lock(sma, NULL, -1);
1345 if (sma->sem_perm.deleted) {
1346 err = -EIDRM;
1347 goto out_unlock;
1348 }
1284 if(nsems > SEMMSL_FAST) { 1349 if(nsems > SEMMSL_FAST) {
1285 if (!ipc_rcu_getref(sma)) { 1350 if (!ipc_rcu_getref(sma)) {
1286 sem_unlock(sma, -1);
1287 rcu_read_unlock();
1288 err = -EIDRM; 1351 err = -EIDRM;
1289 goto out_free; 1352 goto out_unlock;
1290 } 1353 }
1291 sem_unlock(sma, -1); 1354 sem_unlock(sma, -1);
1292 rcu_read_unlock(); 1355 rcu_read_unlock();
1293 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1356 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1294 if(sem_io == NULL) { 1357 if(sem_io == NULL) {
1295 sem_putref(sma); 1358 ipc_rcu_putref(sma, ipc_rcu_free);
1296 return -ENOMEM; 1359 return -ENOMEM;
1297 } 1360 }
1298 1361
1299 rcu_read_lock(); 1362 rcu_read_lock();
1300 sem_lock_and_putref(sma); 1363 sem_lock_and_putref(sma);
1301 if (sma->sem_perm.deleted) { 1364 if (sma->sem_perm.deleted) {
1302 sem_unlock(sma, -1);
1303 rcu_read_unlock();
1304 err = -EIDRM; 1365 err = -EIDRM;
1305 goto out_free; 1366 goto out_unlock;
1306 } 1367 }
1307 } 1368 }
1308 for (i = 0; i < sma->sem_nsems; i++) 1369 for (i = 0; i < sma->sem_nsems; i++)
@@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1320 struct sem_undo *un; 1381 struct sem_undo *un;
1321 1382
1322 if (!ipc_rcu_getref(sma)) { 1383 if (!ipc_rcu_getref(sma)) {
1323 rcu_read_unlock(); 1384 err = -EIDRM;
1324 return -EIDRM; 1385 goto out_rcu_wakeup;
1325 } 1386 }
1326 rcu_read_unlock(); 1387 rcu_read_unlock();
1327 1388
1328 if(nsems > SEMMSL_FAST) { 1389 if(nsems > SEMMSL_FAST) {
1329 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1390 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1330 if(sem_io == NULL) { 1391 if(sem_io == NULL) {
1331 sem_putref(sma); 1392 ipc_rcu_putref(sma, ipc_rcu_free);
1332 return -ENOMEM; 1393 return -ENOMEM;
1333 } 1394 }
1334 } 1395 }
1335 1396
1336 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1397 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1337 sem_putref(sma); 1398 ipc_rcu_putref(sma, ipc_rcu_free);
1338 err = -EFAULT; 1399 err = -EFAULT;
1339 goto out_free; 1400 goto out_free;
1340 } 1401 }
1341 1402
1342 for (i = 0; i < nsems; i++) { 1403 for (i = 0; i < nsems; i++) {
1343 if (sem_io[i] > SEMVMX) { 1404 if (sem_io[i] > SEMVMX) {
1344 sem_putref(sma); 1405 ipc_rcu_putref(sma, ipc_rcu_free);
1345 err = -ERANGE; 1406 err = -ERANGE;
1346 goto out_free; 1407 goto out_free;
1347 } 1408 }
@@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1349 rcu_read_lock(); 1410 rcu_read_lock();
1350 sem_lock_and_putref(sma); 1411 sem_lock_and_putref(sma);
1351 if (sma->sem_perm.deleted) { 1412 if (sma->sem_perm.deleted) {
1352 sem_unlock(sma, -1);
1353 rcu_read_unlock();
1354 err = -EIDRM; 1413 err = -EIDRM;
1355 goto out_free; 1414 goto out_unlock;
1356 } 1415 }
1357 1416
1358 for (i = 0; i < nsems; i++) 1417 for (i = 0; i < nsems; i++)
@@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1376 goto out_rcu_wakeup; 1435 goto out_rcu_wakeup;
1377 1436
1378 sem_lock(sma, NULL, -1); 1437 sem_lock(sma, NULL, -1);
1438 if (sma->sem_perm.deleted) {
1439 err = -EIDRM;
1440 goto out_unlock;
1441 }
1379 curr = &sma->sem_base[semnum]; 1442 curr = &sma->sem_base[semnum];
1380 1443
1381 switch (cmd) { 1444 switch (cmd) {
@@ -1431,9 +1494,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1431} 1494}
1432 1495
1433/* 1496/*
1434 * This function handles some semctl commands which require the rw_mutex 1497 * This function handles some semctl commands which require the rwsem
1435 * to be held in write mode. 1498 * to be held in write mode.
1436 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 1499 * NOTE: no locks must be held, the rwsem is taken inside this function.
1437 */ 1500 */
1438static int semctl_down(struct ipc_namespace *ns, int semid, 1501static int semctl_down(struct ipc_namespace *ns, int semid,
1439 int cmd, int version, void __user *p) 1502 int cmd, int version, void __user *p)
@@ -1448,7 +1511,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
1448 return -EFAULT; 1511 return -EFAULT;
1449 } 1512 }
1450 1513
1451 down_write(&sem_ids(ns).rw_mutex); 1514 down_write(&sem_ids(ns).rwsem);
1452 rcu_read_lock(); 1515 rcu_read_lock();
1453 1516
1454 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1517 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
@@ -1487,7 +1550,7 @@ out_unlock0:
1487out_unlock1: 1550out_unlock1:
1488 rcu_read_unlock(); 1551 rcu_read_unlock();
1489out_up: 1552out_up:
1490 up_write(&sem_ids(ns).rw_mutex); 1553 up_write(&sem_ids(ns).rwsem);
1491 return err; 1554 return err;
1492} 1555}
1493 1556
@@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1629 /* step 2: allocate new undo structure */ 1692 /* step 2: allocate new undo structure */
1630 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1693 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1631 if (!new) { 1694 if (!new) {
1632 sem_putref(sma); 1695 ipc_rcu_putref(sma, ipc_rcu_free);
1633 return ERR_PTR(-ENOMEM); 1696 return ERR_PTR(-ENOMEM);
1634 } 1697 }
1635 1698
@@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1781 if (error) 1844 if (error)
1782 goto out_rcu_wakeup; 1845 goto out_rcu_wakeup;
1783 1846
1847 error = -EIDRM;
1848 locknum = sem_lock(sma, sops, nsops);
1849 if (sma->sem_perm.deleted)
1850 goto out_unlock_free;
1784 /* 1851 /*
1785 * semid identifiers are not unique - find_alloc_undo may have 1852 * semid identifiers are not unique - find_alloc_undo may have
1786 * allocated an undo structure, it was invalidated by an RMID 1853 * allocated an undo structure, it was invalidated by an RMID
@@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1788 * This case can be detected checking un->semid. The existence of 1855 * This case can be detected checking un->semid. The existence of
1789 * "un" itself is guaranteed by rcu. 1856 * "un" itself is guaranteed by rcu.
1790 */ 1857 */
1791 error = -EIDRM;
1792 locknum = sem_lock(sma, sops, nsops);
1793 if (un && un->semid == -1) 1858 if (un && un->semid == -1)
1794 goto out_unlock_free; 1859 goto out_unlock_free;
1795 1860
1796 error = perform_atomic_semop(sma, sops, nsops, un, 1861 error = perform_atomic_semop(sma, sops, nsops, un,
1797 task_tgid_vnr(current)); 1862 task_tgid_vnr(current));
1798 if (error <= 0) { 1863 if (error == 0) {
1799 if (alter && error == 0) 1864 /* If the operation was successful, then do
1865 * the required updates.
1866 */
1867 if (alter)
1800 do_smart_update(sma, sops, nsops, 1, &tasks); 1868 do_smart_update(sma, sops, nsops, 1, &tasks);
1801 1869 else
1802 goto out_unlock_free; 1870 set_semotime(sma, sops);
1803 } 1871 }
1872 if (error <= 0)
1873 goto out_unlock_free;
1804 1874
1805 /* We need to sleep on this operation, so we put the current 1875 /* We need to sleep on this operation, so we put the current
1806 * task into the pending queue and go to sleep. 1876 * task into the pending queue and go to sleep.
@@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
1997 } 2067 }
1998 2068
1999 sem_lock(sma, NULL, -1); 2069 sem_lock(sma, NULL, -1);
2070 /* exit_sem raced with IPC_RMID, nothing to do */
2071 if (sma->sem_perm.deleted) {
2072 sem_unlock(sma, -1);
2073 rcu_read_unlock();
2074 continue;
2075 }
2000 un = __lookup_undo(ulp, semid); 2076 un = __lookup_undo(ulp, semid);
2001 if (un == NULL) { 2077 if (un == NULL) {
2002 /* exit_sem raced with IPC_RMID+semget() that created 2078 /* exit_sem raced with IPC_RMID+semget() that created
@@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2059 struct sem_array *sma = it; 2135 struct sem_array *sma = it;
2060 time_t sem_otime; 2136 time_t sem_otime;
2061 2137
2138 /*
2139 * The proc interface isn't aware of sem_lock(), it calls
2140 * ipc_lock_object() directly (in sysvipc_find_ipc).
2141 * In order to stay compatible with sem_lock(), we must wait until
2142 * all simple semop() calls have left their critical regions.
2143 */
2144 sem_wait_array(sma);
2145
2062 sem_otime = get_semotime(sma); 2146 sem_otime = get_semotime(sma);
2063 2147
2064 return seq_printf(s, 2148 return seq_printf(s,
diff --git a/ipc/shm.c b/ipc/shm.c
index c6b4ad5ce3b7..d69739610fd4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -19,6 +19,9 @@
19 * namespaces support 19 * namespaces support
20 * OpenVZ, SWsoft Inc. 20 * OpenVZ, SWsoft Inc.
21 * Pavel Emelianov <xemul@openvz.org> 21 * Pavel Emelianov <xemul@openvz.org>
22 *
23 * Better ipc lock (kern_ipc_perm.lock) handling
24 * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
22 */ 25 */
23 26
24#include <linux/slab.h> 27#include <linux/slab.h>
@@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns)
80} 83}
81 84
82/* 85/*
83 * Called with shm_ids.rw_mutex (writer) and the shp structure locked. 86 * Called with shm_ids.rwsem (writer) and the shp structure locked.
84 * Only shm_ids.rw_mutex remains locked on exit. 87 * Only shm_ids.rwsem remains locked on exit.
85 */ 88 */
86static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 89static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
87{ 90{
@@ -124,8 +127,28 @@ void __init shm_init (void)
124 IPC_SHM_IDS, sysvipc_shm_proc_show); 127 IPC_SHM_IDS, sysvipc_shm_proc_show);
125} 128}
126 129
130static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
131{
132 struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
133
134 if (IS_ERR(ipcp))
135 return ERR_CAST(ipcp);
136
137 return container_of(ipcp, struct shmid_kernel, shm_perm);
138}
139
140static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
141{
142 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
143
144 if (IS_ERR(ipcp))
145 return ERR_CAST(ipcp);
146
147 return container_of(ipcp, struct shmid_kernel, shm_perm);
148}
149
127/* 150/*
128 * shm_lock_(check_) routines are called in the paths where the rw_mutex 151 * shm_lock_(check_) routines are called in the paths where the rwsem
129 * is not necessarily held. 152 * is not necessarily held.
130 */ 153 */
131static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) 154static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
@@ -144,15 +167,13 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
144 ipc_lock_object(&ipcp->shm_perm); 167 ipc_lock_object(&ipcp->shm_perm);
145} 168}
146 169
147static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, 170static void shm_rcu_free(struct rcu_head *head)
148 int id)
149{ 171{
150 struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); 172 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
151 173 struct shmid_kernel *shp = ipc_rcu_to_struct(p);
152 if (IS_ERR(ipcp))
153 return (struct shmid_kernel *)ipcp;
154 174
155 return container_of(ipcp, struct shmid_kernel, shm_perm); 175 security_shm_free(shp);
176 ipc_rcu_free(head);
156} 177}
157 178
158static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
@@ -182,7 +203,7 @@ static void shm_open(struct vm_area_struct *vma)
182 * @ns: namespace 203 * @ns: namespace
183 * @shp: struct to free 204 * @shp: struct to free
184 * 205 *
185 * It has to be called with shp and shm_ids.rw_mutex (writer) locked, 206 * It has to be called with shp and shm_ids.rwsem (writer) locked,
186 * but returns with shp unlocked and freed. 207 * but returns with shp unlocked and freed.
187 */ 208 */
188static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) 209static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -196,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
196 user_shm_unlock(file_inode(shp->shm_file)->i_size, 217 user_shm_unlock(file_inode(shp->shm_file)->i_size,
197 shp->mlock_user); 218 shp->mlock_user);
198 fput (shp->shm_file); 219 fput (shp->shm_file);
199 security_shm_free(shp); 220 ipc_rcu_putref(shp, shm_rcu_free);
200 ipc_rcu_putref(shp);
201} 221}
202 222
203/* 223/*
@@ -230,7 +250,7 @@ static void shm_close(struct vm_area_struct *vma)
230 struct shmid_kernel *shp; 250 struct shmid_kernel *shp;
231 struct ipc_namespace *ns = sfd->ns; 251 struct ipc_namespace *ns = sfd->ns;
232 252
233 down_write(&shm_ids(ns).rw_mutex); 253 down_write(&shm_ids(ns).rwsem);
234 /* remove from the list of attaches of the shm segment */ 254 /* remove from the list of attaches of the shm segment */
235 shp = shm_lock(ns, sfd->id); 255 shp = shm_lock(ns, sfd->id);
236 BUG_ON(IS_ERR(shp)); 256 BUG_ON(IS_ERR(shp));
@@ -241,10 +261,10 @@ static void shm_close(struct vm_area_struct *vma)
241 shm_destroy(ns, shp); 261 shm_destroy(ns, shp);
242 else 262 else
243 shm_unlock(shp); 263 shm_unlock(shp);
244 up_write(&shm_ids(ns).rw_mutex); 264 up_write(&shm_ids(ns).rwsem);
245} 265}
246 266
247/* Called with ns->shm_ids(ns).rw_mutex locked */ 267/* Called with ns->shm_ids(ns).rwsem locked */
248static int shm_try_destroy_current(int id, void *p, void *data) 268static int shm_try_destroy_current(int id, void *p, void *data)
249{ 269{
250 struct ipc_namespace *ns = data; 270 struct ipc_namespace *ns = data;
@@ -275,7 +295,7 @@ static int shm_try_destroy_current(int id, void *p, void *data)
275 return 0; 295 return 0;
276} 296}
277 297
278/* Called with ns->shm_ids(ns).rw_mutex locked */ 298/* Called with ns->shm_ids(ns).rwsem locked */
279static int shm_try_destroy_orphaned(int id, void *p, void *data) 299static int shm_try_destroy_orphaned(int id, void *p, void *data)
280{ 300{
281 struct ipc_namespace *ns = data; 301 struct ipc_namespace *ns = data;
@@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
286 * We want to destroy segments without users and with already 306 * We want to destroy segments without users and with already
287 * exit'ed originating process. 307 * exit'ed originating process.
288 * 308 *
289 * As shp->* are changed under rw_mutex, it's safe to skip shp locking. 309 * As shp->* are changed under rwsem, it's safe to skip shp locking.
290 */ 310 */
291 if (shp->shm_creator != NULL) 311 if (shp->shm_creator != NULL)
292 return 0; 312 return 0;
@@ -300,10 +320,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
300 320
301void shm_destroy_orphaned(struct ipc_namespace *ns) 321void shm_destroy_orphaned(struct ipc_namespace *ns)
302{ 322{
303 down_write(&shm_ids(ns).rw_mutex); 323 down_write(&shm_ids(ns).rwsem);
304 if (shm_ids(ns).in_use) 324 if (shm_ids(ns).in_use)
305 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); 325 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
306 up_write(&shm_ids(ns).rw_mutex); 326 up_write(&shm_ids(ns).rwsem);
307} 327}
308 328
309 329
@@ -315,10 +335,10 @@ void exit_shm(struct task_struct *task)
315 return; 335 return;
316 336
317 /* Destroy all already created segments, but not mapped yet */ 337 /* Destroy all already created segments, but not mapped yet */
318 down_write(&shm_ids(ns).rw_mutex); 338 down_write(&shm_ids(ns).rwsem);
319 if (shm_ids(ns).in_use) 339 if (shm_ids(ns).in_use)
320 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); 340 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
321 up_write(&shm_ids(ns).rw_mutex); 341 up_write(&shm_ids(ns).rwsem);
322} 342}
323 343
324static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 344static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -452,7 +472,7 @@ static const struct vm_operations_struct shm_vm_ops = {
452 * @ns: namespace 472 * @ns: namespace
453 * @params: ptr to the structure that contains key, size and shmflg 473 * @params: ptr to the structure that contains key, size and shmflg
454 * 474 *
455 * Called with shm_ids.rw_mutex held as a writer. 475 * Called with shm_ids.rwsem held as a writer.
456 */ 476 */
457 477
458static int newseg(struct ipc_namespace *ns, struct ipc_params *params) 478static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
@@ -485,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
485 shp->shm_perm.security = NULL; 505 shp->shm_perm.security = NULL;
486 error = security_shm_alloc(shp); 506 error = security_shm_alloc(shp);
487 if (error) { 507 if (error) {
488 ipc_rcu_putref(shp); 508 ipc_rcu_putref(shp, ipc_rcu_free);
489 return error; 509 return error;
490 } 510 }
491 511
@@ -554,13 +574,12 @@ no_id:
554 user_shm_unlock(size, shp->mlock_user); 574 user_shm_unlock(size, shp->mlock_user);
555 fput(file); 575 fput(file);
556no_file: 576no_file:
557 security_shm_free(shp); 577 ipc_rcu_putref(shp, shm_rcu_free);
558 ipc_rcu_putref(shp);
559 return error; 578 return error;
560} 579}
561 580
562/* 581/*
563 * Called with shm_ids.rw_mutex and ipcp locked. 582 * Called with shm_ids.rwsem and ipcp locked.
564 */ 583 */
565static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) 584static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
566{ 585{
@@ -571,7 +590,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
571} 590}
572 591
573/* 592/*
574 * Called with shm_ids.rw_mutex and ipcp locked. 593 * Called with shm_ids.rwsem and ipcp locked.
575 */ 594 */
576static inline int shm_more_checks(struct kern_ipc_perm *ipcp, 595static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
577 struct ipc_params *params) 596 struct ipc_params *params)
@@ -684,7 +703,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
684 703
685/* 704/*
686 * Calculate and add used RSS and swap pages of a shm. 705 * Calculate and add used RSS and swap pages of a shm.
687 * Called with shm_ids.rw_mutex held as a reader 706 * Called with shm_ids.rwsem held as a reader
688 */ 707 */
689static void shm_add_rss_swap(struct shmid_kernel *shp, 708static void shm_add_rss_swap(struct shmid_kernel *shp,
690 unsigned long *rss_add, unsigned long *swp_add) 709 unsigned long *rss_add, unsigned long *swp_add)
@@ -711,7 +730,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
711} 730}
712 731
713/* 732/*
714 * Called with shm_ids.rw_mutex held as a reader 733 * Called with shm_ids.rwsem held as a reader
715 */ 734 */
716static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, 735static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
717 unsigned long *swp) 736 unsigned long *swp)
@@ -740,9 +759,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
740} 759}
741 760
742/* 761/*
743 * This function handles some shmctl commands which require the rw_mutex 762 * This function handles some shmctl commands which require the rwsem
744 * to be held in write mode. 763 * to be held in write mode.
745 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 764 * NOTE: no locks must be held, the rwsem is taken inside this function.
746 */ 765 */
747static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, 766static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
748 struct shmid_ds __user *buf, int version) 767 struct shmid_ds __user *buf, int version)
@@ -757,14 +776,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
757 return -EFAULT; 776 return -EFAULT;
758 } 777 }
759 778
760 down_write(&shm_ids(ns).rw_mutex); 779 down_write(&shm_ids(ns).rwsem);
761 rcu_read_lock(); 780 rcu_read_lock();
762 781
763 ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, 782 ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
764 &shmid64.shm_perm, 0); 783 &shmid64.shm_perm, 0);
765 if (IS_ERR(ipcp)) { 784 if (IS_ERR(ipcp)) {
766 err = PTR_ERR(ipcp); 785 err = PTR_ERR(ipcp);
767 /* the ipc lock is not held upon failure */
768 goto out_unlock1; 786 goto out_unlock1;
769 } 787 }
770 788
@@ -772,14 +790,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
772 790
773 err = security_shm_shmctl(shp, cmd); 791 err = security_shm_shmctl(shp, cmd);
774 if (err) 792 if (err)
775 goto out_unlock0; 793 goto out_unlock1;
776 794
777 switch (cmd) { 795 switch (cmd) {
778 case IPC_RMID: 796 case IPC_RMID:
797 ipc_lock_object(&shp->shm_perm);
779 /* do_shm_rmid unlocks the ipc object and rcu */ 798 /* do_shm_rmid unlocks the ipc object and rcu */
780 do_shm_rmid(ns, ipcp); 799 do_shm_rmid(ns, ipcp);
781 goto out_up; 800 goto out_up;
782 case IPC_SET: 801 case IPC_SET:
802 ipc_lock_object(&shp->shm_perm);
783 err = ipc_update_perm(&shmid64.shm_perm, ipcp); 803 err = ipc_update_perm(&shmid64.shm_perm, ipcp);
784 if (err) 804 if (err)
785 goto out_unlock0; 805 goto out_unlock0;
@@ -787,6 +807,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
787 break; 807 break;
788 default: 808 default:
789 err = -EINVAL; 809 err = -EINVAL;
810 goto out_unlock1;
790 } 811 }
791 812
792out_unlock0: 813out_unlock0:
@@ -794,33 +815,28 @@ out_unlock0:
794out_unlock1: 815out_unlock1:
795 rcu_read_unlock(); 816 rcu_read_unlock();
796out_up: 817out_up:
797 up_write(&shm_ids(ns).rw_mutex); 818 up_write(&shm_ids(ns).rwsem);
798 return err; 819 return err;
799} 820}
800 821
801SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) 822static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
823 int cmd, int version, void __user *buf)
802{ 824{
825 int err;
803 struct shmid_kernel *shp; 826 struct shmid_kernel *shp;
804 int err, version;
805 struct ipc_namespace *ns;
806 827
807 if (cmd < 0 || shmid < 0) { 828 /* preliminary security checks for *_INFO */
808 err = -EINVAL; 829 if (cmd == IPC_INFO || cmd == SHM_INFO) {
809 goto out; 830 err = security_shm_shmctl(NULL, cmd);
831 if (err)
832 return err;
810 } 833 }
811 834
812 version = ipc_parse_version(&cmd); 835 switch (cmd) {
813 ns = current->nsproxy->ipc_ns;
814
815 switch (cmd) { /* replace with proc interface ? */
816 case IPC_INFO: 836 case IPC_INFO:
817 { 837 {
818 struct shminfo64 shminfo; 838 struct shminfo64 shminfo;
819 839
820 err = security_shm_shmctl(NULL, cmd);
821 if (err)
822 return err;
823
824 memset(&shminfo, 0, sizeof(shminfo)); 840 memset(&shminfo, 0, sizeof(shminfo));
825 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; 841 shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
826 shminfo.shmmax = ns->shm_ctlmax; 842 shminfo.shmmax = ns->shm_ctlmax;
@@ -830,9 +846,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
830 if(copy_shminfo_to_user (buf, &shminfo, version)) 846 if(copy_shminfo_to_user (buf, &shminfo, version))
831 return -EFAULT; 847 return -EFAULT;
832 848
833 down_read(&shm_ids(ns).rw_mutex); 849 down_read(&shm_ids(ns).rwsem);
834 err = ipc_get_maxid(&shm_ids(ns)); 850 err = ipc_get_maxid(&shm_ids(ns));
835 up_read(&shm_ids(ns).rw_mutex); 851 up_read(&shm_ids(ns).rwsem);
836 852
837 if(err<0) 853 if(err<0)
838 err = 0; 854 err = 0;
@@ -842,19 +858,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
842 { 858 {
843 struct shm_info shm_info; 859 struct shm_info shm_info;
844 860
845 err = security_shm_shmctl(NULL, cmd);
846 if (err)
847 return err;
848
849 memset(&shm_info, 0, sizeof(shm_info)); 861 memset(&shm_info, 0, sizeof(shm_info));
850 down_read(&shm_ids(ns).rw_mutex); 862 down_read(&shm_ids(ns).rwsem);
851 shm_info.used_ids = shm_ids(ns).in_use; 863 shm_info.used_ids = shm_ids(ns).in_use;
852 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); 864 shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
853 shm_info.shm_tot = ns->shm_tot; 865 shm_info.shm_tot = ns->shm_tot;
854 shm_info.swap_attempts = 0; 866 shm_info.swap_attempts = 0;
855 shm_info.swap_successes = 0; 867 shm_info.swap_successes = 0;
856 err = ipc_get_maxid(&shm_ids(ns)); 868 err = ipc_get_maxid(&shm_ids(ns));
857 up_read(&shm_ids(ns).rw_mutex); 869 up_read(&shm_ids(ns).rwsem);
858 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { 870 if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
859 err = -EFAULT; 871 err = -EFAULT;
860 goto out; 872 goto out;
@@ -869,27 +881,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
869 struct shmid64_ds tbuf; 881 struct shmid64_ds tbuf;
870 int result; 882 int result;
871 883
884 rcu_read_lock();
872 if (cmd == SHM_STAT) { 885 if (cmd == SHM_STAT) {
873 shp = shm_lock(ns, shmid); 886 shp = shm_obtain_object(ns, shmid);
874 if (IS_ERR(shp)) { 887 if (IS_ERR(shp)) {
875 err = PTR_ERR(shp); 888 err = PTR_ERR(shp);
876 goto out; 889 goto out_unlock;
877 } 890 }
878 result = shp->shm_perm.id; 891 result = shp->shm_perm.id;
879 } else { 892 } else {
880 shp = shm_lock_check(ns, shmid); 893 shp = shm_obtain_object_check(ns, shmid);
881 if (IS_ERR(shp)) { 894 if (IS_ERR(shp)) {
882 err = PTR_ERR(shp); 895 err = PTR_ERR(shp);
883 goto out; 896 goto out_unlock;
884 } 897 }
885 result = 0; 898 result = 0;
886 } 899 }
900
887 err = -EACCES; 901 err = -EACCES;
888 if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) 902 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
889 goto out_unlock; 903 goto out_unlock;
904
890 err = security_shm_shmctl(shp, cmd); 905 err = security_shm_shmctl(shp, cmd);
891 if (err) 906 if (err)
892 goto out_unlock; 907 goto out_unlock;
908
893 memset(&tbuf, 0, sizeof(tbuf)); 909 memset(&tbuf, 0, sizeof(tbuf));
894 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); 910 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
895 tbuf.shm_segsz = shp->shm_segsz; 911 tbuf.shm_segsz = shp->shm_segsz;
@@ -899,43 +915,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
899 tbuf.shm_cpid = shp->shm_cprid; 915 tbuf.shm_cpid = shp->shm_cprid;
900 tbuf.shm_lpid = shp->shm_lprid; 916 tbuf.shm_lpid = shp->shm_lprid;
901 tbuf.shm_nattch = shp->shm_nattch; 917 tbuf.shm_nattch = shp->shm_nattch;
902 shm_unlock(shp); 918 rcu_read_unlock();
903 if(copy_shmid_to_user (buf, &tbuf, version)) 919
920 if (copy_shmid_to_user(buf, &tbuf, version))
904 err = -EFAULT; 921 err = -EFAULT;
905 else 922 else
906 err = result; 923 err = result;
907 goto out; 924 goto out;
908 } 925 }
926 default:
927 return -EINVAL;
928 }
929
930out_unlock:
931 rcu_read_unlock();
932out:
933 return err;
934}
935
936SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
937{
938 struct shmid_kernel *shp;
939 int err, version;
940 struct ipc_namespace *ns;
941
942 if (cmd < 0 || shmid < 0)
943 return -EINVAL;
944
945 version = ipc_parse_version(&cmd);
946 ns = current->nsproxy->ipc_ns;
947
948 switch (cmd) {
949 case IPC_INFO:
950 case SHM_INFO:
951 case SHM_STAT:
952 case IPC_STAT:
953 return shmctl_nolock(ns, shmid, cmd, version, buf);
954 case IPC_RMID:
955 case IPC_SET:
956 return shmctl_down(ns, shmid, cmd, buf, version);
909 case SHM_LOCK: 957 case SHM_LOCK:
910 case SHM_UNLOCK: 958 case SHM_UNLOCK:
911 { 959 {
912 struct file *shm_file; 960 struct file *shm_file;
913 961
914 shp = shm_lock_check(ns, shmid); 962 rcu_read_lock();
963 shp = shm_obtain_object_check(ns, shmid);
915 if (IS_ERR(shp)) { 964 if (IS_ERR(shp)) {
916 err = PTR_ERR(shp); 965 err = PTR_ERR(shp);
917 goto out; 966 goto out_unlock1;
918 } 967 }
919 968
920 audit_ipc_obj(&(shp->shm_perm)); 969 audit_ipc_obj(&(shp->shm_perm));
970 err = security_shm_shmctl(shp, cmd);
971 if (err)
972 goto out_unlock1;
921 973
974 ipc_lock_object(&shp->shm_perm);
922 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 975 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
923 kuid_t euid = current_euid(); 976 kuid_t euid = current_euid();
924 err = -EPERM; 977 err = -EPERM;
925 if (!uid_eq(euid, shp->shm_perm.uid) && 978 if (!uid_eq(euid, shp->shm_perm.uid) &&
926 !uid_eq(euid, shp->shm_perm.cuid)) 979 !uid_eq(euid, shp->shm_perm.cuid))
927 goto out_unlock; 980 goto out_unlock0;
928 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) 981 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
929 goto out_unlock; 982 goto out_unlock0;
930 } 983 }
931 984
932 err = security_shm_shmctl(shp, cmd);
933 if (err)
934 goto out_unlock;
935
936 shm_file = shp->shm_file; 985 shm_file = shp->shm_file;
937 if (is_file_hugepages(shm_file)) 986 if (is_file_hugepages(shm_file))
938 goto out_unlock; 987 goto out_unlock0;
939 988
940 if (cmd == SHM_LOCK) { 989 if (cmd == SHM_LOCK) {
941 struct user_struct *user = current_user(); 990 struct user_struct *user = current_user();
@@ -944,32 +993,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
944 shp->shm_perm.mode |= SHM_LOCKED; 993 shp->shm_perm.mode |= SHM_LOCKED;
945 shp->mlock_user = user; 994 shp->mlock_user = user;
946 } 995 }
947 goto out_unlock; 996 goto out_unlock0;
948 } 997 }
949 998
950 /* SHM_UNLOCK */ 999 /* SHM_UNLOCK */
951 if (!(shp->shm_perm.mode & SHM_LOCKED)) 1000 if (!(shp->shm_perm.mode & SHM_LOCKED))
952 goto out_unlock; 1001 goto out_unlock0;
953 shmem_lock(shm_file, 0, shp->mlock_user); 1002 shmem_lock(shm_file, 0, shp->mlock_user);
954 shp->shm_perm.mode &= ~SHM_LOCKED; 1003 shp->shm_perm.mode &= ~SHM_LOCKED;
955 shp->mlock_user = NULL; 1004 shp->mlock_user = NULL;
956 get_file(shm_file); 1005 get_file(shm_file);
957 shm_unlock(shp); 1006 ipc_unlock_object(&shp->shm_perm);
1007 rcu_read_unlock();
958 shmem_unlock_mapping(shm_file->f_mapping); 1008 shmem_unlock_mapping(shm_file->f_mapping);
1009
959 fput(shm_file); 1010 fput(shm_file);
960 goto out;
961 }
962 case IPC_RMID:
963 case IPC_SET:
964 err = shmctl_down(ns, shmid, cmd, buf, version);
965 return err; 1011 return err;
1012 }
966 default: 1013 default:
967 return -EINVAL; 1014 return -EINVAL;
968 } 1015 }
969 1016
970out_unlock: 1017out_unlock0:
971 shm_unlock(shp); 1018 ipc_unlock_object(&shp->shm_perm);
972out: 1019out_unlock1:
1020 rcu_read_unlock();
973 return err; 1021 return err;
974} 1022}
975 1023
@@ -1037,10 +1085,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1037 * additional creator id... 1085 * additional creator id...
1038 */ 1086 */
1039 ns = current->nsproxy->ipc_ns; 1087 ns = current->nsproxy->ipc_ns;
1040 shp = shm_lock_check(ns, shmid); 1088 rcu_read_lock();
1089 shp = shm_obtain_object_check(ns, shmid);
1041 if (IS_ERR(shp)) { 1090 if (IS_ERR(shp)) {
1042 err = PTR_ERR(shp); 1091 err = PTR_ERR(shp);
1043 goto out; 1092 goto out_unlock;
1044 } 1093 }
1045 1094
1046 err = -EACCES; 1095 err = -EACCES;
@@ -1051,24 +1100,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1051 if (err) 1100 if (err)
1052 goto out_unlock; 1101 goto out_unlock;
1053 1102
1103 ipc_lock_object(&shp->shm_perm);
1054 path = shp->shm_file->f_path; 1104 path = shp->shm_file->f_path;
1055 path_get(&path); 1105 path_get(&path);
1056 shp->shm_nattch++; 1106 shp->shm_nattch++;
1057 size = i_size_read(path.dentry->d_inode); 1107 size = i_size_read(path.dentry->d_inode);
1058 shm_unlock(shp); 1108 ipc_unlock_object(&shp->shm_perm);
1109 rcu_read_unlock();
1059 1110
1060 err = -ENOMEM; 1111 err = -ENOMEM;
1061 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); 1112 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1062 if (!sfd) 1113 if (!sfd) {
1063 goto out_put_dentry; 1114 path_put(&path);
1115 goto out_nattch;
1116 }
1064 1117
1065 file = alloc_file(&path, f_mode, 1118 file = alloc_file(&path, f_mode,
1066 is_file_hugepages(shp->shm_file) ? 1119 is_file_hugepages(shp->shm_file) ?
1067 &shm_file_operations_huge : 1120 &shm_file_operations_huge :
1068 &shm_file_operations); 1121 &shm_file_operations);
1069 err = PTR_ERR(file); 1122 err = PTR_ERR(file);
1070 if (IS_ERR(file)) 1123 if (IS_ERR(file)) {
1071 goto out_free; 1124 kfree(sfd);
1125 path_put(&path);
1126 goto out_nattch;
1127 }
1072 1128
1073 file->private_data = sfd; 1129 file->private_data = sfd;
1074 file->f_mapping = shp->shm_file->f_mapping; 1130 file->f_mapping = shp->shm_file->f_mapping;
@@ -1094,7 +1150,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1094 addr > current->mm->start_stack - size - PAGE_SIZE * 5) 1150 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
1095 goto invalid; 1151 goto invalid;
1096 } 1152 }
1097 1153
1098 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); 1154 addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
1099 *raddr = addr; 1155 *raddr = addr;
1100 err = 0; 1156 err = 0;
@@ -1109,7 +1165,7 @@ out_fput:
1109 fput(file); 1165 fput(file);
1110 1166
1111out_nattch: 1167out_nattch:
1112 down_write(&shm_ids(ns).rw_mutex); 1168 down_write(&shm_ids(ns).rwsem);
1113 shp = shm_lock(ns, shmid); 1169 shp = shm_lock(ns, shmid);
1114 BUG_ON(IS_ERR(shp)); 1170 BUG_ON(IS_ERR(shp));
1115 shp->shm_nattch--; 1171 shp->shm_nattch--;
@@ -1117,20 +1173,13 @@ out_nattch:
1117 shm_destroy(ns, shp); 1173 shm_destroy(ns, shp);
1118 else 1174 else
1119 shm_unlock(shp); 1175 shm_unlock(shp);
1120 up_write(&shm_ids(ns).rw_mutex); 1176 up_write(&shm_ids(ns).rwsem);
1121
1122out:
1123 return err; 1177 return err;
1124 1178
1125out_unlock: 1179out_unlock:
1126 shm_unlock(shp); 1180 rcu_read_unlock();
1127 goto out; 1181out:
1128 1182 return err;
1129out_free:
1130 kfree(sfd);
1131out_put_dentry:
1132 path_put(&path);
1133 goto out_nattch;
1134} 1183}
1135 1184
1136SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) 1185SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
@@ -1235,8 +1284,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1235#else /* CONFIG_MMU */ 1284#else /* CONFIG_MMU */
1236 /* under NOMMU conditions, the exact address to be destroyed must be 1285 /* under NOMMU conditions, the exact address to be destroyed must be
1237 * given */ 1286 * given */
1238 retval = -EINVAL; 1287 if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1239 if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1240 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1288 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
1241 retval = 0; 1289 retval = 0;
1242 } 1290 }
diff --git a/ipc/util.c b/ipc/util.c
index 4704223bfad4..7684f41bce76 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -15,6 +15,29 @@
15 * Jun 2006 - namespaces ssupport 15 * Jun 2006 - namespaces ssupport
16 * OpenVZ, SWsoft Inc. 16 * OpenVZ, SWsoft Inc.
17 * Pavel Emelianov <xemul@openvz.org> 17 * Pavel Emelianov <xemul@openvz.org>
18 *
19 * General sysv ipc locking scheme:
20 * rcu_read_lock()
21 * obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
22 * tree.
23 * - perform initial checks (capabilities, auditing and permission,
24 * etc).
25 * - perform read-only operations, such as STAT, INFO commands.
26 * acquire the ipc lock (kern_ipc_perm.lock) through
27 * ipc_lock_object()
28 * - perform data updates, such as SET, RMID commands and
29 * mechanism-specific operations (semop/semtimedop,
30 * msgsnd/msgrcv, shmat/shmdt).
31 * drop the ipc lock, through ipc_unlock_object().
32 * rcu_read_unlock()
33 *
34 * The ids->rwsem must be taken when:
35 * - creating, removing and iterating the existing entries in ipc
36 * identifier sets.
37 * - iterating through files under /proc/sysvipc/
38 *
39 * Note that sems have a special fast path that avoids kern_ipc_perm.lock -
40 * see sem_lock().
18 */ 41 */
19 42
20#include <linux/mm.h> 43#include <linux/mm.h>
@@ -119,7 +142,7 @@ __initcall(ipc_init);
119 142
120void ipc_init_ids(struct ipc_ids *ids) 143void ipc_init_ids(struct ipc_ids *ids)
121{ 144{
122 init_rwsem(&ids->rw_mutex); 145 init_rwsem(&ids->rwsem);
123 146
124 ids->in_use = 0; 147 ids->in_use = 0;
125 ids->seq = 0; 148 ids->seq = 0;
@@ -174,7 +197,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
174 * @ids: Identifier set 197 * @ids: Identifier set
175 * @key: The key to find 198 * @key: The key to find
176 * 199 *
177 * Requires ipc_ids.rw_mutex locked. 200 * Requires ipc_ids.rwsem locked.
178 * Returns the LOCKED pointer to the ipc structure if found or NULL 201 * Returns the LOCKED pointer to the ipc structure if found or NULL
179 * if not. 202 * if not.
180 * If key is found ipc points to the owning ipc structure 203 * If key is found ipc points to the owning ipc structure
@@ -197,7 +220,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
197 continue; 220 continue;
198 } 221 }
199 222
200 ipc_lock_by_ptr(ipc); 223 rcu_read_lock();
224 ipc_lock_object(ipc);
201 return ipc; 225 return ipc;
202 } 226 }
203 227
@@ -208,7 +232,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
208 * ipc_get_maxid - get the last assigned id 232 * ipc_get_maxid - get the last assigned id
209 * @ids: IPC identifier set 233 * @ids: IPC identifier set
210 * 234 *
211 * Called with ipc_ids.rw_mutex held. 235 * Called with ipc_ids.rwsem held.
212 */ 236 */
213 237
214int ipc_get_maxid(struct ipc_ids *ids) 238int ipc_get_maxid(struct ipc_ids *ids)
@@ -246,7 +270,7 @@ int ipc_get_maxid(struct ipc_ids *ids)
246 * is returned. The 'new' entry is returned in a locked state on success. 270 * is returned. The 'new' entry is returned in a locked state on success.
247 * On failure the entry is not locked and a negative err-code is returned. 271 * On failure the entry is not locked and a negative err-code is returned.
248 * 272 *
249 * Called with writer ipc_ids.rw_mutex held. 273 * Called with writer ipc_ids.rwsem held.
250 */ 274 */
251int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) 275int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
252{ 276{
@@ -312,9 +336,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
312{ 336{
313 int err; 337 int err;
314 338
315 down_write(&ids->rw_mutex); 339 down_write(&ids->rwsem);
316 err = ops->getnew(ns, params); 340 err = ops->getnew(ns, params);
317 up_write(&ids->rw_mutex); 341 up_write(&ids->rwsem);
318 return err; 342 return err;
319} 343}
320 344
@@ -331,7 +355,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
331 * 355 *
332 * On success, the IPC id is returned. 356 * On success, the IPC id is returned.
333 * 357 *
334 * It is called with ipc_ids.rw_mutex and ipcp->lock held. 358 * It is called with ipc_ids.rwsem and ipcp->lock held.
335 */ 359 */
336static int ipc_check_perms(struct ipc_namespace *ns, 360static int ipc_check_perms(struct ipc_namespace *ns,
337 struct kern_ipc_perm *ipcp, 361 struct kern_ipc_perm *ipcp,
@@ -376,7 +400,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
376 * Take the lock as a writer since we are potentially going to add 400 * Take the lock as a writer since we are potentially going to add
377 * a new entry + read locks are not "upgradable" 401 * a new entry + read locks are not "upgradable"
378 */ 402 */
379 down_write(&ids->rw_mutex); 403 down_write(&ids->rwsem);
380 ipcp = ipc_findkey(ids, params->key); 404 ipcp = ipc_findkey(ids, params->key);
381 if (ipcp == NULL) { 405 if (ipcp == NULL) {
382 /* key not used */ 406 /* key not used */
@@ -402,7 +426,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
402 } 426 }
403 ipc_unlock(ipcp); 427 ipc_unlock(ipcp);
404 } 428 }
405 up_write(&ids->rw_mutex); 429 up_write(&ids->rwsem);
406 430
407 return err; 431 return err;
408} 432}
@@ -413,7 +437,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
413 * @ids: IPC identifier set 437 * @ids: IPC identifier set
414 * @ipcp: ipc perm structure containing the identifier to remove 438 * @ipcp: ipc perm structure containing the identifier to remove
415 * 439 *
416 * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held 440 * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
417 * before this function is called, and remain locked on the exit. 441 * before this function is called, and remain locked on the exit.
418 */ 442 */
419 443
@@ -465,11 +489,6 @@ void ipc_free(void* ptr, int size)
465 kfree(ptr); 489 kfree(ptr);
466} 490}
467 491
468struct ipc_rcu {
469 struct rcu_head rcu;
470 atomic_t refcount;
471} ____cacheline_aligned_in_smp;
472
473/** 492/**
474 * ipc_rcu_alloc - allocate ipc and rcu space 493 * ipc_rcu_alloc - allocate ipc and rcu space
475 * @size: size desired 494 * @size: size desired
@@ -496,27 +515,24 @@ int ipc_rcu_getref(void *ptr)
496 return atomic_inc_not_zero(&p->refcount); 515 return atomic_inc_not_zero(&p->refcount);
497} 516}
498 517
499/** 518void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
500 * ipc_schedule_free - free ipc + rcu space
501 * @head: RCU callback structure for queued work
502 */
503static void ipc_schedule_free(struct rcu_head *head)
504{
505 vfree(container_of(head, struct ipc_rcu, rcu));
506}
507
508void ipc_rcu_putref(void *ptr)
509{ 519{
510 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; 520 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
511 521
512 if (!atomic_dec_and_test(&p->refcount)) 522 if (!atomic_dec_and_test(&p->refcount))
513 return; 523 return;
514 524
515 if (is_vmalloc_addr(ptr)) { 525 call_rcu(&p->rcu, func);
516 call_rcu(&p->rcu, ipc_schedule_free); 526}
517 } else { 527
518 kfree_rcu(p, rcu); 528void ipc_rcu_free(struct rcu_head *head)
519 } 529{
530 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
531
532 if (is_vmalloc_addr(p))
533 vfree(p);
534 else
535 kfree(p);
520} 536}
521 537
522/** 538/**
@@ -621,7 +637,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
621} 637}
622 638
623/** 639/**
624 * ipc_lock - Lock an ipc structure without rw_mutex held 640 * ipc_lock - Lock an ipc structure without rwsem held
625 * @ids: IPC identifier set 641 * @ids: IPC identifier set
626 * @id: ipc id to look for 642 * @id: ipc id to look for
627 * 643 *
@@ -677,22 +693,6 @@ out:
677 return out; 693 return out;
678} 694}
679 695
680struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
681{
682 struct kern_ipc_perm *out;
683
684 out = ipc_lock(ids, id);
685 if (IS_ERR(out))
686 return out;
687
688 if (ipc_checkid(out, id)) {
689 ipc_unlock(out);
690 return ERR_PTR(-EIDRM);
691 }
692
693 return out;
694}
695
696/** 696/**
697 * ipcget - Common sys_*get() code 697 * ipcget - Common sys_*get() code
698 * @ns : namsepace 698 * @ns : namsepace
@@ -733,7 +733,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
733} 733}
734 734
735/** 735/**
736 * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd 736 * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
737 * @ns: the ipc namespace 737 * @ns: the ipc namespace
738 * @ids: the table of ids where to look for the ipc 738 * @ids: the table of ids where to look for the ipc
739 * @id: the id of the ipc to retrieve 739 * @id: the id of the ipc to retrieve
@@ -746,29 +746,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
746 * It must be called without any lock held and 746 * It must be called without any lock held and
747 * - retrieves the ipc with the given id in the given table. 747 * - retrieves the ipc with the given id in the given table.
748 * - performs some audit and permission check, depending on the given cmd 748 * - performs some audit and permission check, depending on the given cmd
749 * - returns the ipc with the ipc lock held in case of success 749 * - returns a pointer to the ipc object or otherwise, the corresponding error.
750 * or an err-code without any lock held otherwise.
751 * 750 *
752 * Call holding the both the rw_mutex and the rcu read lock. 751 * Call holding the both the rwsem and the rcu read lock.
753 */ 752 */
754struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
755 struct ipc_ids *ids, int id, int cmd,
756 struct ipc64_perm *perm, int extra_perm)
757{
758 struct kern_ipc_perm *ipcp;
759
760 ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
761 if (IS_ERR(ipcp))
762 goto out;
763
764 spin_lock(&ipcp->lock);
765out:
766 return ipcp;
767}
768
769struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, 753struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
770 struct ipc_ids *ids, int id, int cmd, 754 struct ipc_ids *ids, int id, int cmd,
771 struct ipc64_perm *perm, int extra_perm) 755 struct ipc64_perm *perm, int extra_perm)
772{ 756{
773 kuid_t euid; 757 kuid_t euid;
774 int err = -EPERM; 758 int err = -EPERM;
@@ -846,7 +830,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
846 ipc = idr_find(&ids->ipcs_idr, pos); 830 ipc = idr_find(&ids->ipcs_idr, pos);
847 if (ipc != NULL) { 831 if (ipc != NULL) {
848 *new_pos = pos + 1; 832 *new_pos = pos + 1;
849 ipc_lock_by_ptr(ipc); 833 rcu_read_lock();
834 ipc_lock_object(ipc);
850 return ipc; 835 return ipc;
851 } 836 }
852 } 837 }
@@ -884,7 +869,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
884 * Take the lock - this will be released by the corresponding 869 * Take the lock - this will be released by the corresponding
885 * call to stop(). 870 * call to stop().
886 */ 871 */
887 down_read(&ids->rw_mutex); 872 down_read(&ids->rwsem);
888 873
889 /* pos < 0 is invalid */ 874 /* pos < 0 is invalid */
890 if (*pos < 0) 875 if (*pos < 0)
@@ -911,7 +896,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
911 896
912 ids = &iter->ns->ids[iface->ids]; 897 ids = &iter->ns->ids[iface->ids];
913 /* Release the lock we took in start() */ 898 /* Release the lock we took in start() */
914 up_read(&ids->rw_mutex); 899 up_read(&ids->rwsem);
915} 900}
916 901
917static int sysvipc_proc_show(struct seq_file *s, void *it) 902static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index b6a6a88f3002..f2f5036f2eed 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
47static inline void shm_exit_ns(struct ipc_namespace *ns) { } 47static inline void shm_exit_ns(struct ipc_namespace *ns) { }
48#endif 48#endif
49 49
50struct ipc_rcu {
51 struct rcu_head rcu;
52 atomic_t refcount;
53} ____cacheline_aligned_in_smp;
54
55#define ipc_rcu_to_struct(p) ((void *)(p+1))
56
50/* 57/*
51 * Structure that holds the parameters needed by the ipc operations 58 * Structure that holds the parameters needed by the ipc operations
52 * (see after) 59 * (see after)
@@ -94,10 +101,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
94#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) 101#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
95#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) 102#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
96 103
97/* must be called with ids->rw_mutex acquired for writing */ 104/* must be called with ids->rwsem acquired for writing */
98int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); 105int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
99 106
100/* must be called with ids->rw_mutex acquired for reading */ 107/* must be called with ids->rwsem acquired for reading */
101int ipc_get_maxid(struct ipc_ids *); 108int ipc_get_maxid(struct ipc_ids *);
102 109
103/* must be called with both locks acquired. */ 110/* must be called with both locks acquired. */
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
120 */ 127 */
121void* ipc_rcu_alloc(int size); 128void* ipc_rcu_alloc(int size);
122int ipc_rcu_getref(void *ptr); 129int ipc_rcu_getref(void *ptr);
123void ipc_rcu_putref(void *ptr); 130void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
131void ipc_rcu_free(struct rcu_head *head);
124 132
125struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 133struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
126struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); 134struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
@@ -131,9 +139,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
131struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, 139struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
132 struct ipc_ids *ids, int id, int cmd, 140 struct ipc_ids *ids, int id, int cmd,
133 struct ipc64_perm *perm, int extra_perm); 141 struct ipc64_perm *perm, int extra_perm);
134struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
135 struct ipc_ids *ids, int id, int cmd,
136 struct ipc64_perm *perm, int extra_perm);
137 142
138#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 143#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
139 /* On IA-64, we always use the "64-bit version" of the IPC structures. */ 144 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
@@ -174,19 +179,12 @@ static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm)
174 assert_spin_locked(&perm->lock); 179 assert_spin_locked(&perm->lock);
175} 180}
176 181
177static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
178{
179 rcu_read_lock();
180 ipc_lock_object(perm);
181}
182
183static inline void ipc_unlock(struct kern_ipc_perm *perm) 182static inline void ipc_unlock(struct kern_ipc_perm *perm)
184{ 183{
185 ipc_unlock_object(perm); 184 ipc_unlock_object(perm);
186 rcu_read_unlock(); 185 rcu_read_unlock();
187} 186}
188 187
189struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
190struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); 188struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
191int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, 189int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
192 struct ipc_ops *ops, struct ipc_params *params); 190 struct ipc_ops *ops, struct ipc_params *params);