diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/msg.c | 69 | ||||
-rw-r--r-- | ipc/namespace.c | 9 | ||||
-rw-r--r-- | ipc/sem.c | 280 | ||||
-rw-r--r-- | ipc/shm.c | 264 | ||||
-rw-r--r-- | ipc/util.c | 129 | ||||
-rw-r--r-- | ipc/util.h | 24 |
6 files changed, 454 insertions, 321 deletions
@@ -70,8 +70,6 @@ struct msg_sender { | |||
70 | 70 | ||
71 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) | 71 | #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) |
72 | 72 | ||
73 | #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) | ||
74 | |||
75 | static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); | 73 | static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); |
76 | static int newque(struct ipc_namespace *, struct ipc_params *); | 74 | static int newque(struct ipc_namespace *, struct ipc_params *); |
77 | #ifdef CONFIG_PROC_FS | 75 | #ifdef CONFIG_PROC_FS |
@@ -167,12 +165,21 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) | |||
167 | ipc_rmid(&msg_ids(ns), &s->q_perm); | 165 | ipc_rmid(&msg_ids(ns), &s->q_perm); |
168 | } | 166 | } |
169 | 167 | ||
168 | static void msg_rcu_free(struct rcu_head *head) | ||
169 | { | ||
170 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
171 | struct msg_queue *msq = ipc_rcu_to_struct(p); | ||
172 | |||
173 | security_msg_queue_free(msq); | ||
174 | ipc_rcu_free(head); | ||
175 | } | ||
176 | |||
170 | /** | 177 | /** |
171 | * newque - Create a new msg queue | 178 | * newque - Create a new msg queue |
172 | * @ns: namespace | 179 | * @ns: namespace |
173 | * @params: ptr to the structure that contains the key and msgflg | 180 | * @params: ptr to the structure that contains the key and msgflg |
174 | * | 181 | * |
175 | * Called with msg_ids.rw_mutex held (writer) | 182 | * Called with msg_ids.rwsem held (writer) |
176 | */ | 183 | */ |
177 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) | 184 | static int newque(struct ipc_namespace *ns, struct ipc_params *params) |
178 | { | 185 | { |
@@ -191,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) | |||
191 | msq->q_perm.security = NULL; | 198 | msq->q_perm.security = NULL; |
192 | retval = security_msg_queue_alloc(msq); | 199 | retval = security_msg_queue_alloc(msq); |
193 | if (retval) { | 200 | if (retval) { |
194 | ipc_rcu_putref(msq); | 201 | ipc_rcu_putref(msq, ipc_rcu_free); |
195 | return retval; | 202 | return retval; |
196 | } | 203 | } |
197 | 204 | ||
198 | /* ipc_addid() locks msq upon success. */ | 205 | /* ipc_addid() locks msq upon success. */ |
199 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); | 206 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); |
200 | if (id < 0) { | 207 | if (id < 0) { |
201 | security_msg_queue_free(msq); | 208 | ipc_rcu_putref(msq, msg_rcu_free); |
202 | ipc_rcu_putref(msq); | ||
203 | return id; | 209 | return id; |
204 | } | 210 | } |
205 | 211 | ||
@@ -259,8 +265,8 @@ static void expunge_all(struct msg_queue *msq, int res) | |||
259 | * removes the message queue from message queue ID IDR, and cleans up all the | 265 | * removes the message queue from message queue ID IDR, and cleans up all the |
260 | * messages associated with this queue. | 266 | * messages associated with this queue. |
261 | * | 267 | * |
262 | * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held | 268 | * msg_ids.rwsem (writer) and the spinlock for this message queue are held |
263 | * before freeque() is called. msg_ids.rw_mutex remains locked on exit. | 269 | * before freeque() is called. msg_ids.rwsem remains locked on exit. |
264 | */ | 270 | */ |
265 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 271 | static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
266 | { | 272 | { |
@@ -270,19 +276,19 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
270 | expunge_all(msq, -EIDRM); | 276 | expunge_all(msq, -EIDRM); |
271 | ss_wakeup(&msq->q_senders, 1); | 277 | ss_wakeup(&msq->q_senders, 1); |
272 | msg_rmid(ns, msq); | 278 | msg_rmid(ns, msq); |
273 | msg_unlock(msq); | 279 | ipc_unlock_object(&msq->q_perm); |
280 | rcu_read_unlock(); | ||
274 | 281 | ||
275 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { | 282 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { |
276 | atomic_dec(&ns->msg_hdrs); | 283 | atomic_dec(&ns->msg_hdrs); |
277 | free_msg(msg); | 284 | free_msg(msg); |
278 | } | 285 | } |
279 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); | 286 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); |
280 | security_msg_queue_free(msq); | 287 | ipc_rcu_putref(msq, msg_rcu_free); |
281 | ipc_rcu_putref(msq); | ||
282 | } | 288 | } |
283 | 289 | ||
284 | /* | 290 | /* |
285 | * Called with msg_ids.rw_mutex and ipcp locked. | 291 | * Called with msg_ids.rwsem and ipcp locked. |
286 | */ | 292 | */ |
287 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) | 293 | static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) |
288 | { | 294 | { |
@@ -386,9 +392,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) | |||
386 | } | 392 | } |
387 | 393 | ||
388 | /* | 394 | /* |
389 | * This function handles some msgctl commands which require the rw_mutex | 395 | * This function handles some msgctl commands which require the rwsem |
390 | * to be held in write mode. | 396 | * to be held in write mode. |
391 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 397 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
392 | */ | 398 | */ |
393 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | 399 | static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, |
394 | struct msqid_ds __user *buf, int version) | 400 | struct msqid_ds __user *buf, int version) |
@@ -403,7 +409,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |||
403 | return -EFAULT; | 409 | return -EFAULT; |
404 | } | 410 | } |
405 | 411 | ||
406 | down_write(&msg_ids(ns).rw_mutex); | 412 | down_write(&msg_ids(ns).rwsem); |
407 | rcu_read_lock(); | 413 | rcu_read_lock(); |
408 | 414 | ||
409 | ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, | 415 | ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, |
@@ -459,7 +465,7 @@ out_unlock0: | |||
459 | out_unlock1: | 465 | out_unlock1: |
460 | rcu_read_unlock(); | 466 | rcu_read_unlock(); |
461 | out_up: | 467 | out_up: |
462 | up_write(&msg_ids(ns).rw_mutex); | 468 | up_write(&msg_ids(ns).rwsem); |
463 | return err; | 469 | return err; |
464 | } | 470 | } |
465 | 471 | ||
@@ -494,7 +500,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, | |||
494 | msginfo.msgmnb = ns->msg_ctlmnb; | 500 | msginfo.msgmnb = ns->msg_ctlmnb; |
495 | msginfo.msgssz = MSGSSZ; | 501 | msginfo.msgssz = MSGSSZ; |
496 | msginfo.msgseg = MSGSEG; | 502 | msginfo.msgseg = MSGSEG; |
497 | down_read(&msg_ids(ns).rw_mutex); | 503 | down_read(&msg_ids(ns).rwsem); |
498 | if (cmd == MSG_INFO) { | 504 | if (cmd == MSG_INFO) { |
499 | msginfo.msgpool = msg_ids(ns).in_use; | 505 | msginfo.msgpool = msg_ids(ns).in_use; |
500 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); | 506 | msginfo.msgmap = atomic_read(&ns->msg_hdrs); |
@@ -505,7 +511,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid, | |||
505 | msginfo.msgtql = MSGTQL; | 511 | msginfo.msgtql = MSGTQL; |
506 | } | 512 | } |
507 | max_id = ipc_get_maxid(&msg_ids(ns)); | 513 | max_id = ipc_get_maxid(&msg_ids(ns)); |
508 | up_read(&msg_ids(ns).rw_mutex); | 514 | up_read(&msg_ids(ns).rwsem); |
509 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) | 515 | if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) |
510 | return -EFAULT; | 516 | return -EFAULT; |
511 | return (max_id < 0) ? 0 : max_id; | 517 | return (max_id < 0) ? 0 : max_id; |
@@ -680,16 +686,24 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |||
680 | goto out_unlock1; | 686 | goto out_unlock1; |
681 | } | 687 | } |
682 | 688 | ||
689 | ipc_lock_object(&msq->q_perm); | ||
690 | |||
683 | for (;;) { | 691 | for (;;) { |
684 | struct msg_sender s; | 692 | struct msg_sender s; |
685 | 693 | ||
686 | err = -EACCES; | 694 | err = -EACCES; |
687 | if (ipcperms(ns, &msq->q_perm, S_IWUGO)) | 695 | if (ipcperms(ns, &msq->q_perm, S_IWUGO)) |
688 | goto out_unlock1; | 696 | goto out_unlock0; |
697 | |||
698 | /* raced with RMID? */ | ||
699 | if (msq->q_perm.deleted) { | ||
700 | err = -EIDRM; | ||
701 | goto out_unlock0; | ||
702 | } | ||
689 | 703 | ||
690 | err = security_msg_queue_msgsnd(msq, msg, msgflg); | 704 | err = security_msg_queue_msgsnd(msq, msg, msgflg); |
691 | if (err) | 705 | if (err) |
692 | goto out_unlock1; | 706 | goto out_unlock0; |
693 | 707 | ||
694 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && | 708 | if (msgsz + msq->q_cbytes <= msq->q_qbytes && |
695 | 1 + msq->q_qnum <= msq->q_qbytes) { | 709 | 1 + msq->q_qnum <= msq->q_qbytes) { |
@@ -699,10 +713,9 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |||
699 | /* queue full, wait: */ | 713 | /* queue full, wait: */ |
700 | if (msgflg & IPC_NOWAIT) { | 714 | if (msgflg & IPC_NOWAIT) { |
701 | err = -EAGAIN; | 715 | err = -EAGAIN; |
702 | goto out_unlock1; | 716 | goto out_unlock0; |
703 | } | 717 | } |
704 | 718 | ||
705 | ipc_lock_object(&msq->q_perm); | ||
706 | ss_add(msq, &s); | 719 | ss_add(msq, &s); |
707 | 720 | ||
708 | if (!ipc_rcu_getref(msq)) { | 721 | if (!ipc_rcu_getref(msq)) { |
@@ -717,7 +730,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |||
717 | rcu_read_lock(); | 730 | rcu_read_lock(); |
718 | ipc_lock_object(&msq->q_perm); | 731 | ipc_lock_object(&msq->q_perm); |
719 | 732 | ||
720 | ipc_rcu_putref(msq); | 733 | ipc_rcu_putref(msq, ipc_rcu_free); |
721 | if (msq->q_perm.deleted) { | 734 | if (msq->q_perm.deleted) { |
722 | err = -EIDRM; | 735 | err = -EIDRM; |
723 | goto out_unlock0; | 736 | goto out_unlock0; |
@@ -730,10 +743,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |||
730 | goto out_unlock0; | 743 | goto out_unlock0; |
731 | } | 744 | } |
732 | 745 | ||
733 | ipc_unlock_object(&msq->q_perm); | ||
734 | } | 746 | } |
735 | |||
736 | ipc_lock_object(&msq->q_perm); | ||
737 | msq->q_lspid = task_tgid_vnr(current); | 747 | msq->q_lspid = task_tgid_vnr(current); |
738 | msq->q_stime = get_seconds(); | 748 | msq->q_stime = get_seconds(); |
739 | 749 | ||
@@ -897,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl | |||
897 | goto out_unlock1; | 907 | goto out_unlock1; |
898 | 908 | ||
899 | ipc_lock_object(&msq->q_perm); | 909 | ipc_lock_object(&msq->q_perm); |
910 | |||
911 | /* raced with RMID? */ | ||
912 | if (msq->q_perm.deleted) { | ||
913 | msg = ERR_PTR(-EIDRM); | ||
914 | goto out_unlock0; | ||
915 | } | ||
916 | |||
900 | msg = find_msg(msq, &msgtyp, mode); | 917 | msg = find_msg(msq, &msgtyp, mode); |
901 | if (!IS_ERR(msg)) { | 918 | if (!IS_ERR(msg)) { |
902 | /* | 919 | /* |
diff --git a/ipc/namespace.c b/ipc/namespace.c index 7ee61bf44933..59451c1e214d 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c | |||
@@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
81 | int next_id; | 81 | int next_id; |
82 | int total, in_use; | 82 | int total, in_use; |
83 | 83 | ||
84 | down_write(&ids->rw_mutex); | 84 | down_write(&ids->rwsem); |
85 | 85 | ||
86 | in_use = ids->in_use; | 86 | in_use = ids->in_use; |
87 | 87 | ||
@@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
89 | perm = idr_find(&ids->ipcs_idr, next_id); | 89 | perm = idr_find(&ids->ipcs_idr, next_id); |
90 | if (perm == NULL) | 90 | if (perm == NULL) |
91 | continue; | 91 | continue; |
92 | ipc_lock_by_ptr(perm); | 92 | rcu_read_lock(); |
93 | ipc_lock_object(perm); | ||
93 | free(ns, perm); | 94 | free(ns, perm); |
94 | total++; | 95 | total++; |
95 | } | 96 | } |
96 | up_write(&ids->rw_mutex); | 97 | up_write(&ids->rwsem); |
97 | } | 98 | } |
98 | 99 | ||
99 | static void free_ipc_ns(struct ipc_namespace *ns) | 100 | static void free_ipc_ns(struct ipc_namespace *ns) |
@@ -171,7 +172,7 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new) | |||
171 | { | 172 | { |
172 | struct ipc_namespace *ns = new; | 173 | struct ipc_namespace *ns = new; |
173 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || | 174 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) || |
174 | !nsown_capable(CAP_SYS_ADMIN)) | 175 | !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) |
175 | return -EPERM; | 176 | return -EPERM; |
176 | 177 | ||
177 | /* Ditch state from the old ipc namespace */ | 178 | /* Ditch state from the old ipc namespace */ |
@@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma) | |||
243 | } | 243 | } |
244 | } | 244 | } |
245 | 245 | ||
246 | static void sem_rcu_free(struct rcu_head *head) | ||
247 | { | ||
248 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
249 | struct sem_array *sma = ipc_rcu_to_struct(p); | ||
250 | |||
251 | security_sem_free(sma); | ||
252 | ipc_rcu_free(head); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Wait until all currently ongoing simple ops have completed. | ||
257 | * Caller must own sem_perm.lock. | ||
258 | * New simple ops cannot start, because simple ops first check | ||
259 | * that sem_perm.lock is free. | ||
260 | * that a) sem_perm.lock is free and b) complex_count is 0. | ||
261 | */ | ||
262 | static void sem_wait_array(struct sem_array *sma) | ||
263 | { | ||
264 | int i; | ||
265 | struct sem *sem; | ||
266 | |||
267 | if (sma->complex_count) { | ||
268 | /* The thread that increased sma->complex_count waited on | ||
269 | * all sem->lock locks. Thus we don't need to wait again. | ||
270 | */ | ||
271 | return; | ||
272 | } | ||
273 | |||
274 | for (i = 0; i < sma->sem_nsems; i++) { | ||
275 | sem = sma->sem_base + i; | ||
276 | spin_unlock_wait(&sem->lock); | ||
277 | } | ||
278 | } | ||
279 | |||
246 | /* | 280 | /* |
247 | * If the request contains only one semaphore operation, and there are | 281 | * If the request contains only one semaphore operation, and there are |
248 | * no complex transactions pending, lock only the semaphore involved. | 282 | * no complex transactions pending, lock only the semaphore involved. |
249 | * Otherwise, lock the entire semaphore array, since we either have | 283 | * Otherwise, lock the entire semaphore array, since we either have |
250 | * multiple semaphores in our own semops, or we need to look at | 284 | * multiple semaphores in our own semops, or we need to look at |
251 | * semaphores from other pending complex operations. | 285 | * semaphores from other pending complex operations. |
252 | * | ||
253 | * Carefully guard against sma->complex_count changing between zero | ||
254 | * and non-zero while we are spinning for the lock. The value of | ||
255 | * sma->complex_count cannot change while we are holding the lock, | ||
256 | * so sem_unlock should be fine. | ||
257 | * | ||
258 | * The global lock path checks that all the local locks have been released, | ||
259 | * checking each local lock once. This means that the local lock paths | ||
260 | * cannot start their critical sections while the global lock is held. | ||
261 | */ | 286 | */ |
262 | static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, | 287 | static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, |
263 | int nsops) | 288 | int nsops) |
264 | { | 289 | { |
265 | int locknum; | 290 | struct sem *sem; |
266 | again: | ||
267 | if (nsops == 1 && !sma->complex_count) { | ||
268 | struct sem *sem = sma->sem_base + sops->sem_num; | ||
269 | 291 | ||
270 | /* Lock just the semaphore we are interested in. */ | 292 | if (nsops != 1) { |
271 | spin_lock(&sem->lock); | 293 | /* Complex operation - acquire a full lock */ |
294 | ipc_lock_object(&sma->sem_perm); | ||
272 | 295 | ||
273 | /* | 296 | /* And wait until all simple ops that are processed |
274 | * If sma->complex_count was set while we were spinning, | 297 | * right now have dropped their locks. |
275 | * we may need to look at things we did not lock here. | ||
276 | */ | 298 | */ |
277 | if (unlikely(sma->complex_count)) { | 299 | sem_wait_array(sma); |
278 | spin_unlock(&sem->lock); | 300 | return -1; |
279 | goto lock_array; | 301 | } |
280 | } | 302 | |
303 | /* | ||
304 | * Only one semaphore affected - try to optimize locking. | ||
305 | * The rules are: | ||
306 | * - optimized locking is possible if no complex operation | ||
307 | * is either enqueued or processed right now. | ||
308 | * - The test for enqueued complex ops is simple: | ||
309 | * sma->complex_count != 0 | ||
310 | * - Testing for complex ops that are processed right now is | ||
311 | * a bit more difficult. Complex ops acquire the full lock | ||
312 | * and first wait that the running simple ops have completed. | ||
313 | * (see above) | ||
314 | * Thus: If we own a simple lock and the global lock is free | ||
315 | * and complex_count is now 0, then it will stay 0 and | ||
316 | * thus just locking sem->lock is sufficient. | ||
317 | */ | ||
318 | sem = sma->sem_base + sops->sem_num; | ||
281 | 319 | ||
320 | if (sma->complex_count == 0) { | ||
282 | /* | 321 | /* |
283 | * Another process is holding the global lock on the | 322 | * It appears that no complex operation is around. |
284 | * sem_array; we cannot enter our critical section, | 323 | * Acquire the per-semaphore lock. |
285 | * but have to wait for the global lock to be released. | ||
286 | */ | 324 | */ |
287 | if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { | 325 | spin_lock(&sem->lock); |
288 | spin_unlock(&sem->lock); | 326 | |
289 | spin_unlock_wait(&sma->sem_perm.lock); | 327 | /* Then check that the global lock is free */ |
290 | goto again; | 328 | if (!spin_is_locked(&sma->sem_perm.lock)) { |
329 | /* spin_is_locked() is not a memory barrier */ | ||
330 | smp_mb(); | ||
331 | |||
332 | /* Now repeat the test of complex_count: | ||
333 | * It can't change anymore until we drop sem->lock. | ||
334 | * Thus: if is now 0, then it will stay 0. | ||
335 | */ | ||
336 | if (sma->complex_count == 0) { | ||
337 | /* fast path successful! */ | ||
338 | return sops->sem_num; | ||
339 | } | ||
291 | } | 340 | } |
341 | spin_unlock(&sem->lock); | ||
342 | } | ||
292 | 343 | ||
293 | locknum = sops->sem_num; | 344 | /* slow path: acquire the full lock */ |
345 | ipc_lock_object(&sma->sem_perm); | ||
346 | |||
347 | if (sma->complex_count == 0) { | ||
348 | /* False alarm: | ||
349 | * There is no complex operation, thus we can switch | ||
350 | * back to the fast path. | ||
351 | */ | ||
352 | spin_lock(&sem->lock); | ||
353 | ipc_unlock_object(&sma->sem_perm); | ||
354 | return sops->sem_num; | ||
294 | } else { | 355 | } else { |
295 | int i; | 356 | /* Not a false alarm, thus complete the sequence for a |
296 | /* | 357 | * full lock. |
297 | * Lock the semaphore array, and wait for all of the | ||
298 | * individual semaphore locks to go away. The code | ||
299 | * above ensures no new single-lock holders will enter | ||
300 | * their critical section while the array lock is held. | ||
301 | */ | 358 | */ |
302 | lock_array: | 359 | sem_wait_array(sma); |
303 | ipc_lock_object(&sma->sem_perm); | 360 | return -1; |
304 | for (i = 0; i < sma->sem_nsems; i++) { | ||
305 | struct sem *sem = sma->sem_base + i; | ||
306 | spin_unlock_wait(&sem->lock); | ||
307 | } | ||
308 | locknum = -1; | ||
309 | } | 361 | } |
310 | return locknum; | ||
311 | } | 362 | } |
312 | 363 | ||
313 | static inline void sem_unlock(struct sem_array *sma, int locknum) | 364 | static inline void sem_unlock(struct sem_array *sma, int locknum) |
@@ -322,7 +373,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum) | |||
322 | } | 373 | } |
323 | 374 | ||
324 | /* | 375 | /* |
325 | * sem_lock_(check_) routines are called in the paths where the rw_mutex | 376 | * sem_lock_(check_) routines are called in the paths where the rwsem |
326 | * is not held. | 377 | * is not held. |
327 | * | 378 | * |
328 | * The caller holds the RCU read lock. | 379 | * The caller holds the RCU read lock. |
@@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns | |||
374 | static inline void sem_lock_and_putref(struct sem_array *sma) | 425 | static inline void sem_lock_and_putref(struct sem_array *sma) |
375 | { | 426 | { |
376 | sem_lock(sma, NULL, -1); | 427 | sem_lock(sma, NULL, -1); |
377 | ipc_rcu_putref(sma); | 428 | ipc_rcu_putref(sma, ipc_rcu_free); |
378 | } | ||
379 | |||
380 | static inline void sem_putref(struct sem_array *sma) | ||
381 | { | ||
382 | ipc_rcu_putref(sma); | ||
383 | } | 429 | } |
384 | 430 | ||
385 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | 431 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
@@ -426,7 +472,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | |||
426 | * @ns: namespace | 472 | * @ns: namespace |
427 | * @params: ptr to the structure that contains key, semflg and nsems | 473 | * @params: ptr to the structure that contains key, semflg and nsems |
428 | * | 474 | * |
429 | * Called with sem_ids.rw_mutex held (as a writer) | 475 | * Called with sem_ids.rwsem held (as a writer) |
430 | */ | 476 | */ |
431 | 477 | ||
432 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) | 478 | static int newary(struct ipc_namespace *ns, struct ipc_params *params) |
@@ -458,14 +504,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) | |||
458 | sma->sem_perm.security = NULL; | 504 | sma->sem_perm.security = NULL; |
459 | retval = security_sem_alloc(sma); | 505 | retval = security_sem_alloc(sma); |
460 | if (retval) { | 506 | if (retval) { |
461 | ipc_rcu_putref(sma); | 507 | ipc_rcu_putref(sma, ipc_rcu_free); |
462 | return retval; | 508 | return retval; |
463 | } | 509 | } |
464 | 510 | ||
465 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); | 511 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); |
466 | if (id < 0) { | 512 | if (id < 0) { |
467 | security_sem_free(sma); | 513 | ipc_rcu_putref(sma, sem_rcu_free); |
468 | ipc_rcu_putref(sma); | ||
469 | return id; | 514 | return id; |
470 | } | 515 | } |
471 | ns->used_sems += nsems; | 516 | ns->used_sems += nsems; |
@@ -492,7 +537,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) | |||
492 | 537 | ||
493 | 538 | ||
494 | /* | 539 | /* |
495 | * Called with sem_ids.rw_mutex and ipcp locked. | 540 | * Called with sem_ids.rwsem and ipcp locked. |
496 | */ | 541 | */ |
497 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) | 542 | static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) |
498 | { | 543 | { |
@@ -503,7 +548,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) | |||
503 | } | 548 | } |
504 | 549 | ||
505 | /* | 550 | /* |
506 | * Called with sem_ids.rw_mutex and ipcp locked. | 551 | * Called with sem_ids.rwsem and ipcp locked. |
507 | */ | 552 | */ |
508 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, | 553 | static inline int sem_more_checks(struct kern_ipc_perm *ipcp, |
509 | struct ipc_params *params) | 554 | struct ipc_params *params) |
@@ -873,6 +918,24 @@ again: | |||
873 | } | 918 | } |
874 | 919 | ||
875 | /** | 920 | /** |
921 | * set_semotime(sma, sops) - set sem_otime | ||
922 | * @sma: semaphore array | ||
923 | * @sops: operations that modified the array, may be NULL | ||
924 | * | ||
925 | * sem_otime is replicated to avoid cache line trashing. | ||
926 | * This function sets one instance to the current time. | ||
927 | */ | ||
928 | static void set_semotime(struct sem_array *sma, struct sembuf *sops) | ||
929 | { | ||
930 | if (sops == NULL) { | ||
931 | sma->sem_base[0].sem_otime = get_seconds(); | ||
932 | } else { | ||
933 | sma->sem_base[sops[0].sem_num].sem_otime = | ||
934 | get_seconds(); | ||
935 | } | ||
936 | } | ||
937 | |||
938 | /** | ||
876 | * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue | 939 | * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue |
877 | * @sma: semaphore array | 940 | * @sma: semaphore array |
878 | * @sops: operations that were performed | 941 | * @sops: operations that were performed |
@@ -922,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop | |||
922 | } | 985 | } |
923 | } | 986 | } |
924 | } | 987 | } |
925 | if (otime) { | 988 | if (otime) |
926 | if (sops == NULL) { | 989 | set_semotime(sma, sops); |
927 | sma->sem_base[0].sem_otime = get_seconds(); | ||
928 | } else { | ||
929 | sma->sem_base[sops[0].sem_num].sem_otime = | ||
930 | get_seconds(); | ||
931 | } | ||
932 | } | ||
933 | } | 990 | } |
934 | 991 | ||
935 | |||
936 | /* The following counts are associated to each semaphore: | 992 | /* The following counts are associated to each semaphore: |
937 | * semncnt number of tasks waiting on semval being nonzero | 993 | * semncnt number of tasks waiting on semval being nonzero |
938 | * semzcnt number of tasks waiting on semval being zero | 994 | * semzcnt number of tasks waiting on semval being zero |
@@ -994,8 +1050,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) | |||
994 | return semzcnt; | 1050 | return semzcnt; |
995 | } | 1051 | } |
996 | 1052 | ||
997 | /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked | 1053 | /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked |
998 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex | 1054 | * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem |
999 | * remains locked on exit. | 1055 | * remains locked on exit. |
1000 | */ | 1056 | */ |
1001 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 1057 | static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
@@ -1047,8 +1103,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
1047 | 1103 | ||
1048 | wake_up_sem_queue_do(&tasks); | 1104 | wake_up_sem_queue_do(&tasks); |
1049 | ns->used_sems -= sma->sem_nsems; | 1105 | ns->used_sems -= sma->sem_nsems; |
1050 | security_sem_free(sma); | 1106 | ipc_rcu_putref(sma, sem_rcu_free); |
1051 | ipc_rcu_putref(sma); | ||
1052 | } | 1107 | } |
1053 | 1108 | ||
1054 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) | 1109 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) |
@@ -1116,7 +1171,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1116 | seminfo.semmnu = SEMMNU; | 1171 | seminfo.semmnu = SEMMNU; |
1117 | seminfo.semmap = SEMMAP; | 1172 | seminfo.semmap = SEMMAP; |
1118 | seminfo.semume = SEMUME; | 1173 | seminfo.semume = SEMUME; |
1119 | down_read(&sem_ids(ns).rw_mutex); | 1174 | down_read(&sem_ids(ns).rwsem); |
1120 | if (cmd == SEM_INFO) { | 1175 | if (cmd == SEM_INFO) { |
1121 | seminfo.semusz = sem_ids(ns).in_use; | 1176 | seminfo.semusz = sem_ids(ns).in_use; |
1122 | seminfo.semaem = ns->used_sems; | 1177 | seminfo.semaem = ns->used_sems; |
@@ -1125,7 +1180,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, | |||
1125 | seminfo.semaem = SEMAEM; | 1180 | seminfo.semaem = SEMAEM; |
1126 | } | 1181 | } |
1127 | max_id = ipc_get_maxid(&sem_ids(ns)); | 1182 | max_id = ipc_get_maxid(&sem_ids(ns)); |
1128 | up_read(&sem_ids(ns).rw_mutex); | 1183 | up_read(&sem_ids(ns).rwsem); |
1129 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) | 1184 | if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) |
1130 | return -EFAULT; | 1185 | return -EFAULT; |
1131 | return (max_id < 0) ? 0: max_id; | 1186 | return (max_id < 0) ? 0: max_id; |
@@ -1227,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, | |||
1227 | 1282 | ||
1228 | sem_lock(sma, NULL, -1); | 1283 | sem_lock(sma, NULL, -1); |
1229 | 1284 | ||
1285 | if (sma->sem_perm.deleted) { | ||
1286 | sem_unlock(sma, -1); | ||
1287 | rcu_read_unlock(); | ||
1288 | return -EIDRM; | ||
1289 | } | ||
1290 | |||
1230 | curr = &sma->sem_base[semnum]; | 1291 | curr = &sma->sem_base[semnum]; |
1231 | 1292 | ||
1232 | ipc_assert_locked_object(&sma->sem_perm); | 1293 | ipc_assert_locked_object(&sma->sem_perm); |
@@ -1281,28 +1342,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1281 | int i; | 1342 | int i; |
1282 | 1343 | ||
1283 | sem_lock(sma, NULL, -1); | 1344 | sem_lock(sma, NULL, -1); |
1345 | if (sma->sem_perm.deleted) { | ||
1346 | err = -EIDRM; | ||
1347 | goto out_unlock; | ||
1348 | } | ||
1284 | if(nsems > SEMMSL_FAST) { | 1349 | if(nsems > SEMMSL_FAST) { |
1285 | if (!ipc_rcu_getref(sma)) { | 1350 | if (!ipc_rcu_getref(sma)) { |
1286 | sem_unlock(sma, -1); | ||
1287 | rcu_read_unlock(); | ||
1288 | err = -EIDRM; | 1351 | err = -EIDRM; |
1289 | goto out_free; | 1352 | goto out_unlock; |
1290 | } | 1353 | } |
1291 | sem_unlock(sma, -1); | 1354 | sem_unlock(sma, -1); |
1292 | rcu_read_unlock(); | 1355 | rcu_read_unlock(); |
1293 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 1356 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
1294 | if(sem_io == NULL) { | 1357 | if(sem_io == NULL) { |
1295 | sem_putref(sma); | 1358 | ipc_rcu_putref(sma, ipc_rcu_free); |
1296 | return -ENOMEM; | 1359 | return -ENOMEM; |
1297 | } | 1360 | } |
1298 | 1361 | ||
1299 | rcu_read_lock(); | 1362 | rcu_read_lock(); |
1300 | sem_lock_and_putref(sma); | 1363 | sem_lock_and_putref(sma); |
1301 | if (sma->sem_perm.deleted) { | 1364 | if (sma->sem_perm.deleted) { |
1302 | sem_unlock(sma, -1); | ||
1303 | rcu_read_unlock(); | ||
1304 | err = -EIDRM; | 1365 | err = -EIDRM; |
1305 | goto out_free; | 1366 | goto out_unlock; |
1306 | } | 1367 | } |
1307 | } | 1368 | } |
1308 | for (i = 0; i < sma->sem_nsems; i++) | 1369 | for (i = 0; i < sma->sem_nsems; i++) |
@@ -1320,28 +1381,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1320 | struct sem_undo *un; | 1381 | struct sem_undo *un; |
1321 | 1382 | ||
1322 | if (!ipc_rcu_getref(sma)) { | 1383 | if (!ipc_rcu_getref(sma)) { |
1323 | rcu_read_unlock(); | 1384 | err = -EIDRM; |
1324 | return -EIDRM; | 1385 | goto out_rcu_wakeup; |
1325 | } | 1386 | } |
1326 | rcu_read_unlock(); | 1387 | rcu_read_unlock(); |
1327 | 1388 | ||
1328 | if(nsems > SEMMSL_FAST) { | 1389 | if(nsems > SEMMSL_FAST) { |
1329 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 1390 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
1330 | if(sem_io == NULL) { | 1391 | if(sem_io == NULL) { |
1331 | sem_putref(sma); | 1392 | ipc_rcu_putref(sma, ipc_rcu_free); |
1332 | return -ENOMEM; | 1393 | return -ENOMEM; |
1333 | } | 1394 | } |
1334 | } | 1395 | } |
1335 | 1396 | ||
1336 | if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { | 1397 | if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { |
1337 | sem_putref(sma); | 1398 | ipc_rcu_putref(sma, ipc_rcu_free); |
1338 | err = -EFAULT; | 1399 | err = -EFAULT; |
1339 | goto out_free; | 1400 | goto out_free; |
1340 | } | 1401 | } |
1341 | 1402 | ||
1342 | for (i = 0; i < nsems; i++) { | 1403 | for (i = 0; i < nsems; i++) { |
1343 | if (sem_io[i] > SEMVMX) { | 1404 | if (sem_io[i] > SEMVMX) { |
1344 | sem_putref(sma); | 1405 | ipc_rcu_putref(sma, ipc_rcu_free); |
1345 | err = -ERANGE; | 1406 | err = -ERANGE; |
1346 | goto out_free; | 1407 | goto out_free; |
1347 | } | 1408 | } |
@@ -1349,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1349 | rcu_read_lock(); | 1410 | rcu_read_lock(); |
1350 | sem_lock_and_putref(sma); | 1411 | sem_lock_and_putref(sma); |
1351 | if (sma->sem_perm.deleted) { | 1412 | if (sma->sem_perm.deleted) { |
1352 | sem_unlock(sma, -1); | ||
1353 | rcu_read_unlock(); | ||
1354 | err = -EIDRM; | 1413 | err = -EIDRM; |
1355 | goto out_free; | 1414 | goto out_unlock; |
1356 | } | 1415 | } |
1357 | 1416 | ||
1358 | for (i = 0; i < nsems; i++) | 1417 | for (i = 0; i < nsems; i++) |
@@ -1376,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1376 | goto out_rcu_wakeup; | 1435 | goto out_rcu_wakeup; |
1377 | 1436 | ||
1378 | sem_lock(sma, NULL, -1); | 1437 | sem_lock(sma, NULL, -1); |
1438 | if (sma->sem_perm.deleted) { | ||
1439 | err = -EIDRM; | ||
1440 | goto out_unlock; | ||
1441 | } | ||
1379 | curr = &sma->sem_base[semnum]; | 1442 | curr = &sma->sem_base[semnum]; |
1380 | 1443 | ||
1381 | switch (cmd) { | 1444 | switch (cmd) { |
@@ -1431,9 +1494,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) | |||
1431 | } | 1494 | } |
1432 | 1495 | ||
1433 | /* | 1496 | /* |
1434 | * This function handles some semctl commands which require the rw_mutex | 1497 | * This function handles some semctl commands which require the rwsem |
1435 | * to be held in write mode. | 1498 | * to be held in write mode. |
1436 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 1499 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
1437 | */ | 1500 | */ |
1438 | static int semctl_down(struct ipc_namespace *ns, int semid, | 1501 | static int semctl_down(struct ipc_namespace *ns, int semid, |
1439 | int cmd, int version, void __user *p) | 1502 | int cmd, int version, void __user *p) |
@@ -1448,7 +1511,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, | |||
1448 | return -EFAULT; | 1511 | return -EFAULT; |
1449 | } | 1512 | } |
1450 | 1513 | ||
1451 | down_write(&sem_ids(ns).rw_mutex); | 1514 | down_write(&sem_ids(ns).rwsem); |
1452 | rcu_read_lock(); | 1515 | rcu_read_lock(); |
1453 | 1516 | ||
1454 | ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, | 1517 | ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, |
@@ -1487,7 +1550,7 @@ out_unlock0: | |||
1487 | out_unlock1: | 1550 | out_unlock1: |
1488 | rcu_read_unlock(); | 1551 | rcu_read_unlock(); |
1489 | out_up: | 1552 | out_up: |
1490 | up_write(&sem_ids(ns).rw_mutex); | 1553 | up_write(&sem_ids(ns).rwsem); |
1491 | return err; | 1554 | return err; |
1492 | } | 1555 | } |
1493 | 1556 | ||
@@ -1629,7 +1692,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | |||
1629 | /* step 2: allocate new undo structure */ | 1692 | /* step 2: allocate new undo structure */ |
1630 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); | 1693 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); |
1631 | if (!new) { | 1694 | if (!new) { |
1632 | sem_putref(sma); | 1695 | ipc_rcu_putref(sma, ipc_rcu_free); |
1633 | return ERR_PTR(-ENOMEM); | 1696 | return ERR_PTR(-ENOMEM); |
1634 | } | 1697 | } |
1635 | 1698 | ||
@@ -1781,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1781 | if (error) | 1844 | if (error) |
1782 | goto out_rcu_wakeup; | 1845 | goto out_rcu_wakeup; |
1783 | 1846 | ||
1847 | error = -EIDRM; | ||
1848 | locknum = sem_lock(sma, sops, nsops); | ||
1849 | if (sma->sem_perm.deleted) | ||
1850 | goto out_unlock_free; | ||
1784 | /* | 1851 | /* |
1785 | * semid identifiers are not unique - find_alloc_undo may have | 1852 | * semid identifiers are not unique - find_alloc_undo may have |
1786 | * allocated an undo structure, it was invalidated by an RMID | 1853 | * allocated an undo structure, it was invalidated by an RMID |
@@ -1788,19 +1855,22 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, | |||
1788 | * This case can be detected checking un->semid. The existence of | 1855 | * This case can be detected checking un->semid. The existence of |
1789 | * "un" itself is guaranteed by rcu. | 1856 | * "un" itself is guaranteed by rcu. |
1790 | */ | 1857 | */ |
1791 | error = -EIDRM; | ||
1792 | locknum = sem_lock(sma, sops, nsops); | ||
1793 | if (un && un->semid == -1) | 1858 | if (un && un->semid == -1) |
1794 | goto out_unlock_free; | 1859 | goto out_unlock_free; |
1795 | 1860 | ||
1796 | error = perform_atomic_semop(sma, sops, nsops, un, | 1861 | error = perform_atomic_semop(sma, sops, nsops, un, |
1797 | task_tgid_vnr(current)); | 1862 | task_tgid_vnr(current)); |
1798 | if (error <= 0) { | 1863 | if (error == 0) { |
1799 | if (alter && error == 0) | 1864 | /* If the operation was successful, then do |
1865 | * the required updates. | ||
1866 | */ | ||
1867 | if (alter) | ||
1800 | do_smart_update(sma, sops, nsops, 1, &tasks); | 1868 | do_smart_update(sma, sops, nsops, 1, &tasks); |
1801 | 1869 | else | |
1802 | goto out_unlock_free; | 1870 | set_semotime(sma, sops); |
1803 | } | 1871 | } |
1872 | if (error <= 0) | ||
1873 | goto out_unlock_free; | ||
1804 | 1874 | ||
1805 | /* We need to sleep on this operation, so we put the current | 1875 | /* We need to sleep on this operation, so we put the current |
1806 | * task into the pending queue and go to sleep. | 1876 | * task into the pending queue and go to sleep. |
@@ -1997,6 +2067,12 @@ void exit_sem(struct task_struct *tsk) | |||
1997 | } | 2067 | } |
1998 | 2068 | ||
1999 | sem_lock(sma, NULL, -1); | 2069 | sem_lock(sma, NULL, -1); |
2070 | /* exit_sem raced with IPC_RMID, nothing to do */ | ||
2071 | if (sma->sem_perm.deleted) { | ||
2072 | sem_unlock(sma, -1); | ||
2073 | rcu_read_unlock(); | ||
2074 | continue; | ||
2075 | } | ||
2000 | un = __lookup_undo(ulp, semid); | 2076 | un = __lookup_undo(ulp, semid); |
2001 | if (un == NULL) { | 2077 | if (un == NULL) { |
2002 | /* exit_sem raced with IPC_RMID+semget() that created | 2078 | /* exit_sem raced with IPC_RMID+semget() that created |
@@ -2059,6 +2135,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) | |||
2059 | struct sem_array *sma = it; | 2135 | struct sem_array *sma = it; |
2060 | time_t sem_otime; | 2136 | time_t sem_otime; |
2061 | 2137 | ||
2138 | /* | ||
2139 | * The proc interface isn't aware of sem_lock(), it calls | ||
2140 | * ipc_lock_object() directly (in sysvipc_find_ipc). | ||
2141 | * In order to stay compatible with sem_lock(), we must wait until | ||
2142 | * all simple semop() calls have left their critical regions. | ||
2143 | */ | ||
2144 | sem_wait_array(sma); | ||
2145 | |||
2062 | sem_otime = get_semotime(sma); | 2146 | sem_otime = get_semotime(sma); |
2063 | 2147 | ||
2064 | return seq_printf(s, | 2148 | return seq_printf(s, |
@@ -19,6 +19,9 @@ | |||
19 | * namespaces support | 19 | * namespaces support |
20 | * OpenVZ, SWsoft Inc. | 20 | * OpenVZ, SWsoft Inc. |
21 | * Pavel Emelianov <xemul@openvz.org> | 21 | * Pavel Emelianov <xemul@openvz.org> |
22 | * | ||
23 | * Better ipc lock (kern_ipc_perm.lock) handling | ||
24 | * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. | ||
22 | */ | 25 | */ |
23 | 26 | ||
24 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns) | |||
80 | } | 83 | } |
81 | 84 | ||
82 | /* | 85 | /* |
83 | * Called with shm_ids.rw_mutex (writer) and the shp structure locked. | 86 | * Called with shm_ids.rwsem (writer) and the shp structure locked. |
84 | * Only shm_ids.rw_mutex remains locked on exit. | 87 | * Only shm_ids.rwsem remains locked on exit. |
85 | */ | 88 | */ |
86 | static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | 89 | static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) |
87 | { | 90 | { |
@@ -124,8 +127,28 @@ void __init shm_init (void) | |||
124 | IPC_SHM_IDS, sysvipc_shm_proc_show); | 127 | IPC_SHM_IDS, sysvipc_shm_proc_show); |
125 | } | 128 | } |
126 | 129 | ||
130 | static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) | ||
131 | { | ||
132 | struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); | ||
133 | |||
134 | if (IS_ERR(ipcp)) | ||
135 | return ERR_CAST(ipcp); | ||
136 | |||
137 | return container_of(ipcp, struct shmid_kernel, shm_perm); | ||
138 | } | ||
139 | |||
140 | static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) | ||
141 | { | ||
142 | struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); | ||
143 | |||
144 | if (IS_ERR(ipcp)) | ||
145 | return ERR_CAST(ipcp); | ||
146 | |||
147 | return container_of(ipcp, struct shmid_kernel, shm_perm); | ||
148 | } | ||
149 | |||
127 | /* | 150 | /* |
128 | * shm_lock_(check_) routines are called in the paths where the rw_mutex | 151 | * shm_lock_(check_) routines are called in the paths where the rwsem |
129 | * is not necessarily held. | 152 | * is not necessarily held. |
130 | */ | 153 | */ |
131 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) | 154 | static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) |
@@ -144,15 +167,13 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) | |||
144 | ipc_lock_object(&ipcp->shm_perm); | 167 | ipc_lock_object(&ipcp->shm_perm); |
145 | } | 168 | } |
146 | 169 | ||
147 | static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, | 170 | static void shm_rcu_free(struct rcu_head *head) |
148 | int id) | ||
149 | { | 171 | { |
150 | struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); | 172 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); |
151 | 173 | struct shmid_kernel *shp = ipc_rcu_to_struct(p); | |
152 | if (IS_ERR(ipcp)) | ||
153 | return (struct shmid_kernel *)ipcp; | ||
154 | 174 | ||
155 | return container_of(ipcp, struct shmid_kernel, shm_perm); | 175 | security_shm_free(shp); |
176 | ipc_rcu_free(head); | ||
156 | } | 177 | } |
157 | 178 | ||
158 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) | 179 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) |
@@ -182,7 +203,7 @@ static void shm_open(struct vm_area_struct *vma) | |||
182 | * @ns: namespace | 203 | * @ns: namespace |
183 | * @shp: struct to free | 204 | * @shp: struct to free |
184 | * | 205 | * |
185 | * It has to be called with shp and shm_ids.rw_mutex (writer) locked, | 206 | * It has to be called with shp and shm_ids.rwsem (writer) locked, |
186 | * but returns with shp unlocked and freed. | 207 | * but returns with shp unlocked and freed. |
187 | */ | 208 | */ |
188 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | 209 | static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) |
@@ -196,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | |||
196 | user_shm_unlock(file_inode(shp->shm_file)->i_size, | 217 | user_shm_unlock(file_inode(shp->shm_file)->i_size, |
197 | shp->mlock_user); | 218 | shp->mlock_user); |
198 | fput (shp->shm_file); | 219 | fput (shp->shm_file); |
199 | security_shm_free(shp); | 220 | ipc_rcu_putref(shp, shm_rcu_free); |
200 | ipc_rcu_putref(shp); | ||
201 | } | 221 | } |
202 | 222 | ||
203 | /* | 223 | /* |
@@ -230,7 +250,7 @@ static void shm_close(struct vm_area_struct *vma) | |||
230 | struct shmid_kernel *shp; | 250 | struct shmid_kernel *shp; |
231 | struct ipc_namespace *ns = sfd->ns; | 251 | struct ipc_namespace *ns = sfd->ns; |
232 | 252 | ||
233 | down_write(&shm_ids(ns).rw_mutex); | 253 | down_write(&shm_ids(ns).rwsem); |
234 | /* remove from the list of attaches of the shm segment */ | 254 | /* remove from the list of attaches of the shm segment */ |
235 | shp = shm_lock(ns, sfd->id); | 255 | shp = shm_lock(ns, sfd->id); |
236 | BUG_ON(IS_ERR(shp)); | 256 | BUG_ON(IS_ERR(shp)); |
@@ -241,10 +261,10 @@ static void shm_close(struct vm_area_struct *vma) | |||
241 | shm_destroy(ns, shp); | 261 | shm_destroy(ns, shp); |
242 | else | 262 | else |
243 | shm_unlock(shp); | 263 | shm_unlock(shp); |
244 | up_write(&shm_ids(ns).rw_mutex); | 264 | up_write(&shm_ids(ns).rwsem); |
245 | } | 265 | } |
246 | 266 | ||
247 | /* Called with ns->shm_ids(ns).rw_mutex locked */ | 267 | /* Called with ns->shm_ids(ns).rwsem locked */ |
248 | static int shm_try_destroy_current(int id, void *p, void *data) | 268 | static int shm_try_destroy_current(int id, void *p, void *data) |
249 | { | 269 | { |
250 | struct ipc_namespace *ns = data; | 270 | struct ipc_namespace *ns = data; |
@@ -275,7 +295,7 @@ static int shm_try_destroy_current(int id, void *p, void *data) | |||
275 | return 0; | 295 | return 0; |
276 | } | 296 | } |
277 | 297 | ||
278 | /* Called with ns->shm_ids(ns).rw_mutex locked */ | 298 | /* Called with ns->shm_ids(ns).rwsem locked */ |
279 | static int shm_try_destroy_orphaned(int id, void *p, void *data) | 299 | static int shm_try_destroy_orphaned(int id, void *p, void *data) |
280 | { | 300 | { |
281 | struct ipc_namespace *ns = data; | 301 | struct ipc_namespace *ns = data; |
@@ -286,7 +306,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) | |||
286 | * We want to destroy segments without users and with already | 306 | * We want to destroy segments without users and with already |
287 | * exit'ed originating process. | 307 | * exit'ed originating process. |
288 | * | 308 | * |
289 | * As shp->* are changed under rw_mutex, it's safe to skip shp locking. | 309 | * As shp->* are changed under rwsem, it's safe to skip shp locking. |
290 | */ | 310 | */ |
291 | if (shp->shm_creator != NULL) | 311 | if (shp->shm_creator != NULL) |
292 | return 0; | 312 | return 0; |
@@ -300,10 +320,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) | |||
300 | 320 | ||
301 | void shm_destroy_orphaned(struct ipc_namespace *ns) | 321 | void shm_destroy_orphaned(struct ipc_namespace *ns) |
302 | { | 322 | { |
303 | down_write(&shm_ids(ns).rw_mutex); | 323 | down_write(&shm_ids(ns).rwsem); |
304 | if (shm_ids(ns).in_use) | 324 | if (shm_ids(ns).in_use) |
305 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); | 325 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); |
306 | up_write(&shm_ids(ns).rw_mutex); | 326 | up_write(&shm_ids(ns).rwsem); |
307 | } | 327 | } |
308 | 328 | ||
309 | 329 | ||
@@ -315,10 +335,10 @@ void exit_shm(struct task_struct *task) | |||
315 | return; | 335 | return; |
316 | 336 | ||
317 | /* Destroy all already created segments, but not mapped yet */ | 337 | /* Destroy all already created segments, but not mapped yet */ |
318 | down_write(&shm_ids(ns).rw_mutex); | 338 | down_write(&shm_ids(ns).rwsem); |
319 | if (shm_ids(ns).in_use) | 339 | if (shm_ids(ns).in_use) |
320 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); | 340 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); |
321 | up_write(&shm_ids(ns).rw_mutex); | 341 | up_write(&shm_ids(ns).rwsem); |
322 | } | 342 | } |
323 | 343 | ||
324 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 344 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
@@ -452,7 +472,7 @@ static const struct vm_operations_struct shm_vm_ops = { | |||
452 | * @ns: namespace | 472 | * @ns: namespace |
453 | * @params: ptr to the structure that contains key, size and shmflg | 473 | * @params: ptr to the structure that contains key, size and shmflg |
454 | * | 474 | * |
455 | * Called with shm_ids.rw_mutex held as a writer. | 475 | * Called with shm_ids.rwsem held as a writer. |
456 | */ | 476 | */ |
457 | 477 | ||
458 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) | 478 | static int newseg(struct ipc_namespace *ns, struct ipc_params *params) |
@@ -485,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) | |||
485 | shp->shm_perm.security = NULL; | 505 | shp->shm_perm.security = NULL; |
486 | error = security_shm_alloc(shp); | 506 | error = security_shm_alloc(shp); |
487 | if (error) { | 507 | if (error) { |
488 | ipc_rcu_putref(shp); | 508 | ipc_rcu_putref(shp, ipc_rcu_free); |
489 | return error; | 509 | return error; |
490 | } | 510 | } |
491 | 511 | ||
@@ -554,13 +574,12 @@ no_id: | |||
554 | user_shm_unlock(size, shp->mlock_user); | 574 | user_shm_unlock(size, shp->mlock_user); |
555 | fput(file); | 575 | fput(file); |
556 | no_file: | 576 | no_file: |
557 | security_shm_free(shp); | 577 | ipc_rcu_putref(shp, shm_rcu_free); |
558 | ipc_rcu_putref(shp); | ||
559 | return error; | 578 | return error; |
560 | } | 579 | } |
561 | 580 | ||
562 | /* | 581 | /* |
563 | * Called with shm_ids.rw_mutex and ipcp locked. | 582 | * Called with shm_ids.rwsem and ipcp locked. |
564 | */ | 583 | */ |
565 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) | 584 | static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) |
566 | { | 585 | { |
@@ -571,7 +590,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) | |||
571 | } | 590 | } |
572 | 591 | ||
573 | /* | 592 | /* |
574 | * Called with shm_ids.rw_mutex and ipcp locked. | 593 | * Called with shm_ids.rwsem and ipcp locked. |
575 | */ | 594 | */ |
576 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, | 595 | static inline int shm_more_checks(struct kern_ipc_perm *ipcp, |
577 | struct ipc_params *params) | 596 | struct ipc_params *params) |
@@ -684,7 +703,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf | |||
684 | 703 | ||
685 | /* | 704 | /* |
686 | * Calculate and add used RSS and swap pages of a shm. | 705 | * Calculate and add used RSS and swap pages of a shm. |
687 | * Called with shm_ids.rw_mutex held as a reader | 706 | * Called with shm_ids.rwsem held as a reader |
688 | */ | 707 | */ |
689 | static void shm_add_rss_swap(struct shmid_kernel *shp, | 708 | static void shm_add_rss_swap(struct shmid_kernel *shp, |
690 | unsigned long *rss_add, unsigned long *swp_add) | 709 | unsigned long *rss_add, unsigned long *swp_add) |
@@ -711,7 +730,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, | |||
711 | } | 730 | } |
712 | 731 | ||
713 | /* | 732 | /* |
714 | * Called with shm_ids.rw_mutex held as a reader | 733 | * Called with shm_ids.rwsem held as a reader |
715 | */ | 734 | */ |
716 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | 735 | static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, |
717 | unsigned long *swp) | 736 | unsigned long *swp) |
@@ -740,9 +759,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, | |||
740 | } | 759 | } |
741 | 760 | ||
742 | /* | 761 | /* |
743 | * This function handles some shmctl commands which require the rw_mutex | 762 | * This function handles some shmctl commands which require the rwsem |
744 | * to be held in write mode. | 763 | * to be held in write mode. |
745 | * NOTE: no locks must be held, the rw_mutex is taken inside this function. | 764 | * NOTE: no locks must be held, the rwsem is taken inside this function. |
746 | */ | 765 | */ |
747 | static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | 766 | static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, |
748 | struct shmid_ds __user *buf, int version) | 767 | struct shmid_ds __user *buf, int version) |
@@ -757,14 +776,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
757 | return -EFAULT; | 776 | return -EFAULT; |
758 | } | 777 | } |
759 | 778 | ||
760 | down_write(&shm_ids(ns).rw_mutex); | 779 | down_write(&shm_ids(ns).rwsem); |
761 | rcu_read_lock(); | 780 | rcu_read_lock(); |
762 | 781 | ||
763 | ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd, | 782 | ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, |
764 | &shmid64.shm_perm, 0); | 783 | &shmid64.shm_perm, 0); |
765 | if (IS_ERR(ipcp)) { | 784 | if (IS_ERR(ipcp)) { |
766 | err = PTR_ERR(ipcp); | 785 | err = PTR_ERR(ipcp); |
767 | /* the ipc lock is not held upon failure */ | ||
768 | goto out_unlock1; | 786 | goto out_unlock1; |
769 | } | 787 | } |
770 | 788 | ||
@@ -772,14 +790,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
772 | 790 | ||
773 | err = security_shm_shmctl(shp, cmd); | 791 | err = security_shm_shmctl(shp, cmd); |
774 | if (err) | 792 | if (err) |
775 | goto out_unlock0; | 793 | goto out_unlock1; |
776 | 794 | ||
777 | switch (cmd) { | 795 | switch (cmd) { |
778 | case IPC_RMID: | 796 | case IPC_RMID: |
797 | ipc_lock_object(&shp->shm_perm); | ||
779 | /* do_shm_rmid unlocks the ipc object and rcu */ | 798 | /* do_shm_rmid unlocks the ipc object and rcu */ |
780 | do_shm_rmid(ns, ipcp); | 799 | do_shm_rmid(ns, ipcp); |
781 | goto out_up; | 800 | goto out_up; |
782 | case IPC_SET: | 801 | case IPC_SET: |
802 | ipc_lock_object(&shp->shm_perm); | ||
783 | err = ipc_update_perm(&shmid64.shm_perm, ipcp); | 803 | err = ipc_update_perm(&shmid64.shm_perm, ipcp); |
784 | if (err) | 804 | if (err) |
785 | goto out_unlock0; | 805 | goto out_unlock0; |
@@ -787,6 +807,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, | |||
787 | break; | 807 | break; |
788 | default: | 808 | default: |
789 | err = -EINVAL; | 809 | err = -EINVAL; |
810 | goto out_unlock1; | ||
790 | } | 811 | } |
791 | 812 | ||
792 | out_unlock0: | 813 | out_unlock0: |
@@ -794,33 +815,28 @@ out_unlock0: | |||
794 | out_unlock1: | 815 | out_unlock1: |
795 | rcu_read_unlock(); | 816 | rcu_read_unlock(); |
796 | out_up: | 817 | out_up: |
797 | up_write(&shm_ids(ns).rw_mutex); | 818 | up_write(&shm_ids(ns).rwsem); |
798 | return err; | 819 | return err; |
799 | } | 820 | } |
800 | 821 | ||
801 | SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | 822 | static int shmctl_nolock(struct ipc_namespace *ns, int shmid, |
823 | int cmd, int version, void __user *buf) | ||
802 | { | 824 | { |
825 | int err; | ||
803 | struct shmid_kernel *shp; | 826 | struct shmid_kernel *shp; |
804 | int err, version; | ||
805 | struct ipc_namespace *ns; | ||
806 | 827 | ||
807 | if (cmd < 0 || shmid < 0) { | 828 | /* preliminary security checks for *_INFO */ |
808 | err = -EINVAL; | 829 | if (cmd == IPC_INFO || cmd == SHM_INFO) { |
809 | goto out; | 830 | err = security_shm_shmctl(NULL, cmd); |
831 | if (err) | ||
832 | return err; | ||
810 | } | 833 | } |
811 | 834 | ||
812 | version = ipc_parse_version(&cmd); | 835 | switch (cmd) { |
813 | ns = current->nsproxy->ipc_ns; | ||
814 | |||
815 | switch (cmd) { /* replace with proc interface ? */ | ||
816 | case IPC_INFO: | 836 | case IPC_INFO: |
817 | { | 837 | { |
818 | struct shminfo64 shminfo; | 838 | struct shminfo64 shminfo; |
819 | 839 | ||
820 | err = security_shm_shmctl(NULL, cmd); | ||
821 | if (err) | ||
822 | return err; | ||
823 | |||
824 | memset(&shminfo, 0, sizeof(shminfo)); | 840 | memset(&shminfo, 0, sizeof(shminfo)); |
825 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; | 841 | shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; |
826 | shminfo.shmmax = ns->shm_ctlmax; | 842 | shminfo.shmmax = ns->shm_ctlmax; |
@@ -830,9 +846,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
830 | if(copy_shminfo_to_user (buf, &shminfo, version)) | 846 | if(copy_shminfo_to_user (buf, &shminfo, version)) |
831 | return -EFAULT; | 847 | return -EFAULT; |
832 | 848 | ||
833 | down_read(&shm_ids(ns).rw_mutex); | 849 | down_read(&shm_ids(ns).rwsem); |
834 | err = ipc_get_maxid(&shm_ids(ns)); | 850 | err = ipc_get_maxid(&shm_ids(ns)); |
835 | up_read(&shm_ids(ns).rw_mutex); | 851 | up_read(&shm_ids(ns).rwsem); |
836 | 852 | ||
837 | if(err<0) | 853 | if(err<0) |
838 | err = 0; | 854 | err = 0; |
@@ -842,19 +858,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
842 | { | 858 | { |
843 | struct shm_info shm_info; | 859 | struct shm_info shm_info; |
844 | 860 | ||
845 | err = security_shm_shmctl(NULL, cmd); | ||
846 | if (err) | ||
847 | return err; | ||
848 | |||
849 | memset(&shm_info, 0, sizeof(shm_info)); | 861 | memset(&shm_info, 0, sizeof(shm_info)); |
850 | down_read(&shm_ids(ns).rw_mutex); | 862 | down_read(&shm_ids(ns).rwsem); |
851 | shm_info.used_ids = shm_ids(ns).in_use; | 863 | shm_info.used_ids = shm_ids(ns).in_use; |
852 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); | 864 | shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); |
853 | shm_info.shm_tot = ns->shm_tot; | 865 | shm_info.shm_tot = ns->shm_tot; |
854 | shm_info.swap_attempts = 0; | 866 | shm_info.swap_attempts = 0; |
855 | shm_info.swap_successes = 0; | 867 | shm_info.swap_successes = 0; |
856 | err = ipc_get_maxid(&shm_ids(ns)); | 868 | err = ipc_get_maxid(&shm_ids(ns)); |
857 | up_read(&shm_ids(ns).rw_mutex); | 869 | up_read(&shm_ids(ns).rwsem); |
858 | if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { | 870 | if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { |
859 | err = -EFAULT; | 871 | err = -EFAULT; |
860 | goto out; | 872 | goto out; |
@@ -869,27 +881,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
869 | struct shmid64_ds tbuf; | 881 | struct shmid64_ds tbuf; |
870 | int result; | 882 | int result; |
871 | 883 | ||
884 | rcu_read_lock(); | ||
872 | if (cmd == SHM_STAT) { | 885 | if (cmd == SHM_STAT) { |
873 | shp = shm_lock(ns, shmid); | 886 | shp = shm_obtain_object(ns, shmid); |
874 | if (IS_ERR(shp)) { | 887 | if (IS_ERR(shp)) { |
875 | err = PTR_ERR(shp); | 888 | err = PTR_ERR(shp); |
876 | goto out; | 889 | goto out_unlock; |
877 | } | 890 | } |
878 | result = shp->shm_perm.id; | 891 | result = shp->shm_perm.id; |
879 | } else { | 892 | } else { |
880 | shp = shm_lock_check(ns, shmid); | 893 | shp = shm_obtain_object_check(ns, shmid); |
881 | if (IS_ERR(shp)) { | 894 | if (IS_ERR(shp)) { |
882 | err = PTR_ERR(shp); | 895 | err = PTR_ERR(shp); |
883 | goto out; | 896 | goto out_unlock; |
884 | } | 897 | } |
885 | result = 0; | 898 | result = 0; |
886 | } | 899 | } |
900 | |||
887 | err = -EACCES; | 901 | err = -EACCES; |
888 | if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) | 902 | if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) |
889 | goto out_unlock; | 903 | goto out_unlock; |
904 | |||
890 | err = security_shm_shmctl(shp, cmd); | 905 | err = security_shm_shmctl(shp, cmd); |
891 | if (err) | 906 | if (err) |
892 | goto out_unlock; | 907 | goto out_unlock; |
908 | |||
893 | memset(&tbuf, 0, sizeof(tbuf)); | 909 | memset(&tbuf, 0, sizeof(tbuf)); |
894 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); | 910 | kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); |
895 | tbuf.shm_segsz = shp->shm_segsz; | 911 | tbuf.shm_segsz = shp->shm_segsz; |
@@ -899,43 +915,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
899 | tbuf.shm_cpid = shp->shm_cprid; | 915 | tbuf.shm_cpid = shp->shm_cprid; |
900 | tbuf.shm_lpid = shp->shm_lprid; | 916 | tbuf.shm_lpid = shp->shm_lprid; |
901 | tbuf.shm_nattch = shp->shm_nattch; | 917 | tbuf.shm_nattch = shp->shm_nattch; |
902 | shm_unlock(shp); | 918 | rcu_read_unlock(); |
903 | if(copy_shmid_to_user (buf, &tbuf, version)) | 919 | |
920 | if (copy_shmid_to_user(buf, &tbuf, version)) | ||
904 | err = -EFAULT; | 921 | err = -EFAULT; |
905 | else | 922 | else |
906 | err = result; | 923 | err = result; |
907 | goto out; | 924 | goto out; |
908 | } | 925 | } |
926 | default: | ||
927 | return -EINVAL; | ||
928 | } | ||
929 | |||
930 | out_unlock: | ||
931 | rcu_read_unlock(); | ||
932 | out: | ||
933 | return err; | ||
934 | } | ||
935 | |||
936 | SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | ||
937 | { | ||
938 | struct shmid_kernel *shp; | ||
939 | int err, version; | ||
940 | struct ipc_namespace *ns; | ||
941 | |||
942 | if (cmd < 0 || shmid < 0) | ||
943 | return -EINVAL; | ||
944 | |||
945 | version = ipc_parse_version(&cmd); | ||
946 | ns = current->nsproxy->ipc_ns; | ||
947 | |||
948 | switch (cmd) { | ||
949 | case IPC_INFO: | ||
950 | case SHM_INFO: | ||
951 | case SHM_STAT: | ||
952 | case IPC_STAT: | ||
953 | return shmctl_nolock(ns, shmid, cmd, version, buf); | ||
954 | case IPC_RMID: | ||
955 | case IPC_SET: | ||
956 | return shmctl_down(ns, shmid, cmd, buf, version); | ||
909 | case SHM_LOCK: | 957 | case SHM_LOCK: |
910 | case SHM_UNLOCK: | 958 | case SHM_UNLOCK: |
911 | { | 959 | { |
912 | struct file *shm_file; | 960 | struct file *shm_file; |
913 | 961 | ||
914 | shp = shm_lock_check(ns, shmid); | 962 | rcu_read_lock(); |
963 | shp = shm_obtain_object_check(ns, shmid); | ||
915 | if (IS_ERR(shp)) { | 964 | if (IS_ERR(shp)) { |
916 | err = PTR_ERR(shp); | 965 | err = PTR_ERR(shp); |
917 | goto out; | 966 | goto out_unlock1; |
918 | } | 967 | } |
919 | 968 | ||
920 | audit_ipc_obj(&(shp->shm_perm)); | 969 | audit_ipc_obj(&(shp->shm_perm)); |
970 | err = security_shm_shmctl(shp, cmd); | ||
971 | if (err) | ||
972 | goto out_unlock1; | ||
921 | 973 | ||
974 | ipc_lock_object(&shp->shm_perm); | ||
922 | if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { | 975 | if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { |
923 | kuid_t euid = current_euid(); | 976 | kuid_t euid = current_euid(); |
924 | err = -EPERM; | 977 | err = -EPERM; |
925 | if (!uid_eq(euid, shp->shm_perm.uid) && | 978 | if (!uid_eq(euid, shp->shm_perm.uid) && |
926 | !uid_eq(euid, shp->shm_perm.cuid)) | 979 | !uid_eq(euid, shp->shm_perm.cuid)) |
927 | goto out_unlock; | 980 | goto out_unlock0; |
928 | if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) | 981 | if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) |
929 | goto out_unlock; | 982 | goto out_unlock0; |
930 | } | 983 | } |
931 | 984 | ||
932 | err = security_shm_shmctl(shp, cmd); | ||
933 | if (err) | ||
934 | goto out_unlock; | ||
935 | |||
936 | shm_file = shp->shm_file; | 985 | shm_file = shp->shm_file; |
937 | if (is_file_hugepages(shm_file)) | 986 | if (is_file_hugepages(shm_file)) |
938 | goto out_unlock; | 987 | goto out_unlock0; |
939 | 988 | ||
940 | if (cmd == SHM_LOCK) { | 989 | if (cmd == SHM_LOCK) { |
941 | struct user_struct *user = current_user(); | 990 | struct user_struct *user = current_user(); |
@@ -944,32 +993,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) | |||
944 | shp->shm_perm.mode |= SHM_LOCKED; | 993 | shp->shm_perm.mode |= SHM_LOCKED; |
945 | shp->mlock_user = user; | 994 | shp->mlock_user = user; |
946 | } | 995 | } |
947 | goto out_unlock; | 996 | goto out_unlock0; |
948 | } | 997 | } |
949 | 998 | ||
950 | /* SHM_UNLOCK */ | 999 | /* SHM_UNLOCK */ |
951 | if (!(shp->shm_perm.mode & SHM_LOCKED)) | 1000 | if (!(shp->shm_perm.mode & SHM_LOCKED)) |
952 | goto out_unlock; | 1001 | goto out_unlock0; |
953 | shmem_lock(shm_file, 0, shp->mlock_user); | 1002 | shmem_lock(shm_file, 0, shp->mlock_user); |
954 | shp->shm_perm.mode &= ~SHM_LOCKED; | 1003 | shp->shm_perm.mode &= ~SHM_LOCKED; |
955 | shp->mlock_user = NULL; | 1004 | shp->mlock_user = NULL; |
956 | get_file(shm_file); | 1005 | get_file(shm_file); |
957 | shm_unlock(shp); | 1006 | ipc_unlock_object(&shp->shm_perm); |
1007 | rcu_read_unlock(); | ||
958 | shmem_unlock_mapping(shm_file->f_mapping); | 1008 | shmem_unlock_mapping(shm_file->f_mapping); |
1009 | |||
959 | fput(shm_file); | 1010 | fput(shm_file); |
960 | goto out; | ||
961 | } | ||
962 | case IPC_RMID: | ||
963 | case IPC_SET: | ||
964 | err = shmctl_down(ns, shmid, cmd, buf, version); | ||
965 | return err; | 1011 | return err; |
1012 | } | ||
966 | default: | 1013 | default: |
967 | return -EINVAL; | 1014 | return -EINVAL; |
968 | } | 1015 | } |
969 | 1016 | ||
970 | out_unlock: | 1017 | out_unlock0: |
971 | shm_unlock(shp); | 1018 | ipc_unlock_object(&shp->shm_perm); |
972 | out: | 1019 | out_unlock1: |
1020 | rcu_read_unlock(); | ||
973 | return err; | 1021 | return err; |
974 | } | 1022 | } |
975 | 1023 | ||
@@ -1037,10 +1085,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1037 | * additional creator id... | 1085 | * additional creator id... |
1038 | */ | 1086 | */ |
1039 | ns = current->nsproxy->ipc_ns; | 1087 | ns = current->nsproxy->ipc_ns; |
1040 | shp = shm_lock_check(ns, shmid); | 1088 | rcu_read_lock(); |
1089 | shp = shm_obtain_object_check(ns, shmid); | ||
1041 | if (IS_ERR(shp)) { | 1090 | if (IS_ERR(shp)) { |
1042 | err = PTR_ERR(shp); | 1091 | err = PTR_ERR(shp); |
1043 | goto out; | 1092 | goto out_unlock; |
1044 | } | 1093 | } |
1045 | 1094 | ||
1046 | err = -EACCES; | 1095 | err = -EACCES; |
@@ -1051,24 +1100,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1051 | if (err) | 1100 | if (err) |
1052 | goto out_unlock; | 1101 | goto out_unlock; |
1053 | 1102 | ||
1103 | ipc_lock_object(&shp->shm_perm); | ||
1054 | path = shp->shm_file->f_path; | 1104 | path = shp->shm_file->f_path; |
1055 | path_get(&path); | 1105 | path_get(&path); |
1056 | shp->shm_nattch++; | 1106 | shp->shm_nattch++; |
1057 | size = i_size_read(path.dentry->d_inode); | 1107 | size = i_size_read(path.dentry->d_inode); |
1058 | shm_unlock(shp); | 1108 | ipc_unlock_object(&shp->shm_perm); |
1109 | rcu_read_unlock(); | ||
1059 | 1110 | ||
1060 | err = -ENOMEM; | 1111 | err = -ENOMEM; |
1061 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); | 1112 | sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); |
1062 | if (!sfd) | 1113 | if (!sfd) { |
1063 | goto out_put_dentry; | 1114 | path_put(&path); |
1115 | goto out_nattch; | ||
1116 | } | ||
1064 | 1117 | ||
1065 | file = alloc_file(&path, f_mode, | 1118 | file = alloc_file(&path, f_mode, |
1066 | is_file_hugepages(shp->shm_file) ? | 1119 | is_file_hugepages(shp->shm_file) ? |
1067 | &shm_file_operations_huge : | 1120 | &shm_file_operations_huge : |
1068 | &shm_file_operations); | 1121 | &shm_file_operations); |
1069 | err = PTR_ERR(file); | 1122 | err = PTR_ERR(file); |
1070 | if (IS_ERR(file)) | 1123 | if (IS_ERR(file)) { |
1071 | goto out_free; | 1124 | kfree(sfd); |
1125 | path_put(&path); | ||
1126 | goto out_nattch; | ||
1127 | } | ||
1072 | 1128 | ||
1073 | file->private_data = sfd; | 1129 | file->private_data = sfd; |
1074 | file->f_mapping = shp->shm_file->f_mapping; | 1130 | file->f_mapping = shp->shm_file->f_mapping; |
@@ -1094,7 +1150,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, | |||
1094 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) | 1150 | addr > current->mm->start_stack - size - PAGE_SIZE * 5) |
1095 | goto invalid; | 1151 | goto invalid; |
1096 | } | 1152 | } |
1097 | 1153 | ||
1098 | addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); | 1154 | addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); |
1099 | *raddr = addr; | 1155 | *raddr = addr; |
1100 | err = 0; | 1156 | err = 0; |
@@ -1109,7 +1165,7 @@ out_fput: | |||
1109 | fput(file); | 1165 | fput(file); |
1110 | 1166 | ||
1111 | out_nattch: | 1167 | out_nattch: |
1112 | down_write(&shm_ids(ns).rw_mutex); | 1168 | down_write(&shm_ids(ns).rwsem); |
1113 | shp = shm_lock(ns, shmid); | 1169 | shp = shm_lock(ns, shmid); |
1114 | BUG_ON(IS_ERR(shp)); | 1170 | BUG_ON(IS_ERR(shp)); |
1115 | shp->shm_nattch--; | 1171 | shp->shm_nattch--; |
@@ -1117,20 +1173,13 @@ out_nattch: | |||
1117 | shm_destroy(ns, shp); | 1173 | shm_destroy(ns, shp); |
1118 | else | 1174 | else |
1119 | shm_unlock(shp); | 1175 | shm_unlock(shp); |
1120 | up_write(&shm_ids(ns).rw_mutex); | 1176 | up_write(&shm_ids(ns).rwsem); |
1121 | |||
1122 | out: | ||
1123 | return err; | 1177 | return err; |
1124 | 1178 | ||
1125 | out_unlock: | 1179 | out_unlock: |
1126 | shm_unlock(shp); | 1180 | rcu_read_unlock(); |
1127 | goto out; | 1181 | out: |
1128 | 1182 | return err; | |
1129 | out_free: | ||
1130 | kfree(sfd); | ||
1131 | out_put_dentry: | ||
1132 | path_put(&path); | ||
1133 | goto out_nattch; | ||
1134 | } | 1183 | } |
1135 | 1184 | ||
1136 | SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) | 1185 | SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) |
@@ -1235,8 +1284,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) | |||
1235 | #else /* CONFIG_MMU */ | 1284 | #else /* CONFIG_MMU */ |
1236 | /* under NOMMU conditions, the exact address to be destroyed must be | 1285 | /* under NOMMU conditions, the exact address to be destroyed must be |
1237 | * given */ | 1286 | * given */ |
1238 | retval = -EINVAL; | 1287 | if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { |
1239 | if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { | ||
1240 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); | 1288 | do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); |
1241 | retval = 0; | 1289 | retval = 0; |
1242 | } | 1290 | } |
diff --git a/ipc/util.c b/ipc/util.c index 4704223bfad4..7684f41bce76 100644 --- a/ipc/util.c +++ b/ipc/util.c | |||
@@ -15,6 +15,29 @@ | |||
15 | * Jun 2006 - namespaces ssupport | 15 | * Jun 2006 - namespaces ssupport |
16 | * OpenVZ, SWsoft Inc. | 16 | * OpenVZ, SWsoft Inc. |
17 | * Pavel Emelianov <xemul@openvz.org> | 17 | * Pavel Emelianov <xemul@openvz.org> |
18 | * | ||
19 | * General sysv ipc locking scheme: | ||
20 | * rcu_read_lock() | ||
21 | * obtain the ipc object (kern_ipc_perm) by looking up the id in an idr | ||
22 | * tree. | ||
23 | * - perform initial checks (capabilities, auditing and permission, | ||
24 | * etc). | ||
25 | * - perform read-only operations, such as STAT, INFO commands. | ||
26 | * acquire the ipc lock (kern_ipc_perm.lock) through | ||
27 | * ipc_lock_object() | ||
28 | * - perform data updates, such as SET, RMID commands and | ||
29 | * mechanism-specific operations (semop/semtimedop, | ||
30 | * msgsnd/msgrcv, shmat/shmdt). | ||
31 | * drop the ipc lock, through ipc_unlock_object(). | ||
32 | * rcu_read_unlock() | ||
33 | * | ||
34 | * The ids->rwsem must be taken when: | ||
35 | * - creating, removing and iterating the existing entries in ipc | ||
36 | * identifier sets. | ||
37 | * - iterating through files under /proc/sysvipc/ | ||
38 | * | ||
39 | * Note that sems have a special fast path that avoids kern_ipc_perm.lock - | ||
40 | * see sem_lock(). | ||
18 | */ | 41 | */ |
19 | 42 | ||
20 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
@@ -119,7 +142,7 @@ __initcall(ipc_init); | |||
119 | 142 | ||
120 | void ipc_init_ids(struct ipc_ids *ids) | 143 | void ipc_init_ids(struct ipc_ids *ids) |
121 | { | 144 | { |
122 | init_rwsem(&ids->rw_mutex); | 145 | init_rwsem(&ids->rwsem); |
123 | 146 | ||
124 | ids->in_use = 0; | 147 | ids->in_use = 0; |
125 | ids->seq = 0; | 148 | ids->seq = 0; |
@@ -174,7 +197,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, | |||
174 | * @ids: Identifier set | 197 | * @ids: Identifier set |
175 | * @key: The key to find | 198 | * @key: The key to find |
176 | * | 199 | * |
177 | * Requires ipc_ids.rw_mutex locked. | 200 | * Requires ipc_ids.rwsem locked. |
178 | * Returns the LOCKED pointer to the ipc structure if found or NULL | 201 | * Returns the LOCKED pointer to the ipc structure if found or NULL |
179 | * if not. | 202 | * if not. |
180 | * If key is found ipc points to the owning ipc structure | 203 | * If key is found ipc points to the owning ipc structure |
@@ -197,7 +220,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) | |||
197 | continue; | 220 | continue; |
198 | } | 221 | } |
199 | 222 | ||
200 | ipc_lock_by_ptr(ipc); | 223 | rcu_read_lock(); |
224 | ipc_lock_object(ipc); | ||
201 | return ipc; | 225 | return ipc; |
202 | } | 226 | } |
203 | 227 | ||
@@ -208,7 +232,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) | |||
208 | * ipc_get_maxid - get the last assigned id | 232 | * ipc_get_maxid - get the last assigned id |
209 | * @ids: IPC identifier set | 233 | * @ids: IPC identifier set |
210 | * | 234 | * |
211 | * Called with ipc_ids.rw_mutex held. | 235 | * Called with ipc_ids.rwsem held. |
212 | */ | 236 | */ |
213 | 237 | ||
214 | int ipc_get_maxid(struct ipc_ids *ids) | 238 | int ipc_get_maxid(struct ipc_ids *ids) |
@@ -246,7 +270,7 @@ int ipc_get_maxid(struct ipc_ids *ids) | |||
246 | * is returned. The 'new' entry is returned in a locked state on success. | 270 | * is returned. The 'new' entry is returned in a locked state on success. |
247 | * On failure the entry is not locked and a negative err-code is returned. | 271 | * On failure the entry is not locked and a negative err-code is returned. |
248 | * | 272 | * |
249 | * Called with writer ipc_ids.rw_mutex held. | 273 | * Called with writer ipc_ids.rwsem held. |
250 | */ | 274 | */ |
251 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) | 275 | int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) |
252 | { | 276 | { |
@@ -312,9 +336,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
312 | { | 336 | { |
313 | int err; | 337 | int err; |
314 | 338 | ||
315 | down_write(&ids->rw_mutex); | 339 | down_write(&ids->rwsem); |
316 | err = ops->getnew(ns, params); | 340 | err = ops->getnew(ns, params); |
317 | up_write(&ids->rw_mutex); | 341 | up_write(&ids->rwsem); |
318 | return err; | 342 | return err; |
319 | } | 343 | } |
320 | 344 | ||
@@ -331,7 +355,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
331 | * | 355 | * |
332 | * On success, the IPC id is returned. | 356 | * On success, the IPC id is returned. |
333 | * | 357 | * |
334 | * It is called with ipc_ids.rw_mutex and ipcp->lock held. | 358 | * It is called with ipc_ids.rwsem and ipcp->lock held. |
335 | */ | 359 | */ |
336 | static int ipc_check_perms(struct ipc_namespace *ns, | 360 | static int ipc_check_perms(struct ipc_namespace *ns, |
337 | struct kern_ipc_perm *ipcp, | 361 | struct kern_ipc_perm *ipcp, |
@@ -376,7 +400,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
376 | * Take the lock as a writer since we are potentially going to add | 400 | * Take the lock as a writer since we are potentially going to add |
377 | * a new entry + read locks are not "upgradable" | 401 | * a new entry + read locks are not "upgradable" |
378 | */ | 402 | */ |
379 | down_write(&ids->rw_mutex); | 403 | down_write(&ids->rwsem); |
380 | ipcp = ipc_findkey(ids, params->key); | 404 | ipcp = ipc_findkey(ids, params->key); |
381 | if (ipcp == NULL) { | 405 | if (ipcp == NULL) { |
382 | /* key not used */ | 406 | /* key not used */ |
@@ -402,7 +426,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
402 | } | 426 | } |
403 | ipc_unlock(ipcp); | 427 | ipc_unlock(ipcp); |
404 | } | 428 | } |
405 | up_write(&ids->rw_mutex); | 429 | up_write(&ids->rwsem); |
406 | 430 | ||
407 | return err; | 431 | return err; |
408 | } | 432 | } |
@@ -413,7 +437,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, | |||
413 | * @ids: IPC identifier set | 437 | * @ids: IPC identifier set |
414 | * @ipcp: ipc perm structure containing the identifier to remove | 438 | * @ipcp: ipc perm structure containing the identifier to remove |
415 | * | 439 | * |
416 | * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held | 440 | * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held |
417 | * before this function is called, and remain locked on the exit. | 441 | * before this function is called, and remain locked on the exit. |
418 | */ | 442 | */ |
419 | 443 | ||
@@ -465,11 +489,6 @@ void ipc_free(void* ptr, int size) | |||
465 | kfree(ptr); | 489 | kfree(ptr); |
466 | } | 490 | } |
467 | 491 | ||
468 | struct ipc_rcu { | ||
469 | struct rcu_head rcu; | ||
470 | atomic_t refcount; | ||
471 | } ____cacheline_aligned_in_smp; | ||
472 | |||
473 | /** | 492 | /** |
474 | * ipc_rcu_alloc - allocate ipc and rcu space | 493 | * ipc_rcu_alloc - allocate ipc and rcu space |
475 | * @size: size desired | 494 | * @size: size desired |
@@ -496,27 +515,24 @@ int ipc_rcu_getref(void *ptr) | |||
496 | return atomic_inc_not_zero(&p->refcount); | 515 | return atomic_inc_not_zero(&p->refcount); |
497 | } | 516 | } |
498 | 517 | ||
499 | /** | 518 | void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) |
500 | * ipc_schedule_free - free ipc + rcu space | ||
501 | * @head: RCU callback structure for queued work | ||
502 | */ | ||
503 | static void ipc_schedule_free(struct rcu_head *head) | ||
504 | { | ||
505 | vfree(container_of(head, struct ipc_rcu, rcu)); | ||
506 | } | ||
507 | |||
508 | void ipc_rcu_putref(void *ptr) | ||
509 | { | 519 | { |
510 | struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; | 520 | struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; |
511 | 521 | ||
512 | if (!atomic_dec_and_test(&p->refcount)) | 522 | if (!atomic_dec_and_test(&p->refcount)) |
513 | return; | 523 | return; |
514 | 524 | ||
515 | if (is_vmalloc_addr(ptr)) { | 525 | call_rcu(&p->rcu, func); |
516 | call_rcu(&p->rcu, ipc_schedule_free); | 526 | } |
517 | } else { | 527 | |
518 | kfree_rcu(p, rcu); | 528 | void ipc_rcu_free(struct rcu_head *head) |
519 | } | 529 | { |
530 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
531 | |||
532 | if (is_vmalloc_addr(p)) | ||
533 | vfree(p); | ||
534 | else | ||
535 | kfree(p); | ||
520 | } | 536 | } |
521 | 537 | ||
522 | /** | 538 | /** |
@@ -621,7 +637,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) | |||
621 | } | 637 | } |
622 | 638 | ||
623 | /** | 639 | /** |
624 | * ipc_lock - Lock an ipc structure without rw_mutex held | 640 | * ipc_lock - Lock an ipc structure without rwsem held |
625 | * @ids: IPC identifier set | 641 | * @ids: IPC identifier set |
626 | * @id: ipc id to look for | 642 | * @id: ipc id to look for |
627 | * | 643 | * |
@@ -677,22 +693,6 @@ out: | |||
677 | return out; | 693 | return out; |
678 | } | 694 | } |
679 | 695 | ||
680 | struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) | ||
681 | { | ||
682 | struct kern_ipc_perm *out; | ||
683 | |||
684 | out = ipc_lock(ids, id); | ||
685 | if (IS_ERR(out)) | ||
686 | return out; | ||
687 | |||
688 | if (ipc_checkid(out, id)) { | ||
689 | ipc_unlock(out); | ||
690 | return ERR_PTR(-EIDRM); | ||
691 | } | ||
692 | |||
693 | return out; | ||
694 | } | ||
695 | |||
696 | /** | 696 | /** |
697 | * ipcget - Common sys_*get() code | 697 | * ipcget - Common sys_*get() code |
698 | * @ns : namsepace | 698 | * @ns : namsepace |
@@ -733,7 +733,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) | |||
733 | } | 733 | } |
734 | 734 | ||
735 | /** | 735 | /** |
736 | * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd | 736 | * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd |
737 | * @ns: the ipc namespace | 737 | * @ns: the ipc namespace |
738 | * @ids: the table of ids where to look for the ipc | 738 | * @ids: the table of ids where to look for the ipc |
739 | * @id: the id of the ipc to retrieve | 739 | * @id: the id of the ipc to retrieve |
@@ -746,29 +746,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) | |||
746 | * It must be called without any lock held and | 746 | * It must be called without any lock held and |
747 | * - retrieves the ipc with the given id in the given table. | 747 | * - retrieves the ipc with the given id in the given table. |
748 | * - performs some audit and permission check, depending on the given cmd | 748 | * - performs some audit and permission check, depending on the given cmd |
749 | * - returns the ipc with the ipc lock held in case of success | 749 | * - returns a pointer to the ipc object or otherwise, the corresponding error. |
750 | * or an err-code without any lock held otherwise. | ||
751 | * | 750 | * |
752 | * Call holding the both the rw_mutex and the rcu read lock. | 751 | * Call holding the both the rwsem and the rcu read lock. |
753 | */ | 752 | */ |
754 | struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | ||
755 | struct ipc_ids *ids, int id, int cmd, | ||
756 | struct ipc64_perm *perm, int extra_perm) | ||
757 | { | ||
758 | struct kern_ipc_perm *ipcp; | ||
759 | |||
760 | ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm); | ||
761 | if (IS_ERR(ipcp)) | ||
762 | goto out; | ||
763 | |||
764 | spin_lock(&ipcp->lock); | ||
765 | out: | ||
766 | return ipcp; | ||
767 | } | ||
768 | |||
769 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, | 753 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
770 | struct ipc_ids *ids, int id, int cmd, | 754 | struct ipc_ids *ids, int id, int cmd, |
771 | struct ipc64_perm *perm, int extra_perm) | 755 | struct ipc64_perm *perm, int extra_perm) |
772 | { | 756 | { |
773 | kuid_t euid; | 757 | kuid_t euid; |
774 | int err = -EPERM; | 758 | int err = -EPERM; |
@@ -846,7 +830,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, | |||
846 | ipc = idr_find(&ids->ipcs_idr, pos); | 830 | ipc = idr_find(&ids->ipcs_idr, pos); |
847 | if (ipc != NULL) { | 831 | if (ipc != NULL) { |
848 | *new_pos = pos + 1; | 832 | *new_pos = pos + 1; |
849 | ipc_lock_by_ptr(ipc); | 833 | rcu_read_lock(); |
834 | ipc_lock_object(ipc); | ||
850 | return ipc; | 835 | return ipc; |
851 | } | 836 | } |
852 | } | 837 | } |
@@ -884,7 +869,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) | |||
884 | * Take the lock - this will be released by the corresponding | 869 | * Take the lock - this will be released by the corresponding |
885 | * call to stop(). | 870 | * call to stop(). |
886 | */ | 871 | */ |
887 | down_read(&ids->rw_mutex); | 872 | down_read(&ids->rwsem); |
888 | 873 | ||
889 | /* pos < 0 is invalid */ | 874 | /* pos < 0 is invalid */ |
890 | if (*pos < 0) | 875 | if (*pos < 0) |
@@ -911,7 +896,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) | |||
911 | 896 | ||
912 | ids = &iter->ns->ids[iface->ids]; | 897 | ids = &iter->ns->ids[iface->ids]; |
913 | /* Release the lock we took in start() */ | 898 | /* Release the lock we took in start() */ |
914 | up_read(&ids->rw_mutex); | 899 | up_read(&ids->rwsem); |
915 | } | 900 | } |
916 | 901 | ||
917 | static int sysvipc_proc_show(struct seq_file *s, void *it) | 902 | static int sysvipc_proc_show(struct seq_file *s, void *it) |
diff --git a/ipc/util.h b/ipc/util.h index b6a6a88f3002..f2f5036f2eed 100644 --- a/ipc/util.h +++ b/ipc/util.h | |||
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } | |||
47 | static inline void shm_exit_ns(struct ipc_namespace *ns) { } | 47 | static inline void shm_exit_ns(struct ipc_namespace *ns) { } |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | struct ipc_rcu { | ||
51 | struct rcu_head rcu; | ||
52 | atomic_t refcount; | ||
53 | } ____cacheline_aligned_in_smp; | ||
54 | |||
55 | #define ipc_rcu_to_struct(p) ((void *)(p+1)) | ||
56 | |||
50 | /* | 57 | /* |
51 | * Structure that holds the parameters needed by the ipc operations | 58 | * Structure that holds the parameters needed by the ipc operations |
52 | * (see after) | 59 | * (see after) |
@@ -94,10 +101,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header, | |||
94 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) | 101 | #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) |
95 | #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) | 102 | #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) |
96 | 103 | ||
97 | /* must be called with ids->rw_mutex acquired for writing */ | 104 | /* must be called with ids->rwsem acquired for writing */ |
98 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); | 105 | int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); |
99 | 106 | ||
100 | /* must be called with ids->rw_mutex acquired for reading */ | 107 | /* must be called with ids->rwsem acquired for reading */ |
101 | int ipc_get_maxid(struct ipc_ids *); | 108 | int ipc_get_maxid(struct ipc_ids *); |
102 | 109 | ||
103 | /* must be called with both locks acquired. */ | 110 | /* must be called with both locks acquired. */ |
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); | |||
120 | */ | 127 | */ |
121 | void* ipc_rcu_alloc(int size); | 128 | void* ipc_rcu_alloc(int size); |
122 | int ipc_rcu_getref(void *ptr); | 129 | int ipc_rcu_getref(void *ptr); |
123 | void ipc_rcu_putref(void *ptr); | 130 | void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); |
131 | void ipc_rcu_free(struct rcu_head *head); | ||
124 | 132 | ||
125 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); | 133 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); |
126 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); | 134 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); |
@@ -131,9 +139,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); | |||
131 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, | 139 | struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, |
132 | struct ipc_ids *ids, int id, int cmd, | 140 | struct ipc_ids *ids, int id, int cmd, |
133 | struct ipc64_perm *perm, int extra_perm); | 141 | struct ipc64_perm *perm, int extra_perm); |
134 | struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, | ||
135 | struct ipc_ids *ids, int id, int cmd, | ||
136 | struct ipc64_perm *perm, int extra_perm); | ||
137 | 142 | ||
138 | #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION | 143 | #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION |
139 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ | 144 | /* On IA-64, we always use the "64-bit version" of the IPC structures. */ |
@@ -174,19 +179,12 @@ static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm) | |||
174 | assert_spin_locked(&perm->lock); | 179 | assert_spin_locked(&perm->lock); |
175 | } | 180 | } |
176 | 181 | ||
177 | static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) | ||
178 | { | ||
179 | rcu_read_lock(); | ||
180 | ipc_lock_object(perm); | ||
181 | } | ||
182 | |||
183 | static inline void ipc_unlock(struct kern_ipc_perm *perm) | 182 | static inline void ipc_unlock(struct kern_ipc_perm *perm) |
184 | { | 183 | { |
185 | ipc_unlock_object(perm); | 184 | ipc_unlock_object(perm); |
186 | rcu_read_unlock(); | 185 | rcu_read_unlock(); |
187 | } | 186 | } |
188 | 187 | ||
189 | struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); | ||
190 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); | 188 | struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); |
191 | int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, | 189 | int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, |
192 | struct ipc_ops *ops, struct ipc_params *params); | 190 | struct ipc_ops *ops, struct ipc_params *params); |