aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile2
-rw-r--r--ipc/ipc_sysctl.c68
-rw-r--r--ipc/ipcns_notifier.c82
-rw-r--r--ipc/msg.c239
-rw-r--r--ipc/namespace.c26
-rw-r--r--ipc/sem.c159
-rw-r--r--ipc/shm.c186
-rw-r--r--ipc/util.c131
-rw-r--r--ipc/util.h6
9 files changed, 532 insertions, 367 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
index 5fc5e33ea047..65c384395801 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o 5obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
6obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o 6obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o
7obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o 7obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
8obj_mq-$(CONFIG_COMPAT) += compat_mq.o 8obj_mq-$(CONFIG_COMPAT) += compat_mq.o
9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) 9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 7f4235bed51b..d3497465cc0a 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -15,6 +15,8 @@
15#include <linux/sysctl.h> 15#include <linux/sysctl.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ipc_namespace.h> 17#include <linux/ipc_namespace.h>
18#include <linux/msg.h>
19#include "util.h"
18 20
19static void *get_ipc(ctl_table *table) 21static void *get_ipc(ctl_table *table)
20{ 22{
@@ -24,6 +26,27 @@ static void *get_ipc(ctl_table *table)
24 return which; 26 return which;
25} 27}
26 28
29/*
30 * Routine that is called when a tunable has successfully been changed by
31 * hand and it has a callback routine registered on the ipc namespace notifier
32 * chain: we don't want such tunables to be recomputed anymore upon memory
33 * add/remove or ipc namespace creation/removal.
34 * They can come back to a recomputable state by being set to a <0 value.
35 */
36static void tunable_set_callback(int val)
37{
38 if (val >= 0)
39 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
40 else {
41 /*
42 * Re-enable automatic recomputing only if not already
43 * enabled.
44 */
45 recompute_msgmni(current->nsproxy->ipc_ns);
46 cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
47 }
48}
49
27#ifdef CONFIG_PROC_FS 50#ifdef CONFIG_PROC_FS
28static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, 51static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
29 void __user *buffer, size_t *lenp, loff_t *ppos) 52 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -35,6 +58,24 @@ static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
35 return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); 58 return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
36} 59}
37 60
61static int proc_ipc_callback_dointvec(ctl_table *table, int write,
62 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
63{
64 struct ctl_table ipc_table;
65 size_t lenp_bef = *lenp;
66 int rc;
67
68 memcpy(&ipc_table, table, sizeof(ipc_table));
69 ipc_table.data = get_ipc(table);
70
71 rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
72
73 if (write && !rc && lenp_bef == *lenp)
74 tunable_set_callback(*((int *)(ipc_table.data)));
75
76 return rc;
77}
78
38static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, 79static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
39 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) 80 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
40{ 81{
@@ -49,6 +90,7 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
49#else 90#else
50#define proc_ipc_doulongvec_minmax NULL 91#define proc_ipc_doulongvec_minmax NULL
51#define proc_ipc_dointvec NULL 92#define proc_ipc_dointvec NULL
93#define proc_ipc_callback_dointvec NULL
52#endif 94#endif
53 95
54#ifdef CONFIG_SYSCTL_SYSCALL 96#ifdef CONFIG_SYSCTL_SYSCALL
@@ -90,8 +132,30 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
90 } 132 }
91 return 1; 133 return 1;
92} 134}
135
136static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
137 int nlen, void __user *oldval, size_t __user *oldlenp,
138 void __user *newval, size_t newlen)
139{
140 int rc;
141
142 rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
143 newlen);
144
145 if (newval && newlen && rc > 0) {
146 /*
147 * Tunable has successfully been changed from userland
148 */
149 int *data = get_ipc(table);
150
151 tunable_set_callback(*data);
152 }
153
154 return rc;
155}
93#else 156#else
94#define sysctl_ipc_data NULL 157#define sysctl_ipc_data NULL
158#define sysctl_ipc_registered_data NULL
95#endif 159#endif
96 160
97static struct ctl_table ipc_kern_table[] = { 161static struct ctl_table ipc_kern_table[] = {
@@ -137,8 +201,8 @@ static struct ctl_table ipc_kern_table[] = {
137 .data = &init_ipc_ns.msg_ctlmni, 201 .data = &init_ipc_ns.msg_ctlmni,
138 .maxlen = sizeof (init_ipc_ns.msg_ctlmni), 202 .maxlen = sizeof (init_ipc_ns.msg_ctlmni),
139 .mode = 0644, 203 .mode = 0644,
140 .proc_handler = proc_ipc_dointvec, 204 .proc_handler = proc_ipc_callback_dointvec,
141 .strategy = sysctl_ipc_data, 205 .strategy = sysctl_ipc_registered_data,
142 }, 206 },
143 { 207 {
144 .ctl_name = KERN_MSGMNB, 208 .ctl_name = KERN_MSGMNB,
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
new file mode 100644
index 000000000000..70ff09183f7b
--- /dev/null
+++ b/ipc/ipcns_notifier.c
@@ -0,0 +1,82 @@
1/*
2 * linux/ipc/ipcns_notifier.c
3 * Copyright (C) 2007 BULL SA. Nadia Derbey
4 *
5 * Notification mechanism for ipc namespaces:
6 * The callback routine registered in the memory chain invokes the ipcns
7 * notifier chain with the IPCNS_MEMCHANGED event.
8 * Each callback routine registered in the ipcns namespace recomputes msgmni
9 * for the owning namespace.
10 */
11
12#include <linux/msg.h>
13#include <linux/rcupdate.h>
14#include <linux/notifier.h>
15#include <linux/nsproxy.h>
16#include <linux/ipc_namespace.h>
17
18#include "util.h"
19
20
21
22static BLOCKING_NOTIFIER_HEAD(ipcns_chain);
23
24
25static int ipcns_callback(struct notifier_block *self,
26 unsigned long action, void *arg)
27{
28 struct ipc_namespace *ns;
29
30 switch (action) {
31 case IPCNS_MEMCHANGED: /* amount of lowmem has changed */
32 case IPCNS_CREATED:
33 case IPCNS_REMOVED:
34 /*
35 * It's time to recompute msgmni
36 */
37 ns = container_of(self, struct ipc_namespace, ipcns_nb);
38 /*
39 * No need to get a reference on the ns: the 1st job of
40 * free_ipc_ns() is to unregister the callback routine.
41 * blocking_notifier_chain_unregister takes the wr lock to do
42 * it.
43 * When this callback routine is called the rd lock is held by
44 * blocking_notifier_call_chain.
45 * So the ipc ns cannot be freed while we are here.
46 */
47 recompute_msgmni(ns);
48 break;
49 default:
50 break;
51 }
52
53 return NOTIFY_OK;
54}
55
56int register_ipcns_notifier(struct ipc_namespace *ns)
57{
58 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
59 ns->ipcns_nb.notifier_call = ipcns_callback;
60 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
61 return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
62}
63
64int cond_register_ipcns_notifier(struct ipc_namespace *ns)
65{
66 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
67 ns->ipcns_nb.notifier_call = ipcns_callback;
68 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
69 return blocking_notifier_chain_cond_register(&ipcns_chain,
70 &ns->ipcns_nb);
71}
72
73int unregister_ipcns_notifier(struct ipc_namespace *ns)
74{
75 return blocking_notifier_chain_unregister(&ipcns_chain,
76 &ns->ipcns_nb);
77}
78
79int ipcns_notify(unsigned long val)
80{
81 return blocking_notifier_call_chain(&ipcns_chain, val, NULL);
82}
diff --git a/ipc/msg.c b/ipc/msg.c
index 46585a05473e..32494e8cc7a5 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -27,6 +27,7 @@
27#include <linux/msg.h> 27#include <linux/msg.h>
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/mm.h>
30#include <linux/proc_fs.h> 31#include <linux/proc_fs.h>
31#include <linux/list.h> 32#include <linux/list.h>
32#include <linux/security.h> 33#include <linux/security.h>
@@ -70,7 +71,6 @@ struct msg_sender {
70#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 71#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
71 72
72#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 73#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
73#define msg_buildid(id, seq) ipc_buildid(id, seq)
74 74
75static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 75static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
76static int newque(struct ipc_namespace *, struct ipc_params *); 76static int newque(struct ipc_namespace *, struct ipc_params *);
@@ -78,11 +78,49 @@ static int newque(struct ipc_namespace *, struct ipc_params *);
78static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 78static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
79#endif 79#endif
80 80
81/*
82 * Scale msgmni with the available lowmem size: the memory dedicated to msg
83 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
84 * Also take into account the number of nsproxies created so far.
85 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
86 */
87void recompute_msgmni(struct ipc_namespace *ns)
88{
89 struct sysinfo i;
90 unsigned long allowed;
91 int nb_ns;
92
93 si_meminfo(&i);
94 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
95 / MSGMNB;
96 nb_ns = atomic_read(&nr_ipc_ns);
97 allowed /= nb_ns;
98
99 if (allowed < MSGMNI) {
100 ns->msg_ctlmni = MSGMNI;
101 goto out_callback;
102 }
103
104 if (allowed > IPCMNI / nb_ns) {
105 ns->msg_ctlmni = IPCMNI / nb_ns;
106 goto out_callback;
107 }
108
109 ns->msg_ctlmni = allowed;
110
111out_callback:
112
113 printk(KERN_INFO "msgmni has been set to %d for ipc namespace %p\n",
114 ns->msg_ctlmni, ns);
115}
116
81void msg_init_ns(struct ipc_namespace *ns) 117void msg_init_ns(struct ipc_namespace *ns)
82{ 118{
83 ns->msg_ctlmax = MSGMAX; 119 ns->msg_ctlmax = MSGMAX;
84 ns->msg_ctlmnb = MSGMNB; 120 ns->msg_ctlmnb = MSGMNB;
85 ns->msg_ctlmni = MSGMNI; 121
122 recompute_msgmni(ns);
123
86 atomic_set(&ns->msg_bytes, 0); 124 atomic_set(&ns->msg_bytes, 0);
87 atomic_set(&ns->msg_hdrs, 0); 125 atomic_set(&ns->msg_hdrs, 0);
88 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 126 ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
@@ -104,21 +142,6 @@ void __init msg_init(void)
104} 142}
105 143
106/* 144/*
107 * This routine is called in the paths where the rw_mutex is held to protect
108 * access to the idr tree.
109 */
110static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
111 int id)
112{
113 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);
114
115 if (IS_ERR(ipcp))
116 return (struct msg_queue *)ipcp;
117
118 return container_of(ipcp, struct msg_queue, q_perm);
119}
120
121/*
122 * msg_lock_(check_) routines are called in the paths where the rw_mutex 145 * msg_lock_(check_) routines are called in the paths where the rw_mutex
123 * is not held. 146 * is not held.
124 */ 147 */
@@ -186,7 +209,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
186 return id; 209 return id;
187 } 210 }
188 211
189 msq->q_perm.id = msg_buildid(id, msq->q_perm.seq);
190 msq->q_stime = msq->q_rtime = 0; 212 msq->q_stime = msq->q_rtime = 0;
191 msq->q_ctime = get_seconds(); 213 msq->q_ctime = get_seconds();
192 msq->q_cbytes = msq->q_qnum = 0; 214 msq->q_cbytes = msq->q_qnum = 0;
@@ -324,19 +346,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
324 out.msg_rtime = in->msg_rtime; 346 out.msg_rtime = in->msg_rtime;
325 out.msg_ctime = in->msg_ctime; 347 out.msg_ctime = in->msg_ctime;
326 348
327 if (in->msg_cbytes > USHRT_MAX) 349 if (in->msg_cbytes > USHORT_MAX)
328 out.msg_cbytes = USHRT_MAX; 350 out.msg_cbytes = USHORT_MAX;
329 else 351 else
330 out.msg_cbytes = in->msg_cbytes; 352 out.msg_cbytes = in->msg_cbytes;
331 out.msg_lcbytes = in->msg_cbytes; 353 out.msg_lcbytes = in->msg_cbytes;
332 354
333 if (in->msg_qnum > USHRT_MAX) 355 if (in->msg_qnum > USHORT_MAX)
334 out.msg_qnum = USHRT_MAX; 356 out.msg_qnum = USHORT_MAX;
335 else 357 else
336 out.msg_qnum = in->msg_qnum; 358 out.msg_qnum = in->msg_qnum;
337 359
338 if (in->msg_qbytes > USHRT_MAX) 360 if (in->msg_qbytes > USHORT_MAX)
339 out.msg_qbytes = USHRT_MAX; 361 out.msg_qbytes = USHORT_MAX;
340 else 362 else
341 out.msg_qbytes = in->msg_qbytes; 363 out.msg_qbytes = in->msg_qbytes;
342 out.msg_lqbytes = in->msg_qbytes; 364 out.msg_lqbytes = in->msg_qbytes;
@@ -351,31 +373,14 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
351 } 373 }
352} 374}
353 375
354struct msq_setbuf {
355 unsigned long qbytes;
356 uid_t uid;
357 gid_t gid;
358 mode_t mode;
359};
360
361static inline unsigned long 376static inline unsigned long
362copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) 377copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
363{ 378{
364 switch(version) { 379 switch(version) {
365 case IPC_64: 380 case IPC_64:
366 { 381 if (copy_from_user(out, buf, sizeof(*out)))
367 struct msqid64_ds tbuf;
368
369 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
370 return -EFAULT; 382 return -EFAULT;
371
372 out->qbytes = tbuf.msg_qbytes;
373 out->uid = tbuf.msg_perm.uid;
374 out->gid = tbuf.msg_perm.gid;
375 out->mode = tbuf.msg_perm.mode;
376
377 return 0; 383 return 0;
378 }
379 case IPC_OLD: 384 case IPC_OLD:
380 { 385 {
381 struct msqid_ds tbuf_old; 386 struct msqid_ds tbuf_old;
@@ -383,14 +388,14 @@ copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
383 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 388 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
384 return -EFAULT; 389 return -EFAULT;
385 390
386 out->uid = tbuf_old.msg_perm.uid; 391 out->msg_perm.uid = tbuf_old.msg_perm.uid;
387 out->gid = tbuf_old.msg_perm.gid; 392 out->msg_perm.gid = tbuf_old.msg_perm.gid;
388 out->mode = tbuf_old.msg_perm.mode; 393 out->msg_perm.mode = tbuf_old.msg_perm.mode;
389 394
390 if (tbuf_old.msg_qbytes == 0) 395 if (tbuf_old.msg_qbytes == 0)
391 out->qbytes = tbuf_old.msg_lqbytes; 396 out->msg_qbytes = tbuf_old.msg_lqbytes;
392 else 397 else
393 out->qbytes = tbuf_old.msg_qbytes; 398 out->msg_qbytes = tbuf_old.msg_qbytes;
394 399
395 return 0; 400 return 0;
396 } 401 }
@@ -399,10 +404,71 @@ copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
399 } 404 }
400} 405}
401 406
402asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) 407/*
408 * This function handles some msgctl commands which require the rw_mutex
409 * to be held in write mode.
410 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
411 */
412static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
413 struct msqid_ds __user *buf, int version)
403{ 414{
404 struct kern_ipc_perm *ipcp; 415 struct kern_ipc_perm *ipcp;
405 struct msq_setbuf uninitialized_var(setbuf); 416 struct msqid64_ds msqid64;
417 struct msg_queue *msq;
418 int err;
419
420 if (cmd == IPC_SET) {
421 if (copy_msqid_from_user(&msqid64, buf, version))
422 return -EFAULT;
423 }
424
425 ipcp = ipcctl_pre_down(&msg_ids(ns), msqid, cmd,
426 &msqid64.msg_perm, msqid64.msg_qbytes);
427 if (IS_ERR(ipcp))
428 return PTR_ERR(ipcp);
429
430 msq = container_of(ipcp, struct msg_queue, q_perm);
431
432 err = security_msg_queue_msgctl(msq, cmd);
433 if (err)
434 goto out_unlock;
435
436 switch (cmd) {
437 case IPC_RMID:
438 freeque(ns, ipcp);
439 goto out_up;
440 case IPC_SET:
441 if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
442 !capable(CAP_SYS_RESOURCE)) {
443 err = -EPERM;
444 goto out_unlock;
445 }
446
447 msq->q_qbytes = msqid64.msg_qbytes;
448
449 ipc_update_perm(&msqid64.msg_perm, ipcp);
450 msq->q_ctime = get_seconds();
451 /* sleeping receivers might be excluded by
452 * stricter permissions.
453 */
454 expunge_all(msq, -EAGAIN);
455 /* sleeping senders might be able to send
456 * due to a larger queue size.
457 */
458 ss_wakeup(&msq->q_senders, 0);
459 break;
460 default:
461 err = -EINVAL;
462 }
463out_unlock:
464 msg_unlock(msq);
465out_up:
466 up_write(&msg_ids(ns).rw_mutex);
467 return err;
468}
469
470asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
471{
406 struct msg_queue *msq; 472 struct msg_queue *msq;
407 int err, version; 473 int err, version;
408 struct ipc_namespace *ns; 474 struct ipc_namespace *ns;
@@ -498,82 +564,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
498 return success_return; 564 return success_return;
499 } 565 }
500 case IPC_SET: 566 case IPC_SET:
501 if (!buf)
502 return -EFAULT;
503 if (copy_msqid_from_user(&setbuf, buf, version))
504 return -EFAULT;
505 break;
506 case IPC_RMID: 567 case IPC_RMID:
507 break; 568 err = msgctl_down(ns, msqid, cmd, buf, version);
569 return err;
508 default: 570 default:
509 return -EINVAL; 571 return -EINVAL;
510 } 572 }
511 573
512 down_write(&msg_ids(ns).rw_mutex);
513 msq = msg_lock_check_down(ns, msqid);
514 if (IS_ERR(msq)) {
515 err = PTR_ERR(msq);
516 goto out_up;
517 }
518
519 ipcp = &msq->q_perm;
520
521 err = audit_ipc_obj(ipcp);
522 if (err)
523 goto out_unlock_up;
524 if (cmd == IPC_SET) {
525 err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid,
526 setbuf.mode);
527 if (err)
528 goto out_unlock_up;
529 }
530
531 err = -EPERM;
532 if (current->euid != ipcp->cuid &&
533 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
534 /* We _could_ check for CAP_CHOWN above, but we don't */
535 goto out_unlock_up;
536
537 err = security_msg_queue_msgctl(msq, cmd);
538 if (err)
539 goto out_unlock_up;
540
541 switch (cmd) {
542 case IPC_SET:
543 {
544 err = -EPERM;
545 if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
546 goto out_unlock_up;
547
548 msq->q_qbytes = setbuf.qbytes;
549
550 ipcp->uid = setbuf.uid;
551 ipcp->gid = setbuf.gid;
552 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
553 (S_IRWXUGO & setbuf.mode);
554 msq->q_ctime = get_seconds();
555 /* sleeping receivers might be excluded by
556 * stricter permissions.
557 */
558 expunge_all(msq, -EAGAIN);
559 /* sleeping senders might be able to send
560 * due to a larger queue size.
561 */
562 ss_wakeup(&msq->q_senders, 0);
563 msg_unlock(msq);
564 break;
565 }
566 case IPC_RMID:
567 freeque(ns, &msq->q_perm);
568 break;
569 }
570 err = 0;
571out_up:
572 up_write(&msg_ids(ns).rw_mutex);
573 return err;
574out_unlock_up:
575 msg_unlock(msq);
576 goto out_up;
577out_unlock: 574out_unlock:
578 msg_unlock(msq); 575 msg_unlock(msq);
579 return err; 576 return err;
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 1b967655eb35..9171d948751e 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -20,10 +20,20 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
20 if (ns == NULL) 20 if (ns == NULL)
21 return ERR_PTR(-ENOMEM); 21 return ERR_PTR(-ENOMEM);
22 22
23 atomic_inc(&nr_ipc_ns);
24
23 sem_init_ns(ns); 25 sem_init_ns(ns);
24 msg_init_ns(ns); 26 msg_init_ns(ns);
25 shm_init_ns(ns); 27 shm_init_ns(ns);
26 28
29 /*
30 * msgmni has already been computed for the new ipc ns.
31 * Thus, do the ipcns creation notification before registering that
32 * new ipcns in the chain.
33 */
34 ipcns_notify(IPCNS_CREATED);
35 register_ipcns_notifier(ns);
36
27 kref_init(&ns->kref); 37 kref_init(&ns->kref);
28 return ns; 38 return ns;
29} 39}
@@ -79,8 +89,24 @@ void free_ipc_ns(struct kref *kref)
79 struct ipc_namespace *ns; 89 struct ipc_namespace *ns;
80 90
81 ns = container_of(kref, struct ipc_namespace, kref); 91 ns = container_of(kref, struct ipc_namespace, kref);
92 /*
93 * Unregistering the hotplug notifier at the beginning guarantees
94 * that the ipc namespace won't be freed while we are inside the
95 * callback routine. Since the blocking_notifier_chain_XXX routines
96 * hold a rw lock on the notifier list, unregister_ipcns_notifier()
97 * won't take the rw lock before blocking_notifier_call_chain() has
98 * released the rd lock.
99 */
100 unregister_ipcns_notifier(ns);
82 sem_exit_ns(ns); 101 sem_exit_ns(ns);
83 msg_exit_ns(ns); 102 msg_exit_ns(ns);
84 shm_exit_ns(ns); 103 shm_exit_ns(ns);
85 kfree(ns); 104 kfree(ns);
105 atomic_dec(&nr_ipc_ns);
106
107 /*
108 * Do the ipcns removal notification after decrementing nr_ipc_ns in
109 * order to have a correct value when recomputing msgmni.
110 */
111 ipcns_notify(IPCNS_REMOVED);
86} 112}
diff --git a/ipc/sem.c b/ipc/sem.c
index 0b45a4d383c6..e9418df5ff3e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -91,7 +91,6 @@
91 91
92#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 92#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
93#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 93#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
94#define sem_buildid(id, seq) ipc_buildid(id, seq)
95 94
96static int newary(struct ipc_namespace *, struct ipc_params *); 95static int newary(struct ipc_namespace *, struct ipc_params *);
97static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 96static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
@@ -142,21 +141,6 @@ void __init sem_init (void)
142} 141}
143 142
144/* 143/*
145 * This routine is called in the paths where the rw_mutex is held to protect
146 * access to the idr tree.
147 */
148static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns,
149 int id)
150{
151 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id);
152
153 if (IS_ERR(ipcp))
154 return (struct sem_array *)ipcp;
155
156 return container_of(ipcp, struct sem_array, sem_perm);
157}
158
159/*
160 * sem_lock_(check_) routines are called in the paths where the rw_mutex 144 * sem_lock_(check_) routines are called in the paths where the rw_mutex
161 * is not held. 145 * is not held.
162 */ 146 */
@@ -181,6 +165,25 @@ static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
181 return container_of(ipcp, struct sem_array, sem_perm); 165 return container_of(ipcp, struct sem_array, sem_perm);
182} 166}
183 167
168static inline void sem_lock_and_putref(struct sem_array *sma)
169{
170 ipc_lock_by_ptr(&sma->sem_perm);
171 ipc_rcu_putref(sma);
172}
173
174static inline void sem_getref_and_unlock(struct sem_array *sma)
175{
176 ipc_rcu_getref(sma);
177 ipc_unlock(&(sma)->sem_perm);
178}
179
180static inline void sem_putref(struct sem_array *sma)
181{
182 ipc_lock_by_ptr(&sma->sem_perm);
183 ipc_rcu_putref(sma);
184 ipc_unlock(&(sma)->sem_perm);
185}
186
184static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 187static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
185{ 188{
186 ipc_rmid(&sem_ids(ns), &s->sem_perm); 189 ipc_rmid(&sem_ids(ns), &s->sem_perm);
@@ -268,7 +271,6 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
268 } 271 }
269 ns->used_sems += nsems; 272 ns->used_sems += nsems;
270 273
271 sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq);
272 sma->sem_base = (struct sem *) &sma[1]; 274 sma->sem_base = (struct sem *) &sma[1];
273 /* sma->sem_pending = NULL; */ 275 /* sma->sem_pending = NULL; */
274 sma->sem_pending_last = &sma->sem_pending; 276 sma->sem_pending_last = &sma->sem_pending;
@@ -700,19 +702,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
700 int i; 702 int i;
701 703
702 if(nsems > SEMMSL_FAST) { 704 if(nsems > SEMMSL_FAST) {
703 ipc_rcu_getref(sma); 705 sem_getref_and_unlock(sma);
704 sem_unlock(sma);
705 706
706 sem_io = ipc_alloc(sizeof(ushort)*nsems); 707 sem_io = ipc_alloc(sizeof(ushort)*nsems);
707 if(sem_io == NULL) { 708 if(sem_io == NULL) {
708 ipc_lock_by_ptr(&sma->sem_perm); 709 sem_putref(sma);
709 ipc_rcu_putref(sma);
710 sem_unlock(sma);
711 return -ENOMEM; 710 return -ENOMEM;
712 } 711 }
713 712
714 ipc_lock_by_ptr(&sma->sem_perm); 713 sem_lock_and_putref(sma);
715 ipc_rcu_putref(sma);
716 if (sma->sem_perm.deleted) { 714 if (sma->sem_perm.deleted) {
717 sem_unlock(sma); 715 sem_unlock(sma);
718 err = -EIDRM; 716 err = -EIDRM;
@@ -733,38 +731,30 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
733 int i; 731 int i;
734 struct sem_undo *un; 732 struct sem_undo *un;
735 733
736 ipc_rcu_getref(sma); 734 sem_getref_and_unlock(sma);
737 sem_unlock(sma);
738 735
739 if(nsems > SEMMSL_FAST) { 736 if(nsems > SEMMSL_FAST) {
740 sem_io = ipc_alloc(sizeof(ushort)*nsems); 737 sem_io = ipc_alloc(sizeof(ushort)*nsems);
741 if(sem_io == NULL) { 738 if(sem_io == NULL) {
742 ipc_lock_by_ptr(&sma->sem_perm); 739 sem_putref(sma);
743 ipc_rcu_putref(sma);
744 sem_unlock(sma);
745 return -ENOMEM; 740 return -ENOMEM;
746 } 741 }
747 } 742 }
748 743
749 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 744 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
750 ipc_lock_by_ptr(&sma->sem_perm); 745 sem_putref(sma);
751 ipc_rcu_putref(sma);
752 sem_unlock(sma);
753 err = -EFAULT; 746 err = -EFAULT;
754 goto out_free; 747 goto out_free;
755 } 748 }
756 749
757 for (i = 0; i < nsems; i++) { 750 for (i = 0; i < nsems; i++) {
758 if (sem_io[i] > SEMVMX) { 751 if (sem_io[i] > SEMVMX) {
759 ipc_lock_by_ptr(&sma->sem_perm); 752 sem_putref(sma);
760 ipc_rcu_putref(sma);
761 sem_unlock(sma);
762 err = -ERANGE; 753 err = -ERANGE;
763 goto out_free; 754 goto out_free;
764 } 755 }
765 } 756 }
766 ipc_lock_by_ptr(&sma->sem_perm); 757 sem_lock_and_putref(sma);
767 ipc_rcu_putref(sma);
768 if (sma->sem_perm.deleted) { 758 if (sma->sem_perm.deleted) {
769 sem_unlock(sma); 759 sem_unlock(sma);
770 err = -EIDRM; 760 err = -EIDRM;
@@ -830,28 +820,14 @@ out_free:
830 return err; 820 return err;
831} 821}
832 822
833struct sem_setbuf { 823static inline unsigned long
834 uid_t uid; 824copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
835 gid_t gid;
836 mode_t mode;
837};
838
839static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version)
840{ 825{
841 switch(version) { 826 switch(version) {
842 case IPC_64: 827 case IPC_64:
843 { 828 if (copy_from_user(out, buf, sizeof(*out)))
844 struct semid64_ds tbuf;
845
846 if(copy_from_user(&tbuf, buf, sizeof(tbuf)))
847 return -EFAULT; 829 return -EFAULT;
848
849 out->uid = tbuf.sem_perm.uid;
850 out->gid = tbuf.sem_perm.gid;
851 out->mode = tbuf.sem_perm.mode;
852
853 return 0; 830 return 0;
854 }
855 case IPC_OLD: 831 case IPC_OLD:
856 { 832 {
857 struct semid_ds tbuf_old; 833 struct semid_ds tbuf_old;
@@ -859,9 +835,9 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __
859 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 835 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
860 return -EFAULT; 836 return -EFAULT;
861 837
862 out->uid = tbuf_old.sem_perm.uid; 838 out->sem_perm.uid = tbuf_old.sem_perm.uid;
863 out->gid = tbuf_old.sem_perm.gid; 839 out->sem_perm.gid = tbuf_old.sem_perm.gid;
864 out->mode = tbuf_old.sem_perm.mode; 840 out->sem_perm.mode = tbuf_old.sem_perm.mode;
865 841
866 return 0; 842 return 0;
867 } 843 }
@@ -870,38 +846,29 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __
870 } 846 }
871} 847}
872 848
873static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, 849/*
874 int cmd, int version, union semun arg) 850 * This function handles some semctl commands which require the rw_mutex
851 * to be held in write mode.
852 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
853 */
854static int semctl_down(struct ipc_namespace *ns, int semid,
855 int cmd, int version, union semun arg)
875{ 856{
876 struct sem_array *sma; 857 struct sem_array *sma;
877 int err; 858 int err;
878 struct sem_setbuf uninitialized_var(setbuf); 859 struct semid64_ds semid64;
879 struct kern_ipc_perm *ipcp; 860 struct kern_ipc_perm *ipcp;
880 861
881 if(cmd == IPC_SET) { 862 if(cmd == IPC_SET) {
882 if(copy_semid_from_user (&setbuf, arg.buf, version)) 863 if (copy_semid_from_user(&semid64, arg.buf, version))
883 return -EFAULT; 864 return -EFAULT;
884 } 865 }
885 sma = sem_lock_check_down(ns, semid);
886 if (IS_ERR(sma))
887 return PTR_ERR(sma);
888 866
889 ipcp = &sma->sem_perm; 867 ipcp = ipcctl_pre_down(&sem_ids(ns), semid, cmd, &semid64.sem_perm, 0);
890 868 if (IS_ERR(ipcp))
891 err = audit_ipc_obj(ipcp); 869 return PTR_ERR(ipcp);
892 if (err)
893 goto out_unlock;
894 870
895 if (cmd == IPC_SET) { 871 sma = container_of(ipcp, struct sem_array, sem_perm);
896 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
897 if (err)
898 goto out_unlock;
899 }
900 if (current->euid != ipcp->cuid &&
901 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
902 err=-EPERM;
903 goto out_unlock;
904 }
905 872
906 err = security_sem_semctl(sma, cmd); 873 err = security_sem_semctl(sma, cmd);
907 if (err) 874 if (err)
@@ -910,26 +877,19 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
910 switch(cmd){ 877 switch(cmd){
911 case IPC_RMID: 878 case IPC_RMID:
912 freeary(ns, ipcp); 879 freeary(ns, ipcp);
913 err = 0; 880 goto out_up;
914 break;
915 case IPC_SET: 881 case IPC_SET:
916 ipcp->uid = setbuf.uid; 882 ipc_update_perm(&semid64.sem_perm, ipcp);
917 ipcp->gid = setbuf.gid;
918 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
919 | (setbuf.mode & S_IRWXUGO);
920 sma->sem_ctime = get_seconds(); 883 sma->sem_ctime = get_seconds();
921 sem_unlock(sma);
922 err = 0;
923 break; 884 break;
924 default: 885 default:
925 sem_unlock(sma);
926 err = -EINVAL; 886 err = -EINVAL;
927 break;
928 } 887 }
929 return err;
930 888
931out_unlock: 889out_unlock:
932 sem_unlock(sma); 890 sem_unlock(sma);
891out_up:
892 up_write(&sem_ids(ns).rw_mutex);
933 return err; 893 return err;
934} 894}
935 895
@@ -963,9 +923,7 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
963 return err; 923 return err;
964 case IPC_RMID: 924 case IPC_RMID:
965 case IPC_SET: 925 case IPC_SET:
966 down_write(&sem_ids(ns).rw_mutex); 926 err = semctl_down(ns, semid, cmd, version, arg);
967 err = semctl_down(ns,semid,semnum,cmd,version,arg);
968 up_write(&sem_ids(ns).rw_mutex);
969 return err; 927 return err;
970 default: 928 default:
971 return -EINVAL; 929 return -EINVAL;
@@ -1044,14 +1002,11 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1044 return ERR_PTR(PTR_ERR(sma)); 1002 return ERR_PTR(PTR_ERR(sma));
1045 1003
1046 nsems = sma->sem_nsems; 1004 nsems = sma->sem_nsems;
1047 ipc_rcu_getref(sma); 1005 sem_getref_and_unlock(sma);
1048 sem_unlock(sma);
1049 1006
1050 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1007 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1051 if (!new) { 1008 if (!new) {
1052 ipc_lock_by_ptr(&sma->sem_perm); 1009 sem_putref(sma);
1053 ipc_rcu_putref(sma);
1054 sem_unlock(sma);
1055 return ERR_PTR(-ENOMEM); 1010 return ERR_PTR(-ENOMEM);
1056 } 1011 }
1057 new->semadj = (short *) &new[1]; 1012 new->semadj = (short *) &new[1];
@@ -1062,13 +1017,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
1062 if (un) { 1017 if (un) {
1063 spin_unlock(&ulp->lock); 1018 spin_unlock(&ulp->lock);
1064 kfree(new); 1019 kfree(new);
1065 ipc_lock_by_ptr(&sma->sem_perm); 1020 sem_putref(sma);
1066 ipc_rcu_putref(sma);
1067 sem_unlock(sma);
1068 goto out; 1021 goto out;
1069 } 1022 }
1070 ipc_lock_by_ptr(&sma->sem_perm); 1023 sem_lock_and_putref(sma);
1071 ipc_rcu_putref(sma);
1072 if (sma->sem_perm.deleted) { 1024 if (sma->sem_perm.deleted) {
1073 sem_unlock(sma); 1025 sem_unlock(sma);
1074 spin_unlock(&ulp->lock); 1026 spin_unlock(&ulp->lock);
@@ -1298,6 +1250,7 @@ void exit_sem(struct task_struct *tsk)
1298 undo_list = tsk->sysvsem.undo_list; 1250 undo_list = tsk->sysvsem.undo_list;
1299 if (!undo_list) 1251 if (!undo_list)
1300 return; 1252 return;
1253 tsk->sysvsem.undo_list = NULL;
1301 1254
1302 if (!atomic_dec_and_test(&undo_list->refcnt)) 1255 if (!atomic_dec_and_test(&undo_list->refcnt))
1303 return; 1256 return;
diff --git a/ipc/shm.c b/ipc/shm.c
index e636910454a9..554429ade079 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -60,7 +60,6 @@ static struct vm_operations_struct shm_vm_ops;
60 60
61#define shm_unlock(shp) \ 61#define shm_unlock(shp) \
62 ipc_unlock(&(shp)->shm_perm) 62 ipc_unlock(&(shp)->shm_perm)
63#define shm_buildid(id, seq) ipc_buildid(id, seq)
64 63
65static int newseg(struct ipc_namespace *, struct ipc_params *); 64static int newseg(struct ipc_namespace *, struct ipc_params *);
66static void shm_open(struct vm_area_struct *vma); 65static void shm_open(struct vm_area_struct *vma);
@@ -127,18 +126,6 @@ static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
127 return container_of(ipcp, struct shmid_kernel, shm_perm); 126 return container_of(ipcp, struct shmid_kernel, shm_perm);
128} 127}
129 128
130static inline struct shmid_kernel *shm_lock_check_down(
131 struct ipc_namespace *ns,
132 int id)
133{
134 struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id);
135
136 if (IS_ERR(ipcp))
137 return (struct shmid_kernel *)ipcp;
138
139 return container_of(ipcp, struct shmid_kernel, shm_perm);
140}
141
142/* 129/*
143 * shm_lock_(check_) routines are called in the paths where the rw_mutex 130 * shm_lock_(check_) routines are called in the paths where the rw_mutex
144 * is not held. 131 * is not held.
@@ -169,12 +156,6 @@ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
169 ipc_rmid(&shm_ids(ns), &s->shm_perm); 156 ipc_rmid(&shm_ids(ns), &s->shm_perm);
170} 157}
171 158
172static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
173{
174 return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
175}
176
177
178 159
179/* This is called by fork, once for every shm attach. */ 160/* This is called by fork, once for every shm attach. */
180static void shm_open(struct vm_area_struct *vma) 161static void shm_open(struct vm_area_struct *vma)
@@ -416,7 +397,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
416 if (IS_ERR(file)) 397 if (IS_ERR(file))
417 goto no_file; 398 goto no_file;
418 399
419 id = shm_addid(ns, shp); 400 id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
420 if (id < 0) { 401 if (id < 0) {
421 error = id; 402 error = id;
422 goto no_id; 403 goto no_id;
@@ -428,7 +409,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
428 shp->shm_ctim = get_seconds(); 409 shp->shm_ctim = get_seconds();
429 shp->shm_segsz = size; 410 shp->shm_segsz = size;
430 shp->shm_nattch = 0; 411 shp->shm_nattch = 0;
431 shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq);
432 shp->shm_file = file; 412 shp->shm_file = file;
433 /* 413 /*
434 * shmid gets reported as "inode#" in /proc/pid/maps. 414 * shmid gets reported as "inode#" in /proc/pid/maps.
@@ -519,28 +499,14 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_
519 } 499 }
520} 500}
521 501
522struct shm_setbuf { 502static inline unsigned long
523 uid_t uid; 503copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
524 gid_t gid;
525 mode_t mode;
526};
527
528static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version)
529{ 504{
530 switch(version) { 505 switch(version) {
531 case IPC_64: 506 case IPC_64:
532 { 507 if (copy_from_user(out, buf, sizeof(*out)))
533 struct shmid64_ds tbuf;
534
535 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
536 return -EFAULT; 508 return -EFAULT;
537
538 out->uid = tbuf.shm_perm.uid;
539 out->gid = tbuf.shm_perm.gid;
540 out->mode = tbuf.shm_perm.mode;
541
542 return 0; 509 return 0;
543 }
544 case IPC_OLD: 510 case IPC_OLD:
545 { 511 {
546 struct shmid_ds tbuf_old; 512 struct shmid_ds tbuf_old;
@@ -548,9 +514,9 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __
548 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 514 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
549 return -EFAULT; 515 return -EFAULT;
550 516
551 out->uid = tbuf_old.shm_perm.uid; 517 out->shm_perm.uid = tbuf_old.shm_perm.uid;
552 out->gid = tbuf_old.shm_perm.gid; 518 out->shm_perm.gid = tbuf_old.shm_perm.gid;
553 out->mode = tbuf_old.shm_perm.mode; 519 out->shm_perm.mode = tbuf_old.shm_perm.mode;
554 520
555 return 0; 521 return 0;
556 } 522 }
@@ -624,9 +590,53 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
624 } 590 }
625} 591}
626 592
627asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) 593/*
594 * This function handles some shmctl commands which require the rw_mutex
595 * to be held in write mode.
596 * NOTE: no locks must be held, the rw_mutex is taken inside this function.
597 */
598static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
599 struct shmid_ds __user *buf, int version)
600{
601 struct kern_ipc_perm *ipcp;
602 struct shmid64_ds shmid64;
603 struct shmid_kernel *shp;
604 int err;
605
606 if (cmd == IPC_SET) {
607 if (copy_shmid_from_user(&shmid64, buf, version))
608 return -EFAULT;
609 }
610
611 ipcp = ipcctl_pre_down(&shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0);
612 if (IS_ERR(ipcp))
613 return PTR_ERR(ipcp);
614
615 shp = container_of(ipcp, struct shmid_kernel, shm_perm);
616
617 err = security_shm_shmctl(shp, cmd);
618 if (err)
619 goto out_unlock;
620 switch (cmd) {
621 case IPC_RMID:
622 do_shm_rmid(ns, ipcp);
623 goto out_up;
624 case IPC_SET:
625 ipc_update_perm(&shmid64.shm_perm, ipcp);
626 shp->shm_ctim = get_seconds();
627 break;
628 default:
629 err = -EINVAL;
630 }
631out_unlock:
632 shm_unlock(shp);
633out_up:
634 up_write(&shm_ids(ns).rw_mutex);
635 return err;
636}
637
638asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
628{ 639{
629 struct shm_setbuf setbuf;
630 struct shmid_kernel *shp; 640 struct shmid_kernel *shp;
631 int err, version; 641 int err, version;
632 struct ipc_namespace *ns; 642 struct ipc_namespace *ns;
@@ -783,97 +793,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
783 goto out; 793 goto out;
784 } 794 }
785 case IPC_RMID: 795 case IPC_RMID:
786 {
787 /*
788 * We cannot simply remove the file. The SVID states
789 * that the block remains until the last person
790 * detaches from it, then is deleted. A shmat() on
791 * an RMID segment is legal in older Linux and if
792 * we change it apps break...
793 *
794 * Instead we set a destroyed flag, and then blow
795 * the name away when the usage hits zero.
796 */
797 down_write(&shm_ids(ns).rw_mutex);
798 shp = shm_lock_check_down(ns, shmid);
799 if (IS_ERR(shp)) {
800 err = PTR_ERR(shp);
801 goto out_up;
802 }
803
804 err = audit_ipc_obj(&(shp->shm_perm));
805 if (err)
806 goto out_unlock_up;
807
808 if (current->euid != shp->shm_perm.uid &&
809 current->euid != shp->shm_perm.cuid &&
810 !capable(CAP_SYS_ADMIN)) {
811 err=-EPERM;
812 goto out_unlock_up;
813 }
814
815 err = security_shm_shmctl(shp, cmd);
816 if (err)
817 goto out_unlock_up;
818
819 do_shm_rmid(ns, &shp->shm_perm);
820 up_write(&shm_ids(ns).rw_mutex);
821 goto out;
822 }
823
824 case IPC_SET: 796 case IPC_SET:
825 { 797 err = shmctl_down(ns, shmid, cmd, buf, version);
826 if (!buf) { 798 return err;
827 err = -EFAULT;
828 goto out;
829 }
830
831 if (copy_shmid_from_user (&setbuf, buf, version)) {
832 err = -EFAULT;
833 goto out;
834 }
835 down_write(&shm_ids(ns).rw_mutex);
836 shp = shm_lock_check_down(ns, shmid);
837 if (IS_ERR(shp)) {
838 err = PTR_ERR(shp);
839 goto out_up;
840 }
841 err = audit_ipc_obj(&(shp->shm_perm));
842 if (err)
843 goto out_unlock_up;
844 err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
845 if (err)
846 goto out_unlock_up;
847 err=-EPERM;
848 if (current->euid != shp->shm_perm.uid &&
849 current->euid != shp->shm_perm.cuid &&
850 !capable(CAP_SYS_ADMIN)) {
851 goto out_unlock_up;
852 }
853
854 err = security_shm_shmctl(shp, cmd);
855 if (err)
856 goto out_unlock_up;
857
858 shp->shm_perm.uid = setbuf.uid;
859 shp->shm_perm.gid = setbuf.gid;
860 shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
861 | (setbuf.mode & S_IRWXUGO);
862 shp->shm_ctim = get_seconds();
863 break;
864 }
865
866 default: 799 default:
867 err = -EINVAL; 800 return -EINVAL;
868 goto out;
869 } 801 }
870 802
871 err = 0;
872out_unlock_up:
873 shm_unlock(shp);
874out_up:
875 up_write(&shm_ids(ns).rw_mutex);
876 goto out;
877out_unlock: 803out_unlock:
878 shm_unlock(shp); 804 shm_unlock(shp);
879out: 805out:
diff --git a/ipc/util.c b/ipc/util.c
index fd1b50da9db8..3339177b336c 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -33,6 +33,7 @@
33#include <linux/audit.h> 33#include <linux/audit.h>
34#include <linux/nsproxy.h> 34#include <linux/nsproxy.h>
35#include <linux/rwsem.h> 35#include <linux/rwsem.h>
36#include <linux/memory.h>
36#include <linux/ipc_namespace.h> 37#include <linux/ipc_namespace.h>
37 38
38#include <asm/unistd.h> 39#include <asm/unistd.h>
@@ -52,11 +53,57 @@ struct ipc_namespace init_ipc_ns = {
52 }, 53 },
53}; 54};
54 55
56atomic_t nr_ipc_ns = ATOMIC_INIT(1);
57
58
59#ifdef CONFIG_MEMORY_HOTPLUG
60
61static void ipc_memory_notifier(struct work_struct *work)
62{
63 ipcns_notify(IPCNS_MEMCHANGED);
64}
65
66static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
67
68
69static int ipc_memory_callback(struct notifier_block *self,
70 unsigned long action, void *arg)
71{
72 switch (action) {
73 case MEM_ONLINE: /* memory successfully brought online */
74 case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */
75 /*
76 * This is done by invoking the ipcns notifier chain with the
77 * IPC_MEMCHANGED event.
78 * In order not to keep the lock on the hotplug memory chain
79 * for too long, queue a work item that will, when waken up,
80 * activate the ipcns notification chain.
81 * No need to keep several ipc work items on the queue.
82 */
83 if (!work_pending(&ipc_memory_wq))
84 schedule_work(&ipc_memory_wq);
85 break;
86 case MEM_GOING_ONLINE:
87 case MEM_GOING_OFFLINE:
88 case MEM_CANCEL_ONLINE:
89 case MEM_CANCEL_OFFLINE:
90 default:
91 break;
92 }
93
94 return NOTIFY_OK;
95}
96
97#endif /* CONFIG_MEMORY_HOTPLUG */
98
55/** 99/**
56 * ipc_init - initialise IPC subsystem 100 * ipc_init - initialise IPC subsystem
57 * 101 *
58 * The various system5 IPC resources (semaphores, messages and shared 102 * The various system5 IPC resources (semaphores, messages and shared
59 * memory) are initialised 103 * memory) are initialised
104 * A callback routine is registered into the memory hotplug notifier
105 * chain: since msgmni scales to lowmem this callback routine will be
106 * called upon successful memory add / remove to recompute msmgni.
60 */ 107 */
61 108
62static int __init ipc_init(void) 109static int __init ipc_init(void)
@@ -64,6 +111,8 @@ static int __init ipc_init(void)
64 sem_init(); 111 sem_init();
65 msg_init(); 112 msg_init();
66 shm_init(); 113 shm_init();
114 hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI);
115 register_ipcns_notifier(&init_ipc_ns);
67 return 0; 116 return 0;
68} 117}
69__initcall(ipc_init); 118__initcall(ipc_init);
@@ -84,8 +133,8 @@ void ipc_init_ids(struct ipc_ids *ids)
84 ids->seq = 0; 133 ids->seq = 0;
85 { 134 {
86 int seq_limit = INT_MAX/SEQ_MULTIPLIER; 135 int seq_limit = INT_MAX/SEQ_MULTIPLIER;
87 if(seq_limit > USHRT_MAX) 136 if (seq_limit > USHORT_MAX)
88 ids->seq_max = USHRT_MAX; 137 ids->seq_max = USHORT_MAX;
89 else 138 else
90 ids->seq_max = seq_limit; 139 ids->seq_max = seq_limit;
91 } 140 }
@@ -116,13 +165,12 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
116 iface->ids = ids; 165 iface->ids = ids;
117 iface->show = show; 166 iface->show = show;
118 167
119 pde = create_proc_entry(path, 168 pde = proc_create_data(path,
120 S_IRUGO, /* world readable */ 169 S_IRUGO, /* world readable */
121 NULL /* parent dir */); 170 NULL, /* parent dir */
122 if (pde) { 171 &sysvipc_proc_fops,
123 pde->data = iface; 172 iface);
124 pde->proc_fops = &sysvipc_proc_fops; 173 if (!pde) {
125 } else {
126 kfree(iface); 174 kfree(iface);
127 } 175 }
128} 176}
@@ -231,6 +279,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
231 if(ids->seq > ids->seq_max) 279 if(ids->seq > ids->seq_max)
232 ids->seq = 0; 280 ids->seq = 0;
233 281
282 new->id = ipc_buildid(id, new->seq);
234 spin_lock_init(&new->lock); 283 spin_lock_init(&new->lock);
235 new->deleted = 0; 284 new->deleted = 0;
236 rcu_read_lock(); 285 rcu_read_lock();
@@ -761,6 +810,70 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
761 return ipcget_public(ns, ids, ops, params); 810 return ipcget_public(ns, ids, ops, params);
762} 811}
763 812
813/**
814 * ipc_update_perm - update the permissions of an IPC.
815 * @in: the permission given as input.
816 * @out: the permission of the ipc to set.
817 */
818void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
819{
820 out->uid = in->uid;
821 out->gid = in->gid;
822 out->mode = (out->mode & ~S_IRWXUGO)
823 | (in->mode & S_IRWXUGO);
824}
825
826/**
827 * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
828 * @ids: the table of ids where to look for the ipc
829 * @id: the id of the ipc to retrieve
830 * @cmd: the cmd to check
831 * @perm: the permission to set
832 * @extra_perm: one extra permission parameter used by msq
833 *
834 * This function does some common audit and permissions check for some IPC_XXX
835 * cmd and is called from semctl_down, shmctl_down and msgctl_down.
836 * It must be called without any lock held and
837 * - retrieves the ipc with the given id in the given table.
838 * - performs some audit and permission check, depending on the given cmd
839 * - returns the ipc with both ipc and rw_mutex locks held in case of success
840 * or an err-code without any lock held otherwise.
841 */
842struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
843 struct ipc64_perm *perm, int extra_perm)
844{
845 struct kern_ipc_perm *ipcp;
846 int err;
847
848 down_write(&ids->rw_mutex);
849 ipcp = ipc_lock_check_down(ids, id);
850 if (IS_ERR(ipcp)) {
851 err = PTR_ERR(ipcp);
852 goto out_up;
853 }
854
855 err = audit_ipc_obj(ipcp);
856 if (err)
857 goto out_unlock;
858
859 if (cmd == IPC_SET) {
860 err = audit_ipc_set_perm(extra_perm, perm->uid,
861 perm->gid, perm->mode);
862 if (err)
863 goto out_unlock;
864 }
865 if (current->euid == ipcp->cuid ||
866 current->euid == ipcp->uid || capable(CAP_SYS_ADMIN))
867 return ipcp;
868
869 err = -EPERM;
870out_unlock:
871 ipc_unlock(ipcp);
872out_up:
873 up_write(&ids->rw_mutex);
874 return ERR_PTR(err);
875}
876
764#ifdef __ARCH_WANT_IPC_PARSE_VERSION 877#ifdef __ARCH_WANT_IPC_PARSE_VERSION
765 878
766 879
diff --git a/ipc/util.h b/ipc/util.h
index f37d160c98fe..cdb966aebe07 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -12,7 +12,6 @@
12 12
13#include <linux/err.h> 13#include <linux/err.h>
14 14
15#define USHRT_MAX 0xffff
16#define SEQ_MULTIPLIER (IPCMNI) 15#define SEQ_MULTIPLIER (IPCMNI)
17 16
18void sem_init (void); 17void sem_init (void);
@@ -112,6 +111,9 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
112 111
113void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 112void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
114void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); 113void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
114void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
115struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
116 struct ipc64_perm *perm, int extra_perm);
115 117
116#if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__) 118#if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__)
117 /* On IA-64, we always use the "64-bit version" of the IPC structures. */ 119 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
@@ -124,6 +126,8 @@ extern void free_msg(struct msg_msg *msg);
124extern struct msg_msg *load_msg(const void __user *src, int len); 126extern struct msg_msg *load_msg(const void __user *src, int len);
125extern int store_msg(void __user *dest, struct msg_msg *msg, int len); 127extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
126 128
129extern void recompute_msgmni(struct ipc_namespace *);
130
127static inline int ipc_buildid(int id, int seq) 131static inline int ipc_buildid(int id, int seq)
128{ 132{
129 return SEQ_MULTIPLIER * seq + id; 133 return SEQ_MULTIPLIER * seq + id;