aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-10 06:46:28 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-10 06:46:51 -0400
commit1cad1252ed279ea59f3f8d3d3a5817eeb2f7a4d3 (patch)
treeec5af7a70f58ad27ad21fc27815ca164ccf92c36 /ipc
parentdcef788eb9659b61a2110284fcce3ca6e63480d2 (diff)
parent93cfb3c9fd83d877a8f1ffad9ff862b617b32828 (diff)
Merge branch 'tracing/urgent' into tracing/core
Merge reason: pick up both v2.6.30-rc1 [which includes tracing/urgent fixes] and pick up the current lineup of tracing/urgent fixes as well Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile1
-rw-r--r--ipc/mq_sysctl.c116
-rw-r--r--ipc/mqueue.c256
-rw-r--r--ipc/msgutil.c19
-rw-r--r--ipc/namespace.c39
-rw-r--r--ipc/util.c9
-rw-r--r--ipc/util.h18
7 files changed, 318 insertions, 140 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
index 65c384395801..4e1955ea815d 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
8obj_mq-$(CONFIG_COMPAT) += compat_mq.o 8obj_mq-$(CONFIG_COMPAT) += compat_mq.o
9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) 9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
10obj-$(CONFIG_IPC_NS) += namespace.o 10obj-$(CONFIG_IPC_NS) += namespace.o
11obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o
11 12
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
new file mode 100644
index 000000000000..89f60ec8ee54
--- /dev/null
+++ b/ipc/mq_sysctl.c
@@ -0,0 +1,116 @@
1/*
2 * Copyright (C) 2007 IBM Corporation
3 *
4 * Author: Cedric Le Goater <clg@fr.ibm.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation, version 2 of the
9 * License.
10 */
11
12#include <linux/nsproxy.h>
13#include <linux/ipc_namespace.h>
14#include <linux/sysctl.h>
15
16/*
17 * Define the ranges various user-specified maximum values can
18 * be set to.
19 */
20#define MIN_MSGMAX 1 /* min value for msg_max */
21#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
22#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
23#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
24
25static void *get_mq(ctl_table *table)
26{
27 char *which = table->data;
28 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
29 which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
30 return which;
31}
32
33#ifdef CONFIG_PROC_SYSCTL
34static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
35 void __user *buffer, size_t *lenp, loff_t *ppos)
36{
37 struct ctl_table mq_table;
38 memcpy(&mq_table, table, sizeof(mq_table));
39 mq_table.data = get_mq(table);
40
41 return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
42}
43
44static int proc_mq_dointvec_minmax(ctl_table *table, int write,
45 struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
46{
47 struct ctl_table mq_table;
48 memcpy(&mq_table, table, sizeof(mq_table));
49 mq_table.data = get_mq(table);
50
51 return proc_dointvec_minmax(&mq_table, write, filp, buffer,
52 lenp, ppos);
53}
54#else
55#define proc_mq_dointvec NULL
56#define proc_mq_dointvec_minmax NULL
57#endif
58
59static int msg_max_limit_min = MIN_MSGMAX;
60static int msg_max_limit_max = MAX_MSGMAX;
61
62static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
63static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
64
65static ctl_table mq_sysctls[] = {
66 {
67 .procname = "queues_max",
68 .data = &init_ipc_ns.mq_queues_max,
69 .maxlen = sizeof(int),
70 .mode = 0644,
71 .proc_handler = proc_mq_dointvec,
72 },
73 {
74 .procname = "msg_max",
75 .data = &init_ipc_ns.mq_msg_max,
76 .maxlen = sizeof(int),
77 .mode = 0644,
78 .proc_handler = proc_mq_dointvec_minmax,
79 .extra1 = &msg_max_limit_min,
80 .extra2 = &msg_max_limit_max,
81 },
82 {
83 .procname = "msgsize_max",
84 .data = &init_ipc_ns.mq_msgsize_max,
85 .maxlen = sizeof(int),
86 .mode = 0644,
87 .proc_handler = proc_mq_dointvec_minmax,
88 .extra1 = &msg_maxsize_limit_min,
89 .extra2 = &msg_maxsize_limit_max,
90 },
91 { .ctl_name = 0 }
92};
93
94static ctl_table mq_sysctl_dir[] = {
95 {
96 .procname = "mqueue",
97 .mode = 0555,
98 .child = mq_sysctls,
99 },
100 { .ctl_name = 0 }
101};
102
103static ctl_table mq_sysctl_root[] = {
104 {
105 .ctl_name = CTL_FS,
106 .procname = "fs",
107 .mode = 0555,
108 .child = mq_sysctl_dir,
109 },
110 { .ctl_name = 0 }
111};
112
113struct ctl_table_header *mq_register_sysctl_table(void)
114{
115 return register_sysctl_table(mq_sysctl_root);
116}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 916785363f0f..e35ba2c3a8d7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -31,6 +31,7 @@
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
33#include <linux/pid.h> 33#include <linux/pid.h>
34#include <linux/ipc_namespace.h>
34 35
35#include <net/sock.h> 36#include <net/sock.h>
36#include "util.h" 37#include "util.h"
@@ -46,21 +47,6 @@
46#define STATE_PENDING 1 47#define STATE_PENDING 1
47#define STATE_READY 2 48#define STATE_READY 2
48 49
49/* default values */
50#define DFLT_QUEUESMAX 256 /* max number of message queues */
51#define DFLT_MSGMAX 10 /* max number of messages in each queue */
52#define HARD_MSGMAX (131072/sizeof(void*))
53#define DFLT_MSGSIZEMAX 8192 /* max message size */
54
55/*
56 * Define the ranges various user-specified maximum values can
57 * be set to.
58 */
59#define MIN_MSGMAX 1 /* min value for msg_max */
60#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
61#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
62#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
63
64struct ext_wait_queue { /* queue of sleeping tasks */ 50struct ext_wait_queue { /* queue of sleeping tasks */
65 struct task_struct *task; 51 struct task_struct *task;
66 struct list_head list; 52 struct list_head list;
@@ -93,14 +79,7 @@ static const struct file_operations mqueue_file_operations;
93static struct super_operations mqueue_super_ops; 79static struct super_operations mqueue_super_ops;
94static void remove_notification(struct mqueue_inode_info *info); 80static void remove_notification(struct mqueue_inode_info *info);
95 81
96static spinlock_t mq_lock;
97static struct kmem_cache *mqueue_inode_cachep; 82static struct kmem_cache *mqueue_inode_cachep;
98static struct vfsmount *mqueue_mnt;
99
100static unsigned int queues_count;
101static unsigned int queues_max = DFLT_QUEUESMAX;
102static unsigned int msg_max = DFLT_MSGMAX;
103static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
104 83
105static struct ctl_table_header * mq_sysctl_table; 84static struct ctl_table_header * mq_sysctl_table;
106 85
@@ -109,8 +88,27 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
109 return container_of(inode, struct mqueue_inode_info, vfs_inode); 88 return container_of(inode, struct mqueue_inode_info, vfs_inode);
110} 89}
111 90
112static struct inode *mqueue_get_inode(struct super_block *sb, int mode, 91/*
113 struct mq_attr *attr) 92 * This routine should be called with the mq_lock held.
93 */
94static inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
95{
96 return get_ipc_ns(inode->i_sb->s_fs_info);
97}
98
99static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
100{
101 struct ipc_namespace *ns;
102
103 spin_lock(&mq_lock);
104 ns = __get_ns_from_inode(inode);
105 spin_unlock(&mq_lock);
106 return ns;
107}
108
109static struct inode *mqueue_get_inode(struct super_block *sb,
110 struct ipc_namespace *ipc_ns, int mode,
111 struct mq_attr *attr)
114{ 112{
115 struct user_struct *u = current_user(); 113 struct user_struct *u = current_user();
116 struct inode *inode; 114 struct inode *inode;
@@ -141,8 +139,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
141 info->qsize = 0; 139 info->qsize = 0;
142 info->user = NULL; /* set when all is ok */ 140 info->user = NULL; /* set when all is ok */
143 memset(&info->attr, 0, sizeof(info->attr)); 141 memset(&info->attr, 0, sizeof(info->attr));
144 info->attr.mq_maxmsg = msg_max; 142 info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
145 info->attr.mq_msgsize = msgsize_max; 143 info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
146 if (attr) { 144 if (attr) {
147 info->attr.mq_maxmsg = attr->mq_maxmsg; 145 info->attr.mq_maxmsg = attr->mq_maxmsg;
148 info->attr.mq_msgsize = attr->mq_msgsize; 146 info->attr.mq_msgsize = attr->mq_msgsize;
@@ -188,30 +186,38 @@ out_inode:
188static int mqueue_fill_super(struct super_block *sb, void *data, int silent) 186static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
189{ 187{
190 struct inode *inode; 188 struct inode *inode;
189 struct ipc_namespace *ns = data;
190 int error = 0;
191 191
192 sb->s_blocksize = PAGE_CACHE_SIZE; 192 sb->s_blocksize = PAGE_CACHE_SIZE;
193 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 193 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
194 sb->s_magic = MQUEUE_MAGIC; 194 sb->s_magic = MQUEUE_MAGIC;
195 sb->s_op = &mqueue_super_ops; 195 sb->s_op = &mqueue_super_ops;
196 196
197 inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); 197 inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
198 if (!inode) 198 NULL);
199 return -ENOMEM; 199 if (!inode) {
200 error = -ENOMEM;
201 goto out;
202 }
200 203
201 sb->s_root = d_alloc_root(inode); 204 sb->s_root = d_alloc_root(inode);
202 if (!sb->s_root) { 205 if (!sb->s_root) {
203 iput(inode); 206 iput(inode);
204 return -ENOMEM; 207 error = -ENOMEM;
205 } 208 }
206 209
207 return 0; 210out:
211 return error;
208} 212}
209 213
210static int mqueue_get_sb(struct file_system_type *fs_type, 214static int mqueue_get_sb(struct file_system_type *fs_type,
211 int flags, const char *dev_name, 215 int flags, const char *dev_name,
212 void *data, struct vfsmount *mnt) 216 void *data, struct vfsmount *mnt)
213{ 217{
214 return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); 218 if (!(flags & MS_KERNMOUNT))
219 data = current->nsproxy->ipc_ns;
220 return get_sb_ns(fs_type, flags, data, mqueue_fill_super, mnt);
215} 221}
216 222
217static void init_once(void *foo) 223static void init_once(void *foo)
@@ -242,11 +248,13 @@ static void mqueue_delete_inode(struct inode *inode)
242 struct user_struct *user; 248 struct user_struct *user;
243 unsigned long mq_bytes; 249 unsigned long mq_bytes;
244 int i; 250 int i;
251 struct ipc_namespace *ipc_ns;
245 252
246 if (S_ISDIR(inode->i_mode)) { 253 if (S_ISDIR(inode->i_mode)) {
247 clear_inode(inode); 254 clear_inode(inode);
248 return; 255 return;
249 } 256 }
257 ipc_ns = get_ns_from_inode(inode);
250 info = MQUEUE_I(inode); 258 info = MQUEUE_I(inode);
251 spin_lock(&info->lock); 259 spin_lock(&info->lock);
252 for (i = 0; i < info->attr.mq_curmsgs; i++) 260 for (i = 0; i < info->attr.mq_curmsgs; i++)
@@ -262,10 +270,19 @@ static void mqueue_delete_inode(struct inode *inode)
262 if (user) { 270 if (user) {
263 spin_lock(&mq_lock); 271 spin_lock(&mq_lock);
264 user->mq_bytes -= mq_bytes; 272 user->mq_bytes -= mq_bytes;
265 queues_count--; 273 /*
274 * get_ns_from_inode() ensures that the
275 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
276 * to which we now hold a reference, or it is NULL.
277 * We can't put it here under mq_lock, though.
278 */
279 if (ipc_ns)
280 ipc_ns->mq_queues_count--;
266 spin_unlock(&mq_lock); 281 spin_unlock(&mq_lock);
267 free_uid(user); 282 free_uid(user);
268 } 283 }
284 if (ipc_ns)
285 put_ipc_ns(ipc_ns);
269} 286}
270 287
271static int mqueue_create(struct inode *dir, struct dentry *dentry, 288static int mqueue_create(struct inode *dir, struct dentry *dentry,
@@ -274,31 +291,41 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
274 struct inode *inode; 291 struct inode *inode;
275 struct mq_attr *attr = dentry->d_fsdata; 292 struct mq_attr *attr = dentry->d_fsdata;
276 int error; 293 int error;
294 struct ipc_namespace *ipc_ns;
277 295
278 spin_lock(&mq_lock); 296 spin_lock(&mq_lock);
279 if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) { 297 ipc_ns = __get_ns_from_inode(dir);
298 if (!ipc_ns) {
299 error = -EACCES;
300 goto out_unlock;
301 }
302 if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
303 !capable(CAP_SYS_RESOURCE)) {
280 error = -ENOSPC; 304 error = -ENOSPC;
281 goto out_lock; 305 goto out_unlock;
282 } 306 }
283 queues_count++; 307 ipc_ns->mq_queues_count++;
284 spin_unlock(&mq_lock); 308 spin_unlock(&mq_lock);
285 309
286 inode = mqueue_get_inode(dir->i_sb, mode, attr); 310 inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
287 if (!inode) { 311 if (!inode) {
288 error = -ENOMEM; 312 error = -ENOMEM;
289 spin_lock(&mq_lock); 313 spin_lock(&mq_lock);
290 queues_count--; 314 ipc_ns->mq_queues_count--;
291 goto out_lock; 315 goto out_unlock;
292 } 316 }
293 317
318 put_ipc_ns(ipc_ns);
294 dir->i_size += DIRENT_SIZE; 319 dir->i_size += DIRENT_SIZE;
295 dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME; 320 dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
296 321
297 d_instantiate(dentry, inode); 322 d_instantiate(dentry, inode);
298 dget(dentry); 323 dget(dentry);
299 return 0; 324 return 0;
300out_lock: 325out_unlock:
301 spin_unlock(&mq_lock); 326 spin_unlock(&mq_lock);
327 if (ipc_ns)
328 put_ipc_ns(ipc_ns);
302 return error; 329 return error;
303} 330}
304 331
@@ -562,7 +589,7 @@ static void remove_notification(struct mqueue_inode_info *info)
562 info->notify_owner = NULL; 589 info->notify_owner = NULL;
563} 590}
564 591
565static int mq_attr_ok(struct mq_attr *attr) 592static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
566{ 593{
567 if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) 594 if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
568 return 0; 595 return 0;
@@ -570,8 +597,8 @@ static int mq_attr_ok(struct mq_attr *attr)
570 if (attr->mq_maxmsg > HARD_MSGMAX) 597 if (attr->mq_maxmsg > HARD_MSGMAX)
571 return 0; 598 return 0;
572 } else { 599 } else {
573 if (attr->mq_maxmsg > msg_max || 600 if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
574 attr->mq_msgsize > msgsize_max) 601 attr->mq_msgsize > ipc_ns->mq_msgsize_max)
575 return 0; 602 return 0;
576 } 603 }
577 /* check for overflow */ 604 /* check for overflow */
@@ -587,8 +614,9 @@ static int mq_attr_ok(struct mq_attr *attr)
587/* 614/*
588 * Invoked when creating a new queue via sys_mq_open 615 * Invoked when creating a new queue via sys_mq_open
589 */ 616 */
590static struct file *do_create(struct dentry *dir, struct dentry *dentry, 617static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
591 int oflag, mode_t mode, struct mq_attr *attr) 618 struct dentry *dentry, int oflag, mode_t mode,
619 struct mq_attr *attr)
592{ 620{
593 const struct cred *cred = current_cred(); 621 const struct cred *cred = current_cred();
594 struct file *result; 622 struct file *result;
@@ -596,14 +624,14 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
596 624
597 if (attr) { 625 if (attr) {
598 ret = -EINVAL; 626 ret = -EINVAL;
599 if (!mq_attr_ok(attr)) 627 if (!mq_attr_ok(ipc_ns, attr))
600 goto out; 628 goto out;
601 /* store for use during create */ 629 /* store for use during create */
602 dentry->d_fsdata = attr; 630 dentry->d_fsdata = attr;
603 } 631 }
604 632
605 mode &= ~current_umask(); 633 mode &= ~current_umask();
606 ret = mnt_want_write(mqueue_mnt); 634 ret = mnt_want_write(ipc_ns->mq_mnt);
607 if (ret) 635 if (ret)
608 goto out; 636 goto out;
609 ret = vfs_create(dir->d_inode, dentry, mode, NULL); 637 ret = vfs_create(dir->d_inode, dentry, mode, NULL);
@@ -611,24 +639,25 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
611 if (ret) 639 if (ret)
612 goto out_drop_write; 640 goto out_drop_write;
613 641
614 result = dentry_open(dentry, mqueue_mnt, oflag, cred); 642 result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
615 /* 643 /*
616 * dentry_open() took a persistent mnt_want_write(), 644 * dentry_open() took a persistent mnt_want_write(),
617 * so we can now drop this one. 645 * so we can now drop this one.
618 */ 646 */
619 mnt_drop_write(mqueue_mnt); 647 mnt_drop_write(ipc_ns->mq_mnt);
620 return result; 648 return result;
621 649
622out_drop_write: 650out_drop_write:
623 mnt_drop_write(mqueue_mnt); 651 mnt_drop_write(ipc_ns->mq_mnt);
624out: 652out:
625 dput(dentry); 653 dput(dentry);
626 mntput(mqueue_mnt); 654 mntput(ipc_ns->mq_mnt);
627 return ERR_PTR(ret); 655 return ERR_PTR(ret);
628} 656}
629 657
630/* Opens existing queue */ 658/* Opens existing queue */
631static struct file *do_open(struct dentry *dentry, int oflag) 659static struct file *do_open(struct ipc_namespace *ipc_ns,
660 struct dentry *dentry, int oflag)
632{ 661{
633 const struct cred *cred = current_cred(); 662 const struct cred *cred = current_cred();
634 663
@@ -637,17 +666,17 @@ static struct file *do_open(struct dentry *dentry, int oflag)
637 666
638 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { 667 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
639 dput(dentry); 668 dput(dentry);
640 mntput(mqueue_mnt); 669 mntput(ipc_ns->mq_mnt);
641 return ERR_PTR(-EINVAL); 670 return ERR_PTR(-EINVAL);
642 } 671 }
643 672
644 if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { 673 if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
645 dput(dentry); 674 dput(dentry);
646 mntput(mqueue_mnt); 675 mntput(ipc_ns->mq_mnt);
647 return ERR_PTR(-EACCES); 676 return ERR_PTR(-EACCES);
648 } 677 }
649 678
650 return dentry_open(dentry, mqueue_mnt, oflag, cred); 679 return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
651} 680}
652 681
653SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, 682SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
@@ -658,6 +687,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
658 char *name; 687 char *name;
659 struct mq_attr attr; 688 struct mq_attr attr;
660 int fd, error; 689 int fd, error;
690 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
661 691
662 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) 692 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
663 return -EFAULT; 693 return -EFAULT;
@@ -671,13 +701,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
671 if (fd < 0) 701 if (fd < 0)
672 goto out_putname; 702 goto out_putname;
673 703
674 mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex); 704 mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
675 dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); 705 dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
676 if (IS_ERR(dentry)) { 706 if (IS_ERR(dentry)) {
677 error = PTR_ERR(dentry); 707 error = PTR_ERR(dentry);
678 goto out_err; 708 goto out_err;
679 } 709 }
680 mntget(mqueue_mnt); 710 mntget(ipc_ns->mq_mnt);
681 711
682 if (oflag & O_CREAT) { 712 if (oflag & O_CREAT) {
683 if (dentry->d_inode) { /* entry already exists */ 713 if (dentry->d_inode) { /* entry already exists */
@@ -685,10 +715,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
685 error = -EEXIST; 715 error = -EEXIST;
686 if (oflag & O_EXCL) 716 if (oflag & O_EXCL)
687 goto out; 717 goto out;
688 filp = do_open(dentry, oflag); 718 filp = do_open(ipc_ns, dentry, oflag);
689 } else { 719 } else {
690 filp = do_create(mqueue_mnt->mnt_root, dentry, 720 filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
691 oflag, mode, 721 dentry, oflag, mode,
692 u_attr ? &attr : NULL); 722 u_attr ? &attr : NULL);
693 } 723 }
694 } else { 724 } else {
@@ -696,7 +726,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
696 if (!dentry->d_inode) 726 if (!dentry->d_inode)
697 goto out; 727 goto out;
698 audit_inode(name, dentry); 728 audit_inode(name, dentry);
699 filp = do_open(dentry, oflag); 729 filp = do_open(ipc_ns, dentry, oflag);
700 } 730 }
701 731
702 if (IS_ERR(filp)) { 732 if (IS_ERR(filp)) {
@@ -709,13 +739,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
709 739
710out: 740out:
711 dput(dentry); 741 dput(dentry);
712 mntput(mqueue_mnt); 742 mntput(ipc_ns->mq_mnt);
713out_putfd: 743out_putfd:
714 put_unused_fd(fd); 744 put_unused_fd(fd);
715out_err: 745out_err:
716 fd = error; 746 fd = error;
717out_upsem: 747out_upsem:
718 mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); 748 mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
719out_putname: 749out_putname:
720 putname(name); 750 putname(name);
721 return fd; 751 return fd;
@@ -727,14 +757,15 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
727 char *name; 757 char *name;
728 struct dentry *dentry; 758 struct dentry *dentry;
729 struct inode *inode = NULL; 759 struct inode *inode = NULL;
760 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
730 761
731 name = getname(u_name); 762 name = getname(u_name);
732 if (IS_ERR(name)) 763 if (IS_ERR(name))
733 return PTR_ERR(name); 764 return PTR_ERR(name);
734 765
735 mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex, 766 mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex,
736 I_MUTEX_PARENT); 767 I_MUTEX_PARENT);
737 dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); 768 dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
738 if (IS_ERR(dentry)) { 769 if (IS_ERR(dentry)) {
739 err = PTR_ERR(dentry); 770 err = PTR_ERR(dentry);
740 goto out_unlock; 771 goto out_unlock;
@@ -748,16 +779,16 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
748 inode = dentry->d_inode; 779 inode = dentry->d_inode;
749 if (inode) 780 if (inode)
750 atomic_inc(&inode->i_count); 781 atomic_inc(&inode->i_count);
751 err = mnt_want_write(mqueue_mnt); 782 err = mnt_want_write(ipc_ns->mq_mnt);
752 if (err) 783 if (err)
753 goto out_err; 784 goto out_err;
754 err = vfs_unlink(dentry->d_parent->d_inode, dentry); 785 err = vfs_unlink(dentry->d_parent->d_inode, dentry);
755 mnt_drop_write(mqueue_mnt); 786 mnt_drop_write(ipc_ns->mq_mnt);
756out_err: 787out_err:
757 dput(dentry); 788 dput(dentry);
758 789
759out_unlock: 790out_unlock:
760 mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); 791 mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
761 putname(name); 792 putname(name);
762 if (inode) 793 if (inode)
763 iput(inode); 794 iput(inode);
@@ -1205,59 +1236,31 @@ static struct file_system_type mqueue_fs_type = {
1205 .kill_sb = kill_litter_super, 1236 .kill_sb = kill_litter_super,
1206}; 1237};
1207 1238
1208static int msg_max_limit_min = MIN_MSGMAX; 1239int mq_init_ns(struct ipc_namespace *ns)
1209static int msg_max_limit_max = MAX_MSGMAX; 1240{
1210 1241 ns->mq_queues_count = 0;
1211static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; 1242 ns->mq_queues_max = DFLT_QUEUESMAX;
1212static int msg_maxsize_limit_max = MAX_MSGSIZEMAX; 1243 ns->mq_msg_max = DFLT_MSGMAX;
1213 1244 ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
1214static ctl_table mq_sysctls[] = { 1245
1215 { 1246 ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
1216 .procname = "queues_max", 1247 if (IS_ERR(ns->mq_mnt)) {
1217 .data = &queues_max, 1248 int err = PTR_ERR(ns->mq_mnt);
1218 .maxlen = sizeof(int), 1249 ns->mq_mnt = NULL;
1219 .mode = 0644, 1250 return err;
1220 .proc_handler = &proc_dointvec, 1251 }
1221 }, 1252 return 0;
1222 { 1253}
1223 .procname = "msg_max",
1224 .data = &msg_max,
1225 .maxlen = sizeof(int),
1226 .mode = 0644,
1227 .proc_handler = &proc_dointvec_minmax,
1228 .extra1 = &msg_max_limit_min,
1229 .extra2 = &msg_max_limit_max,
1230 },
1231 {
1232 .procname = "msgsize_max",
1233 .data = &msgsize_max,
1234 .maxlen = sizeof(int),
1235 .mode = 0644,
1236 .proc_handler = &proc_dointvec_minmax,
1237 .extra1 = &msg_maxsize_limit_min,
1238 .extra2 = &msg_maxsize_limit_max,
1239 },
1240 { .ctl_name = 0 }
1241};
1242 1254
1243static ctl_table mq_sysctl_dir[] = { 1255void mq_clear_sbinfo(struct ipc_namespace *ns)
1244 { 1256{
1245 .procname = "mqueue", 1257 ns->mq_mnt->mnt_sb->s_fs_info = NULL;
1246 .mode = 0555, 1258}
1247 .child = mq_sysctls,
1248 },
1249 { .ctl_name = 0 }
1250};
1251 1259
1252static ctl_table mq_sysctl_root[] = { 1260void mq_put_mnt(struct ipc_namespace *ns)
1253 { 1261{
1254 .ctl_name = CTL_FS, 1262 mntput(ns->mq_mnt);
1255 .procname = "fs", 1263}
1256 .mode = 0555,
1257 .child = mq_sysctl_dir,
1258 },
1259 { .ctl_name = 0 }
1260};
1261 1264
1262static int __init init_mqueue_fs(void) 1265static int __init init_mqueue_fs(void)
1263{ 1266{
@@ -1270,21 +1273,20 @@ static int __init init_mqueue_fs(void)
1270 return -ENOMEM; 1273 return -ENOMEM;
1271 1274
1272 /* ignore failues - they are not fatal */ 1275 /* ignore failues - they are not fatal */
1273 mq_sysctl_table = register_sysctl_table(mq_sysctl_root); 1276 mq_sysctl_table = mq_register_sysctl_table();
1274 1277
1275 error = register_filesystem(&mqueue_fs_type); 1278 error = register_filesystem(&mqueue_fs_type);
1276 if (error) 1279 if (error)
1277 goto out_sysctl; 1280 goto out_sysctl;
1278 1281
1279 if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) { 1282 spin_lock_init(&mq_lock);
1280 error = PTR_ERR(mqueue_mnt); 1283
1284 init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
1285 if (IS_ERR(init_ipc_ns.mq_mnt)) {
1286 error = PTR_ERR(init_ipc_ns.mq_mnt);
1281 goto out_filesystem; 1287 goto out_filesystem;
1282 } 1288 }
1283 1289
1284 /* internal initialization - not common for vfs */
1285 queues_count = 0;
1286 spin_lock_init(&mq_lock);
1287
1288 return 0; 1290 return 0;
1289 1291
1290out_filesystem: 1292out_filesystem:
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index c82c215693d7..f095ee268833 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -13,10 +13,29 @@
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/ipc.h> 15#include <linux/ipc.h>
16#include <linux/ipc_namespace.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17 18
18#include "util.h" 19#include "util.h"
19 20
21DEFINE_SPINLOCK(mq_lock);
22
23/*
24 * The next 2 defines are here bc this is the only file
25 * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
26 * and not CONFIG_IPC_NS.
27 */
28struct ipc_namespace init_ipc_ns = {
29 .count = ATOMIC_INIT(1),
30#ifdef CONFIG_POSIX_MQUEUE
31 .mq_queues_max = DFLT_QUEUESMAX,
32 .mq_msg_max = DFLT_MSGMAX,
33 .mq_msgsize_max = DFLT_MSGSIZEMAX,
34#endif
35};
36
37atomic_t nr_ipc_ns = ATOMIC_INIT(1);
38
20struct msg_msgseg { 39struct msg_msgseg {
21 struct msg_msgseg* next; 40 struct msg_msgseg* next;
22 /* the next part of the message follows immediately */ 41 /* the next part of the message follows immediately */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 9171d948751e..4a5e752a9276 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -9,17 +9,26 @@
9#include <linux/rcupdate.h> 9#include <linux/rcupdate.h>
10#include <linux/nsproxy.h> 10#include <linux/nsproxy.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/fs.h>
13#include <linux/mount.h>
12 14
13#include "util.h" 15#include "util.h"
14 16
15static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) 17static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
16{ 18{
17 struct ipc_namespace *ns; 19 struct ipc_namespace *ns;
20 int err;
18 21
19 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); 22 ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
20 if (ns == NULL) 23 if (ns == NULL)
21 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
22 25
26 atomic_set(&ns->count, 1);
27 err = mq_init_ns(ns);
28 if (err) {
29 kfree(ns);
30 return ERR_PTR(err);
31 }
23 atomic_inc(&nr_ipc_ns); 32 atomic_inc(&nr_ipc_ns);
24 33
25 sem_init_ns(ns); 34 sem_init_ns(ns);
@@ -34,7 +43,6 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
34 ipcns_notify(IPCNS_CREATED); 43 ipcns_notify(IPCNS_CREATED);
35 register_ipcns_notifier(ns); 44 register_ipcns_notifier(ns);
36 45
37 kref_init(&ns->kref);
38 return ns; 46 return ns;
39} 47}
40 48
@@ -84,11 +92,34 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
84 up_write(&ids->rw_mutex); 92 up_write(&ids->rw_mutex);
85} 93}
86 94
87void free_ipc_ns(struct kref *kref) 95/*
96 * put_ipc_ns - drop a reference to an ipc namespace.
97 * @ns: the namespace to put
98 *
99 * If this is the last task in the namespace exiting, and
100 * it is dropping the refcount to 0, then it can race with
101 * a task in another ipc namespace but in a mounts namespace
102 * which has this ipcns's mqueuefs mounted, doing some action
103 * with one of the mqueuefs files. That can raise the refcount.
104 * So dropping the refcount, and raising the refcount when
105 * accessing it through the VFS, are protected with mq_lock.
106 *
107 * (Clearly, a task raising the refcount on its own ipc_ns
108 * needn't take mq_lock since it can't race with the last task
109 * in the ipcns exiting).
110 */
111void put_ipc_ns(struct ipc_namespace *ns)
88{ 112{
89 struct ipc_namespace *ns; 113 if (atomic_dec_and_lock(&ns->count, &mq_lock)) {
114 mq_clear_sbinfo(ns);
115 spin_unlock(&mq_lock);
116 mq_put_mnt(ns);
117 free_ipc_ns(ns);
118 }
119}
90 120
91 ns = container_of(kref, struct ipc_namespace, kref); 121void free_ipc_ns(struct ipc_namespace *ns)
122{
92 /* 123 /*
93 * Unregistering the hotplug notifier at the beginning guarantees 124 * Unregistering the hotplug notifier at the beginning guarantees
94 * that the ipc namespace won't be freed while we are inside the 125 * that the ipc namespace won't be freed while we are inside the
diff --git a/ipc/util.c b/ipc/util.c
index 7585a72e259b..b8e4ba92f6d1 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -47,15 +47,6 @@ struct ipc_proc_iface {
47 int (*show)(struct seq_file *, void *); 47 int (*show)(struct seq_file *, void *);
48}; 48};
49 49
50struct ipc_namespace init_ipc_ns = {
51 .kref = {
52 .refcount = ATOMIC_INIT(2),
53 },
54};
55
56atomic_t nr_ipc_ns = ATOMIC_INIT(1);
57
58
59#ifdef CONFIG_MEMORY_HOTPLUG 50#ifdef CONFIG_MEMORY_HOTPLUG
60 51
61static void ipc_memory_notifier(struct work_struct *work) 52static void ipc_memory_notifier(struct work_struct *work)
diff --git a/ipc/util.h b/ipc/util.h
index 3646b45a03c9..1187332a89d2 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -20,6 +20,15 @@ void shm_init (void);
20 20
21struct ipc_namespace; 21struct ipc_namespace;
22 22
23#ifdef CONFIG_POSIX_MQUEUE
24extern void mq_clear_sbinfo(struct ipc_namespace *ns);
25extern void mq_put_mnt(struct ipc_namespace *ns);
26#else
27static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { }
28static inline void mq_put_mnt(struct ipc_namespace *ns) { }
29#endif
30
31#ifdef CONFIG_SYSVIPC
23void sem_init_ns(struct ipc_namespace *ns); 32void sem_init_ns(struct ipc_namespace *ns);
24void msg_init_ns(struct ipc_namespace *ns); 33void msg_init_ns(struct ipc_namespace *ns);
25void shm_init_ns(struct ipc_namespace *ns); 34void shm_init_ns(struct ipc_namespace *ns);
@@ -27,6 +36,15 @@ void shm_init_ns(struct ipc_namespace *ns);
27void sem_exit_ns(struct ipc_namespace *ns); 36void sem_exit_ns(struct ipc_namespace *ns);
28void msg_exit_ns(struct ipc_namespace *ns); 37void msg_exit_ns(struct ipc_namespace *ns);
29void shm_exit_ns(struct ipc_namespace *ns); 38void shm_exit_ns(struct ipc_namespace *ns);
39#else
40static inline void sem_init_ns(struct ipc_namespace *ns) { }
41static inline void msg_init_ns(struct ipc_namespace *ns) { }
42static inline void shm_init_ns(struct ipc_namespace *ns) { }
43
44static inline void sem_exit_ns(struct ipc_namespace *ns) { }
45static inline void msg_exit_ns(struct ipc_namespace *ns) { }
46static inline void shm_exit_ns(struct ipc_namespace *ns) { }
47#endif
30 48
31/* 49/*
32 * Structure that holds the parameters needed by the ipc operations 50 * Structure that holds the parameters needed by the ipc operations