aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /ipc
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'ipc')
-rw-r--r--ipc/compat.c102
-rw-r--r--ipc/ipc_sysctl.c32
-rw-r--r--ipc/mq_sysctl.c49
-rw-r--r--ipc/mqueue.c589
-rw-r--r--ipc/msg.c137
-rw-r--r--ipc/msgutil.c52
-rw-r--r--ipc/namespace.c33
-rw-r--r--ipc/sem.c69
-rw-r--r--ipc/shm.c89
-rw-r--r--ipc/syscall.c2
-rw-r--r--ipc/util.c55
-rw-r--r--ipc/util.h6
12 files changed, 374 insertions, 841 deletions
diff --git a/ipc/compat.c b/ipc/compat.c
index 2547f29dcd1..845a28738d3 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -27,7 +27,6 @@
27#include <linux/msg.h> 27#include <linux/msg.h>
28#include <linux/shm.h> 28#include <linux/shm.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/ptrace.h>
31 30
32#include <linux/mutex.h> 31#include <linux/mutex.h>
33#include <asm/uaccess.h> 32#include <asm/uaccess.h>
@@ -118,7 +117,6 @@ extern int sem_ctls[];
118 117
119static inline int compat_ipc_parse_version(int *cmd) 118static inline int compat_ipc_parse_version(int *cmd)
120{ 119{
121#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
122 int version = *cmd & IPC_64; 120 int version = *cmd & IPC_64;
123 121
124 /* this is tricky: architectures that have support for the old 122 /* this is tricky: architectures that have support for the old
@@ -130,10 +128,6 @@ static inline int compat_ipc_parse_version(int *cmd)
130 *cmd &= ~IPC_64; 128 *cmd &= ~IPC_64;
131#endif 129#endif
132 return version; 130 return version;
133#else
134 /* With the asm-generic APIs, we always use the 64-bit versions. */
135 return IPC_64;
136#endif
137} 131}
138 132
139static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64, 133static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64,
@@ -238,9 +232,10 @@ static inline int put_compat_semid_ds(struct semid64_ds *s,
238 return err; 232 return err;
239} 233}
240 234
241static long do_compat_semctl(int first, int second, int third, u32 pad) 235long compat_sys_semctl(int first, int second, int third, void __user *uptr)
242{ 236{
243 union semun fourth; 237 union semun fourth;
238 u32 pad;
244 int err, err2; 239 int err, err2;
245 struct semid64_ds s64; 240 struct semid64_ds s64;
246 struct semid64_ds __user *up64; 241 struct semid64_ds __user *up64;
@@ -248,6 +243,10 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
248 243
249 memset(&s64, 0, sizeof(s64)); 244 memset(&s64, 0, sizeof(s64));
250 245
246 if (!uptr)
247 return -EINVAL;
248 if (get_user(pad, (u32 __user *) uptr))
249 return -EFAULT;
251 if ((third & (~IPC_64)) == SETVAL) 250 if ((third & (~IPC_64)) == SETVAL)
252 fourth.val = (int) pad; 251 fourth.val = (int) pad;
253 else 252 else
@@ -306,32 +305,6 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
306 return err; 305 return err;
307} 306}
308 307
309long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
310{
311 struct compat_msgbuf __user *msgp = dest;
312 size_t msgsz;
313
314 if (put_user(msg->m_type, &msgp->mtype))
315 return -EFAULT;
316
317 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
318 if (store_msg(msgp->mtext, msg, msgsz))
319 return -EFAULT;
320 return msgsz;
321}
322
323#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
324long compat_sys_semctl(int first, int second, int third, void __user *uptr)
325{
326 u32 pad;
327
328 if (!uptr)
329 return -EINVAL;
330 if (get_user(pad, (u32 __user *) uptr))
331 return -EFAULT;
332 return do_compat_semctl(first, second, third, pad);
333}
334
335long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) 308long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
336{ 309{
337 struct compat_msgbuf __user *up = uptr; 310 struct compat_msgbuf __user *up = uptr;
@@ -351,6 +324,10 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
351long compat_sys_msgrcv(int first, int second, int msgtyp, int third, 324long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
352 int version, void __user *uptr) 325 int version, void __user *uptr)
353{ 326{
327 struct compat_msgbuf __user *up;
328 long type;
329 int err;
330
354 if (first < 0) 331 if (first < 0)
355 return -EINVAL; 332 return -EINVAL;
356 if (second < 0) 333 if (second < 0)
@@ -358,39 +335,24 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
358 335
359 if (!version) { 336 if (!version) {
360 struct compat_ipc_kludge ipck; 337 struct compat_ipc_kludge ipck;
338 err = -EINVAL;
361 if (!uptr) 339 if (!uptr)
362 return -EINVAL; 340 goto out;
341 err = -EFAULT;
363 if (copy_from_user (&ipck, uptr, sizeof(ipck))) 342 if (copy_from_user (&ipck, uptr, sizeof(ipck)))
364 return -EFAULT; 343 goto out;
365 uptr = compat_ptr(ipck.msgp); 344 uptr = compat_ptr(ipck.msgp);
366 msgtyp = ipck.msgtyp; 345 msgtyp = ipck.msgtyp;
367 } 346 }
368 return do_msgrcv(first, uptr, second, msgtyp, third, 347 up = uptr;
369 compat_do_msg_fill); 348 err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third);
370} 349 if (err < 0)
371#else 350 goto out;
372long compat_sys_semctl(int semid, int semnum, int cmd, int arg) 351 if (put_user(type, &up->mtype))
373{ 352 err = -EFAULT;
374 return do_compat_semctl(semid, semnum, cmd, arg); 353out:
375} 354 return err;
376
377long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
378 compat_ssize_t msgsz, int msgflg)
379{
380 compat_long_t mtype;
381
382 if (get_user(mtype, &msgp->mtype))
383 return -EFAULT;
384 return do_msgsnd(msqid, mtype, msgp->mtext, (ssize_t)msgsz, msgflg);
385}
386
387long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
388 compat_ssize_t msgsz, long msgtyp, int msgflg)
389{
390 return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg,
391 compat_do_msg_fill);
392} 355}
393#endif
394 356
395static inline int get_compat_msqid64(struct msqid64_ds *m64, 357static inline int get_compat_msqid64(struct msqid64_ds *m64,
396 struct compat_msqid64_ds __user *up64) 358 struct compat_msqid64_ds __user *up64)
@@ -508,11 +470,6 @@ long compat_sys_msgctl(int first, int second, void __user *uptr)
508 return err; 470 return err;
509} 471}
510 472
511#ifndef COMPAT_SHMLBA
512#define COMPAT_SHMLBA SHMLBA
513#endif
514
515#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
516long compat_sys_shmat(int first, int second, compat_uptr_t third, int version, 473long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
517 void __user *uptr) 474 void __user *uptr)
518{ 475{
@@ -522,25 +479,12 @@ long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
522 479
523 if (version == 1) 480 if (version == 1)
524 return -EINVAL; 481 return -EINVAL;
525 err = do_shmat(first, uptr, second, &raddr, COMPAT_SHMLBA); 482 err = do_shmat(first, uptr, second, &raddr);
526 if (err < 0) 483 if (err < 0)
527 return err; 484 return err;
528 uaddr = compat_ptr(third); 485 uaddr = compat_ptr(third);
529 return put_user(raddr, uaddr); 486 return put_user(raddr, uaddr);
530} 487}
531#else
532long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg)
533{
534 unsigned long ret;
535 long err;
536
537 err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
538 if (err)
539 return err;
540 force_successful_syscall_return();
541 return (long)ret;
542}
543#endif
544 488
545static inline int get_compat_shmid64_ds(struct shmid64_ds *s64, 489static inline int get_compat_shmid64_ds(struct shmid64_ds *s64,
546 struct compat_shmid64_ds __user *up64) 490 struct compat_shmid64_ds __user *up64)
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 130dfece27a..00fba2bab87 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -158,9 +158,6 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
158 158
159static int zero; 159static int zero;
160static int one = 1; 160static int one = 1;
161#ifdef CONFIG_CHECKPOINT_RESTORE
162static int int_max = INT_MAX;
163#endif
164 161
165static struct ctl_table ipc_kern_table[] = { 162static struct ctl_table ipc_kern_table[] = {
166 { 163 {
@@ -230,35 +227,6 @@ static struct ctl_table ipc_kern_table[] = {
230 .extra1 = &zero, 227 .extra1 = &zero,
231 .extra2 = &one, 228 .extra2 = &one,
232 }, 229 },
233#ifdef CONFIG_CHECKPOINT_RESTORE
234 {
235 .procname = "sem_next_id",
236 .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
237 .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
238 .mode = 0644,
239 .proc_handler = proc_ipc_dointvec_minmax,
240 .extra1 = &zero,
241 .extra2 = &int_max,
242 },
243 {
244 .procname = "msg_next_id",
245 .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
246 .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
247 .mode = 0644,
248 .proc_handler = proc_ipc_dointvec_minmax,
249 .extra1 = &zero,
250 .extra2 = &int_max,
251 },
252 {
253 .procname = "shm_next_id",
254 .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
255 .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
256 .mode = 0644,
257 .proc_handler = proc_ipc_dointvec_minmax,
258 .extra1 = &zero,
259 .extra2 = &int_max,
260 },
261#endif
262 {} 230 {}
263}; 231};
264 232
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 383d638340b..0c09366b96f 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -13,6 +13,15 @@
13#include <linux/ipc_namespace.h> 13#include <linux/ipc_namespace.h>
14#include <linux/sysctl.h> 14#include <linux/sysctl.h>
15 15
16/*
17 * Define the ranges various user-specified maximum values can
18 * be set to.
19 */
20#define MIN_MSGMAX 1 /* min value for msg_max */
21#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
22#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
23#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
24
16#ifdef CONFIG_PROC_SYSCTL 25#ifdef CONFIG_PROC_SYSCTL
17static void *get_mq(ctl_table *table) 26static void *get_mq(ctl_table *table)
18{ 27{
@@ -22,6 +31,16 @@ static void *get_mq(ctl_table *table)
22 return which; 31 return which;
23} 32}
24 33
34static int proc_mq_dointvec(ctl_table *table, int write,
35 void __user *buffer, size_t *lenp, loff_t *ppos)
36{
37 struct ctl_table mq_table;
38 memcpy(&mq_table, table, sizeof(mq_table));
39 mq_table.data = get_mq(table);
40
41 return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
42}
43
25static int proc_mq_dointvec_minmax(ctl_table *table, int write, 44static int proc_mq_dointvec_minmax(ctl_table *table, int write,
26 void __user *buffer, size_t *lenp, loff_t *ppos) 45 void __user *buffer, size_t *lenp, loff_t *ppos)
27{ 46{
@@ -33,17 +52,15 @@ static int proc_mq_dointvec_minmax(ctl_table *table, int write,
33 lenp, ppos); 52 lenp, ppos);
34} 53}
35#else 54#else
55#define proc_mq_dointvec NULL
36#define proc_mq_dointvec_minmax NULL 56#define proc_mq_dointvec_minmax NULL
37#endif 57#endif
38 58
39static int msg_queues_limit_min = MIN_QUEUESMAX;
40static int msg_queues_limit_max = HARD_QUEUESMAX;
41
42static int msg_max_limit_min = MIN_MSGMAX; 59static int msg_max_limit_min = MIN_MSGMAX;
43static int msg_max_limit_max = HARD_MSGMAX; 60static int msg_max_limit_max = MAX_MSGMAX;
44 61
45static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; 62static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
46static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; 63static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
47 64
48static ctl_table mq_sysctls[] = { 65static ctl_table mq_sysctls[] = {
49 { 66 {
@@ -51,9 +68,7 @@ static ctl_table mq_sysctls[] = {
51 .data = &init_ipc_ns.mq_queues_max, 68 .data = &init_ipc_ns.mq_queues_max,
52 .maxlen = sizeof(int), 69 .maxlen = sizeof(int),
53 .mode = 0644, 70 .mode = 0644,
54 .proc_handler = proc_mq_dointvec_minmax, 71 .proc_handler = proc_mq_dointvec,
55 .extra1 = &msg_queues_limit_min,
56 .extra2 = &msg_queues_limit_max,
57 }, 72 },
58 { 73 {
59 .procname = "msg_max", 74 .procname = "msg_max",
@@ -73,24 +88,6 @@ static ctl_table mq_sysctls[] = {
73 .extra1 = &msg_maxsize_limit_min, 88 .extra1 = &msg_maxsize_limit_min,
74 .extra2 = &msg_maxsize_limit_max, 89 .extra2 = &msg_maxsize_limit_max,
75 }, 90 },
76 {
77 .procname = "msg_default",
78 .data = &init_ipc_ns.mq_msg_default,
79 .maxlen = sizeof(int),
80 .mode = 0644,
81 .proc_handler = proc_mq_dointvec_minmax,
82 .extra1 = &msg_max_limit_min,
83 .extra2 = &msg_max_limit_max,
84 },
85 {
86 .procname = "msgsize_default",
87 .data = &init_ipc_ns.mq_msgsize_default,
88 .maxlen = sizeof(int),
89 .mode = 0644,
90 .proc_handler = proc_mq_dointvec_minmax,
91 .extra1 = &msg_maxsize_limit_min,
92 .extra2 = &msg_maxsize_limit_max,
93 },
94 {} 91 {}
95}; 92};
96 93
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 71a3ca18c87..ed049ea568f 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -24,7 +24,6 @@
24#include <linux/mqueue.h> 24#include <linux/mqueue.h>
25#include <linux/msg.h> 25#include <linux/msg.h>
26#include <linux/skbuff.h> 26#include <linux/skbuff.h>
27#include <linux/vmalloc.h>
28#include <linux/netlink.h> 27#include <linux/netlink.h>
29#include <linux/syscalls.h> 28#include <linux/syscalls.h>
30#include <linux/audit.h> 29#include <linux/audit.h>
@@ -33,7 +32,6 @@
33#include <linux/nsproxy.h> 32#include <linux/nsproxy.h>
34#include <linux/pid.h> 33#include <linux/pid.h>
35#include <linux/ipc_namespace.h> 34#include <linux/ipc_namespace.h>
36#include <linux/user_namespace.h>
37#include <linux/slab.h> 35#include <linux/slab.h>
38 36
39#include <net/sock.h> 37#include <net/sock.h>
@@ -50,12 +48,6 @@
50#define STATE_PENDING 1 48#define STATE_PENDING 1
51#define STATE_READY 2 49#define STATE_READY 2
52 50
53struct posix_msg_tree_node {
54 struct rb_node rb_node;
55 struct list_head msg_list;
56 int priority;
57};
58
59struct ext_wait_queue { /* queue of sleeping tasks */ 51struct ext_wait_queue { /* queue of sleeping tasks */
60 struct task_struct *task; 52 struct task_struct *task;
61 struct list_head list; 53 struct list_head list;
@@ -68,13 +60,11 @@ struct mqueue_inode_info {
68 struct inode vfs_inode; 60 struct inode vfs_inode;
69 wait_queue_head_t wait_q; 61 wait_queue_head_t wait_q;
70 62
71 struct rb_root msg_tree; 63 struct msg_msg **messages;
72 struct posix_msg_tree_node *node_cache;
73 struct mq_attr attr; 64 struct mq_attr attr;
74 65
75 struct sigevent notify; 66 struct sigevent notify;
76 struct pid* notify_owner; 67 struct pid* notify_owner;
77 struct user_namespace *notify_user_ns;
78 struct user_struct *user; /* user who created, for accounting */ 68 struct user_struct *user; /* user who created, for accounting */
79 struct sock *notify_sock; 69 struct sock *notify_sock;
80 struct sk_buff *notify_cookie; 70 struct sk_buff *notify_cookie;
@@ -117,104 +107,8 @@ static struct ipc_namespace *get_ns_from_inode(struct inode *inode)
117 return ns; 107 return ns;
118} 108}
119 109
120/* Auxiliary functions to manipulate messages' list */
121static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
122{
123 struct rb_node **p, *parent = NULL;
124 struct posix_msg_tree_node *leaf;
125
126 p = &info->msg_tree.rb_node;
127 while (*p) {
128 parent = *p;
129 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
130
131 if (likely(leaf->priority == msg->m_type))
132 goto insert_msg;
133 else if (msg->m_type < leaf->priority)
134 p = &(*p)->rb_left;
135 else
136 p = &(*p)->rb_right;
137 }
138 if (info->node_cache) {
139 leaf = info->node_cache;
140 info->node_cache = NULL;
141 } else {
142 leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
143 if (!leaf)
144 return -ENOMEM;
145 INIT_LIST_HEAD(&leaf->msg_list);
146 info->qsize += sizeof(*leaf);
147 }
148 leaf->priority = msg->m_type;
149 rb_link_node(&leaf->rb_node, parent, p);
150 rb_insert_color(&leaf->rb_node, &info->msg_tree);
151insert_msg:
152 info->attr.mq_curmsgs++;
153 info->qsize += msg->m_ts;
154 list_add_tail(&msg->m_list, &leaf->msg_list);
155 return 0;
156}
157
158static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
159{
160 struct rb_node **p, *parent = NULL;
161 struct posix_msg_tree_node *leaf;
162 struct msg_msg *msg;
163
164try_again:
165 p = &info->msg_tree.rb_node;
166 while (*p) {
167 parent = *p;
168 /*
169 * During insert, low priorities go to the left and high to the
170 * right. On receive, we want the highest priorities first, so
171 * walk all the way to the right.
172 */
173 p = &(*p)->rb_right;
174 }
175 if (!parent) {
176 if (info->attr.mq_curmsgs) {
177 pr_warn_once("Inconsistency in POSIX message queue, "
178 "no tree element, but supposedly messages "
179 "should exist!\n");
180 info->attr.mq_curmsgs = 0;
181 }
182 return NULL;
183 }
184 leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
185 if (unlikely(list_empty(&leaf->msg_list))) {
186 pr_warn_once("Inconsistency in POSIX message queue, "
187 "empty leaf node but we haven't implemented "
188 "lazy leaf delete!\n");
189 rb_erase(&leaf->rb_node, &info->msg_tree);
190 if (info->node_cache) {
191 info->qsize -= sizeof(*leaf);
192 kfree(leaf);
193 } else {
194 info->node_cache = leaf;
195 }
196 goto try_again;
197 } else {
198 msg = list_first_entry(&leaf->msg_list,
199 struct msg_msg, m_list);
200 list_del(&msg->m_list);
201 if (list_empty(&leaf->msg_list)) {
202 rb_erase(&leaf->rb_node, &info->msg_tree);
203 if (info->node_cache) {
204 info->qsize -= sizeof(*leaf);
205 kfree(leaf);
206 } else {
207 info->node_cache = leaf;
208 }
209 }
210 }
211 info->attr.mq_curmsgs--;
212 info->qsize -= msg->m_ts;
213 return msg;
214}
215
216static struct inode *mqueue_get_inode(struct super_block *sb, 110static struct inode *mqueue_get_inode(struct super_block *sb,
217 struct ipc_namespace *ipc_ns, umode_t mode, 111 struct ipc_namespace *ipc_ns, int mode,
218 struct mq_attr *attr) 112 struct mq_attr *attr)
219{ 113{
220 struct user_struct *u = current_user(); 114 struct user_struct *u = current_user();
@@ -233,7 +127,8 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
233 127
234 if (S_ISREG(mode)) { 128 if (S_ISREG(mode)) {
235 struct mqueue_inode_info *info; 129 struct mqueue_inode_info *info;
236 unsigned long mq_bytes, mq_treesize; 130 struct task_struct *p = current;
131 unsigned long mq_bytes, mq_msg_tblsz;
237 132
238 inode->i_fop = &mqueue_file_operations; 133 inode->i_fop = &mqueue_file_operations;
239 inode->i_size = FILENT_SIZE; 134 inode->i_size = FILENT_SIZE;
@@ -244,43 +139,26 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
244 INIT_LIST_HEAD(&info->e_wait_q[0].list); 139 INIT_LIST_HEAD(&info->e_wait_q[0].list);
245 INIT_LIST_HEAD(&info->e_wait_q[1].list); 140 INIT_LIST_HEAD(&info->e_wait_q[1].list);
246 info->notify_owner = NULL; 141 info->notify_owner = NULL;
247 info->notify_user_ns = NULL;
248 info->qsize = 0; 142 info->qsize = 0;
249 info->user = NULL; /* set when all is ok */ 143 info->user = NULL; /* set when all is ok */
250 info->msg_tree = RB_ROOT;
251 info->node_cache = NULL;
252 memset(&info->attr, 0, sizeof(info->attr)); 144 memset(&info->attr, 0, sizeof(info->attr));
253 info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, 145 info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
254 ipc_ns->mq_msg_default); 146 info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
255 info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
256 ipc_ns->mq_msgsize_default);
257 if (attr) { 147 if (attr) {
258 info->attr.mq_maxmsg = attr->mq_maxmsg; 148 info->attr.mq_maxmsg = attr->mq_maxmsg;
259 info->attr.mq_msgsize = attr->mq_msgsize; 149 info->attr.mq_msgsize = attr->mq_msgsize;
260 } 150 }
261 /* 151 mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
262 * We used to allocate a static array of pointers and account 152 info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
263 * the size of that array as well as one msg_msg struct per 153 if (!info->messages)
264 * possible message into the queue size. That's no longer 154 goto out_inode;
265 * accurate as the queue is now an rbtree and will grow and
266 * shrink depending on usage patterns. We can, however, still
267 * account one msg_msg struct per message, but the nodes are
268 * allocated depending on priority usage, and most programs
269 * only use one, or a handful, of priorities. However, since
270 * this is pinned memory, we need to assume worst case, so
271 * that means the min(mq_maxmsg, max_priorities) * struct
272 * posix_msg_tree_node.
273 */
274 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
275 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
276 sizeof(struct posix_msg_tree_node);
277 155
278 mq_bytes = mq_treesize + (info->attr.mq_maxmsg * 156 mq_bytes = (mq_msg_tblsz +
279 info->attr.mq_msgsize); 157 (info->attr.mq_maxmsg * info->attr.mq_msgsize));
280 158
281 spin_lock(&mq_lock); 159 spin_lock(&mq_lock);
282 if (u->mq_bytes + mq_bytes < u->mq_bytes || 160 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
283 u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { 161 u->mq_bytes + mq_bytes > task_rlimit(p, RLIMIT_MSGQUEUE)) {
284 spin_unlock(&mq_lock); 162 spin_unlock(&mq_lock);
285 /* mqueue_evict_inode() releases info->messages */ 163 /* mqueue_evict_inode() releases info->messages */
286 ret = -EMFILE; 164 ret = -EMFILE;
@@ -310,20 +188,30 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
310{ 188{
311 struct inode *inode; 189 struct inode *inode;
312 struct ipc_namespace *ns = data; 190 struct ipc_namespace *ns = data;
191 int error;
313 192
314 sb->s_blocksize = PAGE_CACHE_SIZE; 193 sb->s_blocksize = PAGE_CACHE_SIZE;
315 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 194 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
316 sb->s_magic = MQUEUE_MAGIC; 195 sb->s_magic = MQUEUE_MAGIC;
317 sb->s_op = &mqueue_super_ops; 196 sb->s_op = &mqueue_super_ops;
318 197
319 inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL); 198 inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO,
320 if (IS_ERR(inode)) 199 NULL);
321 return PTR_ERR(inode); 200 if (IS_ERR(inode)) {
201 error = PTR_ERR(inode);
202 goto out;
203 }
322 204
323 sb->s_root = d_make_root(inode); 205 sb->s_root = d_alloc_root(inode);
324 if (!sb->s_root) 206 if (!sb->s_root) {
325 return -ENOMEM; 207 iput(inode);
326 return 0; 208 error = -ENOMEM;
209 goto out;
210 }
211 error = 0;
212
213out:
214 return error;
327} 215}
328 216
329static struct dentry *mqueue_mount(struct file_system_type *fs_type, 217static struct dentry *mqueue_mount(struct file_system_type *fs_type,
@@ -355,6 +243,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
355static void mqueue_i_callback(struct rcu_head *head) 243static void mqueue_i_callback(struct rcu_head *head)
356{ 244{
357 struct inode *inode = container_of(head, struct inode, i_rcu); 245 struct inode *inode = container_of(head, struct inode, i_rcu);
246 INIT_LIST_HEAD(&inode->i_dentry);
358 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode)); 247 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
359} 248}
360 249
@@ -367,11 +256,11 @@ static void mqueue_evict_inode(struct inode *inode)
367{ 256{
368 struct mqueue_inode_info *info; 257 struct mqueue_inode_info *info;
369 struct user_struct *user; 258 struct user_struct *user;
370 unsigned long mq_bytes, mq_treesize; 259 unsigned long mq_bytes;
260 int i;
371 struct ipc_namespace *ipc_ns; 261 struct ipc_namespace *ipc_ns;
372 struct msg_msg *msg;
373 262
374 clear_inode(inode); 263 end_writeback(inode);
375 264
376 if (S_ISDIR(inode->i_mode)) 265 if (S_ISDIR(inode->i_mode))
377 return; 266 return;
@@ -379,19 +268,14 @@ static void mqueue_evict_inode(struct inode *inode)
379 ipc_ns = get_ns_from_inode(inode); 268 ipc_ns = get_ns_from_inode(inode);
380 info = MQUEUE_I(inode); 269 info = MQUEUE_I(inode);
381 spin_lock(&info->lock); 270 spin_lock(&info->lock);
382 while ((msg = msg_get(info)) != NULL) 271 for (i = 0; i < info->attr.mq_curmsgs; i++)
383 free_msg(msg); 272 free_msg(info->messages[i]);
384 kfree(info->node_cache); 273 kfree(info->messages);
385 spin_unlock(&info->lock); 274 spin_unlock(&info->lock);
386 275
387 /* Total amount of bytes accounted for the mqueue */ 276 /* Total amount of bytes accounted for the mqueue */
388 mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + 277 mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
389 min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * 278 + info->attr.mq_msgsize);
390 sizeof(struct posix_msg_tree_node);
391
392 mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
393 info->attr.mq_msgsize);
394
395 user = info->user; 279 user = info->user;
396 if (user) { 280 if (user) {
397 spin_lock(&mq_lock); 281 spin_lock(&mq_lock);
@@ -412,7 +296,7 @@ static void mqueue_evict_inode(struct inode *inode)
412} 296}
413 297
414static int mqueue_create(struct inode *dir, struct dentry *dentry, 298static int mqueue_create(struct inode *dir, struct dentry *dentry,
415 umode_t mode, bool excl) 299 int mode, struct nameidata *nd)
416{ 300{
417 struct inode *inode; 301 struct inode *inode;
418 struct mq_attr *attr = dentry->d_fsdata; 302 struct mq_attr *attr = dentry->d_fsdata;
@@ -425,9 +309,8 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
425 error = -EACCES; 309 error = -EACCES;
426 goto out_unlock; 310 goto out_unlock;
427 } 311 }
428 if (ipc_ns->mq_queues_count >= HARD_QUEUESMAX || 312 if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
429 (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && 313 !capable(CAP_SYS_RESOURCE)) {
430 !capable(CAP_SYS_RESOURCE))) {
431 error = -ENOSPC; 314 error = -ENOSPC;
432 goto out_unlock; 315 goto out_unlock;
433 } 316 }
@@ -566,8 +449,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
566 set_current_state(TASK_INTERRUPTIBLE); 449 set_current_state(TASK_INTERRUPTIBLE);
567 450
568 spin_unlock(&info->lock); 451 spin_unlock(&info->lock);
569 time = schedule_hrtimeout_range_clock(timeout, 0, 452 time = schedule_hrtimeout_range_clock(timeout,
570 HRTIMER_MODE_ABS, CLOCK_REALTIME); 453 HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
571 454
572 while (ewp->state == STATE_PENDING) 455 while (ewp->state == STATE_PENDING)
573 cpu_relax(); 456 cpu_relax();
@@ -611,6 +494,26 @@ static struct ext_wait_queue *wq_get_first_waiter(
611 return list_entry(ptr, struct ext_wait_queue, list); 494 return list_entry(ptr, struct ext_wait_queue, list);
612} 495}
613 496
497/* Auxiliary functions to manipulate messages' list */
498static void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info)
499{
500 int k;
501
502 k = info->attr.mq_curmsgs - 1;
503 while (k >= 0 && info->messages[k]->m_type >= ptr->m_type) {
504 info->messages[k + 1] = info->messages[k];
505 k--;
506 }
507 info->attr.mq_curmsgs++;
508 info->qsize += ptr->m_ts;
509 info->messages[k + 1] = ptr;
510}
511
512static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
513{
514 info->qsize -= info->messages[--info->attr.mq_curmsgs]->m_ts;
515 return info->messages[info->attr.mq_curmsgs];
516}
614 517
615static inline void set_cookie(struct sk_buff *skb, char code) 518static inline void set_cookie(struct sk_buff *skb, char code)
616{ 519{
@@ -640,12 +543,9 @@ static void __do_notify(struct mqueue_inode_info *info)
640 sig_i.si_errno = 0; 543 sig_i.si_errno = 0;
641 sig_i.si_code = SI_MESGQ; 544 sig_i.si_code = SI_MESGQ;
642 sig_i.si_value = info->notify.sigev_value; 545 sig_i.si_value = info->notify.sigev_value;
643 /* map current pid/uid into info->owner's namespaces */
644 rcu_read_lock();
645 sig_i.si_pid = task_tgid_nr_ns(current, 546 sig_i.si_pid = task_tgid_nr_ns(current,
646 ns_of_pid(info->notify_owner)); 547 ns_of_pid(info->notify_owner));
647 sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid()); 548 sig_i.si_uid = current_uid();
648 rcu_read_unlock();
649 549
650 kill_pid_info(info->notify.sigev_signo, 550 kill_pid_info(info->notify.sigev_signo,
651 &sig_i, info->notify_owner); 551 &sig_i, info->notify_owner);
@@ -657,9 +557,7 @@ static void __do_notify(struct mqueue_inode_info *info)
657 } 557 }
658 /* after notification unregisters process */ 558 /* after notification unregisters process */
659 put_pid(info->notify_owner); 559 put_pid(info->notify_owner);
660 put_user_ns(info->notify_user_ns);
661 info->notify_owner = NULL; 560 info->notify_owner = NULL;
662 info->notify_user_ns = NULL;
663 } 561 }
664 wake_up(&info->wait_q); 562 wake_up(&info->wait_q);
665} 563}
@@ -684,101 +582,113 @@ static void remove_notification(struct mqueue_inode_info *info)
684 netlink_sendskb(info->notify_sock, info->notify_cookie); 582 netlink_sendskb(info->notify_sock, info->notify_cookie);
685 } 583 }
686 put_pid(info->notify_owner); 584 put_pid(info->notify_owner);
687 put_user_ns(info->notify_user_ns);
688 info->notify_owner = NULL; 585 info->notify_owner = NULL;
689 info->notify_user_ns = NULL;
690} 586}
691 587
692static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) 588static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
693{ 589{
694 int mq_treesize;
695 unsigned long total_size;
696
697 if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) 590 if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
698 return -EINVAL; 591 return 0;
699 if (capable(CAP_SYS_RESOURCE)) { 592 if (capable(CAP_SYS_RESOURCE)) {
700 if (attr->mq_maxmsg > HARD_MSGMAX || 593 if (attr->mq_maxmsg > HARD_MSGMAX)
701 attr->mq_msgsize > HARD_MSGSIZEMAX) 594 return 0;
702 return -EINVAL;
703 } else { 595 } else {
704 if (attr->mq_maxmsg > ipc_ns->mq_msg_max || 596 if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
705 attr->mq_msgsize > ipc_ns->mq_msgsize_max) 597 attr->mq_msgsize > ipc_ns->mq_msgsize_max)
706 return -EINVAL; 598 return 0;
707 } 599 }
708 /* check for overflow */ 600 /* check for overflow */
709 if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) 601 if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
710 return -EOVERFLOW; 602 return 0;
711 mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) + 603 if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize
712 min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) * 604 + sizeof (struct msg_msg *))) <
713 sizeof(struct posix_msg_tree_node); 605 (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
714 total_size = attr->mq_maxmsg * attr->mq_msgsize; 606 return 0;
715 if (total_size + mq_treesize < total_size) 607 return 1;
716 return -EOVERFLOW;
717 return 0;
718} 608}
719 609
720/* 610/*
721 * Invoked when creating a new queue via sys_mq_open 611 * Invoked when creating a new queue via sys_mq_open
722 */ 612 */
723static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, 613static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
724 struct path *path, int oflag, umode_t mode, 614 struct dentry *dentry, int oflag, mode_t mode,
725 struct mq_attr *attr) 615 struct mq_attr *attr)
726{ 616{
727 const struct cred *cred = current_cred(); 617 const struct cred *cred = current_cred();
618 struct file *result;
728 int ret; 619 int ret;
729 620
730 if (attr) { 621 if (attr) {
731 ret = mq_attr_ok(ipc_ns, attr); 622 if (!mq_attr_ok(ipc_ns, attr)) {
732 if (ret) 623 ret = -EINVAL;
733 return ERR_PTR(ret); 624 goto out;
625 }
734 /* store for use during create */ 626 /* store for use during create */
735 path->dentry->d_fsdata = attr; 627 dentry->d_fsdata = attr;
736 } else {
737 struct mq_attr def_attr;
738
739 def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
740 ipc_ns->mq_msg_default);
741 def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
742 ipc_ns->mq_msgsize_default);
743 ret = mq_attr_ok(ipc_ns, &def_attr);
744 if (ret)
745 return ERR_PTR(ret);
746 } 628 }
747 629
748 mode &= ~current_umask(); 630 mode &= ~current_umask();
749 ret = vfs_create(dir, path->dentry, mode, true); 631 ret = mnt_want_write(ipc_ns->mq_mnt);
750 path->dentry->d_fsdata = NULL;
751 if (ret) 632 if (ret)
752 return ERR_PTR(ret); 633 goto out;
753 return dentry_open(path, oflag, cred); 634 ret = vfs_create(dir->d_inode, dentry, mode, NULL);
635 dentry->d_fsdata = NULL;
636 if (ret)
637 goto out_drop_write;
638
639 result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
640 /*
641 * dentry_open() took a persistent mnt_want_write(),
642 * so we can now drop this one.
643 */
644 mnt_drop_write(ipc_ns->mq_mnt);
645 return result;
646
647out_drop_write:
648 mnt_drop_write(ipc_ns->mq_mnt);
649out:
650 dput(dentry);
651 mntput(ipc_ns->mq_mnt);
652 return ERR_PTR(ret);
754} 653}
755 654
756/* Opens existing queue */ 655/* Opens existing queue */
757static struct file *do_open(struct path *path, int oflag) 656static struct file *do_open(struct ipc_namespace *ipc_ns,
657 struct dentry *dentry, int oflag)
758{ 658{
659 int ret;
660 const struct cred *cred = current_cred();
661
759 static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, 662 static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
760 MAY_READ | MAY_WRITE }; 663 MAY_READ | MAY_WRITE };
761 int acc; 664
762 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) 665 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
763 return ERR_PTR(-EINVAL); 666 ret = -EINVAL;
764 acc = oflag2acc[oflag & O_ACCMODE]; 667 goto err;
765 if (inode_permission(path->dentry->d_inode, acc)) 668 }
766 return ERR_PTR(-EACCES); 669
767 return dentry_open(path, oflag, current_cred()); 670 if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
671 ret = -EACCES;
672 goto err;
673 }
674
675 return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
676
677err:
678 dput(dentry);
679 mntput(ipc_ns->mq_mnt);
680 return ERR_PTR(ret);
768} 681}
769 682
770SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, 683SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
771 struct mq_attr __user *, u_attr) 684 struct mq_attr __user *, u_attr)
772{ 685{
773 struct path path; 686 struct dentry *dentry;
774 struct file *filp; 687 struct file *filp;
775 struct filename *name; 688 char *name;
776 struct mq_attr attr; 689 struct mq_attr attr;
777 int fd, error; 690 int fd, error;
778 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; 691 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
779 struct vfsmount *mnt = ipc_ns->mq_mnt;
780 struct dentry *root = mnt->mnt_root;
781 int ro;
782 692
783 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) 693 if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
784 return -EFAULT; 694 return -EFAULT;
@@ -792,55 +702,52 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
792 if (fd < 0) 702 if (fd < 0)
793 goto out_putname; 703 goto out_putname;
794 704
795 ro = mnt_want_write(mnt); /* we'll drop it in any case */ 705 mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
796 error = 0; 706 dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
797 mutex_lock(&root->d_inode->i_mutex); 707 if (IS_ERR(dentry)) {
798 path.dentry = lookup_one_len(name->name, root, strlen(name->name)); 708 error = PTR_ERR(dentry);
799 if (IS_ERR(path.dentry)) {
800 error = PTR_ERR(path.dentry);
801 goto out_putfd; 709 goto out_putfd;
802 } 710 }
803 path.mnt = mntget(mnt); 711 mntget(ipc_ns->mq_mnt);
804 712
805 if (oflag & O_CREAT) { 713 if (oflag & O_CREAT) {
806 if (path.dentry->d_inode) { /* entry already exists */ 714 if (dentry->d_inode) { /* entry already exists */
807 audit_inode(name, path.dentry, 0); 715 audit_inode(name, dentry);
808 if (oflag & O_EXCL) { 716 if (oflag & O_EXCL) {
809 error = -EEXIST; 717 error = -EEXIST;
810 goto out; 718 goto out;
811 } 719 }
812 filp = do_open(&path, oflag); 720 filp = do_open(ipc_ns, dentry, oflag);
813 } else { 721 } else {
814 if (ro) { 722 filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
815 error = ro; 723 dentry, oflag, mode,
816 goto out;
817 }
818 filp = do_create(ipc_ns, root->d_inode,
819 &path, oflag, mode,
820 u_attr ? &attr : NULL); 724 u_attr ? &attr : NULL);
821 } 725 }
822 } else { 726 } else {
823 if (!path.dentry->d_inode) { 727 if (!dentry->d_inode) {
824 error = -ENOENT; 728 error = -ENOENT;
825 goto out; 729 goto out;
826 } 730 }
827 audit_inode(name, path.dentry, 0); 731 audit_inode(name, dentry);
828 filp = do_open(&path, oflag); 732 filp = do_open(ipc_ns, dentry, oflag);
829 } 733 }
830 734
831 if (!IS_ERR(filp)) 735 if (IS_ERR(filp)) {
832 fd_install(fd, filp);
833 else
834 error = PTR_ERR(filp); 736 error = PTR_ERR(filp);
737 goto out_putfd;
738 }
739
740 fd_install(fd, filp);
741 goto out_upsem;
742
835out: 743out:
836 path_put(&path); 744 dput(dentry);
745 mntput(ipc_ns->mq_mnt);
837out_putfd: 746out_putfd:
838 if (error) { 747 put_unused_fd(fd);
839 put_unused_fd(fd); 748 fd = error;
840 fd = error; 749out_upsem:
841 } 750 mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
842 mutex_unlock(&root->d_inode->i_mutex);
843 mnt_drop_write(mnt);
844out_putname: 751out_putname:
845 putname(name); 752 putname(name);
846 return fd; 753 return fd;
@@ -849,43 +756,44 @@ out_putname:
849SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) 756SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
850{ 757{
851 int err; 758 int err;
852 struct filename *name; 759 char *name;
853 struct dentry *dentry; 760 struct dentry *dentry;
854 struct inode *inode = NULL; 761 struct inode *inode = NULL;
855 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; 762 struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
856 struct vfsmount *mnt = ipc_ns->mq_mnt;
857 763
858 name = getname(u_name); 764 name = getname(u_name);
859 if (IS_ERR(name)) 765 if (IS_ERR(name))
860 return PTR_ERR(name); 766 return PTR_ERR(name);
861 767
862 err = mnt_want_write(mnt); 768 mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex,
863 if (err) 769 I_MUTEX_PARENT);
864 goto out_name; 770 dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
865 mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT);
866 dentry = lookup_one_len(name->name, mnt->mnt_root,
867 strlen(name->name));
868 if (IS_ERR(dentry)) { 771 if (IS_ERR(dentry)) {
869 err = PTR_ERR(dentry); 772 err = PTR_ERR(dentry);
870 goto out_unlock; 773 goto out_unlock;
871 } 774 }
872 775
873 inode = dentry->d_inode; 776 if (!dentry->d_inode) {
874 if (!inode) {
875 err = -ENOENT; 777 err = -ENOENT;
876 } else { 778 goto out_err;
877 ihold(inode);
878 err = vfs_unlink(dentry->d_parent->d_inode, dentry);
879 } 779 }
780
781 inode = dentry->d_inode;
782 if (inode)
783 ihold(inode);
784 err = mnt_want_write(ipc_ns->mq_mnt);
785 if (err)
786 goto out_err;
787 err = vfs_unlink(dentry->d_parent->d_inode, dentry);
788 mnt_drop_write(ipc_ns->mq_mnt);
789out_err:
880 dput(dentry); 790 dput(dentry);
881 791
882out_unlock: 792out_unlock:
883 mutex_unlock(&mnt->mnt_root->d_inode->i_mutex); 793 mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
794 putname(name);
884 if (inode) 795 if (inode)
885 iput(inode); 796 iput(inode);
886 mnt_drop_write(mnt);
887out_name:
888 putname(name);
889 797
890 return err; 798 return err;
891} 799}
@@ -931,8 +839,7 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
931 wake_up_interruptible(&info->wait_q); 839 wake_up_interruptible(&info->wait_q);
932 return; 840 return;
933 } 841 }
934 if (msg_insert(sender->msg, info)) 842 msg_insert(sender->msg, info);
935 return;
936 list_del(&sender->list); 843 list_del(&sender->list);
937 sender->state = STATE_PENDING; 844 sender->state = STATE_PENDING;
938 wake_up_process(sender->task); 845 wake_up_process(sender->task);
@@ -944,7 +851,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
944 size_t, msg_len, unsigned int, msg_prio, 851 size_t, msg_len, unsigned int, msg_prio,
945 const struct timespec __user *, u_abs_timeout) 852 const struct timespec __user *, u_abs_timeout)
946{ 853{
947 struct fd f; 854 struct file *filp;
948 struct inode *inode; 855 struct inode *inode;
949 struct ext_wait_queue wait; 856 struct ext_wait_queue wait;
950 struct ext_wait_queue *receiver; 857 struct ext_wait_queue *receiver;
@@ -952,8 +859,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
952 struct mqueue_inode_info *info; 859 struct mqueue_inode_info *info;
953 ktime_t expires, *timeout = NULL; 860 ktime_t expires, *timeout = NULL;
954 struct timespec ts; 861 struct timespec ts;
955 struct posix_msg_tree_node *new_leaf = NULL; 862 int ret;
956 int ret = 0;
957 863
958 if (u_abs_timeout) { 864 if (u_abs_timeout) {
959 int res = prepare_timeout(u_abs_timeout, &expires, &ts); 865 int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -967,21 +873,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
967 873
968 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL); 874 audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
969 875
970 f = fdget(mqdes); 876 filp = fget(mqdes);
971 if (unlikely(!f.file)) { 877 if (unlikely(!filp)) {
972 ret = -EBADF; 878 ret = -EBADF;
973 goto out; 879 goto out;
974 } 880 }
975 881
976 inode = f.file->f_path.dentry->d_inode; 882 inode = filp->f_path.dentry->d_inode;
977 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 883 if (unlikely(filp->f_op != &mqueue_file_operations)) {
978 ret = -EBADF; 884 ret = -EBADF;
979 goto out_fput; 885 goto out_fput;
980 } 886 }
981 info = MQUEUE_I(inode); 887 info = MQUEUE_I(inode);
982 audit_inode(NULL, f.file->f_path.dentry, 0); 888 audit_inode(NULL, filp->f_path.dentry);
983 889
984 if (unlikely(!(f.file->f_mode & FMODE_WRITE))) { 890 if (unlikely(!(filp->f_mode & FMODE_WRITE))) {
985 ret = -EBADF; 891 ret = -EBADF;
986 goto out_fput; 892 goto out_fput;
987 } 893 }
@@ -1001,61 +907,36 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
1001 msg_ptr->m_ts = msg_len; 907 msg_ptr->m_ts = msg_len;
1002 msg_ptr->m_type = msg_prio; 908 msg_ptr->m_type = msg_prio;
1003 909
1004 /*
1005 * msg_insert really wants us to have a valid, spare node struct so
1006 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
1007 * fall back to that if necessary.
1008 */
1009 if (!info->node_cache)
1010 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1011
1012 spin_lock(&info->lock); 910 spin_lock(&info->lock);
1013 911
1014 if (!info->node_cache && new_leaf) {
1015 /* Save our speculative allocation into the cache */
1016 INIT_LIST_HEAD(&new_leaf->msg_list);
1017 info->node_cache = new_leaf;
1018 info->qsize += sizeof(*new_leaf);
1019 new_leaf = NULL;
1020 } else {
1021 kfree(new_leaf);
1022 }
1023
1024 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { 912 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
1025 if (f.file->f_flags & O_NONBLOCK) { 913 if (filp->f_flags & O_NONBLOCK) {
914 spin_unlock(&info->lock);
1026 ret = -EAGAIN; 915 ret = -EAGAIN;
1027 } else { 916 } else {
1028 wait.task = current; 917 wait.task = current;
1029 wait.msg = (void *) msg_ptr; 918 wait.msg = (void *) msg_ptr;
1030 wait.state = STATE_NONE; 919 wait.state = STATE_NONE;
1031 ret = wq_sleep(info, SEND, timeout, &wait); 920 ret = wq_sleep(info, SEND, timeout, &wait);
1032 /*
1033 * wq_sleep must be called with info->lock held, and
1034 * returns with the lock released
1035 */
1036 goto out_free;
1037 } 921 }
922 if (ret < 0)
923 free_msg(msg_ptr);
1038 } else { 924 } else {
1039 receiver = wq_get_first_waiter(info, RECV); 925 receiver = wq_get_first_waiter(info, RECV);
1040 if (receiver) { 926 if (receiver) {
1041 pipelined_send(info, msg_ptr, receiver); 927 pipelined_send(info, msg_ptr, receiver);
1042 } else { 928 } else {
1043 /* adds message to the queue */ 929 /* adds message to the queue */
1044 ret = msg_insert(msg_ptr, info); 930 msg_insert(msg_ptr, info);
1045 if (ret)
1046 goto out_unlock;
1047 __do_notify(info); 931 __do_notify(info);
1048 } 932 }
1049 inode->i_atime = inode->i_mtime = inode->i_ctime = 933 inode->i_atime = inode->i_mtime = inode->i_ctime =
1050 CURRENT_TIME; 934 CURRENT_TIME;
935 spin_unlock(&info->lock);
936 ret = 0;
1051 } 937 }
1052out_unlock:
1053 spin_unlock(&info->lock);
1054out_free:
1055 if (ret)
1056 free_msg(msg_ptr);
1057out_fput: 938out_fput:
1058 fdput(f); 939 fput(filp);
1059out: 940out:
1060 return ret; 941 return ret;
1061} 942}
@@ -1066,13 +947,12 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1066{ 947{
1067 ssize_t ret; 948 ssize_t ret;
1068 struct msg_msg *msg_ptr; 949 struct msg_msg *msg_ptr;
1069 struct fd f; 950 struct file *filp;
1070 struct inode *inode; 951 struct inode *inode;
1071 struct mqueue_inode_info *info; 952 struct mqueue_inode_info *info;
1072 struct ext_wait_queue wait; 953 struct ext_wait_queue wait;
1073 ktime_t expires, *timeout = NULL; 954 ktime_t expires, *timeout = NULL;
1074 struct timespec ts; 955 struct timespec ts;
1075 struct posix_msg_tree_node *new_leaf = NULL;
1076 956
1077 if (u_abs_timeout) { 957 if (u_abs_timeout) {
1078 int res = prepare_timeout(u_abs_timeout, &expires, &ts); 958 int res = prepare_timeout(u_abs_timeout, &expires, &ts);
@@ -1083,21 +963,21 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1083 963
1084 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL); 964 audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
1085 965
1086 f = fdget(mqdes); 966 filp = fget(mqdes);
1087 if (unlikely(!f.file)) { 967 if (unlikely(!filp)) {
1088 ret = -EBADF; 968 ret = -EBADF;
1089 goto out; 969 goto out;
1090 } 970 }
1091 971
1092 inode = f.file->f_path.dentry->d_inode; 972 inode = filp->f_path.dentry->d_inode;
1093 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 973 if (unlikely(filp->f_op != &mqueue_file_operations)) {
1094 ret = -EBADF; 974 ret = -EBADF;
1095 goto out_fput; 975 goto out_fput;
1096 } 976 }
1097 info = MQUEUE_I(inode); 977 info = MQUEUE_I(inode);
1098 audit_inode(NULL, f.file->f_path.dentry, 0); 978 audit_inode(NULL, filp->f_path.dentry);
1099 979
1100 if (unlikely(!(f.file->f_mode & FMODE_READ))) { 980 if (unlikely(!(filp->f_mode & FMODE_READ))) {
1101 ret = -EBADF; 981 ret = -EBADF;
1102 goto out_fput; 982 goto out_fput;
1103 } 983 }
@@ -1108,27 +988,9 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1108 goto out_fput; 988 goto out_fput;
1109 } 989 }
1110 990
1111 /*
1112 * msg_insert really wants us to have a valid, spare node struct so
1113 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
1114 * fall back to that if necessary.
1115 */
1116 if (!info->node_cache)
1117 new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
1118
1119 spin_lock(&info->lock); 991 spin_lock(&info->lock);
1120
1121 if (!info->node_cache && new_leaf) {
1122 /* Save our speculative allocation into the cache */
1123 INIT_LIST_HEAD(&new_leaf->msg_list);
1124 info->node_cache = new_leaf;
1125 info->qsize += sizeof(*new_leaf);
1126 } else {
1127 kfree(new_leaf);
1128 }
1129
1130 if (info->attr.mq_curmsgs == 0) { 992 if (info->attr.mq_curmsgs == 0) {
1131 if (f.file->f_flags & O_NONBLOCK) { 993 if (filp->f_flags & O_NONBLOCK) {
1132 spin_unlock(&info->lock); 994 spin_unlock(&info->lock);
1133 ret = -EAGAIN; 995 ret = -EAGAIN;
1134 } else { 996 } else {
@@ -1158,7 +1020,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1158 free_msg(msg_ptr); 1020 free_msg(msg_ptr);
1159 } 1021 }
1160out_fput: 1022out_fput:
1161 fdput(f); 1023 fput(filp);
1162out: 1024out:
1163 return ret; 1025 return ret;
1164} 1026}
@@ -1172,7 +1034,7 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1172 const struct sigevent __user *, u_notification) 1034 const struct sigevent __user *, u_notification)
1173{ 1035{
1174 int ret; 1036 int ret;
1175 struct fd f; 1037 struct file *filp;
1176 struct sock *sock; 1038 struct sock *sock;
1177 struct inode *inode; 1039 struct inode *inode;
1178 struct sigevent notification; 1040 struct sigevent notification;
@@ -1218,13 +1080,13 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
1218 skb_put(nc, NOTIFY_COOKIE_LEN); 1080 skb_put(nc, NOTIFY_COOKIE_LEN);
1219 /* and attach it to the socket */ 1081 /* and attach it to the socket */
1220retry: 1082retry:
1221 f = fdget(notification.sigev_signo); 1083 filp = fget(notification.sigev_signo);
1222 if (!f.file) { 1084 if (!filp) {
1223 ret = -EBADF; 1085 ret = -EBADF;
1224 goto out; 1086 goto out;
1225 } 1087 }
1226 sock = netlink_getsockbyfilp(f.file); 1088 sock = netlink_getsockbyfilp(filp);
1227 fdput(f); 1089 fput(filp);
1228 if (IS_ERR(sock)) { 1090 if (IS_ERR(sock)) {
1229 ret = PTR_ERR(sock); 1091 ret = PTR_ERR(sock);
1230 sock = NULL; 1092 sock = NULL;
@@ -1243,14 +1105,14 @@ retry:
1243 } 1105 }
1244 } 1106 }
1245 1107
1246 f = fdget(mqdes); 1108 filp = fget(mqdes);
1247 if (!f.file) { 1109 if (!filp) {
1248 ret = -EBADF; 1110 ret = -EBADF;
1249 goto out; 1111 goto out;
1250 } 1112 }
1251 1113
1252 inode = f.file->f_path.dentry->d_inode; 1114 inode = filp->f_path.dentry->d_inode;
1253 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1115 if (unlikely(filp->f_op != &mqueue_file_operations)) {
1254 ret = -EBADF; 1116 ret = -EBADF;
1255 goto out_fput; 1117 goto out_fput;
1256 } 1118 }
@@ -1285,12 +1147,11 @@ retry:
1285 } 1147 }
1286 1148
1287 info->notify_owner = get_pid(task_tgid(current)); 1149 info->notify_owner = get_pid(task_tgid(current));
1288 info->notify_user_ns = get_user_ns(current_user_ns());
1289 inode->i_atime = inode->i_ctime = CURRENT_TIME; 1150 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1290 } 1151 }
1291 spin_unlock(&info->lock); 1152 spin_unlock(&info->lock);
1292out_fput: 1153out_fput:
1293 fdput(f); 1154 fput(filp);
1294out: 1155out:
1295 if (sock) { 1156 if (sock) {
1296 netlink_detachskb(sock, nc); 1157 netlink_detachskb(sock, nc);
@@ -1306,7 +1167,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1306{ 1167{
1307 int ret; 1168 int ret;
1308 struct mq_attr mqstat, omqstat; 1169 struct mq_attr mqstat, omqstat;
1309 struct fd f; 1170 struct file *filp;
1310 struct inode *inode; 1171 struct inode *inode;
1311 struct mqueue_inode_info *info; 1172 struct mqueue_inode_info *info;
1312 1173
@@ -1317,14 +1178,14 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1317 return -EINVAL; 1178 return -EINVAL;
1318 } 1179 }
1319 1180
1320 f = fdget(mqdes); 1181 filp = fget(mqdes);
1321 if (!f.file) { 1182 if (!filp) {
1322 ret = -EBADF; 1183 ret = -EBADF;
1323 goto out; 1184 goto out;
1324 } 1185 }
1325 1186
1326 inode = f.file->f_path.dentry->d_inode; 1187 inode = filp->f_path.dentry->d_inode;
1327 if (unlikely(f.file->f_op != &mqueue_file_operations)) { 1188 if (unlikely(filp->f_op != &mqueue_file_operations)) {
1328 ret = -EBADF; 1189 ret = -EBADF;
1329 goto out_fput; 1190 goto out_fput;
1330 } 1191 }
@@ -1333,15 +1194,15 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1333 spin_lock(&info->lock); 1194 spin_lock(&info->lock);
1334 1195
1335 omqstat = info->attr; 1196 omqstat = info->attr;
1336 omqstat.mq_flags = f.file->f_flags & O_NONBLOCK; 1197 omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
1337 if (u_mqstat) { 1198 if (u_mqstat) {
1338 audit_mq_getsetattr(mqdes, &mqstat); 1199 audit_mq_getsetattr(mqdes, &mqstat);
1339 spin_lock(&f.file->f_lock); 1200 spin_lock(&filp->f_lock);
1340 if (mqstat.mq_flags & O_NONBLOCK) 1201 if (mqstat.mq_flags & O_NONBLOCK)
1341 f.file->f_flags |= O_NONBLOCK; 1202 filp->f_flags |= O_NONBLOCK;
1342 else 1203 else
1343 f.file->f_flags &= ~O_NONBLOCK; 1204 filp->f_flags &= ~O_NONBLOCK;
1344 spin_unlock(&f.file->f_lock); 1205 spin_unlock(&filp->f_lock);
1345 1206
1346 inode->i_atime = inode->i_ctime = CURRENT_TIME; 1207 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1347 } 1208 }
@@ -1354,7 +1215,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
1354 ret = -EFAULT; 1215 ret = -EFAULT;
1355 1216
1356out_fput: 1217out_fput:
1357 fdput(f); 1218 fput(filp);
1358out: 1219out:
1359 return ret; 1220 return ret;
1360} 1221}
@@ -1391,8 +1252,6 @@ int mq_init_ns(struct ipc_namespace *ns)
1391 ns->mq_queues_max = DFLT_QUEUESMAX; 1252 ns->mq_queues_max = DFLT_QUEUESMAX;
1392 ns->mq_msg_max = DFLT_MSGMAX; 1253 ns->mq_msg_max = DFLT_MSGMAX;
1393 ns->mq_msgsize_max = DFLT_MSGSIZEMAX; 1254 ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
1394 ns->mq_msg_default = DFLT_MSG;
1395 ns->mq_msgsize_default = DFLT_MSGSIZE;
1396 1255
1397 ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); 1256 ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
1398 if (IS_ERR(ns->mq_mnt)) { 1257 if (IS_ERR(ns->mq_mnt)) {
@@ -1410,7 +1269,7 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
1410 1269
1411void mq_put_mnt(struct ipc_namespace *ns) 1270void mq_put_mnt(struct ipc_namespace *ns)
1412{ 1271{
1413 kern_unmount(ns->mq_mnt); 1272 mntput(ns->mq_mnt);
1414} 1273}
1415 1274
1416static int __init init_mqueue_fs(void) 1275static int __init init_mqueue_fs(void)
@@ -1432,9 +1291,11 @@ static int __init init_mqueue_fs(void)
1432 1291
1433 spin_lock_init(&mq_lock); 1292 spin_lock_init(&mq_lock);
1434 1293
1435 error = mq_init_ns(&init_ipc_ns); 1294 init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
1436 if (error) 1295 if (IS_ERR(init_ipc_ns.mq_mnt)) {
1296 error = PTR_ERR(init_ipc_ns.mq_mnt);
1437 goto out_filesystem; 1297 goto out_filesystem;
1298 }
1438 1299
1439 return 0; 1300 return 0;
1440 1301
diff --git a/ipc/msg.c b/ipc/msg.c
index 950572f9d79..7385de25788 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -443,12 +443,9 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
443 goto out_unlock; 443 goto out_unlock;
444 } 444 }
445 445
446 err = ipc_update_perm(&msqid64.msg_perm, ipcp);
447 if (err)
448 goto out_unlock;
449
450 msq->q_qbytes = msqid64.msg_qbytes; 446 msq->q_qbytes = msqid64.msg_qbytes;
451 447
448 ipc_update_perm(&msqid64.msg_perm, ipcp);
452 msq->q_ctime = get_seconds(); 449 msq->q_ctime = get_seconds();
453 /* sleeping receivers might be excluded by 450 /* sleeping receivers might be excluded by
454 * stricter permissions. 451 * stricter permissions.
@@ -755,91 +752,26 @@ static inline int convert_mode(long *msgtyp, int msgflg)
755 return SEARCH_EQUAL; 752 return SEARCH_EQUAL;
756} 753}
757 754
758static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 755long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
759{ 756 size_t msgsz, long msgtyp, int msgflg)
760 struct msgbuf __user *msgp = dest;
761 size_t msgsz;
762
763 if (put_user(msg->m_type, &msgp->mtype))
764 return -EFAULT;
765
766 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
767 if (store_msg(msgp->mtext, msg, msgsz))
768 return -EFAULT;
769 return msgsz;
770}
771
772#ifdef CONFIG_CHECKPOINT_RESTORE
773/*
774 * This function creates new kernel message structure, large enough to store
775 * bufsz message bytes.
776 */
777static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
778 int msgflg, long *msgtyp,
779 unsigned long *copy_number)
780{
781 struct msg_msg *copy;
782
783 *copy_number = *msgtyp;
784 *msgtyp = 0;
785 /*
786 * Create dummy message to copy real message to.
787 */
788 copy = load_msg(buf, bufsz);
789 if (!IS_ERR(copy))
790 copy->m_ts = bufsz;
791 return copy;
792}
793
794static inline void free_copy(struct msg_msg *copy)
795{
796 if (copy)
797 free_msg(copy);
798}
799#else
800static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
801 int msgflg, long *msgtyp,
802 unsigned long *copy_number)
803{
804 return ERR_PTR(-ENOSYS);
805}
806
807static inline void free_copy(struct msg_msg *copy)
808{
809}
810#endif
811
812long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
813 int msgflg,
814 long (*msg_handler)(void __user *, struct msg_msg *, size_t))
815{ 757{
816 struct msg_queue *msq; 758 struct msg_queue *msq;
817 struct msg_msg *msg; 759 struct msg_msg *msg;
818 int mode; 760 int mode;
819 struct ipc_namespace *ns; 761 struct ipc_namespace *ns;
820 struct msg_msg *copy = NULL;
821 unsigned long copy_number = 0;
822 762
823 if (msqid < 0 || (long) bufsz < 0) 763 if (msqid < 0 || (long) msgsz < 0)
824 return -EINVAL; 764 return -EINVAL;
825 if (msgflg & MSG_COPY) {
826 copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, &copy_number);
827 if (IS_ERR(copy))
828 return PTR_ERR(copy);
829 }
830 mode = convert_mode(&msgtyp, msgflg); 765 mode = convert_mode(&msgtyp, msgflg);
831 ns = current->nsproxy->ipc_ns; 766 ns = current->nsproxy->ipc_ns;
832 767
833 msq = msg_lock_check(ns, msqid); 768 msq = msg_lock_check(ns, msqid);
834 if (IS_ERR(msq)) { 769 if (IS_ERR(msq))
835 free_copy(copy);
836 return PTR_ERR(msq); 770 return PTR_ERR(msq);
837 }
838 771
839 for (;;) { 772 for (;;) {
840 struct msg_receiver msr_d; 773 struct msg_receiver msr_d;
841 struct list_head *tmp; 774 struct list_head *tmp;
842 long msg_counter = 0;
843 775
844 msg = ERR_PTR(-EACCES); 776 msg = ERR_PTR(-EACCES);
845 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 777 if (ipcperms(ns, &msq->q_perm, S_IRUGO))
@@ -858,21 +790,12 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
858 msg = walk_msg; 790 msg = walk_msg;
859 if (mode == SEARCH_LESSEQUAL && 791 if (mode == SEARCH_LESSEQUAL &&
860 walk_msg->m_type != 1) { 792 walk_msg->m_type != 1) {
793 msg = walk_msg;
861 msgtyp = walk_msg->m_type - 1; 794 msgtyp = walk_msg->m_type - 1;
862 } else if (msgflg & MSG_COPY) { 795 } else {
863 if (copy_number == msg_counter) { 796 msg = walk_msg;
864 /*
865 * Found requested message.
866 * Copy it.
867 */
868 msg = copy_msg(msg, copy);
869 if (IS_ERR(msg))
870 goto out_unlock;
871 break;
872 }
873 } else
874 break; 797 break;
875 msg_counter++; 798 }
876 } 799 }
877 tmp = tmp->next; 800 tmp = tmp->next;
878 } 801 }
@@ -881,16 +804,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
881 * Found a suitable message. 804 * Found a suitable message.
882 * Unlink it from the queue. 805 * Unlink it from the queue.
883 */ 806 */
884 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 807 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
885 msg = ERR_PTR(-E2BIG); 808 msg = ERR_PTR(-E2BIG);
886 goto out_unlock; 809 goto out_unlock;
887 } 810 }
888 /*
889 * If we are copying, then do not unlink message and do
890 * not update queue parameters.
891 */
892 if (msgflg & MSG_COPY)
893 goto out_unlock;
894 list_del(&msg->m_list); 811 list_del(&msg->m_list);
895 msq->q_qnum--; 812 msq->q_qnum--;
896 msq->q_rtime = get_seconds(); 813 msq->q_rtime = get_seconds();
@@ -914,7 +831,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
914 if (msgflg & MSG_NOERROR) 831 if (msgflg & MSG_NOERROR)
915 msr_d.r_maxsize = INT_MAX; 832 msr_d.r_maxsize = INT_MAX;
916 else 833 else
917 msr_d.r_maxsize = bufsz; 834 msr_d.r_maxsize = msgsz;
918 msr_d.r_msg = ERR_PTR(-EAGAIN); 835 msr_d.r_msg = ERR_PTR(-EAGAIN);
919 current->state = TASK_INTERRUPTIBLE; 836 current->state = TASK_INTERRUPTIBLE;
920 msg_unlock(msq); 837 msg_unlock(msq);
@@ -974,27 +891,37 @@ out_unlock:
974 break; 891 break;
975 } 892 }
976 } 893 }
977 if (IS_ERR(msg)) { 894 if (IS_ERR(msg))
978 free_copy(copy);
979 return PTR_ERR(msg); 895 return PTR_ERR(msg);
980 }
981 896
982 bufsz = msg_handler(buf, msg, bufsz); 897 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
898 *pmtype = msg->m_type;
899 if (store_msg(mtext, msg, msgsz))
900 msgsz = -EFAULT;
901
983 free_msg(msg); 902 free_msg(msg);
984 903
985 return bufsz; 904 return msgsz;
986} 905}
987 906
988SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 907SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
989 long, msgtyp, int, msgflg) 908 long, msgtyp, int, msgflg)
990{ 909{
991 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 910 long err, mtype;
911
912 err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
913 if (err < 0)
914 goto out;
915
916 if (put_user(mtype, &msgp->mtype))
917 err = -EFAULT;
918out:
919 return err;
992} 920}
993 921
994#ifdef CONFIG_PROC_FS 922#ifdef CONFIG_PROC_FS
995static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 923static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
996{ 924{
997 struct user_namespace *user_ns = seq_user_ns(s);
998 struct msg_queue *msq = it; 925 struct msg_queue *msq = it;
999 926
1000 return seq_printf(s, 927 return seq_printf(s,
@@ -1006,10 +933,10 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
1006 msq->q_qnum, 933 msq->q_qnum,
1007 msq->q_lspid, 934 msq->q_lspid,
1008 msq->q_lrpid, 935 msq->q_lrpid,
1009 from_kuid_munged(user_ns, msq->q_perm.uid), 936 msq->q_perm.uid,
1010 from_kgid_munged(user_ns, msq->q_perm.gid), 937 msq->q_perm.gid,
1011 from_kuid_munged(user_ns, msq->q_perm.cuid), 938 msq->q_perm.cuid,
1012 from_kgid_munged(user_ns, msq->q_perm.cgid), 939 msq->q_perm.cgid,
1013 msq->q_stime, 940 msq->q_stime,
1014 msq->q_rtime, 941 msq->q_rtime,
1015 msq->q_ctime); 942 msq->q_ctime);
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index ebfcbfa8b7f..8b5ce5d3f3e 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -13,10 +13,7 @@
13#include <linux/security.h> 13#include <linux/security.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/ipc.h> 15#include <linux/ipc.h>
16#include <linux/msg.h>
17#include <linux/ipc_namespace.h> 16#include <linux/ipc_namespace.h>
18#include <linux/utsname.h>
19#include <linux/proc_fs.h>
20#include <asm/uaccess.h> 17#include <asm/uaccess.h>
21 18
22#include "util.h" 19#include "util.h"
@@ -30,8 +27,12 @@ DEFINE_SPINLOCK(mq_lock);
30 */ 27 */
31struct ipc_namespace init_ipc_ns = { 28struct ipc_namespace init_ipc_ns = {
32 .count = ATOMIC_INIT(1), 29 .count = ATOMIC_INIT(1),
30#ifdef CONFIG_POSIX_MQUEUE
31 .mq_queues_max = DFLT_QUEUESMAX,
32 .mq_msg_max = DFLT_MSGMAX,
33 .mq_msgsize_max = DFLT_MSGSIZEMAX,
34#endif
33 .user_ns = &init_user_ns, 35 .user_ns = &init_user_ns,
34 .proc_inum = PROC_IPC_INIT_INO,
35}; 36};
36 37
37atomic_t nr_ipc_ns = ATOMIC_INIT(1); 38atomic_t nr_ipc_ns = ATOMIC_INIT(1);
@@ -102,50 +103,7 @@ out_err:
102 free_msg(msg); 103 free_msg(msg);
103 return ERR_PTR(err); 104 return ERR_PTR(err);
104} 105}
105#ifdef CONFIG_CHECKPOINT_RESTORE
106struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
107{
108 struct msg_msgseg *dst_pseg, *src_pseg;
109 int len = src->m_ts;
110 int alen;
111
112 BUG_ON(dst == NULL);
113 if (src->m_ts > dst->m_ts)
114 return ERR_PTR(-EINVAL);
115
116 alen = len;
117 if (alen > DATALEN_MSG)
118 alen = DATALEN_MSG;
119
120 dst->next = NULL;
121 dst->security = NULL;
122 106
123 memcpy(dst + 1, src + 1, alen);
124
125 len -= alen;
126 dst_pseg = dst->next;
127 src_pseg = src->next;
128 while (len > 0) {
129 alen = len;
130 if (alen > DATALEN_SEG)
131 alen = DATALEN_SEG;
132 memcpy(dst_pseg + 1, src_pseg + 1, alen);
133 dst_pseg = dst_pseg->next;
134 len -= alen;
135 src_pseg = src_pseg->next;
136 }
137
138 dst->m_type = src->m_type;
139 dst->m_ts = src->m_ts;
140
141 return dst;
142}
143#else
144struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
145{
146 return ERR_PTR(-ENOSYS);
147}
148#endif
149int store_msg(void __user *dest, struct msg_msg *msg, int len) 107int store_msg(void __user *dest, struct msg_msg *msg, int len)
150{ 108{
151 int alen; 109 int alen;
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7c1fa451b0b..ce0a647869b 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
16 16
17#include "util.h" 17#include "util.h"
18 18
19static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, 19static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
20 struct ipc_namespace *old_ns) 20 struct ipc_namespace *old_ns)
21{ 21{
22 struct ipc_namespace *ns; 22 struct ipc_namespace *ns;
@@ -26,16 +26,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
26 if (ns == NULL) 26 if (ns == NULL)
27 return ERR_PTR(-ENOMEM); 27 return ERR_PTR(-ENOMEM);
28 28
29 err = proc_alloc_inum(&ns->proc_inum);
30 if (err) {
31 kfree(ns);
32 return ERR_PTR(err);
33 }
34
35 atomic_set(&ns->count, 1); 29 atomic_set(&ns->count, 1);
36 err = mq_init_ns(ns); 30 err = mq_init_ns(ns);
37 if (err) { 31 if (err) {
38 proc_free_inum(ns->proc_inum);
39 kfree(ns); 32 kfree(ns);
40 return ERR_PTR(err); 33 return ERR_PTR(err);
41 } 34 }
@@ -53,17 +46,19 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
53 ipcns_notify(IPCNS_CREATED); 46 ipcns_notify(IPCNS_CREATED);
54 register_ipcns_notifier(ns); 47 register_ipcns_notifier(ns);
55 48
56 ns->user_ns = get_user_ns(user_ns); 49 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
57 50
58 return ns; 51 return ns;
59} 52}
60 53
61struct ipc_namespace *copy_ipcs(unsigned long flags, 54struct ipc_namespace *copy_ipcs(unsigned long flags,
62 struct user_namespace *user_ns, struct ipc_namespace *ns) 55 struct task_struct *tsk)
63{ 56{
57 struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
58
64 if (!(flags & CLONE_NEWIPC)) 59 if (!(flags & CLONE_NEWIPC))
65 return get_ipc_ns(ns); 60 return get_ipc_ns(ns);
66 return create_ipc_ns(user_ns, ns); 61 return create_ipc_ns(tsk, ns);
67} 62}
68 63
69/* 64/*
@@ -118,7 +113,6 @@ static void free_ipc_ns(struct ipc_namespace *ns)
118 */ 113 */
119 ipcns_notify(IPCNS_REMOVED); 114 ipcns_notify(IPCNS_REMOVED);
120 put_user_ns(ns->user_ns); 115 put_user_ns(ns->user_ns);
121 proc_free_inum(ns->proc_inum);
122 kfree(ns); 116 kfree(ns);
123} 117}
124 118
@@ -167,13 +161,8 @@ static void ipcns_put(void *ns)
167 return put_ipc_ns(ns); 161 return put_ipc_ns(ns);
168} 162}
169 163
170static int ipcns_install(struct nsproxy *nsproxy, void *new) 164static int ipcns_install(struct nsproxy *nsproxy, void *ns)
171{ 165{
172 struct ipc_namespace *ns = new;
173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
174 !nsown_capable(CAP_SYS_ADMIN))
175 return -EPERM;
176
177 /* Ditch state from the old ipc namespace */ 166 /* Ditch state from the old ipc namespace */
178 exit_sem(current); 167 exit_sem(current);
179 put_ipc_ns(nsproxy->ipc_ns); 168 put_ipc_ns(nsproxy->ipc_ns);
@@ -181,18 +170,10 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
181 return 0; 170 return 0;
182} 171}
183 172
184static unsigned int ipcns_inum(void *vp)
185{
186 struct ipc_namespace *ns = vp;
187
188 return ns->proc_inum;
189}
190
191const struct proc_ns_operations ipcns_operations = { 173const struct proc_ns_operations ipcns_operations = {
192 .name = "ipc", 174 .name = "ipc",
193 .type = CLONE_NEWIPC, 175 .type = CLONE_NEWIPC,
194 .get = ipcns_get, 176 .get = ipcns_get,
195 .put = ipcns_put, 177 .put = ipcns_put,
196 .install = ipcns_install, 178 .install = ipcns_install,
197 .inum = ipcns_inum,
198}; 179};
diff --git a/ipc/sem.c b/ipc/sem.c
index 58d31f1c1eb..c8e00f8b4be 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -90,52 +90,6 @@
90#include <asm/uaccess.h> 90#include <asm/uaccess.h>
91#include "util.h" 91#include "util.h"
92 92
93/* One semaphore structure for each semaphore in the system. */
94struct sem {
95 int semval; /* current value */
96 int sempid; /* pid of last operation */
97 struct list_head sem_pending; /* pending single-sop operations */
98};
99
100/* One queue for each sleeping process in the system. */
101struct sem_queue {
102 struct list_head simple_list; /* queue of pending operations */
103 struct list_head list; /* queue of pending operations */
104 struct task_struct *sleeper; /* this process */
105 struct sem_undo *undo; /* undo structure */
106 int pid; /* process id of requesting process */
107 int status; /* completion status of operation */
108 struct sembuf *sops; /* array of pending operations */
109 int nsops; /* number of operations */
110 int alter; /* does *sops alter the array? */
111};
112
113/* Each task has a list of undo requests. They are executed automatically
114 * when the process exits.
115 */
116struct sem_undo {
117 struct list_head list_proc; /* per-process list: *
118 * all undos from one process
119 * rcu protected */
120 struct rcu_head rcu; /* rcu struct for sem_undo */
121 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
122 struct list_head list_id; /* per semaphore array list:
123 * all undos for one array */
124 int semid; /* semaphore set identifier */
125 short *semadj; /* array of adjustments */
126 /* one per semaphore */
127};
128
129/* sem_undo_list controls shared access to the list of sem_undo structures
130 * that may be shared among all a CLONE_SYSVSEM task group.
131 */
132struct sem_undo_list {
133 atomic_t refcnt;
134 spinlock_t lock;
135 struct list_head list_proc;
136};
137
138
139#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 93#define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
140 94
141#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 95#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
@@ -1104,9 +1058,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
1104 freeary(ns, ipcp); 1058 freeary(ns, ipcp);
1105 goto out_up; 1059 goto out_up;
1106 case IPC_SET: 1060 case IPC_SET:
1107 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1061 ipc_update_perm(&semid64.sem_perm, ipcp);
1108 if (err)
1109 goto out_unlock;
1110 sma->sem_ctime = get_seconds(); 1062 sma->sem_ctime = get_seconds();
1111 break; 1063 break;
1112 default: 1064 default:
@@ -1474,8 +1426,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1474 1426
1475 queue.status = -EINTR; 1427 queue.status = -EINTR;
1476 queue.sleeper = current; 1428 queue.sleeper = current;
1477
1478sleep_again:
1479 current->state = TASK_INTERRUPTIBLE; 1429 current->state = TASK_INTERRUPTIBLE;
1480 sem_unlock(sma); 1430 sem_unlock(sma);
1481 1431
@@ -1510,6 +1460,7 @@ sleep_again:
1510 * Array removed? If yes, leave without sem_unlock(). 1460 * Array removed? If yes, leave without sem_unlock().
1511 */ 1461 */
1512 if (IS_ERR(sma)) { 1462 if (IS_ERR(sma)) {
1463 error = -EIDRM;
1513 goto out_free; 1464 goto out_free;
1514 } 1465 }
1515 1466
@@ -1528,13 +1479,6 @@ sleep_again:
1528 */ 1479 */
1529 if (timeout && jiffies_left == 0) 1480 if (timeout && jiffies_left == 0)
1530 error = -EAGAIN; 1481 error = -EAGAIN;
1531
1532 /*
1533 * If the wakeup was spurious, just retry
1534 */
1535 if (error == -EINTR && !signal_pending(current))
1536 goto sleep_again;
1537
1538 unlink_queue(sma, &queue); 1482 unlink_queue(sma, &queue);
1539 1483
1540out_unlock_free: 1484out_unlock_free:
@@ -1679,7 +1623,6 @@ void exit_sem(struct task_struct *tsk)
1679#ifdef CONFIG_PROC_FS 1623#ifdef CONFIG_PROC_FS
1680static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1624static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1681{ 1625{
1682 struct user_namespace *user_ns = seq_user_ns(s);
1683 struct sem_array *sma = it; 1626 struct sem_array *sma = it;
1684 1627
1685 return seq_printf(s, 1628 return seq_printf(s,
@@ -1688,10 +1631,10 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
1688 sma->sem_perm.id, 1631 sma->sem_perm.id,
1689 sma->sem_perm.mode, 1632 sma->sem_perm.mode,
1690 sma->sem_nsems, 1633 sma->sem_nsems,
1691 from_kuid_munged(user_ns, sma->sem_perm.uid), 1634 sma->sem_perm.uid,
1692 from_kgid_munged(user_ns, sma->sem_perm.gid), 1635 sma->sem_perm.gid,
1693 from_kuid_munged(user_ns, sma->sem_perm.cuid), 1636 sma->sem_perm.cuid,
1694 from_kgid_munged(user_ns, sma->sem_perm.cgid), 1637 sma->sem_perm.cgid,
1695 sma->sem_otime, 1638 sma->sem_otime,
1696 sma->sem_ctime); 1639 sma->sem_ctime);
1697} 1640}
diff --git a/ipc/shm.c b/ipc/shm.c
index 4fa6d8fee73..02ecf2c078f 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -393,16 +393,6 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync); 393 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
394} 394}
395 395
396static long shm_fallocate(struct file *file, int mode, loff_t offset,
397 loff_t len)
398{
399 struct shm_file_data *sfd = shm_file_data(file);
400
401 if (!sfd->file->f_op->fallocate)
402 return -EOPNOTSUPP;
403 return sfd->file->f_op->fallocate(file, mode, offset, len);
404}
405
406static unsigned long shm_get_unmapped_area(struct file *file, 396static unsigned long shm_get_unmapped_area(struct file *file,
407 unsigned long addr, unsigned long len, unsigned long pgoff, 397 unsigned long addr, unsigned long len, unsigned long pgoff,
408 unsigned long flags) 398 unsigned long flags)
@@ -420,7 +410,6 @@ static const struct file_operations shm_file_operations = {
420 .get_unmapped_area = shm_get_unmapped_area, 410 .get_unmapped_area = shm_get_unmapped_area,
421#endif 411#endif
422 .llseek = noop_llseek, 412 .llseek = noop_llseek,
423 .fallocate = shm_fallocate,
424}; 413};
425 414
426static const struct file_operations shm_file_operations_huge = { 415static const struct file_operations shm_file_operations_huge = {
@@ -429,7 +418,6 @@ static const struct file_operations shm_file_operations_huge = {
429 .release = shm_release, 418 .release = shm_release,
430 .get_unmapped_area = shm_get_unmapped_area, 419 .get_unmapped_area = shm_get_unmapped_area,
431 .llseek = noop_llseek, 420 .llseek = noop_llseek,
432 .fallocate = shm_fallocate,
433}; 421};
434 422
435int is_file_shm_hugepages(struct file *file) 423int is_file_shm_hugepages(struct file *file)
@@ -494,9 +482,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
494 /* hugetlb_file_setup applies strict accounting */ 482 /* hugetlb_file_setup applies strict accounting */
495 if (shmflg & SHM_NORESERVE) 483 if (shmflg & SHM_NORESERVE)
496 acctflag = VM_NORESERVE; 484 acctflag = VM_NORESERVE;
497 file = hugetlb_file_setup(name, 0, size, acctflag, 485 file = hugetlb_file_setup(name, size, acctflag,
498 &shp->mlock_user, HUGETLB_SHMFS_INODE, 486 &shp->mlock_user, HUGETLB_SHMFS_INODE);
499 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
500 } else { 487 } else {
501 /* 488 /*
502 * Do not allow no accounting for OVERCOMMIT_NEVER, even 489 * Do not allow no accounting for OVERCOMMIT_NEVER, even
@@ -759,9 +746,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
759 do_shm_rmid(ns, ipcp); 746 do_shm_rmid(ns, ipcp);
760 goto out_up; 747 goto out_up;
761 case IPC_SET: 748 case IPC_SET:
762 err = ipc_update_perm(&shmid64.shm_perm, ipcp); 749 ipc_update_perm(&shmid64.shm_perm, ipcp);
763 if (err)
764 goto out_unlock;
765 shp->shm_ctim = get_seconds(); 750 shp->shm_ctim = get_seconds();
766 break; 751 break;
767 default: 752 default:
@@ -885,7 +870,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
885 case SHM_LOCK: 870 case SHM_LOCK:
886 case SHM_UNLOCK: 871 case SHM_UNLOCK:
887 { 872 {
888 struct file *shm_file; 873 struct file *uninitialized_var(shm_file);
874
875 lru_add_drain_all(); /* drain pagevecs to lru lists */
889 876
890 shp = shm_lock_check(ns, shmid); 877 shp = shm_lock_check(ns, shmid);
891 if (IS_ERR(shp)) { 878 if (IS_ERR(shp)) {
@@ -896,10 +883,10 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
896 audit_ipc_obj(&(shp->shm_perm)); 883 audit_ipc_obj(&(shp->shm_perm));
897 884
898 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { 885 if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
899 kuid_t euid = current_euid(); 886 uid_t euid = current_euid();
900 err = -EPERM; 887 err = -EPERM;
901 if (!uid_eq(euid, shp->shm_perm.uid) && 888 if (euid != shp->shm_perm.uid &&
902 !uid_eq(euid, shp->shm_perm.cuid)) 889 euid != shp->shm_perm.cuid)
903 goto out_unlock; 890 goto out_unlock;
904 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) 891 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
905 goto out_unlock; 892 goto out_unlock;
@@ -908,31 +895,22 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
908 err = security_shm_shmctl(shp, cmd); 895 err = security_shm_shmctl(shp, cmd);
909 if (err) 896 if (err)
910 goto out_unlock; 897 goto out_unlock;
911 898
912 shm_file = shp->shm_file; 899 if(cmd==SHM_LOCK) {
913 if (is_file_hugepages(shm_file))
914 goto out_unlock;
915
916 if (cmd == SHM_LOCK) {
917 struct user_struct *user = current_user(); 900 struct user_struct *user = current_user();
918 err = shmem_lock(shm_file, 1, user); 901 if (!is_file_hugepages(shp->shm_file)) {
919 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { 902 err = shmem_lock(shp->shm_file, 1, user);
920 shp->shm_perm.mode |= SHM_LOCKED; 903 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){
921 shp->mlock_user = user; 904 shp->shm_perm.mode |= SHM_LOCKED;
905 shp->mlock_user = user;
906 }
922 } 907 }
923 goto out_unlock; 908 } else if (!is_file_hugepages(shp->shm_file)) {
909 shmem_lock(shp->shm_file, 0, shp->mlock_user);
910 shp->shm_perm.mode &= ~SHM_LOCKED;
911 shp->mlock_user = NULL;
924 } 912 }
925
926 /* SHM_UNLOCK */
927 if (!(shp->shm_perm.mode & SHM_LOCKED))
928 goto out_unlock;
929 shmem_lock(shm_file, 0, shp->mlock_user);
930 shp->shm_perm.mode &= ~SHM_LOCKED;
931 shp->mlock_user = NULL;
932 get_file(shm_file);
933 shm_unlock(shp); 913 shm_unlock(shp);
934 shmem_unlock_mapping(shm_file->f_mapping);
935 fput(shm_file);
936 goto out; 914 goto out;
937 } 915 }
938 case IPC_RMID: 916 case IPC_RMID:
@@ -956,8 +934,7 @@ out:
956 * "raddr" thing points to kernel space, and there has to be a wrapper around 934 * "raddr" thing points to kernel space, and there has to be a wrapper around
957 * this. 935 * this.
958 */ 936 */
959long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, 937long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
960 unsigned long shmlba)
961{ 938{
962 struct shmid_kernel *shp; 939 struct shmid_kernel *shp;
963 unsigned long addr; 940 unsigned long addr;
@@ -977,9 +954,9 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
977 if (shmid < 0) 954 if (shmid < 0)
978 goto out; 955 goto out;
979 else if ((addr = (ulong)shmaddr)) { 956 else if ((addr = (ulong)shmaddr)) {
980 if (addr & (shmlba - 1)) { 957 if (addr & (SHMLBA-1)) {
981 if (shmflg & SHM_RND) 958 if (shmflg & SHM_RND)
982 addr &= ~(shmlba - 1); /* round down */ 959 addr &= ~(SHMLBA-1); /* round down */
983 else 960 else
984#ifndef __ARCH_FORCE_SHMLBA 961#ifndef __ARCH_FORCE_SHMLBA
985 if (addr & ~PAGE_MASK) 962 if (addr & ~PAGE_MASK)
@@ -1052,10 +1029,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1052 sfd->file = shp->shm_file; 1029 sfd->file = shp->shm_file;
1053 sfd->vm_ops = NULL; 1030 sfd->vm_ops = NULL;
1054 1031
1055 err = security_mmap_file(file, prot, flags);
1056 if (err)
1057 goto out_fput;
1058
1059 down_write(&current->mm->mmap_sem); 1032 down_write(&current->mm->mmap_sem);
1060 if (addr && !(shmflg & SHM_REMAP)) { 1033 if (addr && !(shmflg & SHM_REMAP)) {
1061 err = -EINVAL; 1034 err = -EINVAL;
@@ -1070,7 +1043,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1070 goto invalid; 1043 goto invalid;
1071 } 1044 }
1072 1045
1073 user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); 1046 user_addr = do_mmap (file, addr, size, prot, flags, 0);
1074 *raddr = user_addr; 1047 *raddr = user_addr;
1075 err = 0; 1048 err = 0;
1076 if (IS_ERR_VALUE(user_addr)) 1049 if (IS_ERR_VALUE(user_addr))
@@ -1078,7 +1051,6 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
1078invalid: 1051invalid:
1079 up_write(&current->mm->mmap_sem); 1052 up_write(&current->mm->mmap_sem);
1080 1053
1081out_fput:
1082 fput(file); 1054 fput(file);
1083 1055
1084out_nattch: 1056out_nattch:
@@ -1111,7 +1083,7 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1111 unsigned long ret; 1083 unsigned long ret;
1112 long err; 1084 long err;
1113 1085
1114 err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); 1086 err = do_shmat(shmid, shmaddr, shmflg, &ret);
1115 if (err) 1087 if (err)
1116 return err; 1088 return err;
1117 force_successful_syscall_return(); 1089 force_successful_syscall_return();
@@ -1223,7 +1195,6 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1223#ifdef CONFIG_PROC_FS 1195#ifdef CONFIG_PROC_FS
1224static int sysvipc_shm_proc_show(struct seq_file *s, void *it) 1196static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1225{ 1197{
1226 struct user_namespace *user_ns = seq_user_ns(s);
1227 struct shmid_kernel *shp = it; 1198 struct shmid_kernel *shp = it;
1228 unsigned long rss = 0, swp = 0; 1199 unsigned long rss = 0, swp = 0;
1229 1200
@@ -1246,10 +1217,10 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1246 shp->shm_cprid, 1217 shp->shm_cprid,
1247 shp->shm_lprid, 1218 shp->shm_lprid,
1248 shp->shm_nattch, 1219 shp->shm_nattch,
1249 from_kuid_munged(user_ns, shp->shm_perm.uid), 1220 shp->shm_perm.uid,
1250 from_kgid_munged(user_ns, shp->shm_perm.gid), 1221 shp->shm_perm.gid,
1251 from_kuid_munged(user_ns, shp->shm_perm.cuid), 1222 shp->shm_perm.cuid,
1252 from_kgid_munged(user_ns, shp->shm_perm.cgid), 1223 shp->shm_perm.cgid,
1253 shp->shm_atim, 1224 shp->shm_atim,
1254 shp->shm_dtim, 1225 shp->shm_dtim,
1255 shp->shm_ctim, 1226 shp->shm_ctim,
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 0d1e32ce048..1d6f53f6b56 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -73,7 +73,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
73 default: { 73 default: {
74 unsigned long raddr; 74 unsigned long raddr;
75 ret = do_shmat(first, (char __user *)ptr, 75 ret = do_shmat(first, (char __user *)ptr,
76 second, &raddr, SHMLBA); 76 second, &raddr);
77 if (ret) 77 if (ret)
78 return ret; 78 return ret;
79 return put_user(raddr, (unsigned long __user *) third); 79 return put_user(raddr, (unsigned long __user *) third);
diff --git a/ipc/util.c b/ipc/util.c
index 74e1d9c7a98..75261a31d48 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -122,7 +122,6 @@ void ipc_init_ids(struct ipc_ids *ids)
122 122
123 ids->in_use = 0; 123 ids->in_use = 0;
124 ids->seq = 0; 124 ids->seq = 0;
125 ids->next_id = -1;
126 { 125 {
127 int seq_limit = INT_MAX/SEQ_MULTIPLIER; 126 int seq_limit = INT_MAX/SEQ_MULTIPLIER;
128 if (seq_limit > USHRT_MAX) 127 if (seq_limit > USHRT_MAX)
@@ -250,10 +249,9 @@ int ipc_get_maxid(struct ipc_ids *ids)
250 249
251int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) 250int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
252{ 251{
253 kuid_t euid; 252 uid_t euid;
254 kgid_t egid; 253 gid_t egid;
255 int id, err; 254 int id, err;
256 int next_id = ids->next_id;
257 255
258 if (size > IPCMNI) 256 if (size > IPCMNI)
259 size = IPCMNI; 257 size = IPCMNI;
@@ -266,8 +264,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
266 rcu_read_lock(); 264 rcu_read_lock();
267 spin_lock(&new->lock); 265 spin_lock(&new->lock);
268 266
269 err = idr_get_new_above(&ids->ipcs_idr, new, 267 err = idr_get_new(&ids->ipcs_idr, new, &id);
270 (next_id < 0) ? 0 : ipcid_to_idx(next_id), &id);
271 if (err) { 268 if (err) {
272 spin_unlock(&new->lock); 269 spin_unlock(&new->lock);
273 rcu_read_unlock(); 270 rcu_read_unlock();
@@ -280,14 +277,9 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
280 new->cuid = new->uid = euid; 277 new->cuid = new->uid = euid;
281 new->gid = new->cgid = egid; 278 new->gid = new->cgid = egid;
282 279
283 if (next_id < 0) { 280 new->seq = ids->seq++;
284 new->seq = ids->seq++; 281 if(ids->seq > ids->seq_max)
285 if (ids->seq > ids->seq_max) 282 ids->seq = 0;
286 ids->seq = 0;
287 } else {
288 new->seq = ipcid_to_seqx(next_id);
289 ids->next_id = -1;
290 }
291 283
292 new->id = ipc_buildid(id, new->seq); 284 new->id = ipc_buildid(id, new->seq);
293 return id; 285 return id;
@@ -614,14 +606,14 @@ void ipc_rcu_putref(void *ptr)
614 606
615int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) 607int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag)
616{ 608{
617 kuid_t euid = current_euid(); 609 uid_t euid = current_euid();
618 int requested_mode, granted_mode; 610 int requested_mode, granted_mode;
619 611
620 audit_ipc_obj(ipcp); 612 audit_ipc_obj(ipcp);
621 requested_mode = (flag >> 6) | (flag >> 3) | flag; 613 requested_mode = (flag >> 6) | (flag >> 3) | flag;
622 granted_mode = ipcp->mode; 614 granted_mode = ipcp->mode;
623 if (uid_eq(euid, ipcp->cuid) || 615 if (euid == ipcp->cuid ||
624 uid_eq(euid, ipcp->uid)) 616 euid == ipcp->uid)
625 granted_mode >>= 6; 617 granted_mode >>= 6;
626 else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) 618 else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid))
627 granted_mode >>= 3; 619 granted_mode >>= 3;
@@ -651,10 +643,10 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag)
651void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) 643void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
652{ 644{
653 out->key = in->key; 645 out->key = in->key;
654 out->uid = from_kuid_munged(current_user_ns(), in->uid); 646 out->uid = in->uid;
655 out->gid = from_kgid_munged(current_user_ns(), in->gid); 647 out->gid = in->gid;
656 out->cuid = from_kuid_munged(current_user_ns(), in->cuid); 648 out->cuid = in->cuid;
657 out->cgid = from_kgid_munged(current_user_ns(), in->cgid); 649 out->cgid = in->cgid;
658 out->mode = in->mode; 650 out->mode = in->mode;
659 out->seq = in->seq; 651 out->seq = in->seq;
660} 652}
@@ -755,19 +747,12 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
755 * @in: the permission given as input. 747 * @in: the permission given as input.
756 * @out: the permission of the ipc to set. 748 * @out: the permission of the ipc to set.
757 */ 749 */
758int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) 750void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
759{ 751{
760 kuid_t uid = make_kuid(current_user_ns(), in->uid); 752 out->uid = in->uid;
761 kgid_t gid = make_kgid(current_user_ns(), in->gid); 753 out->gid = in->gid;
762 if (!uid_valid(uid) || !gid_valid(gid))
763 return -EINVAL;
764
765 out->uid = uid;
766 out->gid = gid;
767 out->mode = (out->mode & ~S_IRWXUGO) 754 out->mode = (out->mode & ~S_IRWXUGO)
768 | (in->mode & S_IRWXUGO); 755 | (in->mode & S_IRWXUGO);
769
770 return 0;
771} 756}
772 757
773/** 758/**
@@ -792,7 +777,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
792 struct ipc64_perm *perm, int extra_perm) 777 struct ipc64_perm *perm, int extra_perm)
793{ 778{
794 struct kern_ipc_perm *ipcp; 779 struct kern_ipc_perm *ipcp;
795 kuid_t euid; 780 uid_t euid;
796 int err; 781 int err;
797 782
798 down_write(&ids->rw_mutex); 783 down_write(&ids->rw_mutex);
@@ -808,7 +793,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
808 perm->gid, perm->mode); 793 perm->gid, perm->mode);
809 794
810 euid = current_euid(); 795 euid = current_euid();
811 if (uid_eq(euid, ipcp->cuid) || uid_eq(euid, ipcp->uid) || 796 if (euid == ipcp->cuid || euid == ipcp->uid ||
812 ns_capable(ns->user_ns, CAP_SYS_ADMIN)) 797 ns_capable(ns->user_ns, CAP_SYS_ADMIN))
813 return ipcp; 798 return ipcp;
814 799
@@ -819,7 +804,7 @@ out_up:
819 return ERR_PTR(err); 804 return ERR_PTR(err);
820} 805}
821 806
822#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 807#ifdef __ARCH_WANT_IPC_PARSE_VERSION
823 808
824 809
825/** 810/**
@@ -841,7 +826,7 @@ int ipc_parse_version (int *cmd)
841 } 826 }
842} 827}
843 828
844#endif /* CONFIG_ARCH_WANT_IPC_PARSE_VERSION */ 829#endif /* __ARCH_WANT_IPC_PARSE_VERSION */
845 830
846#ifdef CONFIG_PROC_FS 831#ifdef CONFIG_PROC_FS
847struct ipc_proc_iter { 832struct ipc_proc_iter {
diff --git a/ipc/util.h b/ipc/util.h
index eeb79a1fbd8..6f5c20bedaa 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -92,7 +92,6 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
92#define IPC_SHM_IDS 2 92#define IPC_SHM_IDS 2
93 93
94#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) 94#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
95#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
96 95
97/* must be called with ids->rw_mutex acquired for writing */ 96/* must be called with ids->rw_mutex acquired for writing */
98int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); 97int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
@@ -126,12 +125,12 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
126 125
127void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); 126void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
128void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); 127void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
129int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); 128void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
130struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns, 129struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
131 struct ipc_ids *ids, int id, int cmd, 130 struct ipc_ids *ids, int id, int cmd,
132 struct ipc64_perm *perm, int extra_perm); 131 struct ipc64_perm *perm, int extra_perm);
133 132
134#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 133#ifndef __ARCH_WANT_IPC_PARSE_VERSION
135 /* On IA-64, we always use the "64-bit version" of the IPC structures. */ 134 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
136# define ipc_parse_version(cmd) IPC_64 135# define ipc_parse_version(cmd) IPC_64
137#else 136#else
@@ -140,7 +139,6 @@ int ipc_parse_version (int *cmd);
140 139
141extern void free_msg(struct msg_msg *msg); 140extern void free_msg(struct msg_msg *msg);
142extern struct msg_msg *load_msg(const void __user *src, int len); 141extern struct msg_msg *load_msg(const void __user *src, int len);
143extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
144extern int store_msg(void __user *dest, struct msg_msg *msg, int len); 142extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
145 143
146extern void recompute_msgmni(struct ipc_namespace *); 144extern void recompute_msgmni(struct ipc_namespace *);