aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /ipc
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile9
-rw-r--r--ipc/compat.c687
-rw-r--r--ipc/compat_mq.c146
-rw-r--r--ipc/mqueue.c1252
-rw-r--r--ipc/msg.c862
-rw-r--r--ipc/msgutil.c127
-rw-r--r--ipc/sem.c1384
-rw-r--r--ipc/shm.c917
-rw-r--r--ipc/util.c580
-rw-r--r--ipc/util.h81
10 files changed, 6045 insertions, 0 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
new file mode 100644
index 000000000000..0a6d626cd794
--- /dev/null
+++ b/ipc/Makefile
@@ -0,0 +1,9 @@
1#
2# Makefile for the linux ipc.
3#
4
5obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
6obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o
7obj_mq-$(CONFIG_COMPAT) += compat_mq.o
8obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
9
diff --git a/ipc/compat.c b/ipc/compat.c
new file mode 100644
index 000000000000..70e4e4e10fd1
--- /dev/null
+++ b/ipc/compat.c
@@ -0,0 +1,687 @@
1/*
2 * 32 bit compatibility code for System V IPC
3 *
4 * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
7 * Copyright (C) 2000 VA Linux Co
8 * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
9 * Copyright (C) 2000 Hewlett-Packard Co.
10 * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
11 * Copyright (C) 2000 Gerhard Tonn (ton@de.ibm.com)
12 * Copyright (C) 2000-2002 Andi Kleen, SuSE Labs (x86-64 port)
13 * Copyright (C) 2000 Silicon Graphics, Inc.
14 * Copyright (C) 2001 IBM
15 * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
16 * Copyright (C) 2004 Arnd Bergmann (arnd@arndb.de)
17 *
18 * This code is collected from the versions for sparc64, mips64, s390x, ia64,
19 * ppc64 and x86_64, all of which are based on the original sparc64 version
20 * by Jakub Jelinek.
21 *
22 */
23#include <linux/compat.h>
24#include <linux/config.h>
25#include <linux/errno.h>
26#include <linux/highuid.h>
27#include <linux/init.h>
28#include <linux/msg.h>
29#include <linux/shm.h>
30#include <linux/slab.h>
31#include <linux/syscalls.h>
32
33#include <asm/semaphore.h>
34#include <asm/uaccess.h>
35
36#include "util.h"
37
38struct compat_msgbuf {
39 compat_long_t mtype;
40 char mtext[1];
41};
42
43struct compat_ipc_perm {
44 key_t key;
45 compat_uid_t uid;
46 compat_gid_t gid;
47 compat_uid_t cuid;
48 compat_gid_t cgid;
49 compat_mode_t mode;
50 unsigned short seq;
51};
52
53struct compat_semid_ds {
54 struct compat_ipc_perm sem_perm;
55 compat_time_t sem_otime;
56 compat_time_t sem_ctime;
57 compat_uptr_t sem_base;
58 compat_uptr_t sem_pending;
59 compat_uptr_t sem_pending_last;
60 compat_uptr_t undo;
61 unsigned short sem_nsems;
62};
63
64struct compat_msqid_ds {
65 struct compat_ipc_perm msg_perm;
66 compat_uptr_t msg_first;
67 compat_uptr_t msg_last;
68 compat_time_t msg_stime;
69 compat_time_t msg_rtime;
70 compat_time_t msg_ctime;
71 compat_ulong_t msg_lcbytes;
72 compat_ulong_t msg_lqbytes;
73 unsigned short msg_cbytes;
74 unsigned short msg_qnum;
75 unsigned short msg_qbytes;
76 compat_ipc_pid_t msg_lspid;
77 compat_ipc_pid_t msg_lrpid;
78};
79
80struct compat_shmid_ds {
81 struct compat_ipc_perm shm_perm;
82 int shm_segsz;
83 compat_time_t shm_atime;
84 compat_time_t shm_dtime;
85 compat_time_t shm_ctime;
86 compat_ipc_pid_t shm_cpid;
87 compat_ipc_pid_t shm_lpid;
88 unsigned short shm_nattch;
89 unsigned short shm_unused;
90 compat_uptr_t shm_unused2;
91 compat_uptr_t shm_unused3;
92};
93
94struct compat_ipc_kludge {
95 compat_uptr_t msgp;
96 compat_long_t msgtyp;
97};
98
99struct compat_shminfo64 {
100 compat_ulong_t shmmax;
101 compat_ulong_t shmmin;
102 compat_ulong_t shmmni;
103 compat_ulong_t shmseg;
104 compat_ulong_t shmall;
105 compat_ulong_t __unused1;
106 compat_ulong_t __unused2;
107 compat_ulong_t __unused3;
108 compat_ulong_t __unused4;
109};
110
111struct compat_shm_info {
112 compat_int_t used_ids;
113 compat_ulong_t shm_tot, shm_rss, shm_swp;
114 compat_ulong_t swap_attempts, swap_successes;
115};
116
117extern int sem_ctls[];
118#define sc_semopm (sem_ctls[2])
119#define MAXBUF (64*1024)
120
121static inline int compat_ipc_parse_version(int *cmd)
122{
123 int version = *cmd & IPC_64;
124
125 /* this is tricky: architectures that have support for the old
126 * ipc structures in 64 bit binaries need to have IPC_64 set
127 * in cmd, the others need to have it cleared */
128#ifndef ipc_parse_version
129 *cmd |= IPC_64;
130#else
131 *cmd &= ~IPC_64;
132#endif
133 return version;
134}
135
136static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64,
137 struct compat_ipc64_perm __user *up64)
138{
139 int err;
140
141 err = __get_user(p64->uid, &up64->uid);
142 err |= __get_user(p64->gid, &up64->gid);
143 err |= __get_user(p64->mode, &up64->mode);
144 return err;
145}
146
147static inline int __get_compat_ipc_perm(struct ipc64_perm *p,
148 struct compat_ipc_perm __user *up)
149{
150 int err;
151
152 err = __get_user(p->uid, &up->uid);
153 err |= __get_user(p->gid, &up->gid);
154 err |= __get_user(p->mode, &up->mode);
155 return err;
156}
157
158static inline int __put_compat_ipc64_perm(struct ipc64_perm *p64,
159 struct compat_ipc64_perm __user *up64)
160{
161 int err;
162
163 err = __put_user(p64->key, &up64->key);
164 err |= __put_user(p64->uid, &up64->uid);
165 err |= __put_user(p64->gid, &up64->gid);
166 err |= __put_user(p64->cuid, &up64->cuid);
167 err |= __put_user(p64->cgid, &up64->cgid);
168 err |= __put_user(p64->mode, &up64->mode);
169 err |= __put_user(p64->seq, &up64->seq);
170 return err;
171}
172
173static inline int __put_compat_ipc_perm(struct ipc64_perm *p,
174 struct compat_ipc_perm __user *up)
175{
176 int err;
177 compat_uid_t u;
178 compat_gid_t g;
179
180 err = __put_user(p->key, &up->key);
181 SET_UID(u, p->uid);
182 err |= __put_user(u, &up->uid);
183 SET_GID(g, p->gid);
184 err |= __put_user(g, &up->gid);
185 SET_UID(u, p->cuid);
186 err |= __put_user(u, &up->cuid);
187 SET_GID(g, p->cgid);
188 err |= __put_user(g, &up->cgid);
189 err |= __put_user(p->mode, &up->mode);
190 err |= __put_user(p->seq, &up->seq);
191 return err;
192}
193
194static inline int get_compat_semid64_ds(struct semid64_ds *s64,
195 struct compat_semid64_ds __user *up64)
196{
197 if (!access_ok (VERIFY_READ, up64, sizeof(*up64)))
198 return -EFAULT;
199 return __get_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
200}
201
202static inline int get_compat_semid_ds(struct semid64_ds *s,
203 struct compat_semid_ds __user *up)
204{
205 if (!access_ok (VERIFY_READ, up, sizeof(*up)))
206 return -EFAULT;
207 return __get_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
208}
209
210static inline int put_compat_semid64_ds(struct semid64_ds *s64,
211 struct compat_semid64_ds __user *up64)
212{
213 int err;
214
215 if (!access_ok (VERIFY_WRITE, up64, sizeof(*up64)))
216 return -EFAULT;
217 err = __put_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
218 err |= __put_user(s64->sem_otime, &up64->sem_otime);
219 err |= __put_user(s64->sem_ctime, &up64->sem_ctime);
220 err |= __put_user(s64->sem_nsems, &up64->sem_nsems);
221 return err;
222}
223
224static inline int put_compat_semid_ds(struct semid64_ds *s,
225 struct compat_semid_ds __user *up)
226{
227 int err;
228
229 if (!access_ok (VERIFY_WRITE, up, sizeof(*up)))
230 err = -EFAULT;
231 err = __put_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
232 err |= __put_user(s->sem_otime, &up->sem_otime);
233 err |= __put_user(s->sem_ctime, &up->sem_ctime);
234 err |= __put_user(s->sem_nsems, &up->sem_nsems);
235 return err;
236}
237
238long compat_sys_semctl(int first, int second, int third, void __user *uptr)
239{
240 union semun fourth;
241 u32 pad;
242 int err, err2;
243 struct semid64_ds s64;
244 struct semid64_ds __user *up64;
245 int version = compat_ipc_parse_version(&third);
246
247 if (!uptr)
248 return -EINVAL;
249 if (get_user(pad, (u32 __user *) uptr))
250 return -EFAULT;
251 if ((third & (~IPC_64)) == SETVAL)
252 fourth.val = (int) pad;
253 else
254 fourth.__pad = compat_ptr(pad);
255 switch (third & (~IPC_64)) {
256 case IPC_INFO:
257 case IPC_RMID:
258 case SEM_INFO:
259 case GETVAL:
260 case GETPID:
261 case GETNCNT:
262 case GETZCNT:
263 case GETALL:
264 case SETVAL:
265 case SETALL:
266 err = sys_semctl(first, second, third, fourth);
267 break;
268
269 case IPC_STAT:
270 case SEM_STAT:
271 up64 = compat_alloc_user_space(sizeof(s64));
272 fourth.__pad = up64;
273 err = sys_semctl(first, second, third, fourth);
274 if (err < 0)
275 break;
276 if (copy_from_user(&s64, up64, sizeof(s64)))
277 err2 = -EFAULT;
278 else if (version == IPC_64)
279 err2 = put_compat_semid64_ds(&s64, compat_ptr(pad));
280 else
281 err2 = put_compat_semid_ds(&s64, compat_ptr(pad));
282 if (err2)
283 err = -EFAULT;
284 break;
285
286 case IPC_SET:
287 if (version == IPC_64) {
288 err = get_compat_semid64_ds(&s64, compat_ptr(pad));
289 } else {
290 err = get_compat_semid_ds(&s64, compat_ptr(pad));
291 }
292 up64 = compat_alloc_user_space(sizeof(s64));
293 if (copy_to_user(up64, &s64, sizeof(s64)))
294 err = -EFAULT;
295 if (err)
296 break;
297
298 fourth.__pad = up64;
299 err = sys_semctl(first, second, third, fourth);
300 break;
301
302 default:
303 err = -EINVAL;
304 break;
305 }
306 return err;
307}
308
309long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
310{
311 struct msgbuf __user *p;
312 struct compat_msgbuf __user *up = uptr;
313 long type;
314
315 if (first < 0)
316 return -EINVAL;
317 if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf)))
318 return -EINVAL;
319
320 p = compat_alloc_user_space(second + sizeof(struct msgbuf));
321 if (get_user(type, &up->mtype) ||
322 put_user(type, &p->mtype) ||
323 copy_in_user(p->mtext, up->mtext, second))
324 return -EFAULT;
325
326 return sys_msgsnd(first, p, second, third);
327}
328
329long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
330 int version, void __user *uptr)
331{
332 struct msgbuf __user *p;
333 struct compat_msgbuf __user *up;
334 long type;
335 int err;
336
337 if (first < 0)
338 return -EINVAL;
339 if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf)))
340 return -EINVAL;
341
342 if (!version) {
343 struct compat_ipc_kludge ipck;
344 err = -EINVAL;
345 if (!uptr)
346 goto out;
347 err = -EFAULT;
348 if (copy_from_user (&ipck, uptr, sizeof(ipck)))
349 goto out;
350 uptr = compat_ptr(ipck.msgp);
351 msgtyp = ipck.msgtyp;
352 }
353 p = compat_alloc_user_space(second + sizeof(struct msgbuf));
354 err = sys_msgrcv(first, p, second, msgtyp, third);
355 if (err < 0)
356 goto out;
357 up = uptr;
358 if (get_user(type, &p->mtype) ||
359 put_user(type, &up->mtype) ||
360 copy_in_user(up->mtext, p->mtext, err))
361 err = -EFAULT;
362out:
363 return err;
364}
365
366static inline int get_compat_msqid64(struct msqid64_ds *m64,
367 struct compat_msqid64_ds __user *up64)
368{
369 int err;
370
371 if (!access_ok(VERIFY_READ, up64, sizeof(*up64)))
372 return -EFAULT;
373 err = __get_compat_ipc64_perm(&m64->msg_perm, &up64->msg_perm);
374 err |= __get_user(m64->msg_qbytes, &up64->msg_qbytes);
375 return err;
376}
377
378static inline int get_compat_msqid(struct msqid64_ds *m,
379 struct compat_msqid_ds __user *up)
380{
381 int err;
382
383 if (!access_ok(VERIFY_READ, up, sizeof(*up)))
384 return -EFAULT;
385 err = __get_compat_ipc_perm(&m->msg_perm, &up->msg_perm);
386 err |= __get_user(m->msg_qbytes, &up->msg_qbytes);
387 return err;
388}
389
390static inline int put_compat_msqid64_ds(struct msqid64_ds *m64,
391 struct compat_msqid64_ds __user *up64)
392{
393 int err;
394
395 if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
396 return -EFAULT;
397 err = __put_compat_ipc64_perm(&m64->msg_perm, &up64->msg_perm);
398 err |= __put_user(m64->msg_stime, &up64->msg_stime);
399 err |= __put_user(m64->msg_rtime, &up64->msg_rtime);
400 err |= __put_user(m64->msg_ctime, &up64->msg_ctime);
401 err |= __put_user(m64->msg_cbytes, &up64->msg_cbytes);
402 err |= __put_user(m64->msg_qnum, &up64->msg_qnum);
403 err |= __put_user(m64->msg_qbytes, &up64->msg_qbytes);
404 err |= __put_user(m64->msg_lspid, &up64->msg_lspid);
405 err |= __put_user(m64->msg_lrpid, &up64->msg_lrpid);
406 return err;
407}
408
409static inline int put_compat_msqid_ds(struct msqid64_ds *m,
410 struct compat_msqid_ds __user *up)
411{
412 int err;
413
414 if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
415 return -EFAULT;
416 err = __put_compat_ipc_perm(&m->msg_perm, &up->msg_perm);
417 err |= __put_user(m->msg_stime, &up->msg_stime);
418 err |= __put_user(m->msg_rtime, &up->msg_rtime);
419 err |= __put_user(m->msg_ctime, &up->msg_ctime);
420 err |= __put_user(m->msg_cbytes, &up->msg_cbytes);
421 err |= __put_user(m->msg_qnum, &up->msg_qnum);
422 err |= __put_user(m->msg_qbytes, &up->msg_qbytes);
423 err |= __put_user(m->msg_lspid, &up->msg_lspid);
424 err |= __put_user(m->msg_lrpid, &up->msg_lrpid);
425 return err;
426}
427
428long compat_sys_msgctl(int first, int second, void __user *uptr)
429{
430 int err, err2;
431 struct msqid64_ds m64;
432 int version = compat_ipc_parse_version(&second);
433 void __user *p;
434
435 switch (second & (~IPC_64)) {
436 case IPC_INFO:
437 case IPC_RMID:
438 case MSG_INFO:
439 err = sys_msgctl(first, second, uptr);
440 break;
441
442 case IPC_SET:
443 if (version == IPC_64) {
444 err = get_compat_msqid64(&m64, uptr);
445 } else {
446 err = get_compat_msqid(&m64, uptr);
447 }
448 if (err)
449 break;
450 p = compat_alloc_user_space(sizeof(m64));
451 if (copy_to_user(p, &m64, sizeof(m64)))
452 err = -EFAULT;
453 else
454 err = sys_msgctl(first, second, p);
455 break;
456
457 case IPC_STAT:
458 case MSG_STAT:
459 p = compat_alloc_user_space(sizeof(m64));
460 err = sys_msgctl(first, second, p);
461 if (err < 0)
462 break;
463 if (copy_from_user(&m64, p, sizeof(m64)))
464 err2 = -EFAULT;
465 else if (version == IPC_64)
466 err2 = put_compat_msqid64_ds(&m64, uptr);
467 else
468 err2 = put_compat_msqid_ds(&m64, uptr);
469 if (err2)
470 err = -EFAULT;
471 break;
472
473 default:
474 err = -EINVAL;
475 break;
476 }
477 return err;
478}
479
480long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
481 void __user *uptr)
482{
483 int err;
484 unsigned long raddr;
485 compat_ulong_t __user *uaddr;
486
487 if (version == 1)
488 return -EINVAL;
489 err = do_shmat(first, uptr, second, &raddr);
490 if (err < 0)
491 return err;
492 uaddr = compat_ptr(third);
493 return put_user(raddr, uaddr);
494}
495
496static inline int get_compat_shmid64_ds(struct shmid64_ds *s64,
497 struct compat_shmid64_ds __user *up64)
498{
499 if (!access_ok(VERIFY_READ, up64, sizeof(*up64)))
500 return -EFAULT;
501 return __get_compat_ipc64_perm(&s64->shm_perm, &up64->shm_perm);
502}
503
504static inline int get_compat_shmid_ds(struct shmid64_ds *s,
505 struct compat_shmid_ds __user *up)
506{
507 if (!access_ok(VERIFY_READ, up, sizeof(*up)))
508 return -EFAULT;
509 return __get_compat_ipc_perm(&s->shm_perm, &up->shm_perm);
510}
511
512static inline int put_compat_shmid64_ds(struct shmid64_ds *s64,
513 struct compat_shmid64_ds __user *up64)
514{
515 int err;
516
517 if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
518 return -EFAULT;
519 err = __put_compat_ipc64_perm(&s64->shm_perm, &up64->shm_perm);
520 err |= __put_user(s64->shm_atime, &up64->shm_atime);
521 err |= __put_user(s64->shm_dtime, &up64->shm_dtime);
522 err |= __put_user(s64->shm_ctime, &up64->shm_ctime);
523 err |= __put_user(s64->shm_segsz, &up64->shm_segsz);
524 err |= __put_user(s64->shm_nattch, &up64->shm_nattch);
525 err |= __put_user(s64->shm_cpid, &up64->shm_cpid);
526 err |= __put_user(s64->shm_lpid, &up64->shm_lpid);
527 return err;
528}
529
530static inline int put_compat_shmid_ds(struct shmid64_ds *s,
531 struct compat_shmid_ds __user *up)
532{
533 int err;
534
535 if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
536 return -EFAULT;
537 err = __put_compat_ipc_perm(&s->shm_perm, &up->shm_perm);
538 err |= __put_user(s->shm_atime, &up->shm_atime);
539 err |= __put_user(s->shm_dtime, &up->shm_dtime);
540 err |= __put_user(s->shm_ctime, &up->shm_ctime);
541 err |= __put_user(s->shm_segsz, &up->shm_segsz);
542 err |= __put_user(s->shm_nattch, &up->shm_nattch);
543 err |= __put_user(s->shm_cpid, &up->shm_cpid);
544 err |= __put_user(s->shm_lpid, &up->shm_lpid);
545 return err;
546}
547
548static inline int put_compat_shminfo64(struct shminfo64 *smi,
549 struct compat_shminfo64 __user *up64)
550{
551 int err;
552
553 if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
554 return -EFAULT;
555 err = __put_user(smi->shmmax, &up64->shmmax);
556 err |= __put_user(smi->shmmin, &up64->shmmin);
557 err |= __put_user(smi->shmmni, &up64->shmmni);
558 err |= __put_user(smi->shmseg, &up64->shmseg);
559 err |= __put_user(smi->shmall, &up64->shmall);
560 return err;
561}
562
563static inline int put_compat_shminfo(struct shminfo64 *smi,
564 struct shminfo __user *up)
565{
566 int err;
567
568 if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
569 return -EFAULT;
570 err = __put_user(smi->shmmax, &up->shmmax);
571 err |= __put_user(smi->shmmin, &up->shmmin);
572 err |= __put_user(smi->shmmni, &up->shmmni);
573 err |= __put_user(smi->shmseg, &up->shmseg);
574 err |= __put_user(smi->shmall, &up->shmall);
575}
576
577static inline int put_compat_shm_info(struct shm_info __user *ip,
578 struct compat_shm_info __user *uip)
579{
580 int err;
581 struct shm_info si;
582
583 if (!access_ok(VERIFY_WRITE, uip, sizeof(*uip)) ||
584 copy_from_user(&si, ip, sizeof(si)))
585 return -EFAULT;
586 err = __put_user(si.used_ids, &uip->used_ids);
587 err |= __put_user(si.shm_tot, &uip->shm_tot);
588 err |= __put_user(si.shm_rss, &uip->shm_rss);
589 err |= __put_user(si.shm_swp, &uip->shm_swp);
590 err |= __put_user(si.swap_attempts, &uip->swap_attempts);
591 err |= __put_user(si.swap_successes, &uip->swap_successes);
592 return err;
593}
594
595long compat_sys_shmctl(int first, int second, void __user *uptr)
596{
597 void __user *p;
598 struct shmid64_ds s64;
599 struct shminfo64 smi;
600 int err, err2;
601 int version = compat_ipc_parse_version(&second);
602
603 switch (second & (~IPC_64)) {
604 case IPC_RMID:
605 case SHM_LOCK:
606 case SHM_UNLOCK:
607 err = sys_shmctl(first, second, uptr);
608 break;
609
610 case IPC_INFO:
611 p = compat_alloc_user_space(sizeof(smi));
612 err = sys_shmctl(first, second, p);
613 if (err < 0)
614 break;
615 if (copy_from_user(&smi, p, sizeof(smi)))
616 err2 = -EFAULT;
617 else if (version == IPC_64)
618 err2 = put_compat_shminfo64(&smi, uptr);
619 else
620 err2 = put_compat_shminfo(&smi, uptr);
621 if (err2)
622 err = -EFAULT;
623 break;
624
625
626 case IPC_SET:
627 if (version == IPC_64) {
628 err = get_compat_shmid64_ds(&s64, uptr);
629 } else {
630 err = get_compat_shmid_ds(&s64, uptr);
631 }
632 if (err)
633 break;
634 p = compat_alloc_user_space(sizeof(s64));
635 if (copy_to_user(p, &s64, sizeof(s64)))
636 err = -EFAULT;
637 else
638 err = sys_shmctl(first, second, p);
639 break;
640
641 case IPC_STAT:
642 case SHM_STAT:
643 p = compat_alloc_user_space(sizeof(s64));
644 err = sys_shmctl(first, second, p);
645 if (err < 0)
646 break;
647 if (copy_from_user(&s64, p, sizeof(s64)))
648 err2 = -EFAULT;
649 else if (version == IPC_64)
650 err2 = put_compat_shmid64_ds(&s64, uptr);
651 else
652 err2 = put_compat_shmid_ds(&s64, uptr);
653 if (err2)
654 err = -EFAULT;
655 break;
656
657 case SHM_INFO:
658 p = compat_alloc_user_space(sizeof(struct shm_info));
659 err = sys_shmctl(first, second, p);
660 if (err < 0)
661 break;
662 err2 = put_compat_shm_info(p, uptr);
663 if (err2)
664 err = -EFAULT;
665 break;
666
667 default:
668 err = -EINVAL;
669 break;
670 }
671 return err;
672}
673
674long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
675 unsigned nsops, const struct compat_timespec __user *timeout)
676{
677 struct timespec __user *ts64 = NULL;
678 if (timeout) {
679 struct timespec ts;
680 ts64 = compat_alloc_user_space(sizeof(*ts64));
681 if (get_compat_timespec(&ts, timeout))
682 return -EFAULT;
683 if (copy_to_user(ts64, &ts, sizeof(ts)))
684 return -EFAULT;
685 }
686 return sys_semtimedop(semid, tsems, nsops, ts64);
687}
diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c
new file mode 100644
index 000000000000..d8d1e9ff4e88
--- /dev/null
+++ b/ipc/compat_mq.c
@@ -0,0 +1,146 @@
1/*
2 * ipc/compat_mq.c
3 * 32 bit emulation for POSIX message queue system calls
4 *
5 * Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author: Arnd Bergmann <arnd@arndb.de>
7 */
8
9#include <linux/compat.h>
10#include <linux/fs.h>
11#include <linux/kernel.h>
12#include <linux/mqueue.h>
13#include <linux/syscalls.h>
14
15#include <asm/uaccess.h>
16
17struct compat_mq_attr {
18 compat_long_t mq_flags; /* message queue flags */
19 compat_long_t mq_maxmsg; /* maximum number of messages */
20 compat_long_t mq_msgsize; /* maximum message size */
21 compat_long_t mq_curmsgs; /* number of messages currently queued */
22 compat_long_t __reserved[4]; /* ignored for input, zeroed for output */
23};
24
25static inline int get_compat_mq_attr(struct mq_attr *attr,
26 const struct compat_mq_attr __user *uattr)
27{
28 if (!access_ok(VERIFY_READ, uattr, sizeof *uattr))
29 return -EFAULT;
30
31 return __get_user(attr->mq_flags, &uattr->mq_flags)
32 | __get_user(attr->mq_maxmsg, &uattr->mq_maxmsg)
33 | __get_user(attr->mq_msgsize, &uattr->mq_msgsize)
34 | __get_user(attr->mq_curmsgs, &uattr->mq_curmsgs);
35}
36
37static inline int put_compat_mq_attr(const struct mq_attr *attr,
38 struct compat_mq_attr __user *uattr)
39{
40 if (clear_user(uattr, sizeof *uattr))
41 return -EFAULT;
42
43 return __put_user(attr->mq_flags, &uattr->mq_flags)
44 | __put_user(attr->mq_maxmsg, &uattr->mq_maxmsg)
45 | __put_user(attr->mq_msgsize, &uattr->mq_msgsize)
46 | __put_user(attr->mq_curmsgs, &uattr->mq_curmsgs);
47}
48
49asmlinkage long compat_sys_mq_open(const char __user *u_name,
50 int oflag, compat_mode_t mode,
51 struct compat_mq_attr __user *u_attr)
52{
53 void __user *p = NULL;
54 if (u_attr && oflag & O_CREAT) {
55 struct mq_attr attr;
56 p = compat_alloc_user_space(sizeof(attr));
57 if (get_compat_mq_attr(&attr, u_attr) ||
58 copy_to_user(p, &attr, sizeof(attr)))
59 return -EFAULT;
60 }
61 return sys_mq_open(u_name, oflag, mode, p);
62}
63
64static int compat_prepare_timeout(struct timespec __user * *p,
65 const struct compat_timespec __user *u)
66{
67 struct timespec ts;
68 if (!u) {
69 *p = NULL;
70 return 0;
71 }
72 *p = compat_alloc_user_space(sizeof(ts));
73 if (get_compat_timespec(&ts, u) || copy_to_user(*p, &ts, sizeof(ts)))
74 return -EFAULT;
75 return 0;
76}
77
78asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
79 const char __user *u_msg_ptr,
80 size_t msg_len, unsigned int msg_prio,
81 const struct compat_timespec __user *u_abs_timeout)
82{
83 struct timespec __user *u_ts;
84
85 if (compat_prepare_timeout(&u_ts, u_abs_timeout))
86 return -EFAULT;
87
88 return sys_mq_timedsend(mqdes, u_msg_ptr, msg_len,
89 msg_prio, u_ts);
90}
91
92asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
93 char __user *u_msg_ptr,
94 size_t msg_len, unsigned int __user *u_msg_prio,
95 const struct compat_timespec __user *u_abs_timeout)
96{
97 struct timespec __user *u_ts;
98 if (compat_prepare_timeout(&u_ts, u_abs_timeout))
99 return -EFAULT;
100
101 return sys_mq_timedreceive(mqdes, u_msg_ptr, msg_len,
102 u_msg_prio, u_ts);
103}
104
105asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
106 const struct compat_sigevent __user *u_notification)
107{
108 struct sigevent __user *p = NULL;
109 if (u_notification) {
110 struct sigevent n;
111 p = compat_alloc_user_space(sizeof(*p));
112 if (get_compat_sigevent(&n, u_notification))
113 return -EFAULT;
114 if (n.sigev_notify == SIGEV_THREAD)
115 n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
116 if (copy_to_user(p, &n, sizeof(*p)))
117 return -EFAULT;
118 }
119 return sys_mq_notify(mqdes, p);
120}
121
122asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
123 const struct compat_mq_attr __user *u_mqstat,
124 struct compat_mq_attr __user *u_omqstat)
125{
126 struct mq_attr mqstat;
127 struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p));
128 long ret;
129
130 if (u_mqstat) {
131 if (get_compat_mq_attr(&mqstat, u_mqstat) ||
132 copy_to_user(p, &mqstat, sizeof(mqstat)))
133 return -EFAULT;
134 }
135 ret = sys_mq_getsetattr(mqdes,
136 u_mqstat ? p : NULL,
137 u_omqstat ? p + 1 : NULL);
138 if (ret)
139 return ret;
140 if (u_omqstat) {
141 if (copy_from_user(&mqstat, p + 1, sizeof(mqstat)) ||
142 put_compat_mq_attr(&mqstat, u_omqstat))
143 return -EFAULT;
144 }
145 return 0;
146}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
new file mode 100644
index 000000000000..cb0cd3cf3b5a
--- /dev/null
+++ b/ipc/mqueue.c
@@ -0,0 +1,1252 @@
1/*
2 * POSIX message queues filesystem for Linux.
3 *
4 * Copyright (C) 2003,2004 Krzysztof Benedyczak (golbi@mat.uni.torun.pl)
5 * Michal Wronski (wrona@mat.uni.torun.pl)
6 *
7 * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com)
8 * Lockless receive & send, fd based notify:
9 * Manfred Spraul (manfred@colorfullife.com)
10 *
11 * This file is released under the GPL.
12 */
13
14#include <linux/init.h>
15#include <linux/pagemap.h>
16#include <linux/file.h>
17#include <linux/mount.h>
18#include <linux/namei.h>
19#include <linux/sysctl.h>
20#include <linux/poll.h>
21#include <linux/mqueue.h>
22#include <linux/msg.h>
23#include <linux/skbuff.h>
24#include <linux/netlink.h>
25#include <linux/syscalls.h>
26#include <net/sock.h>
27#include "util.h"
28
29#define MQUEUE_MAGIC 0x19800202
30#define DIRENT_SIZE 20
31#define FILENT_SIZE 80
32
33#define SEND 0
34#define RECV 1
35
36#define STATE_NONE 0
37#define STATE_PENDING 1
38#define STATE_READY 2
39
40/* used by sysctl */
41#define FS_MQUEUE 1
42#define CTL_QUEUESMAX 2
43#define CTL_MSGMAX 3
44#define CTL_MSGSIZEMAX 4
45
46/* default values */
47#define DFLT_QUEUESMAX 256 /* max number of message queues */
48#define DFLT_MSGMAX 10 /* max number of messages in each queue */
49#define HARD_MSGMAX (131072/sizeof(void*))
50#define DFLT_MSGSIZEMAX 8192 /* max message size */
51
52#define NOTIFY_COOKIE_LEN 32
53
54struct ext_wait_queue { /* queue of sleeping tasks */
55 struct task_struct *task;
56 struct list_head list;
57 struct msg_msg *msg; /* ptr of loaded message */
58 int state; /* one of STATE_* values */
59};
60
61struct mqueue_inode_info {
62 spinlock_t lock;
63 struct inode vfs_inode;
64 wait_queue_head_t wait_q;
65
66 struct msg_msg **messages;
67 struct mq_attr attr;
68
69 struct sigevent notify;
70 pid_t notify_owner;
71 struct user_struct *user; /* user who created, for accouting */
72 struct sock *notify_sock;
73 struct sk_buff *notify_cookie;
74
75 /* for tasks waiting for free space and messages, respectively */
76 struct ext_wait_queue e_wait_q[2];
77
78 unsigned long qsize; /* size of queue in memory (sum of all msgs) */
79};
80
81static struct inode_operations mqueue_dir_inode_operations;
82static struct file_operations mqueue_file_operations;
83static struct super_operations mqueue_super_ops;
84static void remove_notification(struct mqueue_inode_info *info);
85
86static spinlock_t mq_lock;
87static kmem_cache_t *mqueue_inode_cachep;
88static struct vfsmount *mqueue_mnt;
89
90static unsigned int queues_count;
91static unsigned int queues_max = DFLT_QUEUESMAX;
92static unsigned int msg_max = DFLT_MSGMAX;
93static unsigned int msgsize_max = DFLT_MSGSIZEMAX;
94
95static struct ctl_table_header * mq_sysctl_table;
96
97static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
98{
99 return container_of(inode, struct mqueue_inode_info, vfs_inode);
100}
101
102static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
103 struct mq_attr *attr)
104{
105 struct inode *inode;
106
107 inode = new_inode(sb);
108 if (inode) {
109 inode->i_mode = mode;
110 inode->i_uid = current->fsuid;
111 inode->i_gid = current->fsgid;
112 inode->i_blksize = PAGE_CACHE_SIZE;
113 inode->i_blocks = 0;
114 inode->i_mtime = inode->i_ctime = inode->i_atime =
115 CURRENT_TIME;
116
117 if (S_ISREG(mode)) {
118 struct mqueue_inode_info *info;
119 struct task_struct *p = current;
120 struct user_struct *u = p->user;
121 unsigned long mq_bytes, mq_msg_tblsz;
122
123 inode->i_fop = &mqueue_file_operations;
124 inode->i_size = FILENT_SIZE;
125 /* mqueue specific info */
126 info = MQUEUE_I(inode);
127 spin_lock_init(&info->lock);
128 init_waitqueue_head(&info->wait_q);
129 INIT_LIST_HEAD(&info->e_wait_q[0].list);
130 INIT_LIST_HEAD(&info->e_wait_q[1].list);
131 info->messages = NULL;
132 info->notify_owner = 0;
133 info->qsize = 0;
134 info->user = NULL; /* set when all is ok */
135 memset(&info->attr, 0, sizeof(info->attr));
136 info->attr.mq_maxmsg = DFLT_MSGMAX;
137 info->attr.mq_msgsize = DFLT_MSGSIZEMAX;
138 if (attr) {
139 info->attr.mq_maxmsg = attr->mq_maxmsg;
140 info->attr.mq_msgsize = attr->mq_msgsize;
141 }
142 mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
143 mq_bytes = (mq_msg_tblsz +
144 (info->attr.mq_maxmsg * info->attr.mq_msgsize));
145
146 spin_lock(&mq_lock);
147 if (u->mq_bytes + mq_bytes < u->mq_bytes ||
148 u->mq_bytes + mq_bytes >
149 p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
150 spin_unlock(&mq_lock);
151 goto out_inode;
152 }
153 u->mq_bytes += mq_bytes;
154 spin_unlock(&mq_lock);
155
156 info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
157 if (!info->messages) {
158 spin_lock(&mq_lock);
159 u->mq_bytes -= mq_bytes;
160 spin_unlock(&mq_lock);
161 goto out_inode;
162 }
163 /* all is ok */
164 info->user = get_uid(u);
165 } else if (S_ISDIR(mode)) {
166 inode->i_nlink++;
167 /* Some things misbehave if size == 0 on a directory */
168 inode->i_size = 2 * DIRENT_SIZE;
169 inode->i_op = &mqueue_dir_inode_operations;
170 inode->i_fop = &simple_dir_operations;
171 }
172 }
173 return inode;
174out_inode:
175 make_bad_inode(inode);
176 iput(inode);
177 return NULL;
178}
179
180static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
181{
182 struct inode *inode;
183
184 sb->s_blocksize = PAGE_CACHE_SIZE;
185 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
186 sb->s_magic = MQUEUE_MAGIC;
187 sb->s_op = &mqueue_super_ops;
188
189 inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
190 if (!inode)
191 return -ENOMEM;
192
193 sb->s_root = d_alloc_root(inode);
194 if (!sb->s_root) {
195 iput(inode);
196 return -ENOMEM;
197 }
198
199 return 0;
200}
201
202static struct super_block *mqueue_get_sb(struct file_system_type *fs_type,
203 int flags, const char *dev_name,
204 void *data)
205{
206 return get_sb_single(fs_type, flags, data, mqueue_fill_super);
207}
208
209static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
210{
211 struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
212
213 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
214 SLAB_CTOR_CONSTRUCTOR)
215 inode_init_once(&p->vfs_inode);
216}
217
218static struct inode *mqueue_alloc_inode(struct super_block *sb)
219{
220 struct mqueue_inode_info *ei;
221
222 ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
223 if (!ei)
224 return NULL;
225 return &ei->vfs_inode;
226}
227
228static void mqueue_destroy_inode(struct inode *inode)
229{
230 kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
231}
232
233static void mqueue_delete_inode(struct inode *inode)
234{
235 struct mqueue_inode_info *info;
236 struct user_struct *user;
237 unsigned long mq_bytes;
238 int i;
239
240 if (S_ISDIR(inode->i_mode)) {
241 clear_inode(inode);
242 return;
243 }
244 info = MQUEUE_I(inode);
245 spin_lock(&info->lock);
246 for (i = 0; i < info->attr.mq_curmsgs; i++)
247 free_msg(info->messages[i]);
248 kfree(info->messages);
249 spin_unlock(&info->lock);
250
251 clear_inode(inode);
252
253 mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
254 (info->attr.mq_maxmsg * info->attr.mq_msgsize));
255 user = info->user;
256 if (user) {
257 spin_lock(&mq_lock);
258 user->mq_bytes -= mq_bytes;
259 queues_count--;
260 spin_unlock(&mq_lock);
261 free_uid(user);
262 }
263}
264
265static int mqueue_create(struct inode *dir, struct dentry *dentry,
266 int mode, struct nameidata *nd)
267{
268 struct inode *inode;
269 struct mq_attr *attr = dentry->d_fsdata;
270 int error;
271
272 spin_lock(&mq_lock);
273 if (queues_count >= queues_max && !capable(CAP_SYS_RESOURCE)) {
274 error = -ENOSPC;
275 goto out_lock;
276 }
277 queues_count++;
278 spin_unlock(&mq_lock);
279
280 inode = mqueue_get_inode(dir->i_sb, mode, attr);
281 if (!inode) {
282 error = -ENOMEM;
283 spin_lock(&mq_lock);
284 queues_count--;
285 goto out_lock;
286 }
287
288 dir->i_size += DIRENT_SIZE;
289 dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
290
291 d_instantiate(dentry, inode);
292 dget(dentry);
293 return 0;
294out_lock:
295 spin_unlock(&mq_lock);
296 return error;
297}
298
299static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
300{
301 struct inode *inode = dentry->d_inode;
302
303 dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
304 dir->i_size -= DIRENT_SIZE;
305 inode->i_nlink--;
306 dput(dentry);
307 return 0;
308}
309
310/*
311* This is routine for system read from queue file.
312* To avoid mess with doing here some sort of mq_receive we allow
313* to read only queue size & notification info (the only values
314* that are interesting from user point of view and aren't accessible
315* through std routines)
316*/
317static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
318 size_t count, loff_t * off)
319{
320 struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
321 char buffer[FILENT_SIZE];
322 size_t slen;
323 loff_t o;
324
325 if (!count)
326 return 0;
327
328 spin_lock(&info->lock);
329 snprintf(buffer, sizeof(buffer),
330 "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
331 info->qsize,
332 info->notify_owner ? info->notify.sigev_notify : 0,
333 (info->notify_owner &&
334 info->notify.sigev_notify == SIGEV_SIGNAL) ?
335 info->notify.sigev_signo : 0,
336 info->notify_owner);
337 spin_unlock(&info->lock);
338 buffer[sizeof(buffer)-1] = '\0';
339 slen = strlen(buffer)+1;
340
341 o = *off;
342 if (o > slen)
343 return 0;
344
345 if (o + count > slen)
346 count = slen - o;
347
348 if (copy_to_user(u_data, buffer + o, count))
349 return -EFAULT;
350
351 *off = o + count;
352 filp->f_dentry->d_inode->i_atime = filp->f_dentry->d_inode->i_ctime = CURRENT_TIME;
353 return count;
354}
355
356static int mqueue_flush_file(struct file *filp)
357{
358 struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
359
360 spin_lock(&info->lock);
361 if (current->tgid == info->notify_owner)
362 remove_notification(info);
363
364 spin_unlock(&info->lock);
365 return 0;
366}
367
368static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
369{
370 struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
371 int retval = 0;
372
373 poll_wait(filp, &info->wait_q, poll_tab);
374
375 spin_lock(&info->lock);
376 if (info->attr.mq_curmsgs)
377 retval = POLLIN | POLLRDNORM;
378
379 if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
380 retval |= POLLOUT | POLLWRNORM;
381 spin_unlock(&info->lock);
382
383 return retval;
384}
385
386/* Adds current to info->e_wait_q[sr] before element with smaller prio */
387static void wq_add(struct mqueue_inode_info *info, int sr,
388 struct ext_wait_queue *ewp)
389{
390 struct ext_wait_queue *walk;
391
392 ewp->task = current;
393
394 list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
395 if (walk->task->static_prio <= current->static_prio) {
396 list_add_tail(&ewp->list, &walk->list);
397 return;
398 }
399 }
400 list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
401}
402
403/*
404 * Puts current task to sleep. Caller must hold queue lock. After return
405 * lock isn't held.
406 * sr: SEND or RECV
407 */
408static int wq_sleep(struct mqueue_inode_info *info, int sr,
409 long timeout, struct ext_wait_queue *ewp)
410{
411 int retval;
412 signed long time;
413
414 wq_add(info, sr, ewp);
415
416 for (;;) {
417 set_current_state(TASK_INTERRUPTIBLE);
418
419 spin_unlock(&info->lock);
420 time = schedule_timeout(timeout);
421
422 while (ewp->state == STATE_PENDING)
423 cpu_relax();
424
425 if (ewp->state == STATE_READY) {
426 retval = 0;
427 goto out;
428 }
429 spin_lock(&info->lock);
430 if (ewp->state == STATE_READY) {
431 retval = 0;
432 goto out_unlock;
433 }
434 if (signal_pending(current)) {
435 retval = -ERESTARTSYS;
436 break;
437 }
438 if (time == 0) {
439 retval = -ETIMEDOUT;
440 break;
441 }
442 }
443 list_del(&ewp->list);
444out_unlock:
445 spin_unlock(&info->lock);
446out:
447 return retval;
448}
449
450/*
451 * Returns waiting task that should be serviced first or NULL if none exists
452 */
453static struct ext_wait_queue *wq_get_first_waiter(
454 struct mqueue_inode_info *info, int sr)
455{
456 struct list_head *ptr;
457
458 ptr = info->e_wait_q[sr].list.prev;
459 if (ptr == &info->e_wait_q[sr].list)
460 return NULL;
461 return list_entry(ptr, struct ext_wait_queue, list);
462}
463
464/* Auxiliary functions to manipulate messages' list */
465static void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info)
466{
467 int k;
468
469 k = info->attr.mq_curmsgs - 1;
470 while (k >= 0 && info->messages[k]->m_type >= ptr->m_type) {
471 info->messages[k + 1] = info->messages[k];
472 k--;
473 }
474 info->attr.mq_curmsgs++;
475 info->qsize += ptr->m_ts;
476 info->messages[k + 1] = ptr;
477}
478
479static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
480{
481 info->qsize -= info->messages[--info->attr.mq_curmsgs]->m_ts;
482 return info->messages[info->attr.mq_curmsgs];
483}
484
485static inline void set_cookie(struct sk_buff *skb, char code)
486{
487 ((char*)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
488}
489
490/*
491 * The next function is only to split too long sys_mq_timedsend
492 */
493static void __do_notify(struct mqueue_inode_info *info)
494{
495 /* notification
496 * invoked when there is registered process and there isn't process
497 * waiting synchronously for message AND state of queue changed from
498 * empty to not empty. Here we are sure that no one is waiting
499 * synchronously. */
500 if (info->notify_owner &&
501 info->attr.mq_curmsgs == 1) {
502 struct siginfo sig_i;
503 switch (info->notify.sigev_notify) {
504 case SIGEV_NONE:
505 break;
506 case SIGEV_SIGNAL:
507 /* sends signal */
508
509 sig_i.si_signo = info->notify.sigev_signo;
510 sig_i.si_errno = 0;
511 sig_i.si_code = SI_MESGQ;
512 sig_i.si_value = info->notify.sigev_value;
513 sig_i.si_pid = current->tgid;
514 sig_i.si_uid = current->uid;
515
516 kill_proc_info(info->notify.sigev_signo,
517 &sig_i, info->notify_owner);
518 break;
519 case SIGEV_THREAD:
520 set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
521 netlink_sendskb(info->notify_sock,
522 info->notify_cookie, 0);
523 break;
524 }
525 /* after notification unregisters process */
526 info->notify_owner = 0;
527 }
528 wake_up(&info->wait_q);
529}
530
531static long prepare_timeout(const struct timespec __user *u_arg)
532{
533 struct timespec ts, nowts;
534 long timeout;
535
536 if (u_arg) {
537 if (unlikely(copy_from_user(&ts, u_arg,
538 sizeof(struct timespec))))
539 return -EFAULT;
540
541 if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0
542 || ts.tv_nsec >= NSEC_PER_SEC))
543 return -EINVAL;
544 nowts = CURRENT_TIME;
545 /* first subtract as jiffies can't be too big */
546 ts.tv_sec -= nowts.tv_sec;
547 if (ts.tv_nsec < nowts.tv_nsec) {
548 ts.tv_nsec += NSEC_PER_SEC;
549 ts.tv_sec--;
550 }
551 ts.tv_nsec -= nowts.tv_nsec;
552 if (ts.tv_sec < 0)
553 return 0;
554
555 timeout = timespec_to_jiffies(&ts) + 1;
556 } else
557 return MAX_SCHEDULE_TIMEOUT;
558
559 return timeout;
560}
561
562static void remove_notification(struct mqueue_inode_info *info)
563{
564 if (info->notify_owner != 0 &&
565 info->notify.sigev_notify == SIGEV_THREAD) {
566 set_cookie(info->notify_cookie, NOTIFY_REMOVED);
567 netlink_sendskb(info->notify_sock, info->notify_cookie, 0);
568 }
569 info->notify_owner = 0;
570}
571
572static int mq_attr_ok(struct mq_attr *attr)
573{
574 if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
575 return 0;
576 if (capable(CAP_SYS_RESOURCE)) {
577 if (attr->mq_maxmsg > HARD_MSGMAX)
578 return 0;
579 } else {
580 if (attr->mq_maxmsg > msg_max ||
581 attr->mq_msgsize > msgsize_max)
582 return 0;
583 }
584 /* check for overflow */
585 if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
586 return 0;
587 if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
588 (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
589 (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
590 return 0;
591 return 1;
592}
593
594/*
595 * Invoked when creating a new queue via sys_mq_open
596 */
597static struct file *do_create(struct dentry *dir, struct dentry *dentry,
598 int oflag, mode_t mode, struct mq_attr __user *u_attr)
599{
600 struct file *filp;
601 struct mq_attr attr;
602 int ret;
603
604 if (u_attr != NULL) {
605 if (copy_from_user(&attr, u_attr, sizeof(attr)))
606 return ERR_PTR(-EFAULT);
607 if (!mq_attr_ok(&attr))
608 return ERR_PTR(-EINVAL);
609 /* store for use during create */
610 dentry->d_fsdata = &attr;
611 }
612
613 ret = vfs_create(dir->d_inode, dentry, mode, NULL);
614 dentry->d_fsdata = NULL;
615 if (ret)
616 return ERR_PTR(ret);
617
618 filp = dentry_open(dentry, mqueue_mnt, oflag);
619 if (!IS_ERR(filp))
620 dget(dentry);
621
622 return filp;
623}
624
625/* Opens existing queue */
626static struct file *do_open(struct dentry *dentry, int oflag)
627{
628static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
629 MAY_READ | MAY_WRITE };
630 struct file *filp;
631
632 if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
633 return ERR_PTR(-EINVAL);
634
635 if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL))
636 return ERR_PTR(-EACCES);
637
638 filp = dentry_open(dentry, mqueue_mnt, oflag);
639
640 if (!IS_ERR(filp))
641 dget(dentry);
642
643 return filp;
644}
645
646asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
647 struct mq_attr __user *u_attr)
648{
649 struct dentry *dentry;
650 struct file *filp;
651 char *name;
652 int fd, error;
653
654 if (IS_ERR(name = getname(u_name)))
655 return PTR_ERR(name);
656
657 fd = get_unused_fd();
658 if (fd < 0)
659 goto out_putname;
660
661 down(&mqueue_mnt->mnt_root->d_inode->i_sem);
662 dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
663 if (IS_ERR(dentry)) {
664 error = PTR_ERR(dentry);
665 goto out_err;
666 }
667 mntget(mqueue_mnt);
668
669 if (oflag & O_CREAT) {
670 if (dentry->d_inode) { /* entry already exists */
671 filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
672 do_open(dentry, oflag);
673 } else {
674 filp = do_create(mqueue_mnt->mnt_root, dentry,
675 oflag, mode, u_attr);
676 }
677 } else
678 filp = (dentry->d_inode) ? do_open(dentry, oflag) :
679 ERR_PTR(-ENOENT);
680
681 dput(dentry);
682
683 if (IS_ERR(filp)) {
684 error = PTR_ERR(filp);
685 goto out_putfd;
686 }
687
688 set_close_on_exec(fd, 1);
689 fd_install(fd, filp);
690 goto out_upsem;
691
692out_putfd:
693 mntput(mqueue_mnt);
694 put_unused_fd(fd);
695out_err:
696 fd = error;
697out_upsem:
698 up(&mqueue_mnt->mnt_root->d_inode->i_sem);
699out_putname:
700 putname(name);
701 return fd;
702}
703
704asmlinkage long sys_mq_unlink(const char __user *u_name)
705{
706 int err;
707 char *name;
708 struct dentry *dentry;
709 struct inode *inode = NULL;
710
711 name = getname(u_name);
712 if (IS_ERR(name))
713 return PTR_ERR(name);
714
715 down(&mqueue_mnt->mnt_root->d_inode->i_sem);
716 dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
717 if (IS_ERR(dentry)) {
718 err = PTR_ERR(dentry);
719 goto out_unlock;
720 }
721
722 if (!dentry->d_inode) {
723 err = -ENOENT;
724 goto out_err;
725 }
726
727 inode = dentry->d_inode;
728 if (inode)
729 atomic_inc(&inode->i_count);
730
731 err = vfs_unlink(dentry->d_parent->d_inode, dentry);
732out_err:
733 dput(dentry);
734
735out_unlock:
736 up(&mqueue_mnt->mnt_root->d_inode->i_sem);
737 putname(name);
738 if (inode)
739 iput(inode);
740
741 return err;
742}
743
744/* Pipelined send and receive functions.
745 *
746 * If a receiver finds no waiting message, then it registers itself in the
747 * list of waiting receivers. A sender checks that list before adding the new
748 * message into the message array. If there is a waiting receiver, then it
749 * bypasses the message array and directly hands the message over to the
750 * receiver.
751 * The receiver accepts the message and returns without grabbing the queue
752 * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
753 * are necessary. The same algorithm is used for sysv semaphores, see
754 * ipc/sem.c fore more details.
755 *
756 * The same algorithm is used for senders.
757 */
758
759/* pipelined_send() - send a message directly to the task waiting in
760 * sys_mq_timedreceive() (without inserting message into a queue).
761 */
762static inline void pipelined_send(struct mqueue_inode_info *info,
763 struct msg_msg *message,
764 struct ext_wait_queue *receiver)
765{
766 receiver->msg = message;
767 list_del(&receiver->list);
768 receiver->state = STATE_PENDING;
769 wake_up_process(receiver->task);
770 wmb();
771 receiver->state = STATE_READY;
772}
773
774/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
775 * gets its message and put to the queue (we have one free place for sure). */
776static inline void pipelined_receive(struct mqueue_inode_info *info)
777{
778 struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
779
780 if (!sender) {
781 /* for poll */
782 wake_up_interruptible(&info->wait_q);
783 return;
784 }
785 msg_insert(sender->msg, info);
786 list_del(&sender->list);
787 sender->state = STATE_PENDING;
788 wake_up_process(sender->task);
789 wmb();
790 sender->state = STATE_READY;
791}
792
793asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
794 size_t msg_len, unsigned int msg_prio,
795 const struct timespec __user *u_abs_timeout)
796{
797 struct file *filp;
798 struct inode *inode;
799 struct ext_wait_queue wait;
800 struct ext_wait_queue *receiver;
801 struct msg_msg *msg_ptr;
802 struct mqueue_inode_info *info;
803 long timeout;
804 int ret;
805
806 if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
807 return -EINVAL;
808
809 timeout = prepare_timeout(u_abs_timeout);
810
811 ret = -EBADF;
812 filp = fget(mqdes);
813 if (unlikely(!filp))
814 goto out;
815
816 inode = filp->f_dentry->d_inode;
817 if (unlikely(filp->f_op != &mqueue_file_operations))
818 goto out_fput;
819 info = MQUEUE_I(inode);
820
821 if (unlikely(!(filp->f_mode & FMODE_WRITE)))
822 goto out_fput;
823
824 if (unlikely(msg_len > info->attr.mq_msgsize)) {
825 ret = -EMSGSIZE;
826 goto out_fput;
827 }
828
829 /* First try to allocate memory, before doing anything with
830 * existing queues. */
831 msg_ptr = load_msg(u_msg_ptr, msg_len);
832 if (IS_ERR(msg_ptr)) {
833 ret = PTR_ERR(msg_ptr);
834 goto out_fput;
835 }
836 msg_ptr->m_ts = msg_len;
837 msg_ptr->m_type = msg_prio;
838
839 spin_lock(&info->lock);
840
841 if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
842 if (filp->f_flags & O_NONBLOCK) {
843 spin_unlock(&info->lock);
844 ret = -EAGAIN;
845 } else if (unlikely(timeout < 0)) {
846 spin_unlock(&info->lock);
847 ret = timeout;
848 } else {
849 wait.task = current;
850 wait.msg = (void *) msg_ptr;
851 wait.state = STATE_NONE;
852 ret = wq_sleep(info, SEND, timeout, &wait);
853 }
854 if (ret < 0)
855 free_msg(msg_ptr);
856 } else {
857 receiver = wq_get_first_waiter(info, RECV);
858 if (receiver) {
859 pipelined_send(info, msg_ptr, receiver);
860 } else {
861 /* adds message to the queue */
862 msg_insert(msg_ptr, info);
863 __do_notify(info);
864 }
865 inode->i_atime = inode->i_mtime = inode->i_ctime =
866 CURRENT_TIME;
867 spin_unlock(&info->lock);
868 ret = 0;
869 }
870out_fput:
871 fput(filp);
872out:
873 return ret;
874}
875
876asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
877 size_t msg_len, unsigned int __user *u_msg_prio,
878 const struct timespec __user *u_abs_timeout)
879{
880 long timeout;
881 ssize_t ret;
882 struct msg_msg *msg_ptr;
883 struct file *filp;
884 struct inode *inode;
885 struct mqueue_inode_info *info;
886 struct ext_wait_queue wait;
887
888 timeout = prepare_timeout(u_abs_timeout);
889
890 ret = -EBADF;
891 filp = fget(mqdes);
892 if (unlikely(!filp))
893 goto out;
894
895 inode = filp->f_dentry->d_inode;
896 if (unlikely(filp->f_op != &mqueue_file_operations))
897 goto out_fput;
898 info = MQUEUE_I(inode);
899
900 if (unlikely(!(filp->f_mode & FMODE_READ)))
901 goto out_fput;
902
903 /* checks if buffer is big enough */
904 if (unlikely(msg_len < info->attr.mq_msgsize)) {
905 ret = -EMSGSIZE;
906 goto out_fput;
907 }
908
909 spin_lock(&info->lock);
910 if (info->attr.mq_curmsgs == 0) {
911 if (filp->f_flags & O_NONBLOCK) {
912 spin_unlock(&info->lock);
913 ret = -EAGAIN;
914 msg_ptr = NULL;
915 } else if (unlikely(timeout < 0)) {
916 spin_unlock(&info->lock);
917 ret = timeout;
918 msg_ptr = NULL;
919 } else {
920 wait.task = current;
921 wait.state = STATE_NONE;
922 ret = wq_sleep(info, RECV, timeout, &wait);
923 msg_ptr = wait.msg;
924 }
925 } else {
926 msg_ptr = msg_get(info);
927
928 inode->i_atime = inode->i_mtime = inode->i_ctime =
929 CURRENT_TIME;
930
931 /* There is now free space in queue. */
932 pipelined_receive(info);
933 spin_unlock(&info->lock);
934 ret = 0;
935 }
936 if (ret == 0) {
937 ret = msg_ptr->m_ts;
938
939 if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
940 store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
941 ret = -EFAULT;
942 }
943 free_msg(msg_ptr);
944 }
945out_fput:
946 fput(filp);
947out:
948 return ret;
949}
950
951/*
952 * Notes: the case when user wants us to deregister (with NULL as pointer)
953 * and he isn't currently owner of notification, will be silently discarded.
954 * It isn't explicitly defined in the POSIX.
955 */
956asmlinkage long sys_mq_notify(mqd_t mqdes,
957 const struct sigevent __user *u_notification)
958{
959 int ret;
960 struct file *filp;
961 struct sock *sock;
962 struct inode *inode;
963 struct sigevent notification;
964 struct mqueue_inode_info *info;
965 struct sk_buff *nc;
966
967 nc = NULL;
968 sock = NULL;
969 if (u_notification != NULL) {
970 if (copy_from_user(&notification, u_notification,
971 sizeof(struct sigevent)))
972 return -EFAULT;
973
974 if (unlikely(notification.sigev_notify != SIGEV_NONE &&
975 notification.sigev_notify != SIGEV_SIGNAL &&
976 notification.sigev_notify != SIGEV_THREAD))
977 return -EINVAL;
978 if (notification.sigev_notify == SIGEV_SIGNAL &&
979 (notification.sigev_signo < 0 ||
980 notification.sigev_signo > _NSIG)) {
981 return -EINVAL;
982 }
983 if (notification.sigev_notify == SIGEV_THREAD) {
984 /* create the notify skb */
985 nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
986 ret = -ENOMEM;
987 if (!nc)
988 goto out;
989 ret = -EFAULT;
990 if (copy_from_user(nc->data,
991 notification.sigev_value.sival_ptr,
992 NOTIFY_COOKIE_LEN)) {
993 goto out;
994 }
995
996 /* TODO: add a header? */
997 skb_put(nc, NOTIFY_COOKIE_LEN);
998 /* and attach it to the socket */
999retry:
1000 filp = fget(notification.sigev_signo);
1001 ret = -EBADF;
1002 if (!filp)
1003 goto out;
1004 sock = netlink_getsockbyfilp(filp);
1005 fput(filp);
1006 if (IS_ERR(sock)) {
1007 ret = PTR_ERR(sock);
1008 sock = NULL;
1009 goto out;
1010 }
1011
1012 ret = netlink_attachskb(sock, nc, 0, MAX_SCHEDULE_TIMEOUT);
1013 if (ret == 1)
1014 goto retry;
1015 if (ret) {
1016 sock = NULL;
1017 nc = NULL;
1018 goto out;
1019 }
1020 }
1021 }
1022
1023 ret = -EBADF;
1024 filp = fget(mqdes);
1025 if (!filp)
1026 goto out;
1027
1028 inode = filp->f_dentry->d_inode;
1029 if (unlikely(filp->f_op != &mqueue_file_operations))
1030 goto out_fput;
1031 info = MQUEUE_I(inode);
1032
1033 ret = 0;
1034 spin_lock(&info->lock);
1035 if (u_notification == NULL) {
1036 if (info->notify_owner == current->tgid) {
1037 remove_notification(info);
1038 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1039 }
1040 } else if (info->notify_owner != 0) {
1041 ret = -EBUSY;
1042 } else {
1043 switch (notification.sigev_notify) {
1044 case SIGEV_NONE:
1045 info->notify.sigev_notify = SIGEV_NONE;
1046 break;
1047 case SIGEV_THREAD:
1048 info->notify_sock = sock;
1049 info->notify_cookie = nc;
1050 sock = NULL;
1051 nc = NULL;
1052 info->notify.sigev_notify = SIGEV_THREAD;
1053 break;
1054 case SIGEV_SIGNAL:
1055 info->notify.sigev_signo = notification.sigev_signo;
1056 info->notify.sigev_value = notification.sigev_value;
1057 info->notify.sigev_notify = SIGEV_SIGNAL;
1058 break;
1059 }
1060 info->notify_owner = current->tgid;
1061 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1062 }
1063 spin_unlock(&info->lock);
1064out_fput:
1065 fput(filp);
1066out:
1067 if (sock) {
1068 netlink_detachskb(sock, nc);
1069 } else if (nc) {
1070 dev_kfree_skb(nc);
1071 }
1072 return ret;
1073}
1074
1075asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
1076 const struct mq_attr __user *u_mqstat,
1077 struct mq_attr __user *u_omqstat)
1078{
1079 int ret;
1080 struct mq_attr mqstat, omqstat;
1081 struct file *filp;
1082 struct inode *inode;
1083 struct mqueue_inode_info *info;
1084
1085 if (u_mqstat != NULL) {
1086 if (copy_from_user(&mqstat, u_mqstat, sizeof(struct mq_attr)))
1087 return -EFAULT;
1088 if (mqstat.mq_flags & (~O_NONBLOCK))
1089 return -EINVAL;
1090 }
1091
1092 ret = -EBADF;
1093 filp = fget(mqdes);
1094 if (!filp)
1095 goto out;
1096
1097 inode = filp->f_dentry->d_inode;
1098 if (unlikely(filp->f_op != &mqueue_file_operations))
1099 goto out_fput;
1100 info = MQUEUE_I(inode);
1101
1102 spin_lock(&info->lock);
1103
1104 omqstat = info->attr;
1105 omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
1106 if (u_mqstat) {
1107 if (mqstat.mq_flags & O_NONBLOCK)
1108 filp->f_flags |= O_NONBLOCK;
1109 else
1110 filp->f_flags &= ~O_NONBLOCK;
1111
1112 inode->i_atime = inode->i_ctime = CURRENT_TIME;
1113 }
1114
1115 spin_unlock(&info->lock);
1116
1117 ret = 0;
1118 if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat,
1119 sizeof(struct mq_attr)))
1120 ret = -EFAULT;
1121
1122out_fput:
1123 fput(filp);
1124out:
1125 return ret;
1126}
1127
1128static struct inode_operations mqueue_dir_inode_operations = {
1129 .lookup = simple_lookup,
1130 .create = mqueue_create,
1131 .unlink = mqueue_unlink,
1132};
1133
1134static struct file_operations mqueue_file_operations = {
1135 .flush = mqueue_flush_file,
1136 .poll = mqueue_poll_file,
1137 .read = mqueue_read_file,
1138};
1139
1140static struct super_operations mqueue_super_ops = {
1141 .alloc_inode = mqueue_alloc_inode,
1142 .destroy_inode = mqueue_destroy_inode,
1143 .statfs = simple_statfs,
1144 .delete_inode = mqueue_delete_inode,
1145 .drop_inode = generic_delete_inode,
1146};
1147
1148static struct file_system_type mqueue_fs_type = {
1149 .name = "mqueue",
1150 .get_sb = mqueue_get_sb,
1151 .kill_sb = kill_litter_super,
1152};
1153
1154static int msg_max_limit_min = DFLT_MSGMAX;
1155static int msg_max_limit_max = HARD_MSGMAX;
1156
1157static int msg_maxsize_limit_min = DFLT_MSGSIZEMAX;
1158static int msg_maxsize_limit_max = INT_MAX;
1159
1160static ctl_table mq_sysctls[] = {
1161 {
1162 .ctl_name = CTL_QUEUESMAX,
1163 .procname = "queues_max",
1164 .data = &queues_max,
1165 .maxlen = sizeof(int),
1166 .mode = 0644,
1167 .proc_handler = &proc_dointvec,
1168 },
1169 {
1170 .ctl_name = CTL_MSGMAX,
1171 .procname = "msg_max",
1172 .data = &msg_max,
1173 .maxlen = sizeof(int),
1174 .mode = 0644,
1175 .proc_handler = &proc_dointvec_minmax,
1176 .extra1 = &msg_max_limit_min,
1177 .extra2 = &msg_max_limit_max,
1178 },
1179 {
1180 .ctl_name = CTL_MSGSIZEMAX,
1181 .procname = "msgsize_max",
1182 .data = &msgsize_max,
1183 .maxlen = sizeof(int),
1184 .mode = 0644,
1185 .proc_handler = &proc_dointvec_minmax,
1186 .extra1 = &msg_maxsize_limit_min,
1187 .extra2 = &msg_maxsize_limit_max,
1188 },
1189 { .ctl_name = 0 }
1190};
1191
1192static ctl_table mq_sysctl_dir[] = {
1193 {
1194 .ctl_name = FS_MQUEUE,
1195 .procname = "mqueue",
1196 .mode = 0555,
1197 .child = mq_sysctls,
1198 },
1199 { .ctl_name = 0 }
1200};
1201
1202static ctl_table mq_sysctl_root[] = {
1203 {
1204 .ctl_name = CTL_FS,
1205 .procname = "fs",
1206 .mode = 0555,
1207 .child = mq_sysctl_dir,
1208 },
1209 { .ctl_name = 0 }
1210};
1211
1212static int __init init_mqueue_fs(void)
1213{
1214 int error;
1215
1216 mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
1217 sizeof(struct mqueue_inode_info), 0,
1218 SLAB_HWCACHE_ALIGN, init_once, NULL);
1219 if (mqueue_inode_cachep == NULL)
1220 return -ENOMEM;
1221
1222 /* ignore failues - they are not fatal */
1223 mq_sysctl_table = register_sysctl_table(mq_sysctl_root, 0);
1224
1225 error = register_filesystem(&mqueue_fs_type);
1226 if (error)
1227 goto out_sysctl;
1228
1229 if (IS_ERR(mqueue_mnt = kern_mount(&mqueue_fs_type))) {
1230 error = PTR_ERR(mqueue_mnt);
1231 goto out_filesystem;
1232 }
1233
1234 /* internal initialization - not common for vfs */
1235 queues_count = 0;
1236 spin_lock_init(&mq_lock);
1237
1238 return 0;
1239
1240out_filesystem:
1241 unregister_filesystem(&mqueue_fs_type);
1242out_sysctl:
1243 if (mq_sysctl_table)
1244 unregister_sysctl_table(mq_sysctl_table);
1245 if (kmem_cache_destroy(mqueue_inode_cachep)) {
1246 printk(KERN_INFO
1247 "mqueue_inode_cache: not all structures were freed\n");
1248 }
1249 return error;
1250}
1251
1252__initcall(init_mqueue_fs);
diff --git a/ipc/msg.c b/ipc/msg.c
new file mode 100644
index 000000000000..27e516f96cdc
--- /dev/null
+++ b/ipc/msg.c
@@ -0,0 +1,862 @@
1/*
2 * linux/ipc/msg.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 *
5 * Removed all the remaining kerneld mess
6 * Catch the -EFAULT stuff properly
7 * Use GFP_KERNEL for messages as in 1.2
8 * Fixed up the unchecked user space derefs
9 * Copyright (C) 1998 Alan Cox & Andi Kleen
10 *
11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
12 *
13 * mostly rewritten, threaded and wake-one semantics added
14 * MSGMAX limit removed, sysctl's added
15 * (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
16 */
17
18#include <linux/config.h>
19#include <linux/slab.h>
20#include <linux/msg.h>
21#include <linux/spinlock.h>
22#include <linux/init.h>
23#include <linux/proc_fs.h>
24#include <linux/list.h>
25#include <linux/security.h>
26#include <linux/sched.h>
27#include <linux/syscalls.h>
28#include <linux/audit.h>
29#include <asm/current.h>
30#include <asm/uaccess.h>
31#include "util.h"
32
33/* sysctl: */
34int msg_ctlmax = MSGMAX;
35int msg_ctlmnb = MSGMNB;
36int msg_ctlmni = MSGMNI;
37
38/* one msg_receiver structure for each sleeping receiver */
39struct msg_receiver {
40 struct list_head r_list;
41 struct task_struct* r_tsk;
42
43 int r_mode;
44 long r_msgtype;
45 long r_maxsize;
46
47 struct msg_msg* volatile r_msg;
48};
49
50/* one msg_sender for each sleeping sender */
51struct msg_sender {
52 struct list_head list;
53 struct task_struct* tsk;
54};
55
56#define SEARCH_ANY 1
57#define SEARCH_EQUAL 2
58#define SEARCH_NOTEQUAL 3
59#define SEARCH_LESSEQUAL 4
60
61static atomic_t msg_bytes = ATOMIC_INIT(0);
62static atomic_t msg_hdrs = ATOMIC_INIT(0);
63
64static struct ipc_ids msg_ids;
65
66#define msg_lock(id) ((struct msg_queue*)ipc_lock(&msg_ids,id))
67#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
68#define msg_rmid(id) ((struct msg_queue*)ipc_rmid(&msg_ids,id))
69#define msg_checkid(msq, msgid) \
70 ipc_checkid(&msg_ids,&msq->q_perm,msgid)
71#define msg_buildid(id, seq) \
72 ipc_buildid(&msg_ids, id, seq)
73
74static void freeque (struct msg_queue *msq, int id);
75static int newque (key_t key, int msgflg);
76#ifdef CONFIG_PROC_FS
77static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
78#endif
79
80void __init msg_init (void)
81{
82 ipc_init_ids(&msg_ids,msg_ctlmni);
83
84#ifdef CONFIG_PROC_FS
85 create_proc_read_entry("sysvipc/msg", 0, NULL, sysvipc_msg_read_proc, NULL);
86#endif
87}
88
89static int newque (key_t key, int msgflg)
90{
91 int id;
92 int retval;
93 struct msg_queue *msq;
94
95 msq = ipc_rcu_alloc(sizeof(*msq));
96 if (!msq)
97 return -ENOMEM;
98
99 msq->q_perm.mode = (msgflg & S_IRWXUGO);
100 msq->q_perm.key = key;
101
102 msq->q_perm.security = NULL;
103 retval = security_msg_queue_alloc(msq);
104 if (retval) {
105 ipc_rcu_putref(msq);
106 return retval;
107 }
108
109 id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
110 if(id == -1) {
111 security_msg_queue_free(msq);
112 ipc_rcu_putref(msq);
113 return -ENOSPC;
114 }
115
116 msq->q_stime = msq->q_rtime = 0;
117 msq->q_ctime = get_seconds();
118 msq->q_cbytes = msq->q_qnum = 0;
119 msq->q_qbytes = msg_ctlmnb;
120 msq->q_lspid = msq->q_lrpid = 0;
121 INIT_LIST_HEAD(&msq->q_messages);
122 INIT_LIST_HEAD(&msq->q_receivers);
123 INIT_LIST_HEAD(&msq->q_senders);
124 msg_unlock(msq);
125
126 return msg_buildid(id,msq->q_perm.seq);
127}
128
129static inline void ss_add(struct msg_queue* msq, struct msg_sender* mss)
130{
131 mss->tsk=current;
132 current->state=TASK_INTERRUPTIBLE;
133 list_add_tail(&mss->list,&msq->q_senders);
134}
135
136static inline void ss_del(struct msg_sender* mss)
137{
138 if(mss->list.next != NULL)
139 list_del(&mss->list);
140}
141
142static void ss_wakeup(struct list_head* h, int kill)
143{
144 struct list_head *tmp;
145
146 tmp = h->next;
147 while (tmp != h) {
148 struct msg_sender* mss;
149
150 mss = list_entry(tmp,struct msg_sender,list);
151 tmp = tmp->next;
152 if(kill)
153 mss->list.next=NULL;
154 wake_up_process(mss->tsk);
155 }
156}
157
158static void expunge_all(struct msg_queue* msq, int res)
159{
160 struct list_head *tmp;
161
162 tmp = msq->q_receivers.next;
163 while (tmp != &msq->q_receivers) {
164 struct msg_receiver* msr;
165
166 msr = list_entry(tmp,struct msg_receiver,r_list);
167 tmp = tmp->next;
168 msr->r_msg = NULL;
169 wake_up_process(msr->r_tsk);
170 smp_mb();
171 msr->r_msg = ERR_PTR(res);
172 }
173}
174/*
175 * freeque() wakes up waiters on the sender and receiver waiting queue,
176 * removes the message queue from message queue ID
177 * array, and cleans up all the messages associated with this queue.
178 *
179 * msg_ids.sem and the spinlock for this message queue is hold
180 * before freeque() is called. msg_ids.sem remains locked on exit.
181 */
182static void freeque (struct msg_queue *msq, int id)
183{
184 struct list_head *tmp;
185
186 expunge_all(msq,-EIDRM);
187 ss_wakeup(&msq->q_senders,1);
188 msq = msg_rmid(id);
189 msg_unlock(msq);
190
191 tmp = msq->q_messages.next;
192 while(tmp != &msq->q_messages) {
193 struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
194 tmp = tmp->next;
195 atomic_dec(&msg_hdrs);
196 free_msg(msg);
197 }
198 atomic_sub(msq->q_cbytes, &msg_bytes);
199 security_msg_queue_free(msq);
200 ipc_rcu_putref(msq);
201}
202
203asmlinkage long sys_msgget (key_t key, int msgflg)
204{
205 int id, ret = -EPERM;
206 struct msg_queue *msq;
207
208 down(&msg_ids.sem);
209 if (key == IPC_PRIVATE)
210 ret = newque(key, msgflg);
211 else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
212 if (!(msgflg & IPC_CREAT))
213 ret = -ENOENT;
214 else
215 ret = newque(key, msgflg);
216 } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
217 ret = -EEXIST;
218 } else {
219 msq = msg_lock(id);
220 if(msq==NULL)
221 BUG();
222 if (ipcperms(&msq->q_perm, msgflg))
223 ret = -EACCES;
224 else {
225 int qid = msg_buildid(id, msq->q_perm.seq);
226 ret = security_msg_queue_associate(msq, msgflg);
227 if (!ret)
228 ret = qid;
229 }
230 msg_unlock(msq);
231 }
232 up(&msg_ids.sem);
233 return ret;
234}
235
236static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
237{
238 switch(version) {
239 case IPC_64:
240 return copy_to_user (buf, in, sizeof(*in));
241 case IPC_OLD:
242 {
243 struct msqid_ds out;
244
245 memset(&out,0,sizeof(out));
246
247 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
248
249 out.msg_stime = in->msg_stime;
250 out.msg_rtime = in->msg_rtime;
251 out.msg_ctime = in->msg_ctime;
252
253 if(in->msg_cbytes > USHRT_MAX)
254 out.msg_cbytes = USHRT_MAX;
255 else
256 out.msg_cbytes = in->msg_cbytes;
257 out.msg_lcbytes = in->msg_cbytes;
258
259 if(in->msg_qnum > USHRT_MAX)
260 out.msg_qnum = USHRT_MAX;
261 else
262 out.msg_qnum = in->msg_qnum;
263
264 if(in->msg_qbytes > USHRT_MAX)
265 out.msg_qbytes = USHRT_MAX;
266 else
267 out.msg_qbytes = in->msg_qbytes;
268 out.msg_lqbytes = in->msg_qbytes;
269
270 out.msg_lspid = in->msg_lspid;
271 out.msg_lrpid = in->msg_lrpid;
272
273 return copy_to_user (buf, &out, sizeof(out));
274 }
275 default:
276 return -EINVAL;
277 }
278}
279
280struct msq_setbuf {
281 unsigned long qbytes;
282 uid_t uid;
283 gid_t gid;
284 mode_t mode;
285};
286
287static inline unsigned long copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
288{
289 switch(version) {
290 case IPC_64:
291 {
292 struct msqid64_ds tbuf;
293
294 if (copy_from_user (&tbuf, buf, sizeof (tbuf)))
295 return -EFAULT;
296
297 out->qbytes = tbuf.msg_qbytes;
298 out->uid = tbuf.msg_perm.uid;
299 out->gid = tbuf.msg_perm.gid;
300 out->mode = tbuf.msg_perm.mode;
301
302 return 0;
303 }
304 case IPC_OLD:
305 {
306 struct msqid_ds tbuf_old;
307
308 if (copy_from_user (&tbuf_old, buf, sizeof (tbuf_old)))
309 return -EFAULT;
310
311 out->uid = tbuf_old.msg_perm.uid;
312 out->gid = tbuf_old.msg_perm.gid;
313 out->mode = tbuf_old.msg_perm.mode;
314
315 if(tbuf_old.msg_qbytes == 0)
316 out->qbytes = tbuf_old.msg_lqbytes;
317 else
318 out->qbytes = tbuf_old.msg_qbytes;
319
320 return 0;
321 }
322 default:
323 return -EINVAL;
324 }
325}
326
327asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
328{
329 int err, version;
330 struct msg_queue *msq;
331 struct msq_setbuf setbuf;
332 struct kern_ipc_perm *ipcp;
333
334 if (msqid < 0 || cmd < 0)
335 return -EINVAL;
336
337 version = ipc_parse_version(&cmd);
338
339 switch (cmd) {
340 case IPC_INFO:
341 case MSG_INFO:
342 {
343 struct msginfo msginfo;
344 int max_id;
345 if (!buf)
346 return -EFAULT;
347 /* We must not return kernel stack data.
348 * due to padding, it's not enough
349 * to set all member fields.
350 */
351
352 err = security_msg_queue_msgctl(NULL, cmd);
353 if (err)
354 return err;
355
356 memset(&msginfo,0,sizeof(msginfo));
357 msginfo.msgmni = msg_ctlmni;
358 msginfo.msgmax = msg_ctlmax;
359 msginfo.msgmnb = msg_ctlmnb;
360 msginfo.msgssz = MSGSSZ;
361 msginfo.msgseg = MSGSEG;
362 down(&msg_ids.sem);
363 if (cmd == MSG_INFO) {
364 msginfo.msgpool = msg_ids.in_use;
365 msginfo.msgmap = atomic_read(&msg_hdrs);
366 msginfo.msgtql = atomic_read(&msg_bytes);
367 } else {
368 msginfo.msgmap = MSGMAP;
369 msginfo.msgpool = MSGPOOL;
370 msginfo.msgtql = MSGTQL;
371 }
372 max_id = msg_ids.max_id;
373 up(&msg_ids.sem);
374 if (copy_to_user (buf, &msginfo, sizeof(struct msginfo)))
375 return -EFAULT;
376 return (max_id < 0) ? 0: max_id;
377 }
378 case MSG_STAT:
379 case IPC_STAT:
380 {
381 struct msqid64_ds tbuf;
382 int success_return;
383 if (!buf)
384 return -EFAULT;
385 if(cmd == MSG_STAT && msqid >= msg_ids.entries->size)
386 return -EINVAL;
387
388 memset(&tbuf,0,sizeof(tbuf));
389
390 msq = msg_lock(msqid);
391 if (msq == NULL)
392 return -EINVAL;
393
394 if(cmd == MSG_STAT) {
395 success_return = msg_buildid(msqid, msq->q_perm.seq);
396 } else {
397 err = -EIDRM;
398 if (msg_checkid(msq,msqid))
399 goto out_unlock;
400 success_return = 0;
401 }
402 err = -EACCES;
403 if (ipcperms (&msq->q_perm, S_IRUGO))
404 goto out_unlock;
405
406 err = security_msg_queue_msgctl(msq, cmd);
407 if (err)
408 goto out_unlock;
409
410 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
411 tbuf.msg_stime = msq->q_stime;
412 tbuf.msg_rtime = msq->q_rtime;
413 tbuf.msg_ctime = msq->q_ctime;
414 tbuf.msg_cbytes = msq->q_cbytes;
415 tbuf.msg_qnum = msq->q_qnum;
416 tbuf.msg_qbytes = msq->q_qbytes;
417 tbuf.msg_lspid = msq->q_lspid;
418 tbuf.msg_lrpid = msq->q_lrpid;
419 msg_unlock(msq);
420 if (copy_msqid_to_user(buf, &tbuf, version))
421 return -EFAULT;
422 return success_return;
423 }
424 case IPC_SET:
425 if (!buf)
426 return -EFAULT;
427 if (copy_msqid_from_user (&setbuf, buf, version))
428 return -EFAULT;
429 if ((err = audit_ipc_perms(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode)))
430 return err;
431 break;
432 case IPC_RMID:
433 break;
434 default:
435 return -EINVAL;
436 }
437
438 down(&msg_ids.sem);
439 msq = msg_lock(msqid);
440 err=-EINVAL;
441 if (msq == NULL)
442 goto out_up;
443
444 err = -EIDRM;
445 if (msg_checkid(msq,msqid))
446 goto out_unlock_up;
447 ipcp = &msq->q_perm;
448 err = -EPERM;
449 if (current->euid != ipcp->cuid &&
450 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
451 /* We _could_ check for CAP_CHOWN above, but we don't */
452 goto out_unlock_up;
453
454 err = security_msg_queue_msgctl(msq, cmd);
455 if (err)
456 goto out_unlock_up;
457
458 switch (cmd) {
459 case IPC_SET:
460 {
461 err = -EPERM;
462 if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
463 goto out_unlock_up;
464
465 msq->q_qbytes = setbuf.qbytes;
466
467 ipcp->uid = setbuf.uid;
468 ipcp->gid = setbuf.gid;
469 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
470 (S_IRWXUGO & setbuf.mode);
471 msq->q_ctime = get_seconds();
472 /* sleeping receivers might be excluded by
473 * stricter permissions.
474 */
475 expunge_all(msq,-EAGAIN);
476 /* sleeping senders might be able to send
477 * due to a larger queue size.
478 */
479 ss_wakeup(&msq->q_senders,0);
480 msg_unlock(msq);
481 break;
482 }
483 case IPC_RMID:
484 freeque (msq, msqid);
485 break;
486 }
487 err = 0;
488out_up:
489 up(&msg_ids.sem);
490 return err;
491out_unlock_up:
492 msg_unlock(msq);
493 goto out_up;
494out_unlock:
495 msg_unlock(msq);
496 return err;
497}
498
499static int testmsg(struct msg_msg* msg,long type,int mode)
500{
501 switch(mode)
502 {
503 case SEARCH_ANY:
504 return 1;
505 case SEARCH_LESSEQUAL:
506 if(msg->m_type <=type)
507 return 1;
508 break;
509 case SEARCH_EQUAL:
510 if(msg->m_type == type)
511 return 1;
512 break;
513 case SEARCH_NOTEQUAL:
514 if(msg->m_type != type)
515 return 1;
516 break;
517 }
518 return 0;
519}
520
521static inline int pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
522{
523 struct list_head* tmp;
524
525 tmp = msq->q_receivers.next;
526 while (tmp != &msq->q_receivers) {
527 struct msg_receiver* msr;
528 msr = list_entry(tmp,struct msg_receiver,r_list);
529 tmp = tmp->next;
530 if(testmsg(msg,msr->r_msgtype,msr->r_mode) &&
531 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) {
532 list_del(&msr->r_list);
533 if(msr->r_maxsize < msg->m_ts) {
534 msr->r_msg = NULL;
535 wake_up_process(msr->r_tsk);
536 smp_mb();
537 msr->r_msg = ERR_PTR(-E2BIG);
538 } else {
539 msr->r_msg = NULL;
540 msq->q_lrpid = msr->r_tsk->pid;
541 msq->q_rtime = get_seconds();
542 wake_up_process(msr->r_tsk);
543 smp_mb();
544 msr->r_msg = msg;
545 return 1;
546 }
547 }
548 }
549 return 0;
550}
551
552asmlinkage long sys_msgsnd (int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
553{
554 struct msg_queue *msq;
555 struct msg_msg *msg;
556 long mtype;
557 int err;
558
559 if (msgsz > msg_ctlmax || (long) msgsz < 0 || msqid < 0)
560 return -EINVAL;
561 if (get_user(mtype, &msgp->mtype))
562 return -EFAULT;
563 if (mtype < 1)
564 return -EINVAL;
565
566 msg = load_msg(msgp->mtext, msgsz);
567 if(IS_ERR(msg))
568 return PTR_ERR(msg);
569
570 msg->m_type = mtype;
571 msg->m_ts = msgsz;
572
573 msq = msg_lock(msqid);
574 err=-EINVAL;
575 if(msq==NULL)
576 goto out_free;
577
578 err= -EIDRM;
579 if (msg_checkid(msq,msqid))
580 goto out_unlock_free;
581
582 for (;;) {
583 struct msg_sender s;
584
585 err=-EACCES;
586 if (ipcperms(&msq->q_perm, S_IWUGO))
587 goto out_unlock_free;
588
589 err = security_msg_queue_msgsnd(msq, msg, msgflg);
590 if (err)
591 goto out_unlock_free;
592
593 if(msgsz + msq->q_cbytes <= msq->q_qbytes &&
594 1 + msq->q_qnum <= msq->q_qbytes) {
595 break;
596 }
597
598 /* queue full, wait: */
599 if(msgflg&IPC_NOWAIT) {
600 err=-EAGAIN;
601 goto out_unlock_free;
602 }
603 ss_add(msq, &s);
604 ipc_rcu_getref(msq);
605 msg_unlock(msq);
606 schedule();
607
608 ipc_lock_by_ptr(&msq->q_perm);
609 ipc_rcu_putref(msq);
610 if (msq->q_perm.deleted) {
611 err = -EIDRM;
612 goto out_unlock_free;
613 }
614 ss_del(&s);
615
616 if (signal_pending(current)) {
617 err=-ERESTARTNOHAND;
618 goto out_unlock_free;
619 }
620 }
621
622 msq->q_lspid = current->tgid;
623 msq->q_stime = get_seconds();
624
625 if(!pipelined_send(msq,msg)) {
626 /* noone is waiting for this message, enqueue it */
627 list_add_tail(&msg->m_list,&msq->q_messages);
628 msq->q_cbytes += msgsz;
629 msq->q_qnum++;
630 atomic_add(msgsz,&msg_bytes);
631 atomic_inc(&msg_hdrs);
632 }
633
634 err = 0;
635 msg = NULL;
636
637out_unlock_free:
638 msg_unlock(msq);
639out_free:
640 if(msg!=NULL)
641 free_msg(msg);
642 return err;
643}
644
645static inline int convert_mode(long* msgtyp, int msgflg)
646{
647 /*
648 * find message of correct type.
649 * msgtyp = 0 => get first.
650 * msgtyp > 0 => get first message of matching type.
651 * msgtyp < 0 => get message with least type must be < abs(msgtype).
652 */
653 if(*msgtyp==0)
654 return SEARCH_ANY;
655 if(*msgtyp<0) {
656 *msgtyp=-(*msgtyp);
657 return SEARCH_LESSEQUAL;
658 }
659 if(msgflg & MSG_EXCEPT)
660 return SEARCH_NOTEQUAL;
661 return SEARCH_EQUAL;
662}
663
664asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
665 long msgtyp, int msgflg)
666{
667 struct msg_queue *msq;
668 struct msg_msg *msg;
669 int mode;
670
671 if (msqid < 0 || (long) msgsz < 0)
672 return -EINVAL;
673 mode = convert_mode(&msgtyp,msgflg);
674
675 msq = msg_lock(msqid);
676 if(msq==NULL)
677 return -EINVAL;
678
679 msg = ERR_PTR(-EIDRM);
680 if (msg_checkid(msq,msqid))
681 goto out_unlock;
682
683 for (;;) {
684 struct msg_receiver msr_d;
685 struct list_head* tmp;
686
687 msg = ERR_PTR(-EACCES);
688 if (ipcperms (&msq->q_perm, S_IRUGO))
689 goto out_unlock;
690
691 msg = ERR_PTR(-EAGAIN);
692 tmp = msq->q_messages.next;
693 while (tmp != &msq->q_messages) {
694 struct msg_msg *walk_msg;
695 walk_msg = list_entry(tmp,struct msg_msg,m_list);
696 if(testmsg(walk_msg,msgtyp,mode) &&
697 !security_msg_queue_msgrcv(msq, walk_msg, current, msgtyp, mode)) {
698 msg = walk_msg;
699 if(mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) {
700 msg=walk_msg;
701 msgtyp=walk_msg->m_type-1;
702 } else {
703 msg=walk_msg;
704 break;
705 }
706 }
707 tmp = tmp->next;
708 }
709 if(!IS_ERR(msg)) {
710 /* Found a suitable message. Unlink it from the queue. */
711 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
712 msg = ERR_PTR(-E2BIG);
713 goto out_unlock;
714 }
715 list_del(&msg->m_list);
716 msq->q_qnum--;
717 msq->q_rtime = get_seconds();
718 msq->q_lrpid = current->tgid;
719 msq->q_cbytes -= msg->m_ts;
720 atomic_sub(msg->m_ts,&msg_bytes);
721 atomic_dec(&msg_hdrs);
722 ss_wakeup(&msq->q_senders,0);
723 msg_unlock(msq);
724 break;
725 }
726 /* No message waiting. Wait for a message */
727 if (msgflg & IPC_NOWAIT) {
728 msg = ERR_PTR(-ENOMSG);
729 goto out_unlock;
730 }
731 list_add_tail(&msr_d.r_list,&msq->q_receivers);
732 msr_d.r_tsk = current;
733 msr_d.r_msgtype = msgtyp;
734 msr_d.r_mode = mode;
735 if(msgflg & MSG_NOERROR)
736 msr_d.r_maxsize = INT_MAX;
737 else
738 msr_d.r_maxsize = msgsz;
739 msr_d.r_msg = ERR_PTR(-EAGAIN);
740 current->state = TASK_INTERRUPTIBLE;
741 msg_unlock(msq);
742
743 schedule();
744
745 /* Lockless receive, part 1:
746 * Disable preemption. We don't hold a reference to the queue
747 * and getting a reference would defeat the idea of a lockless
748 * operation, thus the code relies on rcu to guarantee the
749 * existance of msq:
750 * Prior to destruction, expunge_all(-EIRDM) changes r_msg.
751 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
752 * rcu_read_lock() prevents preemption between reading r_msg
753 * and the spin_lock() inside ipc_lock_by_ptr().
754 */
755 rcu_read_lock();
756
757 /* Lockless receive, part 2:
758 * Wait until pipelined_send or expunge_all are outside of
759 * wake_up_process(). There is a race with exit(), see
760 * ipc/mqueue.c for the details.
761 */
762 msg = (struct msg_msg*) msr_d.r_msg;
763 while (msg == NULL) {
764 cpu_relax();
765 msg = (struct msg_msg*) msr_d.r_msg;
766 }
767
768 /* Lockless receive, part 3:
769 * If there is a message or an error then accept it without
770 * locking.
771 */
772 if(msg != ERR_PTR(-EAGAIN)) {
773 rcu_read_unlock();
774 break;
775 }
776
777 /* Lockless receive, part 3:
778 * Acquire the queue spinlock.
779 */
780 ipc_lock_by_ptr(&msq->q_perm);
781 rcu_read_unlock();
782
783 /* Lockless receive, part 4:
784 * Repeat test after acquiring the spinlock.
785 */
786 msg = (struct msg_msg*)msr_d.r_msg;
787 if(msg != ERR_PTR(-EAGAIN))
788 goto out_unlock;
789
790 list_del(&msr_d.r_list);
791 if (signal_pending(current)) {
792 msg = ERR_PTR(-ERESTARTNOHAND);
793out_unlock:
794 msg_unlock(msq);
795 break;
796 }
797 }
798 if (IS_ERR(msg))
799 return PTR_ERR(msg);
800
801 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
802 if (put_user (msg->m_type, &msgp->mtype) ||
803 store_msg(msgp->mtext, msg, msgsz)) {
804 msgsz = -EFAULT;
805 }
806 free_msg(msg);
807 return msgsz;
808}
809
810#ifdef CONFIG_PROC_FS
811static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
812{
813 off_t pos = 0;
814 off_t begin = 0;
815 int i, len = 0;
816
817 down(&msg_ids.sem);
818 len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n");
819
820 for(i = 0; i <= msg_ids.max_id; i++) {
821 struct msg_queue * msq;
822 msq = msg_lock(i);
823 if(msq != NULL) {
824 len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
825 msq->q_perm.key,
826 msg_buildid(i,msq->q_perm.seq),
827 msq->q_perm.mode,
828 msq->q_cbytes,
829 msq->q_qnum,
830 msq->q_lspid,
831 msq->q_lrpid,
832 msq->q_perm.uid,
833 msq->q_perm.gid,
834 msq->q_perm.cuid,
835 msq->q_perm.cgid,
836 msq->q_stime,
837 msq->q_rtime,
838 msq->q_ctime);
839 msg_unlock(msq);
840
841 pos += len;
842 if(pos < offset) {
843 len = 0;
844 begin = pos;
845 }
846 if(pos > offset + length)
847 goto done;
848 }
849
850 }
851 *eof = 1;
852done:
853 up(&msg_ids.sem);
854 *start = buffer + (offset - begin);
855 len -= (offset - begin);
856 if(len > length)
857 len = length;
858 if(len < 0)
859 len = 0;
860 return len;
861}
862#endif
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
new file mode 100644
index 000000000000..66cfb87646eb
--- /dev/null
+++ b/ipc/msgutil.c
@@ -0,0 +1,127 @@
1/*
2 * linux/ipc/util.c
3 * Copyright (C) 1999, 2004 Manfred Spraul
4 *
5 * This file is released under GNU General Public Licence version 2 or
6 * (at your option) any later version.
7 *
8 * See the file COPYING for more details.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/init.h>
13#include <linux/security.h>
14#include <linux/slab.h>
15#include <linux/ipc.h>
16#include <asm/uaccess.h>
17
18#include "util.h"
19
20struct msg_msgseg {
21 struct msg_msgseg* next;
22 /* the next part of the message follows immediately */
23};
24
25#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
26#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
27
28struct msg_msg *load_msg(const void __user *src, int len)
29{
30 struct msg_msg *msg;
31 struct msg_msgseg **pseg;
32 int err;
33 int alen;
34
35 alen = len;
36 if (alen > DATALEN_MSG)
37 alen = DATALEN_MSG;
38
39 msg = (struct msg_msg *)kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
40 if (msg == NULL)
41 return ERR_PTR(-ENOMEM);
42
43 msg->next = NULL;
44 msg->security = NULL;
45
46 if (copy_from_user(msg + 1, src, alen)) {
47 err = -EFAULT;
48 goto out_err;
49 }
50
51 len -= alen;
52 src = ((char __user *)src) + alen;
53 pseg = &msg->next;
54 while (len > 0) {
55 struct msg_msgseg *seg;
56 alen = len;
57 if (alen > DATALEN_SEG)
58 alen = DATALEN_SEG;
59 seg = (struct msg_msgseg *)kmalloc(sizeof(*seg) + alen,
60 GFP_KERNEL);
61 if (seg == NULL) {
62 err = -ENOMEM;
63 goto out_err;
64 }
65 *pseg = seg;
66 seg->next = NULL;
67 if (copy_from_user(seg + 1, src, alen)) {
68 err = -EFAULT;
69 goto out_err;
70 }
71 pseg = &seg->next;
72 len -= alen;
73 src = ((char __user *)src) + alen;
74 }
75
76 err = security_msg_msg_alloc(msg);
77 if (err)
78 goto out_err;
79
80 return msg;
81
82out_err:
83 free_msg(msg);
84 return ERR_PTR(err);
85}
86
87int store_msg(void __user *dest, struct msg_msg *msg, int len)
88{
89 int alen;
90 struct msg_msgseg *seg;
91
92 alen = len;
93 if (alen > DATALEN_MSG)
94 alen = DATALEN_MSG;
95 if (copy_to_user(dest, msg + 1, alen))
96 return -1;
97
98 len -= alen;
99 dest = ((char __user *)dest) + alen;
100 seg = msg->next;
101 while (len > 0) {
102 alen = len;
103 if (alen > DATALEN_SEG)
104 alen = DATALEN_SEG;
105 if (copy_to_user(dest, seg + 1, alen))
106 return -1;
107 len -= alen;
108 dest = ((char __user *)dest) + alen;
109 seg = seg->next;
110 }
111 return 0;
112}
113
114void free_msg(struct msg_msg *msg)
115{
116 struct msg_msgseg *seg;
117
118 security_msg_msg_free(msg);
119
120 seg = msg->next;
121 kfree(msg);
122 while (seg != NULL) {
123 struct msg_msgseg *tmp = seg->next;
124 kfree(seg);
125 seg = tmp;
126 }
127}
diff --git a/ipc/sem.c b/ipc/sem.c
new file mode 100644
index 000000000000..5ad7ac0ed60d
--- /dev/null
+++ b/ipc/sem.c
@@ -0,0 +1,1384 @@
1/*
2 * linux/ipc/sem.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
5 *
6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
7 * This code underwent a massive rewrite in order to solve some problems
8 * with the original code. In particular the original code failed to
9 * wake up processes that were waiting for semval to go to 0 if the
10 * value went to 0 and was then incremented rapidly enough. In solving
11 * this problem I have also modified the implementation so that it
12 * processes pending operations in a FIFO manner, thus give a guarantee
13 * that processes waiting for a lock on the semaphore won't starve
14 * unless another locking process fails to unlock.
15 * In addition the following two changes in behavior have been introduced:
16 * - The original implementation of semop returned the value
17 * last semaphore element examined on success. This does not
18 * match the manual page specifications, and effectively
19 * allows the user to read the semaphore even if they do not
20 * have read permissions. The implementation now returns 0
21 * on success as stated in the manual page.
22 * - There is some confusion over whether the set of undo adjustments
23 * to be performed at exit should be done in an atomic manner.
24 * That is, if we are attempting to decrement the semval should we queue
25 * up and wait until we can do so legally?
26 * The original implementation attempted to do this.
27 * The current implementation does not do so. This is because I don't
28 * think it is the right thing (TM) to do, and because I couldn't
29 * see a clean way to get the old behavior with the new design.
30 * The POSIX standard and SVID should be consulted to determine
31 * what behavior is mandated.
32 *
33 * Further notes on refinement (Christoph Rohland, December 1998):
34 * - The POSIX standard says, that the undo adjustments simply should
35 * redo. So the current implementation is o.K.
36 * - The previous code had two flaws:
37 * 1) It actively gave the semaphore to the next waiting process
38 * sleeping on the semaphore. Since this process did not have the
39 * cpu this led to many unnecessary context switches and bad
40 * performance. Now we only check which process should be able to
41 * get the semaphore and if this process wants to reduce some
42 * semaphore value we simply wake it up without doing the
43 * operation. So it has to try to get it later. Thus e.g. the
44 * running process may reacquire the semaphore during the current
45 * time slice. If it only waits for zero or increases the semaphore,
46 * we do the operation in advance and wake it up.
47 * 2) It did not wake up all zero waiting processes. We try to do
48 * better but only get the semops right which only wait for zero or
49 * increase. If there are decrement operations in the operations
50 * array we do the same as before.
51 *
52 * With the incarnation of O(1) scheduler, it becomes unnecessary to perform
53 * check/retry algorithm for waking up blocked processes as the new scheduler
54 * is better at handling thread switch than the old one.
55 *
56 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
57 *
58 * SMP-threaded, sysctl's added
59 * (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
60 * Enforced range limit on SEM_UNDO
61 * (c) 2001 Red Hat Inc <alan@redhat.com>
62 * Lockless wakeup
63 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
64 */
65
66#include <linux/config.h>
67#include <linux/slab.h>
68#include <linux/spinlock.h>
69#include <linux/init.h>
70#include <linux/proc_fs.h>
71#include <linux/time.h>
72#include <linux/smp_lock.h>
73#include <linux/security.h>
74#include <linux/syscalls.h>
75#include <linux/audit.h>
76#include <asm/uaccess.h>
77#include "util.h"
78
79
80#define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
81#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
82#define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id))
83#define sem_checkid(sma, semid) \
84 ipc_checkid(&sem_ids,&sma->sem_perm,semid)
85#define sem_buildid(id, seq) \
86 ipc_buildid(&sem_ids, id, seq)
87static struct ipc_ids sem_ids;
88
89static int newary (key_t, int, int);
90static void freeary (struct sem_array *sma, int id);
91#ifdef CONFIG_PROC_FS
92static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
93#endif
94
95#define SEMMSL_FAST 256 /* 512 bytes on stack */
96#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
97
98/*
99 * linked list protection:
100 * sem_undo.id_next,
101 * sem_array.sem_pending{,last},
102 * sem_array.sem_undo: sem_lock() for read/write
103 * sem_undo.proc_next: only "current" is allowed to read/write that field.
104 *
105 */
106
107int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
108#define sc_semmsl (sem_ctls[0])
109#define sc_semmns (sem_ctls[1])
110#define sc_semopm (sem_ctls[2])
111#define sc_semmni (sem_ctls[3])
112
113static int used_sems;
114
115void __init sem_init (void)
116{
117 used_sems = 0;
118 ipc_init_ids(&sem_ids,sc_semmni);
119
120#ifdef CONFIG_PROC_FS
121 create_proc_read_entry("sysvipc/sem", 0, NULL, sysvipc_sem_read_proc, NULL);
122#endif
123}
124
125/*
126 * Lockless wakeup algorithm:
127 * Without the check/retry algorithm a lockless wakeup is possible:
128 * - queue.status is initialized to -EINTR before blocking.
129 * - wakeup is performed by
130 * * unlinking the queue entry from sma->sem_pending
131 * * setting queue.status to IN_WAKEUP
132 * This is the notification for the blocked thread that a
133 * result value is imminent.
134 * * call wake_up_process
135 * * set queue.status to the final value.
136 * - the previously blocked thread checks queue.status:
137 * * if it's IN_WAKEUP, then it must wait until the value changes
138 * * if it's not -EINTR, then the operation was completed by
139 * update_queue. semtimedop can return queue.status without
140 * performing any operation on the semaphore array.
141 * * otherwise it must acquire the spinlock and check what's up.
142 *
143 * The two-stage algorithm is necessary to protect against the following
144 * races:
145 * - if queue.status is set after wake_up_process, then the woken up idle
146 * thread could race forward and try (and fail) to acquire sma->lock
147 * before update_queue had a chance to set queue.status
148 * - if queue.status is written before wake_up_process and if the
149 * blocked process is woken up by a signal between writing
150 * queue.status and the wake_up_process, then the woken up
151 * process could return from semtimedop and die by calling
152 * sys_exit before wake_up_process is called. Then wake_up_process
153 * will oops, because the task structure is already invalid.
154 * (yes, this happened on s390 with sysv msg).
155 *
156 */
157#define IN_WAKEUP 1
158
159static int newary (key_t key, int nsems, int semflg)
160{
161 int id;
162 int retval;
163 struct sem_array *sma;
164 int size;
165
166 if (!nsems)
167 return -EINVAL;
168 if (used_sems + nsems > sc_semmns)
169 return -ENOSPC;
170
171 size = sizeof (*sma) + nsems * sizeof (struct sem);
172 sma = ipc_rcu_alloc(size);
173 if (!sma) {
174 return -ENOMEM;
175 }
176 memset (sma, 0, size);
177
178 sma->sem_perm.mode = (semflg & S_IRWXUGO);
179 sma->sem_perm.key = key;
180
181 sma->sem_perm.security = NULL;
182 retval = security_sem_alloc(sma);
183 if (retval) {
184 ipc_rcu_putref(sma);
185 return retval;
186 }
187
188 id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
189 if(id == -1) {
190 security_sem_free(sma);
191 ipc_rcu_putref(sma);
192 return -ENOSPC;
193 }
194 used_sems += nsems;
195
196 sma->sem_base = (struct sem *) &sma[1];
197 /* sma->sem_pending = NULL; */
198 sma->sem_pending_last = &sma->sem_pending;
199 /* sma->undo = NULL; */
200 sma->sem_nsems = nsems;
201 sma->sem_ctime = get_seconds();
202 sem_unlock(sma);
203
204 return sem_buildid(id, sma->sem_perm.seq);
205}
206
207asmlinkage long sys_semget (key_t key, int nsems, int semflg)
208{
209 int id, err = -EINVAL;
210 struct sem_array *sma;
211
212 if (nsems < 0 || nsems > sc_semmsl)
213 return -EINVAL;
214 down(&sem_ids.sem);
215
216 if (key == IPC_PRIVATE) {
217 err = newary(key, nsems, semflg);
218 } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */
219 if (!(semflg & IPC_CREAT))
220 err = -ENOENT;
221 else
222 err = newary(key, nsems, semflg);
223 } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
224 err = -EEXIST;
225 } else {
226 sma = sem_lock(id);
227 if(sma==NULL)
228 BUG();
229 if (nsems > sma->sem_nsems)
230 err = -EINVAL;
231 else if (ipcperms(&sma->sem_perm, semflg))
232 err = -EACCES;
233 else {
234 int semid = sem_buildid(id, sma->sem_perm.seq);
235 err = security_sem_associate(sma, semflg);
236 if (!err)
237 err = semid;
238 }
239 sem_unlock(sma);
240 }
241
242 up(&sem_ids.sem);
243 return err;
244}
245
246/* Manage the doubly linked list sma->sem_pending as a FIFO:
247 * insert new queue elements at the tail sma->sem_pending_last.
248 */
249static inline void append_to_queue (struct sem_array * sma,
250 struct sem_queue * q)
251{
252 *(q->prev = sma->sem_pending_last) = q;
253 *(sma->sem_pending_last = &q->next) = NULL;
254}
255
256static inline void prepend_to_queue (struct sem_array * sma,
257 struct sem_queue * q)
258{
259 q->next = sma->sem_pending;
260 *(q->prev = &sma->sem_pending) = q;
261 if (q->next)
262 q->next->prev = &q->next;
263 else /* sma->sem_pending_last == &sma->sem_pending */
264 sma->sem_pending_last = &q->next;
265}
266
267static inline void remove_from_queue (struct sem_array * sma,
268 struct sem_queue * q)
269{
270 *(q->prev) = q->next;
271 if (q->next)
272 q->next->prev = q->prev;
273 else /* sma->sem_pending_last == &q->next */
274 sma->sem_pending_last = q->prev;
275 q->prev = NULL; /* mark as removed */
276}
277
278/*
279 * Determine whether a sequence of semaphore operations would succeed
280 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
281 */
282
283static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
284 int nsops, struct sem_undo *un, int pid)
285{
286 int result, sem_op;
287 struct sembuf *sop;
288 struct sem * curr;
289
290 for (sop = sops; sop < sops + nsops; sop++) {
291 curr = sma->sem_base + sop->sem_num;
292 sem_op = sop->sem_op;
293 result = curr->semval;
294
295 if (!sem_op && result)
296 goto would_block;
297
298 result += sem_op;
299 if (result < 0)
300 goto would_block;
301 if (result > SEMVMX)
302 goto out_of_range;
303 if (sop->sem_flg & SEM_UNDO) {
304 int undo = un->semadj[sop->sem_num] - sem_op;
305 /*
306 * Exceeding the undo range is an error.
307 */
308 if (undo < (-SEMAEM - 1) || undo > SEMAEM)
309 goto out_of_range;
310 }
311 curr->semval = result;
312 }
313
314 sop--;
315 while (sop >= sops) {
316 sma->sem_base[sop->sem_num].sempid = pid;
317 if (sop->sem_flg & SEM_UNDO)
318 un->semadj[sop->sem_num] -= sop->sem_op;
319 sop--;
320 }
321
322 sma->sem_otime = get_seconds();
323 return 0;
324
325out_of_range:
326 result = -ERANGE;
327 goto undo;
328
329would_block:
330 if (sop->sem_flg & IPC_NOWAIT)
331 result = -EAGAIN;
332 else
333 result = 1;
334
335undo:
336 sop--;
337 while (sop >= sops) {
338 sma->sem_base[sop->sem_num].semval -= sop->sem_op;
339 sop--;
340 }
341
342 return result;
343}
344
345/* Go through the pending queue for the indicated semaphore
346 * looking for tasks that can be completed.
347 */
348static void update_queue (struct sem_array * sma)
349{
350 int error;
351 struct sem_queue * q;
352
353 q = sma->sem_pending;
354 while(q) {
355 error = try_atomic_semop(sma, q->sops, q->nsops,
356 q->undo, q->pid);
357
358 /* Does q->sleeper still need to sleep? */
359 if (error <= 0) {
360 struct sem_queue *n;
361 remove_from_queue(sma,q);
362 q->status = IN_WAKEUP;
363 /*
364 * Continue scanning. The next operation
365 * that must be checked depends on the type of the
366 * completed operation:
367 * - if the operation modified the array, then
368 * restart from the head of the queue and
369 * check for threads that might be waiting
370 * for semaphore values to become 0.
371 * - if the operation didn't modify the array,
372 * then just continue.
373 */
374 if (q->alter)
375 n = sma->sem_pending;
376 else
377 n = q->next;
378 wake_up_process(q->sleeper);
379 /* hands-off: q will disappear immediately after
380 * writing q->status.
381 */
382 q->status = error;
383 q = n;
384 } else {
385 q = q->next;
386 }
387 }
388}
389
390/* The following counts are associated to each semaphore:
391 * semncnt number of tasks waiting on semval being nonzero
392 * semzcnt number of tasks waiting on semval being zero
393 * This model assumes that a task waits on exactly one semaphore.
394 * Since semaphore operations are to be performed atomically, tasks actually
395 * wait on a whole sequence of semaphores simultaneously.
396 * The counts we return here are a rough approximation, but still
397 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
398 */
399static int count_semncnt (struct sem_array * sma, ushort semnum)
400{
401 int semncnt;
402 struct sem_queue * q;
403
404 semncnt = 0;
405 for (q = sma->sem_pending; q; q = q->next) {
406 struct sembuf * sops = q->sops;
407 int nsops = q->nsops;
408 int i;
409 for (i = 0; i < nsops; i++)
410 if (sops[i].sem_num == semnum
411 && (sops[i].sem_op < 0)
412 && !(sops[i].sem_flg & IPC_NOWAIT))
413 semncnt++;
414 }
415 return semncnt;
416}
417static int count_semzcnt (struct sem_array * sma, ushort semnum)
418{
419 int semzcnt;
420 struct sem_queue * q;
421
422 semzcnt = 0;
423 for (q = sma->sem_pending; q; q = q->next) {
424 struct sembuf * sops = q->sops;
425 int nsops = q->nsops;
426 int i;
427 for (i = 0; i < nsops; i++)
428 if (sops[i].sem_num == semnum
429 && (sops[i].sem_op == 0)
430 && !(sops[i].sem_flg & IPC_NOWAIT))
431 semzcnt++;
432 }
433 return semzcnt;
434}
435
436/* Free a semaphore set. freeary() is called with sem_ids.sem down and
437 * the spinlock for this semaphore set hold. sem_ids.sem remains locked
438 * on exit.
439 */
440static void freeary (struct sem_array *sma, int id)
441{
442 struct sem_undo *un;
443 struct sem_queue *q;
444 int size;
445
446 /* Invalidate the existing undo structures for this semaphore set.
447 * (They will be freed without any further action in exit_sem()
448 * or during the next semop.)
449 */
450 for (un = sma->undo; un; un = un->id_next)
451 un->semid = -1;
452
453 /* Wake up all pending processes and let them fail with EIDRM. */
454 q = sma->sem_pending;
455 while(q) {
456 struct sem_queue *n;
457 /* lazy remove_from_queue: we are killing the whole queue */
458 q->prev = NULL;
459 n = q->next;
460 q->status = IN_WAKEUP;
461 wake_up_process(q->sleeper); /* doesn't sleep */
462 q->status = -EIDRM; /* hands-off q */
463 q = n;
464 }
465
466 /* Remove the semaphore set from the ID array*/
467 sma = sem_rmid(id);
468 sem_unlock(sma);
469
470 used_sems -= sma->sem_nsems;
471 size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
472 security_sem_free(sma);
473 ipc_rcu_putref(sma);
474}
475
476static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
477{
478 switch(version) {
479 case IPC_64:
480 return copy_to_user(buf, in, sizeof(*in));
481 case IPC_OLD:
482 {
483 struct semid_ds out;
484
485 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
486
487 out.sem_otime = in->sem_otime;
488 out.sem_ctime = in->sem_ctime;
489 out.sem_nsems = in->sem_nsems;
490
491 return copy_to_user(buf, &out, sizeof(out));
492 }
493 default:
494 return -EINVAL;
495 }
496}
497
498static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg)
499{
500 int err = -EINVAL;
501 struct sem_array *sma;
502
503 switch(cmd) {
504 case IPC_INFO:
505 case SEM_INFO:
506 {
507 struct seminfo seminfo;
508 int max_id;
509
510 err = security_sem_semctl(NULL, cmd);
511 if (err)
512 return err;
513
514 memset(&seminfo,0,sizeof(seminfo));
515 seminfo.semmni = sc_semmni;
516 seminfo.semmns = sc_semmns;
517 seminfo.semmsl = sc_semmsl;
518 seminfo.semopm = sc_semopm;
519 seminfo.semvmx = SEMVMX;
520 seminfo.semmnu = SEMMNU;
521 seminfo.semmap = SEMMAP;
522 seminfo.semume = SEMUME;
523 down(&sem_ids.sem);
524 if (cmd == SEM_INFO) {
525 seminfo.semusz = sem_ids.in_use;
526 seminfo.semaem = used_sems;
527 } else {
528 seminfo.semusz = SEMUSZ;
529 seminfo.semaem = SEMAEM;
530 }
531 max_id = sem_ids.max_id;
532 up(&sem_ids.sem);
533 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
534 return -EFAULT;
535 return (max_id < 0) ? 0: max_id;
536 }
537 case SEM_STAT:
538 {
539 struct semid64_ds tbuf;
540 int id;
541
542 if(semid >= sem_ids.entries->size)
543 return -EINVAL;
544
545 memset(&tbuf,0,sizeof(tbuf));
546
547 sma = sem_lock(semid);
548 if(sma == NULL)
549 return -EINVAL;
550
551 err = -EACCES;
552 if (ipcperms (&sma->sem_perm, S_IRUGO))
553 goto out_unlock;
554
555 err = security_sem_semctl(sma, cmd);
556 if (err)
557 goto out_unlock;
558
559 id = sem_buildid(semid, sma->sem_perm.seq);
560
561 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
562 tbuf.sem_otime = sma->sem_otime;
563 tbuf.sem_ctime = sma->sem_ctime;
564 tbuf.sem_nsems = sma->sem_nsems;
565 sem_unlock(sma);
566 if (copy_semid_to_user (arg.buf, &tbuf, version))
567 return -EFAULT;
568 return id;
569 }
570 default:
571 return -EINVAL;
572 }
573 return err;
574out_unlock:
575 sem_unlock(sma);
576 return err;
577}
578
579static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg)
580{
581 struct sem_array *sma;
582 struct sem* curr;
583 int err;
584 ushort fast_sem_io[SEMMSL_FAST];
585 ushort* sem_io = fast_sem_io;
586 int nsems;
587
588 sma = sem_lock(semid);
589 if(sma==NULL)
590 return -EINVAL;
591
592 nsems = sma->sem_nsems;
593
594 err=-EIDRM;
595 if (sem_checkid(sma,semid))
596 goto out_unlock;
597
598 err = -EACCES;
599 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
600 goto out_unlock;
601
602 err = security_sem_semctl(sma, cmd);
603 if (err)
604 goto out_unlock;
605
606 err = -EACCES;
607 switch (cmd) {
608 case GETALL:
609 {
610 ushort __user *array = arg.array;
611 int i;
612
613 if(nsems > SEMMSL_FAST) {
614 ipc_rcu_getref(sma);
615 sem_unlock(sma);
616
617 sem_io = ipc_alloc(sizeof(ushort)*nsems);
618 if(sem_io == NULL) {
619 ipc_lock_by_ptr(&sma->sem_perm);
620 ipc_rcu_putref(sma);
621 sem_unlock(sma);
622 return -ENOMEM;
623 }
624
625 ipc_lock_by_ptr(&sma->sem_perm);
626 ipc_rcu_putref(sma);
627 if (sma->sem_perm.deleted) {
628 sem_unlock(sma);
629 err = -EIDRM;
630 goto out_free;
631 }
632 }
633
634 for (i = 0; i < sma->sem_nsems; i++)
635 sem_io[i] = sma->sem_base[i].semval;
636 sem_unlock(sma);
637 err = 0;
638 if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
639 err = -EFAULT;
640 goto out_free;
641 }
642 case SETALL:
643 {
644 int i;
645 struct sem_undo *un;
646
647 ipc_rcu_getref(sma);
648 sem_unlock(sma);
649
650 if(nsems > SEMMSL_FAST) {
651 sem_io = ipc_alloc(sizeof(ushort)*nsems);
652 if(sem_io == NULL) {
653 ipc_lock_by_ptr(&sma->sem_perm);
654 ipc_rcu_putref(sma);
655 sem_unlock(sma);
656 return -ENOMEM;
657 }
658 }
659
660 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
661 ipc_lock_by_ptr(&sma->sem_perm);
662 ipc_rcu_putref(sma);
663 sem_unlock(sma);
664 err = -EFAULT;
665 goto out_free;
666 }
667
668 for (i = 0; i < nsems; i++) {
669 if (sem_io[i] > SEMVMX) {
670 ipc_lock_by_ptr(&sma->sem_perm);
671 ipc_rcu_putref(sma);
672 sem_unlock(sma);
673 err = -ERANGE;
674 goto out_free;
675 }
676 }
677 ipc_lock_by_ptr(&sma->sem_perm);
678 ipc_rcu_putref(sma);
679 if (sma->sem_perm.deleted) {
680 sem_unlock(sma);
681 err = -EIDRM;
682 goto out_free;
683 }
684
685 for (i = 0; i < nsems; i++)
686 sma->sem_base[i].semval = sem_io[i];
687 for (un = sma->undo; un; un = un->id_next)
688 for (i = 0; i < nsems; i++)
689 un->semadj[i] = 0;
690 sma->sem_ctime = get_seconds();
691 /* maybe some queued-up processes were waiting for this */
692 update_queue(sma);
693 err = 0;
694 goto out_unlock;
695 }
696 case IPC_STAT:
697 {
698 struct semid64_ds tbuf;
699 memset(&tbuf,0,sizeof(tbuf));
700 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
701 tbuf.sem_otime = sma->sem_otime;
702 tbuf.sem_ctime = sma->sem_ctime;
703 tbuf.sem_nsems = sma->sem_nsems;
704 sem_unlock(sma);
705 if (copy_semid_to_user (arg.buf, &tbuf, version))
706 return -EFAULT;
707 return 0;
708 }
709 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */
710 }
711 err = -EINVAL;
712 if(semnum < 0 || semnum >= nsems)
713 goto out_unlock;
714
715 curr = &sma->sem_base[semnum];
716
717 switch (cmd) {
718 case GETVAL:
719 err = curr->semval;
720 goto out_unlock;
721 case GETPID:
722 err = curr->sempid;
723 goto out_unlock;
724 case GETNCNT:
725 err = count_semncnt(sma,semnum);
726 goto out_unlock;
727 case GETZCNT:
728 err = count_semzcnt(sma,semnum);
729 goto out_unlock;
730 case SETVAL:
731 {
732 int val = arg.val;
733 struct sem_undo *un;
734 err = -ERANGE;
735 if (val > SEMVMX || val < 0)
736 goto out_unlock;
737
738 for (un = sma->undo; un; un = un->id_next)
739 un->semadj[semnum] = 0;
740 curr->semval = val;
741 curr->sempid = current->tgid;
742 sma->sem_ctime = get_seconds();
743 /* maybe some queued-up processes were waiting for this */
744 update_queue(sma);
745 err = 0;
746 goto out_unlock;
747 }
748 }
749out_unlock:
750 sem_unlock(sma);
751out_free:
752 if(sem_io != fast_sem_io)
753 ipc_free(sem_io, sizeof(ushort)*nsems);
754 return err;
755}
756
757struct sem_setbuf {
758 uid_t uid;
759 gid_t gid;
760 mode_t mode;
761};
762
763static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version)
764{
765 switch(version) {
766 case IPC_64:
767 {
768 struct semid64_ds tbuf;
769
770 if(copy_from_user(&tbuf, buf, sizeof(tbuf)))
771 return -EFAULT;
772
773 out->uid = tbuf.sem_perm.uid;
774 out->gid = tbuf.sem_perm.gid;
775 out->mode = tbuf.sem_perm.mode;
776
777 return 0;
778 }
779 case IPC_OLD:
780 {
781 struct semid_ds tbuf_old;
782
783 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
784 return -EFAULT;
785
786 out->uid = tbuf_old.sem_perm.uid;
787 out->gid = tbuf_old.sem_perm.gid;
788 out->mode = tbuf_old.sem_perm.mode;
789
790 return 0;
791 }
792 default:
793 return -EINVAL;
794 }
795}
796
797static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg)
798{
799 struct sem_array *sma;
800 int err;
801 struct sem_setbuf setbuf;
802 struct kern_ipc_perm *ipcp;
803
804 if(cmd == IPC_SET) {
805 if(copy_semid_from_user (&setbuf, arg.buf, version))
806 return -EFAULT;
807 if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode)))
808 return err;
809 }
810 sma = sem_lock(semid);
811 if(sma==NULL)
812 return -EINVAL;
813
814 if (sem_checkid(sma,semid)) {
815 err=-EIDRM;
816 goto out_unlock;
817 }
818 ipcp = &sma->sem_perm;
819
820 if (current->euid != ipcp->cuid &&
821 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
822 err=-EPERM;
823 goto out_unlock;
824 }
825
826 err = security_sem_semctl(sma, cmd);
827 if (err)
828 goto out_unlock;
829
830 switch(cmd){
831 case IPC_RMID:
832 freeary(sma, semid);
833 err = 0;
834 break;
835 case IPC_SET:
836 ipcp->uid = setbuf.uid;
837 ipcp->gid = setbuf.gid;
838 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
839 | (setbuf.mode & S_IRWXUGO);
840 sma->sem_ctime = get_seconds();
841 sem_unlock(sma);
842 err = 0;
843 break;
844 default:
845 sem_unlock(sma);
846 err = -EINVAL;
847 break;
848 }
849 return err;
850
851out_unlock:
852 sem_unlock(sma);
853 return err;
854}
855
856asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
857{
858 int err = -EINVAL;
859 int version;
860
861 if (semid < 0)
862 return -EINVAL;
863
864 version = ipc_parse_version(&cmd);
865
866 switch(cmd) {
867 case IPC_INFO:
868 case SEM_INFO:
869 case SEM_STAT:
870 err = semctl_nolock(semid,semnum,cmd,version,arg);
871 return err;
872 case GETALL:
873 case GETVAL:
874 case GETPID:
875 case GETNCNT:
876 case GETZCNT:
877 case IPC_STAT:
878 case SETVAL:
879 case SETALL:
880 err = semctl_main(semid,semnum,cmd,version,arg);
881 return err;
882 case IPC_RMID:
883 case IPC_SET:
884 down(&sem_ids.sem);
885 err = semctl_down(semid,semnum,cmd,version,arg);
886 up(&sem_ids.sem);
887 return err;
888 default:
889 return -EINVAL;
890 }
891}
892
893static inline void lock_semundo(void)
894{
895 struct sem_undo_list *undo_list;
896
897 undo_list = current->sysvsem.undo_list;
898 if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
899 spin_lock(&undo_list->lock);
900}
901
902/* This code has an interaction with copy_semundo().
903 * Consider; two tasks are sharing the undo_list. task1
904 * acquires the undo_list lock in lock_semundo(). If task2 now
905 * exits before task1 releases the lock (by calling
906 * unlock_semundo()), then task1 will never call spin_unlock().
907 * This leave the sem_undo_list in a locked state. If task1 now creats task3
908 * and once again shares the sem_undo_list, the sem_undo_list will still be
909 * locked, and future SEM_UNDO operations will deadlock. This case is
910 * dealt with in copy_semundo() by having it reinitialize the spin lock when
911 * the refcnt goes from 1 to 2.
912 */
913static inline void unlock_semundo(void)
914{
915 struct sem_undo_list *undo_list;
916
917 undo_list = current->sysvsem.undo_list;
918 if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
919 spin_unlock(&undo_list->lock);
920}
921
922
923/* If the task doesn't already have a undo_list, then allocate one
924 * here. We guarantee there is only one thread using this undo list,
925 * and current is THE ONE
926 *
927 * If this allocation and assignment succeeds, but later
928 * portions of this code fail, there is no need to free the sem_undo_list.
929 * Just let it stay associated with the task, and it'll be freed later
930 * at exit time.
931 *
932 * This can block, so callers must hold no locks.
933 */
934static inline int get_undo_list(struct sem_undo_list **undo_listp)
935{
936 struct sem_undo_list *undo_list;
937 int size;
938
939 undo_list = current->sysvsem.undo_list;
940 if (!undo_list) {
941 size = sizeof(struct sem_undo_list);
942 undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
943 if (undo_list == NULL)
944 return -ENOMEM;
945 memset(undo_list, 0, size);
946 /* don't initialize unodhd->lock here. It's done
947 * in copy_semundo() instead.
948 */
949 atomic_set(&undo_list->refcnt, 1);
950 current->sysvsem.undo_list = undo_list;
951 }
952 *undo_listp = undo_list;
953 return 0;
954}
955
956static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
957{
958 struct sem_undo **last, *un;
959
960 last = &ulp->proc_list;
961 un = *last;
962 while(un != NULL) {
963 if(un->semid==semid)
964 break;
965 if(un->semid==-1) {
966 *last=un->proc_next;
967 kfree(un);
968 } else {
969 last=&un->proc_next;
970 }
971 un=*last;
972 }
973 return un;
974}
975
976static struct sem_undo *find_undo(int semid)
977{
978 struct sem_array *sma;
979 struct sem_undo_list *ulp;
980 struct sem_undo *un, *new;
981 int nsems;
982 int error;
983
984 error = get_undo_list(&ulp);
985 if (error)
986 return ERR_PTR(error);
987
988 lock_semundo();
989 un = lookup_undo(ulp, semid);
990 unlock_semundo();
991 if (likely(un!=NULL))
992 goto out;
993
994 /* no undo structure around - allocate one. */
995 sma = sem_lock(semid);
996 un = ERR_PTR(-EINVAL);
997 if(sma==NULL)
998 goto out;
999 un = ERR_PTR(-EIDRM);
1000 if (sem_checkid(sma,semid)) {
1001 sem_unlock(sma);
1002 goto out;
1003 }
1004 nsems = sma->sem_nsems;
1005 ipc_rcu_getref(sma);
1006 sem_unlock(sma);
1007
1008 new = (struct sem_undo *) kmalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1009 if (!new) {
1010 ipc_lock_by_ptr(&sma->sem_perm);
1011 ipc_rcu_putref(sma);
1012 sem_unlock(sma);
1013 return ERR_PTR(-ENOMEM);
1014 }
1015 memset(new, 0, sizeof(struct sem_undo) + sizeof(short)*nsems);
1016 new->semadj = (short *) &new[1];
1017 new->semid = semid;
1018
1019 lock_semundo();
1020 un = lookup_undo(ulp, semid);
1021 if (un) {
1022 unlock_semundo();
1023 kfree(new);
1024 ipc_lock_by_ptr(&sma->sem_perm);
1025 ipc_rcu_putref(sma);
1026 sem_unlock(sma);
1027 goto out;
1028 }
1029 ipc_lock_by_ptr(&sma->sem_perm);
1030 ipc_rcu_putref(sma);
1031 if (sma->sem_perm.deleted) {
1032 sem_unlock(sma);
1033 unlock_semundo();
1034 kfree(new);
1035 un = ERR_PTR(-EIDRM);
1036 goto out;
1037 }
1038 new->proc_next = ulp->proc_list;
1039 ulp->proc_list = new;
1040 new->id_next = sma->undo;
1041 sma->undo = new;
1042 sem_unlock(sma);
1043 un = new;
1044 unlock_semundo();
1045out:
1046 return un;
1047}
1048
1049asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
1050 unsigned nsops, const struct timespec __user *timeout)
1051{
1052 int error = -EINVAL;
1053 struct sem_array *sma;
1054 struct sembuf fast_sops[SEMOPM_FAST];
1055 struct sembuf* sops = fast_sops, *sop;
1056 struct sem_undo *un;
1057 int undos = 0, decrease = 0, alter = 0, max;
1058 struct sem_queue queue;
1059 unsigned long jiffies_left = 0;
1060
1061 if (nsops < 1 || semid < 0)
1062 return -EINVAL;
1063 if (nsops > sc_semopm)
1064 return -E2BIG;
1065 if(nsops > SEMOPM_FAST) {
1066 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
1067 if(sops==NULL)
1068 return -ENOMEM;
1069 }
1070 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
1071 error=-EFAULT;
1072 goto out_free;
1073 }
1074 if (timeout) {
1075 struct timespec _timeout;
1076 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
1077 error = -EFAULT;
1078 goto out_free;
1079 }
1080 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
1081 _timeout.tv_nsec >= 1000000000L) {
1082 error = -EINVAL;
1083 goto out_free;
1084 }
1085 jiffies_left = timespec_to_jiffies(&_timeout);
1086 }
1087 max = 0;
1088 for (sop = sops; sop < sops + nsops; sop++) {
1089 if (sop->sem_num >= max)
1090 max = sop->sem_num;
1091 if (sop->sem_flg & SEM_UNDO)
1092 undos++;
1093 if (sop->sem_op < 0)
1094 decrease = 1;
1095 if (sop->sem_op > 0)
1096 alter = 1;
1097 }
1098 alter |= decrease;
1099
1100retry_undos:
1101 if (undos) {
1102 un = find_undo(semid);
1103 if (IS_ERR(un)) {
1104 error = PTR_ERR(un);
1105 goto out_free;
1106 }
1107 } else
1108 un = NULL;
1109
1110 sma = sem_lock(semid);
1111 error=-EINVAL;
1112 if(sma==NULL)
1113 goto out_free;
1114 error = -EIDRM;
1115 if (sem_checkid(sma,semid))
1116 goto out_unlock_free;
1117 /*
1118 * semid identifies are not unique - find_undo may have
1119 * allocated an undo structure, it was invalidated by an RMID
1120 * and now a new array with received the same id. Check and retry.
1121 */
1122 if (un && un->semid == -1) {
1123 sem_unlock(sma);
1124 goto retry_undos;
1125 }
1126 error = -EFBIG;
1127 if (max >= sma->sem_nsems)
1128 goto out_unlock_free;
1129
1130 error = -EACCES;
1131 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1132 goto out_unlock_free;
1133
1134 error = security_sem_semop(sma, sops, nsops, alter);
1135 if (error)
1136 goto out_unlock_free;
1137
1138 error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
1139 if (error <= 0) {
1140 if (alter && error == 0)
1141 update_queue (sma);
1142 goto out_unlock_free;
1143 }
1144
1145 /* We need to sleep on this operation, so we put the current
1146 * task into the pending queue and go to sleep.
1147 */
1148
1149 queue.sma = sma;
1150 queue.sops = sops;
1151 queue.nsops = nsops;
1152 queue.undo = un;
1153 queue.pid = current->tgid;
1154 queue.id = semid;
1155 queue.alter = alter;
1156 if (alter)
1157 append_to_queue(sma ,&queue);
1158 else
1159 prepend_to_queue(sma ,&queue);
1160
1161 queue.status = -EINTR;
1162 queue.sleeper = current;
1163 current->state = TASK_INTERRUPTIBLE;
1164 sem_unlock(sma);
1165
1166 if (timeout)
1167 jiffies_left = schedule_timeout(jiffies_left);
1168 else
1169 schedule();
1170
1171 error = queue.status;
1172 while(unlikely(error == IN_WAKEUP)) {
1173 cpu_relax();
1174 error = queue.status;
1175 }
1176
1177 if (error != -EINTR) {
1178 /* fast path: update_queue already obtained all requested
1179 * resources */
1180 goto out_free;
1181 }
1182
1183 sma = sem_lock(semid);
1184 if(sma==NULL) {
1185 if(queue.prev != NULL)
1186 BUG();
1187 error = -EIDRM;
1188 goto out_free;
1189 }
1190
1191 /*
1192 * If queue.status != -EINTR we are woken up by another process
1193 */
1194 error = queue.status;
1195 if (error != -EINTR) {
1196 goto out_unlock_free;
1197 }
1198
1199 /*
1200 * If an interrupt occurred we have to clean up the queue
1201 */
1202 if (timeout && jiffies_left == 0)
1203 error = -EAGAIN;
1204 remove_from_queue(sma,&queue);
1205 goto out_unlock_free;
1206
1207out_unlock_free:
1208 sem_unlock(sma);
1209out_free:
1210 if(sops != fast_sops)
1211 kfree(sops);
1212 return error;
1213}
1214
1215asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops)
1216{
1217 return sys_semtimedop(semid, tsops, nsops, NULL);
1218}
1219
1220/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
1221 * parent and child tasks.
1222 *
1223 * See the notes above unlock_semundo() regarding the spin_lock_init()
1224 * in this code. Initialize the undo_list->lock here instead of get_undo_list()
1225 * because of the reasoning in the comment above unlock_semundo.
1226 */
1227
1228int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
1229{
1230 struct sem_undo_list *undo_list;
1231 int error;
1232
1233 if (clone_flags & CLONE_SYSVSEM) {
1234 error = get_undo_list(&undo_list);
1235 if (error)
1236 return error;
1237 if (atomic_read(&undo_list->refcnt) == 1)
1238 spin_lock_init(&undo_list->lock);
1239 atomic_inc(&undo_list->refcnt);
1240 tsk->sysvsem.undo_list = undo_list;
1241 } else
1242 tsk->sysvsem.undo_list = NULL;
1243
1244 return 0;
1245}
1246
1247/*
1248 * add semadj values to semaphores, free undo structures.
1249 * undo structures are not freed when semaphore arrays are destroyed
1250 * so some of them may be out of date.
1251 * IMPLEMENTATION NOTE: There is some confusion over whether the
1252 * set of adjustments that needs to be done should be done in an atomic
1253 * manner or not. That is, if we are attempting to decrement the semval
1254 * should we queue up and wait until we can do so legally?
1255 * The original implementation attempted to do this (queue and wait).
1256 * The current implementation does not do so. The POSIX standard
1257 * and SVID should be consulted to determine what behavior is mandated.
1258 */
1259void exit_sem(struct task_struct *tsk)
1260{
1261 struct sem_undo_list *undo_list;
1262 struct sem_undo *u, **up;
1263
1264 undo_list = tsk->sysvsem.undo_list;
1265 if (!undo_list)
1266 return;
1267
1268 if (!atomic_dec_and_test(&undo_list->refcnt))
1269 return;
1270
1271 /* There's no need to hold the semundo list lock, as current
1272 * is the last task exiting for this undo list.
1273 */
1274 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
1275 struct sem_array *sma;
1276 int nsems, i;
1277 struct sem_undo *un, **unp;
1278 int semid;
1279
1280 semid = u->semid;
1281
1282 if(semid == -1)
1283 continue;
1284 sma = sem_lock(semid);
1285 if (sma == NULL)
1286 continue;
1287
1288 if (u->semid == -1)
1289 goto next_entry;
1290
1291 BUG_ON(sem_checkid(sma,u->semid));
1292
1293 /* remove u from the sma->undo list */
1294 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
1295 if (u == un)
1296 goto found;
1297 }
1298 printk ("exit_sem undo list error id=%d\n", u->semid);
1299 goto next_entry;
1300found:
1301 *unp = un->id_next;
1302 /* perform adjustments registered in u */
1303 nsems = sma->sem_nsems;
1304 for (i = 0; i < nsems; i++) {
1305 struct sem * sem = &sma->sem_base[i];
1306 if (u->semadj[i]) {
1307 sem->semval += u->semadj[i];
1308 /*
1309 * Range checks of the new semaphore value,
1310 * not defined by sus:
1311 * - Some unices ignore the undo entirely
1312 * (e.g. HP UX 11i 11.22, Tru64 V5.1)
1313 * - some cap the value (e.g. FreeBSD caps
1314 * at 0, but doesn't enforce SEMVMX)
1315 *
1316 * Linux caps the semaphore value, both at 0
1317 * and at SEMVMX.
1318 *
1319 * Manfred <manfred@colorfullife.com>
1320 */
1321 if (sem->semval < 0)
1322 sem->semval = 0;
1323 if (sem->semval > SEMVMX)
1324 sem->semval = SEMVMX;
1325 sem->sempid = current->tgid;
1326 }
1327 }
1328 sma->sem_otime = get_seconds();
1329 /* maybe some queued-up processes were waiting for this */
1330 update_queue(sma);
1331next_entry:
1332 sem_unlock(sma);
1333 }
1334 kfree(undo_list);
1335}
1336
1337#ifdef CONFIG_PROC_FS
1338static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1339{
1340 off_t pos = 0;
1341 off_t begin = 0;
1342 int i, len = 0;
1343
1344 len += sprintf(buffer, " key semid perms nsems uid gid cuid cgid otime ctime\n");
1345 down(&sem_ids.sem);
1346
1347 for(i = 0; i <= sem_ids.max_id; i++) {
1348 struct sem_array *sma;
1349 sma = sem_lock(i);
1350 if(sma) {
1351 len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
1352 sma->sem_perm.key,
1353 sem_buildid(i,sma->sem_perm.seq),
1354 sma->sem_perm.mode,
1355 sma->sem_nsems,
1356 sma->sem_perm.uid,
1357 sma->sem_perm.gid,
1358 sma->sem_perm.cuid,
1359 sma->sem_perm.cgid,
1360 sma->sem_otime,
1361 sma->sem_ctime);
1362 sem_unlock(sma);
1363
1364 pos += len;
1365 if(pos < offset) {
1366 len = 0;
1367 begin = pos;
1368 }
1369 if(pos > offset + length)
1370 goto done;
1371 }
1372 }
1373 *eof = 1;
1374done:
1375 up(&sem_ids.sem);
1376 *start = buffer + (offset - begin);
1377 len -= (offset - begin);
1378 if(len > length)
1379 len = length;
1380 if(len < 0)
1381 len = 0;
1382 return len;
1383}
1384#endif
diff --git a/ipc/shm.c b/ipc/shm.c
new file mode 100644
index 000000000000..06cd5c91056f
--- /dev/null
+++ b/ipc/shm.c
@@ -0,0 +1,917 @@
1/*
2 * linux/ipc/shm.c
3 * Copyright (C) 1992, 1993 Krishna Balasubramanian
4 * Many improvements/fixes by Bruno Haible.
5 * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
6 * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
7 *
8 * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
9 * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
10 * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
11 * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
12 * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
13 * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
14 * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
15 *
16 */
17
18#include <linux/config.h>
19#include <linux/slab.h>
20#include <linux/mm.h>
21#include <linux/hugetlb.h>
22#include <linux/shm.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/mman.h>
26#include <linux/proc_fs.h>
27#include <linux/shmem_fs.h>
28#include <linux/security.h>
29#include <linux/syscalls.h>
30#include <linux/audit.h>
31#include <asm/uaccess.h>
32
33#include "util.h"
34
35#define shm_flags shm_perm.mode
36
37static struct file_operations shm_file_operations;
38static struct vm_operations_struct shm_vm_ops;
39
40static struct ipc_ids shm_ids;
41
42#define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
43#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm)
44#define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id))
45#define shm_buildid(id, seq) \
46 ipc_buildid(&shm_ids, id, seq)
47
48static int newseg (key_t key, int shmflg, size_t size);
49static void shm_open (struct vm_area_struct *shmd);
50static void shm_close (struct vm_area_struct *shmd);
51#ifdef CONFIG_PROC_FS
52static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
53#endif
54
55size_t shm_ctlmax = SHMMAX;
56size_t shm_ctlall = SHMALL;
57int shm_ctlmni = SHMMNI;
58
59static int shm_tot; /* total number of shared memory pages */
60
61void __init shm_init (void)
62{
63 ipc_init_ids(&shm_ids, 1);
64#ifdef CONFIG_PROC_FS
65 create_proc_read_entry("sysvipc/shm", 0, NULL, sysvipc_shm_read_proc, NULL);
66#endif
67}
68
69static inline int shm_checkid(struct shmid_kernel *s, int id)
70{
71 if (ipc_checkid(&shm_ids,&s->shm_perm,id))
72 return -EIDRM;
73 return 0;
74}
75
76static inline struct shmid_kernel *shm_rmid(int id)
77{
78 return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
79}
80
81static inline int shm_addid(struct shmid_kernel *shp)
82{
83 return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
84}
85
86
87
88static inline void shm_inc (int id) {
89 struct shmid_kernel *shp;
90
91 if(!(shp = shm_lock(id)))
92 BUG();
93 shp->shm_atim = get_seconds();
94 shp->shm_lprid = current->tgid;
95 shp->shm_nattch++;
96 shm_unlock(shp);
97}
98
99/* This is called by fork, once for every shm attach. */
100static void shm_open (struct vm_area_struct *shmd)
101{
102 shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
103}
104
105/*
106 * shm_destroy - free the struct shmid_kernel
107 *
108 * @shp: struct to free
109 *
110 * It has to be called with shp and shm_ids.sem locked,
111 * but returns with shp unlocked and freed.
112 */
113static void shm_destroy (struct shmid_kernel *shp)
114{
115 shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
116 shm_rmid (shp->id);
117 shm_unlock(shp);
118 if (!is_file_hugepages(shp->shm_file))
119 shmem_lock(shp->shm_file, 0, shp->mlock_user);
120 else
121 user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size,
122 shp->mlock_user);
123 fput (shp->shm_file);
124 security_shm_free(shp);
125 ipc_rcu_putref(shp);
126}
127
128/*
129 * remove the attach descriptor shmd.
130 * free memory for segment if it is marked destroyed.
131 * The descriptor has already been removed from the current->mm->mmap list
132 * and will later be kfree()d.
133 */
134static void shm_close (struct vm_area_struct *shmd)
135{
136 struct file * file = shmd->vm_file;
137 int id = file->f_dentry->d_inode->i_ino;
138 struct shmid_kernel *shp;
139
140 down (&shm_ids.sem);
141 /* remove from the list of attaches of the shm segment */
142 if(!(shp = shm_lock(id)))
143 BUG();
144 shp->shm_lprid = current->tgid;
145 shp->shm_dtim = get_seconds();
146 shp->shm_nattch--;
147 if(shp->shm_nattch == 0 &&
148 shp->shm_flags & SHM_DEST)
149 shm_destroy (shp);
150 else
151 shm_unlock(shp);
152 up (&shm_ids.sem);
153}
154
155static int shm_mmap(struct file * file, struct vm_area_struct * vma)
156{
157 file_accessed(file);
158 vma->vm_ops = &shm_vm_ops;
159 shm_inc(file->f_dentry->d_inode->i_ino);
160 return 0;
161}
162
163static struct file_operations shm_file_operations = {
164 .mmap = shm_mmap
165};
166
167static struct vm_operations_struct shm_vm_ops = {
168 .open = shm_open, /* callback for a new vm-area open */
169 .close = shm_close, /* callback for when the vm-area is released */
170 .nopage = shmem_nopage,
171#ifdef CONFIG_NUMA
172 .set_policy = shmem_set_policy,
173 .get_policy = shmem_get_policy,
174#endif
175};
176
177static int newseg (key_t key, int shmflg, size_t size)
178{
179 int error;
180 struct shmid_kernel *shp;
181 int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
182 struct file * file;
183 char name[13];
184 int id;
185
186 if (size < SHMMIN || size > shm_ctlmax)
187 return -EINVAL;
188
189 if (shm_tot + numpages >= shm_ctlall)
190 return -ENOSPC;
191
192 shp = ipc_rcu_alloc(sizeof(*shp));
193 if (!shp)
194 return -ENOMEM;
195
196 shp->shm_perm.key = key;
197 shp->shm_flags = (shmflg & S_IRWXUGO);
198 shp->mlock_user = NULL;
199
200 shp->shm_perm.security = NULL;
201 error = security_shm_alloc(shp);
202 if (error) {
203 ipc_rcu_putref(shp);
204 return error;
205 }
206
207 if (shmflg & SHM_HUGETLB) {
208 /* hugetlb_zero_setup takes care of mlock user accounting */
209 file = hugetlb_zero_setup(size);
210 shp->mlock_user = current->user;
211 } else {
212 sprintf (name, "SYSV%08x", key);
213 file = shmem_file_setup(name, size, VM_ACCOUNT);
214 }
215 error = PTR_ERR(file);
216 if (IS_ERR(file))
217 goto no_file;
218
219 error = -ENOSPC;
220 id = shm_addid(shp);
221 if(id == -1)
222 goto no_id;
223
224 shp->shm_cprid = current->tgid;
225 shp->shm_lprid = 0;
226 shp->shm_atim = shp->shm_dtim = 0;
227 shp->shm_ctim = get_seconds();
228 shp->shm_segsz = size;
229 shp->shm_nattch = 0;
230 shp->id = shm_buildid(id,shp->shm_perm.seq);
231 shp->shm_file = file;
232 file->f_dentry->d_inode->i_ino = shp->id;
233 if (shmflg & SHM_HUGETLB)
234 set_file_hugepages(file);
235 else
236 file->f_op = &shm_file_operations;
237 shm_tot += numpages;
238 shm_unlock(shp);
239 return shp->id;
240
241no_id:
242 fput(file);
243no_file:
244 security_shm_free(shp);
245 ipc_rcu_putref(shp);
246 return error;
247}
248
249asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
250{
251 struct shmid_kernel *shp;
252 int err, id = 0;
253
254 down(&shm_ids.sem);
255 if (key == IPC_PRIVATE) {
256 err = newseg(key, shmflg, size);
257 } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
258 if (!(shmflg & IPC_CREAT))
259 err = -ENOENT;
260 else
261 err = newseg(key, shmflg, size);
262 } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
263 err = -EEXIST;
264 } else {
265 shp = shm_lock(id);
266 if(shp==NULL)
267 BUG();
268 if (shp->shm_segsz < size)
269 err = -EINVAL;
270 else if (ipcperms(&shp->shm_perm, shmflg))
271 err = -EACCES;
272 else {
273 int shmid = shm_buildid(id, shp->shm_perm.seq);
274 err = security_shm_associate(shp, shmflg);
275 if (!err)
276 err = shmid;
277 }
278 shm_unlock(shp);
279 }
280 up(&shm_ids.sem);
281
282 return err;
283}
284
285static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
286{
287 switch(version) {
288 case IPC_64:
289 return copy_to_user(buf, in, sizeof(*in));
290 case IPC_OLD:
291 {
292 struct shmid_ds out;
293
294 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
295 out.shm_segsz = in->shm_segsz;
296 out.shm_atime = in->shm_atime;
297 out.shm_dtime = in->shm_dtime;
298 out.shm_ctime = in->shm_ctime;
299 out.shm_cpid = in->shm_cpid;
300 out.shm_lpid = in->shm_lpid;
301 out.shm_nattch = in->shm_nattch;
302
303 return copy_to_user(buf, &out, sizeof(out));
304 }
305 default:
306 return -EINVAL;
307 }
308}
309
310struct shm_setbuf {
311 uid_t uid;
312 gid_t gid;
313 mode_t mode;
314};
315
316static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version)
317{
318 switch(version) {
319 case IPC_64:
320 {
321 struct shmid64_ds tbuf;
322
323 if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
324 return -EFAULT;
325
326 out->uid = tbuf.shm_perm.uid;
327 out->gid = tbuf.shm_perm.gid;
328 out->mode = tbuf.shm_flags;
329
330 return 0;
331 }
332 case IPC_OLD:
333 {
334 struct shmid_ds tbuf_old;
335
336 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
337 return -EFAULT;
338
339 out->uid = tbuf_old.shm_perm.uid;
340 out->gid = tbuf_old.shm_perm.gid;
341 out->mode = tbuf_old.shm_flags;
342
343 return 0;
344 }
345 default:
346 return -EINVAL;
347 }
348}
349
350static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
351{
352 switch(version) {
353 case IPC_64:
354 return copy_to_user(buf, in, sizeof(*in));
355 case IPC_OLD:
356 {
357 struct shminfo out;
358
359 if(in->shmmax > INT_MAX)
360 out.shmmax = INT_MAX;
361 else
362 out.shmmax = (int)in->shmmax;
363
364 out.shmmin = in->shmmin;
365 out.shmmni = in->shmmni;
366 out.shmseg = in->shmseg;
367 out.shmall = in->shmall;
368
369 return copy_to_user(buf, &out, sizeof(out));
370 }
371 default:
372 return -EINVAL;
373 }
374}
375
376static void shm_get_stat(unsigned long *rss, unsigned long *swp)
377{
378 int i;
379
380 *rss = 0;
381 *swp = 0;
382
383 for (i = 0; i <= shm_ids.max_id; i++) {
384 struct shmid_kernel *shp;
385 struct inode *inode;
386
387 shp = shm_get(i);
388 if(!shp)
389 continue;
390
391 inode = shp->shm_file->f_dentry->d_inode;
392
393 if (is_file_hugepages(shp->shm_file)) {
394 struct address_space *mapping = inode->i_mapping;
395 *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages;
396 } else {
397 struct shmem_inode_info *info = SHMEM_I(inode);
398 spin_lock(&info->lock);
399 *rss += inode->i_mapping->nrpages;
400 *swp += info->swapped;
401 spin_unlock(&info->lock);
402 }
403 }
404}
405
406asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
407{
408 struct shm_setbuf setbuf;
409 struct shmid_kernel *shp;
410 int err, version;
411
412 if (cmd < 0 || shmid < 0) {
413 err = -EINVAL;
414 goto out;
415 }
416
417 version = ipc_parse_version(&cmd);
418
419 switch (cmd) { /* replace with proc interface ? */
420 case IPC_INFO:
421 {
422 struct shminfo64 shminfo;
423
424 err = security_shm_shmctl(NULL, cmd);
425 if (err)
426 return err;
427
428 memset(&shminfo,0,sizeof(shminfo));
429 shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
430 shminfo.shmmax = shm_ctlmax;
431 shminfo.shmall = shm_ctlall;
432
433 shminfo.shmmin = SHMMIN;
434 if(copy_shminfo_to_user (buf, &shminfo, version))
435 return -EFAULT;
436 /* reading a integer is always atomic */
437 err= shm_ids.max_id;
438 if(err<0)
439 err = 0;
440 goto out;
441 }
442 case SHM_INFO:
443 {
444 struct shm_info shm_info;
445
446 err = security_shm_shmctl(NULL, cmd);
447 if (err)
448 return err;
449
450 memset(&shm_info,0,sizeof(shm_info));
451 down(&shm_ids.sem);
452 shm_info.used_ids = shm_ids.in_use;
453 shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
454 shm_info.shm_tot = shm_tot;
455 shm_info.swap_attempts = 0;
456 shm_info.swap_successes = 0;
457 err = shm_ids.max_id;
458 up(&shm_ids.sem);
459 if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
460 err = -EFAULT;
461 goto out;
462 }
463
464 err = err < 0 ? 0 : err;
465 goto out;
466 }
467 case SHM_STAT:
468 case IPC_STAT:
469 {
470 struct shmid64_ds tbuf;
471 int result;
472 memset(&tbuf, 0, sizeof(tbuf));
473 shp = shm_lock(shmid);
474 if(shp==NULL) {
475 err = -EINVAL;
476 goto out;
477 } else if(cmd==SHM_STAT) {
478 err = -EINVAL;
479 if (shmid > shm_ids.max_id)
480 goto out_unlock;
481 result = shm_buildid(shmid, shp->shm_perm.seq);
482 } else {
483 err = shm_checkid(shp,shmid);
484 if(err)
485 goto out_unlock;
486 result = 0;
487 }
488 err=-EACCES;
489 if (ipcperms (&shp->shm_perm, S_IRUGO))
490 goto out_unlock;
491 err = security_shm_shmctl(shp, cmd);
492 if (err)
493 goto out_unlock;
494 kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
495 tbuf.shm_segsz = shp->shm_segsz;
496 tbuf.shm_atime = shp->shm_atim;
497 tbuf.shm_dtime = shp->shm_dtim;
498 tbuf.shm_ctime = shp->shm_ctim;
499 tbuf.shm_cpid = shp->shm_cprid;
500 tbuf.shm_lpid = shp->shm_lprid;
501 if (!is_file_hugepages(shp->shm_file))
502 tbuf.shm_nattch = shp->shm_nattch;
503 else
504 tbuf.shm_nattch = file_count(shp->shm_file) - 1;
505 shm_unlock(shp);
506 if(copy_shmid_to_user (buf, &tbuf, version))
507 err = -EFAULT;
508 else
509 err = result;
510 goto out;
511 }
512 case SHM_LOCK:
513 case SHM_UNLOCK:
514 {
515 shp = shm_lock(shmid);
516 if(shp==NULL) {
517 err = -EINVAL;
518 goto out;
519 }
520 err = shm_checkid(shp,shmid);
521 if(err)
522 goto out_unlock;
523
524 if (!capable(CAP_IPC_LOCK)) {
525 err = -EPERM;
526 if (current->euid != shp->shm_perm.uid &&
527 current->euid != shp->shm_perm.cuid)
528 goto out_unlock;
529 if (cmd == SHM_LOCK &&
530 !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
531 goto out_unlock;
532 }
533
534 err = security_shm_shmctl(shp, cmd);
535 if (err)
536 goto out_unlock;
537
538 if(cmd==SHM_LOCK) {
539 struct user_struct * user = current->user;
540 if (!is_file_hugepages(shp->shm_file)) {
541 err = shmem_lock(shp->shm_file, 1, user);
542 if (!err) {
543 shp->shm_flags |= SHM_LOCKED;
544 shp->mlock_user = user;
545 }
546 }
547 } else if (!is_file_hugepages(shp->shm_file)) {
548 shmem_lock(shp->shm_file, 0, shp->mlock_user);
549 shp->shm_flags &= ~SHM_LOCKED;
550 shp->mlock_user = NULL;
551 }
552 shm_unlock(shp);
553 goto out;
554 }
555 case IPC_RMID:
556 {
557 /*
558 * We cannot simply remove the file. The SVID states
559 * that the block remains until the last person
560 * detaches from it, then is deleted. A shmat() on
561 * an RMID segment is legal in older Linux and if
562 * we change it apps break...
563 *
564 * Instead we set a destroyed flag, and then blow
565 * the name away when the usage hits zero.
566 */
567 down(&shm_ids.sem);
568 shp = shm_lock(shmid);
569 err = -EINVAL;
570 if (shp == NULL)
571 goto out_up;
572 err = shm_checkid(shp, shmid);
573 if(err)
574 goto out_unlock_up;
575
576 if (current->euid != shp->shm_perm.uid &&
577 current->euid != shp->shm_perm.cuid &&
578 !capable(CAP_SYS_ADMIN)) {
579 err=-EPERM;
580 goto out_unlock_up;
581 }
582
583 err = security_shm_shmctl(shp, cmd);
584 if (err)
585 goto out_unlock_up;
586
587 if (shp->shm_nattch){
588 shp->shm_flags |= SHM_DEST;
589 /* Do not find it any more */
590 shp->shm_perm.key = IPC_PRIVATE;
591 shm_unlock(shp);
592 } else
593 shm_destroy (shp);
594 up(&shm_ids.sem);
595 goto out;
596 }
597
598 case IPC_SET:
599 {
600 if (copy_shmid_from_user (&setbuf, buf, version)) {
601 err = -EFAULT;
602 goto out;
603 }
604 if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode)))
605 return err;
606 down(&shm_ids.sem);
607 shp = shm_lock(shmid);
608 err=-EINVAL;
609 if(shp==NULL)
610 goto out_up;
611 err = shm_checkid(shp,shmid);
612 if(err)
613 goto out_unlock_up;
614 err=-EPERM;
615 if (current->euid != shp->shm_perm.uid &&
616 current->euid != shp->shm_perm.cuid &&
617 !capable(CAP_SYS_ADMIN)) {
618 goto out_unlock_up;
619 }
620
621 err = security_shm_shmctl(shp, cmd);
622 if (err)
623 goto out_unlock_up;
624
625 shp->shm_perm.uid = setbuf.uid;
626 shp->shm_perm.gid = setbuf.gid;
627 shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
628 | (setbuf.mode & S_IRWXUGO);
629 shp->shm_ctim = get_seconds();
630 break;
631 }
632
633 default:
634 err = -EINVAL;
635 goto out;
636 }
637
638 err = 0;
639out_unlock_up:
640 shm_unlock(shp);
641out_up:
642 up(&shm_ids.sem);
643 goto out;
644out_unlock:
645 shm_unlock(shp);
646out:
647 return err;
648}
649
650/*
651 * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
652 *
653 * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
654 * "raddr" thing points to kernel space, and there has to be a wrapper around
655 * this.
656 */
657long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
658{
659 struct shmid_kernel *shp;
660 unsigned long addr;
661 unsigned long size;
662 struct file * file;
663 int err;
664 unsigned long flags;
665 unsigned long prot;
666 unsigned long o_flags;
667 int acc_mode;
668 void *user_addr;
669
670 if (shmid < 0) {
671 err = -EINVAL;
672 goto out;
673 } else if ((addr = (ulong)shmaddr)) {
674 if (addr & (SHMLBA-1)) {
675 if (shmflg & SHM_RND)
676 addr &= ~(SHMLBA-1); /* round down */
677 else
678#ifndef __ARCH_FORCE_SHMLBA
679 if (addr & ~PAGE_MASK)
680#endif
681 return -EINVAL;
682 }
683 flags = MAP_SHARED | MAP_FIXED;
684 } else {
685 if ((shmflg & SHM_REMAP))
686 return -EINVAL;
687
688 flags = MAP_SHARED;
689 }
690
691 if (shmflg & SHM_RDONLY) {
692 prot = PROT_READ;
693 o_flags = O_RDONLY;
694 acc_mode = S_IRUGO;
695 } else {
696 prot = PROT_READ | PROT_WRITE;
697 o_flags = O_RDWR;
698 acc_mode = S_IRUGO | S_IWUGO;
699 }
700 if (shmflg & SHM_EXEC) {
701 prot |= PROT_EXEC;
702 acc_mode |= S_IXUGO;
703 }
704
705 /*
706 * We cannot rely on the fs check since SYSV IPC does have an
707 * additional creator id...
708 */
709 shp = shm_lock(shmid);
710 if(shp == NULL) {
711 err = -EINVAL;
712 goto out;
713 }
714 err = shm_checkid(shp,shmid);
715 if (err) {
716 shm_unlock(shp);
717 goto out;
718 }
719 if (ipcperms(&shp->shm_perm, acc_mode)) {
720 shm_unlock(shp);
721 err = -EACCES;
722 goto out;
723 }
724
725 err = security_shm_shmat(shp, shmaddr, shmflg);
726 if (err) {
727 shm_unlock(shp);
728 return err;
729 }
730
731 file = shp->shm_file;
732 size = i_size_read(file->f_dentry->d_inode);
733 shp->shm_nattch++;
734 shm_unlock(shp);
735
736 down_write(&current->mm->mmap_sem);
737 if (addr && !(shmflg & SHM_REMAP)) {
738 user_addr = ERR_PTR(-EINVAL);
739 if (find_vma_intersection(current->mm, addr, addr + size))
740 goto invalid;
741 /*
742 * If shm segment goes below stack, make sure there is some
743 * space left for the stack to grow (at least 4 pages).
744 */
745 if (addr < current->mm->start_stack &&
746 addr > current->mm->start_stack - size - PAGE_SIZE * 5)
747 goto invalid;
748 }
749
750 user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0);
751
752invalid:
753 up_write(&current->mm->mmap_sem);
754
755 down (&shm_ids.sem);
756 if(!(shp = shm_lock(shmid)))
757 BUG();
758 shp->shm_nattch--;
759 if(shp->shm_nattch == 0 &&
760 shp->shm_flags & SHM_DEST)
761 shm_destroy (shp);
762 else
763 shm_unlock(shp);
764 up (&shm_ids.sem);
765
766 *raddr = (unsigned long) user_addr;
767 err = 0;
768 if (IS_ERR(user_addr))
769 err = PTR_ERR(user_addr);
770out:
771 return err;
772}
773
774/*
775 * detach and kill segment if marked destroyed.
776 * The work is done in shm_close.
777 */
778asmlinkage long sys_shmdt(char __user *shmaddr)
779{
780 struct mm_struct *mm = current->mm;
781 struct vm_area_struct *vma, *next;
782 unsigned long addr = (unsigned long)shmaddr;
783 loff_t size = 0;
784 int retval = -EINVAL;
785
786 down_write(&mm->mmap_sem);
787
788 /*
789 * This function tries to be smart and unmap shm segments that
790 * were modified by partial mlock or munmap calls:
791 * - It first determines the size of the shm segment that should be
792 * unmapped: It searches for a vma that is backed by shm and that
793 * started at address shmaddr. It records it's size and then unmaps
794 * it.
795 * - Then it unmaps all shm vmas that started at shmaddr and that
796 * are within the initially determined size.
797 * Errors from do_munmap are ignored: the function only fails if
798 * it's called with invalid parameters or if it's called to unmap
799 * a part of a vma. Both calls in this function are for full vmas,
800 * the parameters are directly copied from the vma itself and always
801 * valid - therefore do_munmap cannot fail. (famous last words?)
802 */
803 /*
804 * If it had been mremap()'d, the starting address would not
805 * match the usual checks anyway. So assume all vma's are
806 * above the starting address given.
807 */
808 vma = find_vma(mm, addr);
809
810 while (vma) {
811 next = vma->vm_next;
812
813 /*
814 * Check if the starting address would match, i.e. it's
815 * a fragment created by mprotect() and/or munmap(), or it
816 * otherwise it starts at this address with no hassles.
817 */
818 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
819 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
820
821
822 size = vma->vm_file->f_dentry->d_inode->i_size;
823 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
824 /*
825 * We discovered the size of the shm segment, so
826 * break out of here and fall through to the next
827 * loop that uses the size information to stop
828 * searching for matching vma's.
829 */
830 retval = 0;
831 vma = next;
832 break;
833 }
834 vma = next;
835 }
836
837 /*
838 * We need look no further than the maximum address a fragment
839 * could possibly have landed at. Also cast things to loff_t to
840 * prevent overflows and make comparisions vs. equal-width types.
841 */
842 while (vma && (loff_t)(vma->vm_end - addr) <= size) {
843 next = vma->vm_next;
844
845 /* finding a matching vma now does not alter retval */
846 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) &&
847 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
848
849 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
850 vma = next;
851 }
852
853 up_write(&mm->mmap_sem);
854 return retval;
855}
856
857#ifdef CONFIG_PROC_FS
858static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
859{
860 off_t pos = 0;
861 off_t begin = 0;
862 int i, len = 0;
863
864 down(&shm_ids.sem);
865 len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
866
867 for(i = 0; i <= shm_ids.max_id; i++) {
868 struct shmid_kernel* shp;
869
870 shp = shm_lock(i);
871 if(shp!=NULL) {
872#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
873#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
874 char *format;
875
876 if (sizeof(size_t) <= sizeof(int))
877 format = SMALL_STRING;
878 else
879 format = BIG_STRING;
880 len += sprintf(buffer + len, format,
881 shp->shm_perm.key,
882 shm_buildid(i, shp->shm_perm.seq),
883 shp->shm_flags,
884 shp->shm_segsz,
885 shp->shm_cprid,
886 shp->shm_lprid,
887 is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch,
888 shp->shm_perm.uid,
889 shp->shm_perm.gid,
890 shp->shm_perm.cuid,
891 shp->shm_perm.cgid,
892 shp->shm_atim,
893 shp->shm_dtim,
894 shp->shm_ctim);
895 shm_unlock(shp);
896
897 pos += len;
898 if(pos < offset) {
899 len = 0;
900 begin = pos;
901 }
902 if(pos > offset + length)
903 goto done;
904 }
905 }
906 *eof = 1;
907done:
908 up(&shm_ids.sem);
909 *start = buffer + (offset - begin);
910 len -= (offset - begin);
911 if(len > length)
912 len = length;
913 if(len < 0)
914 len = 0;
915 return len;
916}
917#endif
diff --git a/ipc/util.c b/ipc/util.c
new file mode 100644
index 000000000000..e00c35f7b2b8
--- /dev/null
+++ b/ipc/util.c
@@ -0,0 +1,580 @@
1/*
2 * linux/ipc/util.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 *
5 * Sep 1997 - Call suser() last after "normal" permission checks so we
6 * get BSD style process accounting right.
7 * Occurs in several places in the IPC code.
8 * Chris Evans, <chris@ferret.lmh.ox.ac.uk>
9 * Nov 1999 - ipc helper functions, unified SMP locking
10 * Manfred Spraul <manfreds@colorfullife.com>
11 * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
12 * Mingming Cao <cmm@us.ibm.com>
13 */
14
15#include <linux/config.h>
16#include <linux/mm.h>
17#include <linux/shm.h>
18#include <linux/init.h>
19#include <linux/msg.h>
20#include <linux/smp_lock.h>
21#include <linux/vmalloc.h>
22#include <linux/slab.h>
23#include <linux/highuid.h>
24#include <linux/security.h>
25#include <linux/rcupdate.h>
26#include <linux/workqueue.h>
27
28#include <asm/unistd.h>
29
30#include "util.h"
31
32/**
33 * ipc_init - initialise IPC subsystem
34 *
35 * The various system5 IPC resources (semaphores, messages and shared
36 * memory are initialised
37 */
38
39static int __init ipc_init(void)
40{
41 sem_init();
42 msg_init();
43 shm_init();
44 return 0;
45}
46__initcall(ipc_init);
47
48/**
49 * ipc_init_ids - initialise IPC identifiers
50 * @ids: Identifier set
51 * @size: Number of identifiers
52 *
53 * Given a size for the ipc identifier range (limited below IPCMNI)
54 * set up the sequence range to use then allocate and initialise the
55 * array itself.
56 */
57
58void __init ipc_init_ids(struct ipc_ids* ids, int size)
59{
60 int i;
61 sema_init(&ids->sem,1);
62
63 if(size > IPCMNI)
64 size = IPCMNI;
65 ids->in_use = 0;
66 ids->max_id = -1;
67 ids->seq = 0;
68 {
69 int seq_limit = INT_MAX/SEQ_MULTIPLIER;
70 if(seq_limit > USHRT_MAX)
71 ids->seq_max = USHRT_MAX;
72 else
73 ids->seq_max = seq_limit;
74 }
75
76 ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
77 sizeof(struct ipc_id_ary));
78
79 if(ids->entries == NULL) {
80 printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
81 size = 0;
82 ids->entries = &ids->nullentry;
83 }
84 ids->entries->size = size;
85 for(i=0;i<size;i++)
86 ids->entries->p[i] = NULL;
87}
88
89/**
90 * ipc_findkey - find a key in an ipc identifier set
91 * @ids: Identifier set
92 * @key: The key to find
93 *
94 * Requires ipc_ids.sem locked.
95 * Returns the identifier if found or -1 if not.
96 */
97
98int ipc_findkey(struct ipc_ids* ids, key_t key)
99{
100 int id;
101 struct kern_ipc_perm* p;
102 int max_id = ids->max_id;
103
104 /*
105 * rcu_dereference() is not needed here
106 * since ipc_ids.sem is held
107 */
108 for (id = 0; id <= max_id; id++) {
109 p = ids->entries->p[id];
110 if(p==NULL)
111 continue;
112 if (key == p->key)
113 return id;
114 }
115 return -1;
116}
117
118/*
119 * Requires ipc_ids.sem locked
120 */
121static int grow_ary(struct ipc_ids* ids, int newsize)
122{
123 struct ipc_id_ary* new;
124 struct ipc_id_ary* old;
125 int i;
126 int size = ids->entries->size;
127
128 if(newsize > IPCMNI)
129 newsize = IPCMNI;
130 if(newsize <= size)
131 return newsize;
132
133 new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
134 sizeof(struct ipc_id_ary));
135 if(new == NULL)
136 return size;
137 new->size = newsize;
138 memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size +
139 sizeof(struct ipc_id_ary));
140 for(i=size;i<newsize;i++) {
141 new->p[i] = NULL;
142 }
143 old = ids->entries;
144
145 /*
146 * Use rcu_assign_pointer() to make sure the memcpyed contents
147 * of the new array are visible before the new array becomes visible.
148 */
149 rcu_assign_pointer(ids->entries, new);
150
151 ipc_rcu_putref(old);
152 return newsize;
153}
154
155/**
156 * ipc_addid - add an IPC identifier
157 * @ids: IPC identifier set
158 * @new: new IPC permission set
159 * @size: new size limit for the id array
160 *
161 * Add an entry 'new' to the IPC arrays. The permissions object is
162 * initialised and the first free entry is set up and the id assigned
163 * is returned. The list is returned in a locked state on success.
164 * On failure the list is not locked and -1 is returned.
165 *
166 * Called with ipc_ids.sem held.
167 */
168
169int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
170{
171 int id;
172
173 size = grow_ary(ids,size);
174
175 /*
176 * rcu_dereference()() is not needed here since
177 * ipc_ids.sem is held
178 */
179 for (id = 0; id < size; id++) {
180 if(ids->entries->p[id] == NULL)
181 goto found;
182 }
183 return -1;
184found:
185 ids->in_use++;
186 if (id > ids->max_id)
187 ids->max_id = id;
188
189 new->cuid = new->uid = current->euid;
190 new->gid = new->cgid = current->egid;
191
192 new->seq = ids->seq++;
193 if(ids->seq > ids->seq_max)
194 ids->seq = 0;
195
196 spin_lock_init(&new->lock);
197 new->deleted = 0;
198 rcu_read_lock();
199 spin_lock(&new->lock);
200 ids->entries->p[id] = new;
201 return id;
202}
203
204/**
205 * ipc_rmid - remove an IPC identifier
206 * @ids: identifier set
207 * @id: Identifier to remove
208 *
209 * The identifier must be valid, and in use. The kernel will panic if
210 * fed an invalid identifier. The entry is removed and internal
211 * variables recomputed. The object associated with the identifier
212 * is returned.
213 * ipc_ids.sem and the spinlock for this ID is hold before this function
214 * is called, and remain locked on the exit.
215 */
216
217struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
218{
219 struct kern_ipc_perm* p;
220 int lid = id % SEQ_MULTIPLIER;
221 if(lid >= ids->entries->size)
222 BUG();
223
224 /*
225 * do not need a rcu_dereference()() here to force ordering
226 * on Alpha, since the ipc_ids.sem is held.
227 */
228 p = ids->entries->p[lid];
229 ids->entries->p[lid] = NULL;
230 if(p==NULL)
231 BUG();
232 ids->in_use--;
233
234 if (lid == ids->max_id) {
235 do {
236 lid--;
237 if(lid == -1)
238 break;
239 } while (ids->entries->p[lid] == NULL);
240 ids->max_id = lid;
241 }
242 p->deleted = 1;
243 return p;
244}
245
246/**
247 * ipc_alloc - allocate ipc space
248 * @size: size desired
249 *
250 * Allocate memory from the appropriate pools and return a pointer to it.
251 * NULL is returned if the allocation fails
252 */
253
254void* ipc_alloc(int size)
255{
256 void* out;
257 if(size > PAGE_SIZE)
258 out = vmalloc(size);
259 else
260 out = kmalloc(size, GFP_KERNEL);
261 return out;
262}
263
264/**
265 * ipc_free - free ipc space
266 * @ptr: pointer returned by ipc_alloc
267 * @size: size of block
268 *
269 * Free a block created with ipc_alloc. The caller must know the size
270 * used in the allocation call.
271 */
272
273void ipc_free(void* ptr, int size)
274{
275 if(size > PAGE_SIZE)
276 vfree(ptr);
277 else
278 kfree(ptr);
279}
280
281/*
282 * rcu allocations:
283 * There are three headers that are prepended to the actual allocation:
284 * - during use: ipc_rcu_hdr.
285 * - during the rcu grace period: ipc_rcu_grace.
286 * - [only if vmalloc]: ipc_rcu_sched.
287 * Their lifetime doesn't overlap, thus the headers share the same memory.
288 * Unlike a normal union, they are right-aligned, thus some container_of
289 * forward/backward casting is necessary:
290 */
291struct ipc_rcu_hdr
292{
293 int refcount;
294 int is_vmalloc;
295 void *data[0];
296};
297
298
299struct ipc_rcu_grace
300{
301 struct rcu_head rcu;
302 /* "void *" makes sure alignment of following data is sane. */
303 void *data[0];
304};
305
306struct ipc_rcu_sched
307{
308 struct work_struct work;
309 /* "void *" makes sure alignment of following data is sane. */
310 void *data[0];
311};
312
313#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
314 sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
315#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
316 sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
317
318static inline int rcu_use_vmalloc(int size)
319{
320 /* Too big for a single page? */
321 if (HDRLEN_KMALLOC + size > PAGE_SIZE)
322 return 1;
323 return 0;
324}
325
326/**
327 * ipc_rcu_alloc - allocate ipc and rcu space
328 * @size: size desired
329 *
330 * Allocate memory for the rcu header structure + the object.
331 * Returns the pointer to the object.
332 * NULL is returned if the allocation fails.
333 */
334
335void* ipc_rcu_alloc(int size)
336{
337 void* out;
338 /*
339 * We prepend the allocation with the rcu struct, and
340 * workqueue if necessary (for vmalloc).
341 */
342 if (rcu_use_vmalloc(size)) {
343 out = vmalloc(HDRLEN_VMALLOC + size);
344 if (out) {
345 out += HDRLEN_VMALLOC;
346 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
347 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
348 }
349 } else {
350 out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
351 if (out) {
352 out += HDRLEN_KMALLOC;
353 container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
354 container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
355 }
356 }
357
358 return out;
359}
360
361void ipc_rcu_getref(void *ptr)
362{
363 container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
364}
365
366/**
367 * ipc_schedule_free - free ipc + rcu space
368 *
369 * Since RCU callback function is called in bh,
370 * we need to defer the vfree to schedule_work
371 */
372static void ipc_schedule_free(struct rcu_head *head)
373{
374 struct ipc_rcu_grace *grace =
375 container_of(head, struct ipc_rcu_grace, rcu);
376 struct ipc_rcu_sched *sched =
377 container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
378
379 INIT_WORK(&sched->work, vfree, sched);
380 schedule_work(&sched->work);
381}
382
383/**
384 * ipc_immediate_free - free ipc + rcu space
385 *
386 * Free from the RCU callback context
387 *
388 */
389static void ipc_immediate_free(struct rcu_head *head)
390{
391 struct ipc_rcu_grace *free =
392 container_of(head, struct ipc_rcu_grace, rcu);
393 kfree(free);
394}
395
396void ipc_rcu_putref(void *ptr)
397{
398 if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
399 return;
400
401 if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
402 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
403 ipc_schedule_free);
404 } else {
405 call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
406 ipc_immediate_free);
407 }
408}
409
410/**
411 * ipcperms - check IPC permissions
412 * @ipcp: IPC permission set
413 * @flag: desired permission set.
414 *
415 * Check user, group, other permissions for access
416 * to ipc resources. return 0 if allowed
417 */
418
419int ipcperms (struct kern_ipc_perm *ipcp, short flag)
420{ /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
421 int requested_mode, granted_mode;
422
423 requested_mode = (flag >> 6) | (flag >> 3) | flag;
424 granted_mode = ipcp->mode;
425 if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
426 granted_mode >>= 6;
427 else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid))
428 granted_mode >>= 3;
429 /* is there some bit set in requested_mode but not in granted_mode? */
430 if ((requested_mode & ~granted_mode & 0007) &&
431 !capable(CAP_IPC_OWNER))
432 return -1;
433
434 return security_ipc_permission(ipcp, flag);
435}
436
437/*
438 * Functions to convert between the kern_ipc_perm structure and the
439 * old/new ipc_perm structures
440 */
441
442/**
443 * kernel_to_ipc64_perm - convert kernel ipc permissions to user
444 * @in: kernel permissions
445 * @out: new style IPC permissions
446 *
447 * Turn the kernel object 'in' into a set of permissions descriptions
448 * for returning to userspace (out).
449 */
450
451
452void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
453{
454 out->key = in->key;
455 out->uid = in->uid;
456 out->gid = in->gid;
457 out->cuid = in->cuid;
458 out->cgid = in->cgid;
459 out->mode = in->mode;
460 out->seq = in->seq;
461}
462
463/**
464 * ipc64_perm_to_ipc_perm - convert old ipc permissions to new
465 * @in: new style IPC permissions
466 * @out: old style IPC permissions
467 *
468 * Turn the new style permissions object in into a compatibility
469 * object and store it into the 'out' pointer.
470 */
471
472void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
473{
474 out->key = in->key;
475 SET_UID(out->uid, in->uid);
476 SET_GID(out->gid, in->gid);
477 SET_UID(out->cuid, in->cuid);
478 SET_GID(out->cgid, in->cgid);
479 out->mode = in->mode;
480 out->seq = in->seq;
481}
482
483/*
484 * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get()
485 * is called with shm_ids.sem locked. Since grow_ary() is also called with
486 * shm_ids.sem down(for Shared Memory), there is no need to add read
487 * barriers here to gurantee the writes in grow_ary() are seen in order
488 * here (for Alpha).
489 *
490 * However ipc_get() itself does not necessary require ipc_ids.sem down. So
491 * if in the future ipc_get() is used by other places without ipc_ids.sem
492 * down, then ipc_get() needs read memery barriers as ipc_lock() does.
493 */
494struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
495{
496 struct kern_ipc_perm* out;
497 int lid = id % SEQ_MULTIPLIER;
498 if(lid >= ids->entries->size)
499 return NULL;
500 out = ids->entries->p[lid];
501 return out;
502}
503
504struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
505{
506 struct kern_ipc_perm* out;
507 int lid = id % SEQ_MULTIPLIER;
508 struct ipc_id_ary* entries;
509
510 rcu_read_lock();
511 entries = rcu_dereference(ids->entries);
512 if(lid >= entries->size) {
513 rcu_read_unlock();
514 return NULL;
515 }
516 out = entries->p[lid];
517 if(out == NULL) {
518 rcu_read_unlock();
519 return NULL;
520 }
521 spin_lock(&out->lock);
522
523 /* ipc_rmid() may have already freed the ID while ipc_lock
524 * was spinning: here verify that the structure is still valid
525 */
526 if (out->deleted) {
527 spin_unlock(&out->lock);
528 rcu_read_unlock();
529 return NULL;
530 }
531 return out;
532}
533
534void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
535{
536 rcu_read_lock();
537 spin_lock(&perm->lock);
538}
539
540void ipc_unlock(struct kern_ipc_perm* perm)
541{
542 spin_unlock(&perm->lock);
543 rcu_read_unlock();
544}
545
546int ipc_buildid(struct ipc_ids* ids, int id, int seq)
547{
548 return SEQ_MULTIPLIER*seq + id;
549}
550
551int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
552{
553 if(uid/SEQ_MULTIPLIER != ipcp->seq)
554 return 1;
555 return 0;
556}
557
558#ifdef __ARCH_WANT_IPC_PARSE_VERSION
559
560
561/**
562 * ipc_parse_version - IPC call version
563 * @cmd: pointer to command
564 *
565 * Return IPC_64 for new style IPC and IPC_OLD for old style IPC.
566 * The cmd value is turned from an encoding command and version into
567 * just the command code.
568 */
569
570int ipc_parse_version (int *cmd)
571{
572 if (*cmd & IPC_64) {
573 *cmd ^= IPC_64;
574 return IPC_64;
575 } else {
576 return IPC_OLD;
577 }
578}
579
580#endif /* __ARCH_WANT_IPC_PARSE_VERSION */
diff --git a/ipc/util.h b/ipc/util.h
new file mode 100644
index 000000000000..07d689452363
--- /dev/null
+++ b/ipc/util.h
@@ -0,0 +1,81 @@
1/*
2 * linux/ipc/util.h
3 * Copyright (C) 1999 Christoph Rohland
4 *
5 * ipc helper functions (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
6 */
7
8#ifndef _IPC_UTIL_H
9#define _IPC_UTIL_H
10
11#define USHRT_MAX 0xffff
12#define SEQ_MULTIPLIER (IPCMNI)
13
14void sem_init (void);
15void msg_init (void);
16void shm_init (void);
17
18struct ipc_id_ary {
19 int size;
20 struct kern_ipc_perm *p[0];
21};
22
23struct ipc_ids {
24 int in_use;
25 int max_id;
26 unsigned short seq;
27 unsigned short seq_max;
28 struct semaphore sem;
29 struct ipc_id_ary nullentry;
30 struct ipc_id_ary* entries;
31};
32
33void __init ipc_init_ids(struct ipc_ids* ids, int size);
34
35/* must be called with ids->sem acquired.*/
36int ipc_findkey(struct ipc_ids* ids, key_t key);
37int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
38
39/* must be called with both locks acquired. */
40struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
41
42int ipcperms (struct kern_ipc_perm *ipcp, short flg);
43
44/* for rare, potentially huge allocations.
45 * both function can sleep
46 */
47void* ipc_alloc(int size);
48void ipc_free(void* ptr, int size);
49
50/*
51 * For allocation that need to be freed by RCU.
52 * Objects are reference counted, they start with reference count 1.
53 * getref increases the refcount, the putref call that reduces the recount
54 * to 0 schedules the rcu destruction. Caller must guarantee locking.
55 */
56void* ipc_rcu_alloc(int size);
57void ipc_rcu_getref(void *ptr);
58void ipc_rcu_putref(void *ptr);
59
60struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
61struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
62void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
63void ipc_unlock(struct kern_ipc_perm* perm);
64int ipc_buildid(struct ipc_ids* ids, int id, int seq);
65int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
66
67void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
68void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
69
70#if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__)
71 /* On IA-64, we always use the "64-bit version" of the IPC structures. */
72# define ipc_parse_version(cmd) IPC_64
73#else
74int ipc_parse_version (int *cmd);
75#endif
76
77extern void free_msg(struct msg_msg *msg);
78extern struct msg_msg *load_msg(const void __user *src, int len);
79extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
80
81#endif