aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2011-05-02 16:21:35 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-05 14:10:14 -0400
commit228e548e602061b08ee8e8966f567c12aa079682 (patch)
tree4c79ecf071d6174d42da1557812a4646d0aaa5af
parent1c5cae815d19ffe02bdfda1260949ef2b1806171 (diff)
net: Add sendmmsg socket system call
This patch adds a multiple message send syscall and is the send version of the existing recvmmsg syscall. This is heavily based on the patch by Arnaldo that added recvmmsg. I wrote a microbenchmark to test the performance gains of using this new syscall: http://ozlabs.org/~anton/junkcode/sendmmsg_test.c The test was run on a ppc64 box with a 10 Gbit network card. The benchmark can send both UDP and RAW ethernet packets. 64B UDP batch pkts/sec 1 804570 2 872800 (+ 8 %) 4 916556 (+14 %) 8 939712 (+17 %) 16 952688 (+18 %) 32 956448 (+19 %) 64 964800 (+20 %) 64B raw socket batch pkts/sec 1 1201449 2 1350028 (+12 %) 4 1461416 (+22 %) 8 1513080 (+26 %) 16 1541216 (+28 %) 32 1553440 (+29 %) 64 1557888 (+30 %) We see a 20% improvement in throughput on UDP send and 30% on raw socket send. [ Add sparc syscall entries. -DaveM ] Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/powerpc/include/asm/systbl.h1
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/sparc/include/asm/unistd.h3
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/net/compat.h2
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--net/compat.c16
-rw-r--r--net/socket.c199
16 files changed, 192 insertions, 52 deletions
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd4..8489d372077f 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
352COMPAT_SYS_SPU(open_by_handle_at) 352COMPAT_SYS_SPU(open_by_handle_at)
353COMPAT_SYS_SPU(clock_adjtime) 353COMPAT_SYS_SPU(clock_adjtime)
354SYSCALL_SPU(syncfs) 354SYSCALL_SPU(syncfs)
355COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6d..6d23c8193caa 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
371#define __NR_open_by_handle_at 346 371#define __NR_open_by_handle_at 346
372#define __NR_clock_adjtime 347 372#define __NR_clock_adjtime 347
373#define __NR_syncfs 348 373#define __NR_syncfs 348
374#define __NR_sendmmsg 349
374 375
375#ifdef __KERNEL__ 376#ifdef __KERNEL__
376 377
377#define __NR_syscalls 349 378#define __NR_syscalls 350
378 379
379#define __NR__exit __NR_exit 380#define __NR__exit __NR_exit
380#define NR_syscalls __NR_syscalls 381#define NR_syscalls __NR_syscalls
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db983..c5387ed0add8 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
404#define __NR_open_by_handle_at 333 404#define __NR_open_by_handle_at 333
405#define __NR_clock_adjtime 334 405#define __NR_clock_adjtime 334
406#define __NR_syncfs 335 406#define __NR_syncfs 335
407#define __NR_sendmmsg 336
407 408
408#define NR_syscalls 336 409#define NR_syscalls 337
409 410
410#ifdef __32bit_syscall_numbers__ 411#ifdef __32bit_syscall_numbers__
411/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, 412/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e88..332c83ff7701 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
84/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv 84/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init 85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
86/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime 86/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
87/*335*/ .long sys_syncfs 87/*335*/ .long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef47..43887ca0be0e 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv 85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init 86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
87/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime 87/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
88 .word sys_syncfs 88 .word sys_syncfs, compat_sys_sendmmsg
89 89
90#endif /* CONFIG_COMPAT */ 90#endif /* CONFIG_COMPAT */
91 91
@@ -162,4 +162,4 @@ sys_call_table:
162/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv 162/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
163 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init 163 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
164/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime 164/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
165 .word sys_syncfs 165 .word sys_syncfs, sys_sendmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71d..95f5826be458 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
848 .quad compat_sys_open_by_handle_at 848 .quad compat_sys_open_by_handle_at
849 .quad compat_sys_clock_adjtime 849 .quad compat_sys_clock_adjtime
850 .quad sys_syncfs 850 .quad sys_syncfs
851 .quad compat_sys_sendmmsg /* 345 */
851ia32_syscall_end: 852ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e5977..fb6a625c99bf 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
350#define __NR_open_by_handle_at 342 350#define __NR_open_by_handle_at 342
351#define __NR_clock_adjtime 343 351#define __NR_clock_adjtime 343
352#define __NR_syncfs 344 352#define __NR_syncfs 344
353#define __NR_sendmmsg 345
353 354
354#ifdef __KERNEL__ 355#ifdef __KERNEL__
355 356
356#define NR_syscalls 345 357#define NR_syscalls 346
357 358
358#define __ARCH_WANT_IPC_PARSE_VERSION 359#define __ARCH_WANT_IPC_PARSE_VERSION
359#define __ARCH_WANT_OLD_READDIR 360#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd578..79f90eb15aad 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) 677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
678#define __NR_syncfs 306 678#define __NR_syncfs 306
679__SYSCALL(__NR_syncfs, sys_syncfs) 679__SYSCALL(__NR_syncfs, sys_syncfs)
680#define __NR_sendmmsg 307
681__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
680 682
681#ifndef __NO_STUBS 683#ifndef __NO_STUBS
682#define __ARCH_WANT_OLD_READDIR 684#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79d..32cbffb0c494 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
344 .long sys_open_by_handle_at 344 .long sys_open_by_handle_at
345 .long sys_clock_adjtime 345 .long sys_clock_adjtime
346 .long sys_syncfs 346 .long sys_syncfs
347 .long sys_sendmmsg /* 345 */
diff --git a/include/linux/net.h b/include/linux/net.h
index 94de83c0f877..1da55e9b6f01 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,6 +42,7 @@
42#define SYS_RECVMSG 17 /* sys_recvmsg(2) */ 42#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
43#define SYS_ACCEPT4 18 /* sys_accept4(2) */ 43#define SYS_ACCEPT4 18 /* sys_accept4(2) */
44#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ 44#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
45#define SYS_SENDMMSG 20 /* sys_sendmmsg(2) */
45 46
46typedef enum { 47typedef enum {
47 SS_FREE = 0, /* not allocated */ 48 SS_FREE = 0, /* not allocated */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d2b5e982f079..4ef98e422fde 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -333,5 +333,7 @@ struct timespec;
333 333
334extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 334extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
335 unsigned int flags, struct timespec *timeout); 335 unsigned int flags, struct timespec *timeout);
336extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
337 unsigned int vlen, unsigned int flags);
336#endif /* not kernel and not glibc */ 338#endif /* not kernel and not glibc */
337#endif /* _LINUX_SOCKET_H */ 339#endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..ab71447d0c5a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
610asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, 610asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
611 struct sockaddr __user *, int); 611 struct sockaddr __user *, int);
612asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); 612asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
613asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
614 unsigned int vlen, unsigned flags);
613asmlinkage long sys_recv(int, void __user *, size_t, unsigned); 615asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
614asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, 616asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
615 struct sockaddr __user *, int __user *); 617 struct sockaddr __user *, int __user *);
diff --git a/include/net/compat.h b/include/net/compat.h
index 28d5428ec6a2..9ee75edcc295 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
43extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); 43extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
44extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); 44extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
45extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); 45extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
46extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
47 unsigned, unsigned);
46extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); 48extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
47extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, 49extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
48 unsigned, unsigned, 50 unsigned, unsigned,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41cd8f33..97e966f171c6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
46cond_syscall(compat_sys_getsockopt); 46cond_syscall(compat_sys_getsockopt);
47cond_syscall(sys_shutdown); 47cond_syscall(sys_shutdown);
48cond_syscall(sys_sendmsg); 48cond_syscall(sys_sendmsg);
49cond_syscall(sys_sendmmsg);
49cond_syscall(compat_sys_sendmsg); 50cond_syscall(compat_sys_sendmsg);
51cond_syscall(compat_sys_sendmmsg);
50cond_syscall(sys_recvmsg); 52cond_syscall(sys_recvmsg);
51cond_syscall(sys_recvmmsg); 53cond_syscall(sys_recvmmsg);
52cond_syscall(compat_sys_recvmsg); 54cond_syscall(compat_sys_recvmsg);
diff --git a/net/compat.c b/net/compat.c
index 3649d5895361..c578d9382e19 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
722 722
723/* Argument list sizes for compat_sys_socketcall */ 723/* Argument list sizes for compat_sys_socketcall */
724#define AL(x) ((x) * sizeof(u32)) 724#define AL(x) ((x) * sizeof(u32))
725static unsigned char nas[20] = { 725static unsigned char nas[21] = {
726 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 726 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
727 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 727 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
728 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 728 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
729 AL(4), AL(5) 729 AL(4), AL(5), AL(4)
730}; 730};
731#undef AL 731#undef AL
732 732
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
735 return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 735 return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
736} 736}
737 737
738asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
739 unsigned vlen, unsigned int flags)
740{
741 return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
742 flags | MSG_CMSG_COMPAT);
743}
744
738asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) 745asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
739{ 746{
740 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); 747 return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
780 u32 a[6]; 787 u32 a[6];
781 u32 a0, a1; 788 u32 a0, a1;
782 789
783 if (call < SYS_SOCKET || call > SYS_RECVMMSG) 790 if (call < SYS_SOCKET || call > SYS_SENDMMSG)
784 return -EINVAL; 791 return -EINVAL;
785 if (copy_from_user(a, args, nas[call])) 792 if (copy_from_user(a, args, nas[call]))
786 return -EFAULT; 793 return -EFAULT;
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
839 case SYS_SENDMSG: 846 case SYS_SENDMSG:
840 ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); 847 ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
841 break; 848 break;
849 case SYS_SENDMMSG:
850 ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
851 break;
842 case SYS_RECVMSG: 852 case SYS_RECVMSG:
843 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); 853 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
844 break; 854 break;
diff --git a/net/socket.c b/net/socket.c
index d25f5a9d6fa2..ed50255143d5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
551} 551}
552EXPORT_SYMBOL(sock_tx_timestamp); 552EXPORT_SYMBOL(sock_tx_timestamp);
553 553
554static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 554static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
555 struct msghdr *msg, size_t size) 555 struct msghdr *msg, size_t size)
556{ 556{
557 struct sock_iocb *si = kiocb_to_siocb(iocb); 557 struct sock_iocb *si = kiocb_to_siocb(iocb);
558 int err;
559 558
560 sock_update_classid(sock->sk); 559 sock_update_classid(sock->sk);
561 560
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
564 si->msg = msg; 563 si->msg = msg;
565 si->size = size; 564 si->size = size;
566 565
567 err = security_socket_sendmsg(sock, msg, size);
568 if (err)
569 return err;
570
571 return sock->ops->sendmsg(iocb, sock, msg, size); 566 return sock->ops->sendmsg(iocb, sock, msg, size);
572} 567}
573 568
569static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
570 struct msghdr *msg, size_t size)
571{
572 int err = security_socket_sendmsg(sock, msg, size);
573
574 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
575}
576
574int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 577int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
575{ 578{
576 struct kiocb iocb; 579 struct kiocb iocb;
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
586} 589}
587EXPORT_SYMBOL(sock_sendmsg); 590EXPORT_SYMBOL(sock_sendmsg);
588 591
592int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
593{
594 struct kiocb iocb;
595 struct sock_iocb siocb;
596 int ret;
597
598 init_sync_kiocb(&iocb, NULL);
599 iocb.private = &siocb;
600 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
601 if (-EIOCBQUEUED == ret)
602 ret = wait_on_sync_kiocb(&iocb);
603 return ret;
604}
605
589int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 606int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
590 struct kvec *vec, size_t num, size_t size) 607 struct kvec *vec, size_t num, size_t size)
591{ 608{
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1863#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1880#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1864#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1881#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1865 1882
1866/* 1883static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1867 * BSD sendmsg interface 1884 struct msghdr *msg_sys, unsigned flags, int nosec)
1868 */
1869
1870SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1871{ 1885{
1872 struct compat_msghdr __user *msg_compat = 1886 struct compat_msghdr __user *msg_compat =
1873 (struct compat_msghdr __user *)msg; 1887 (struct compat_msghdr __user *)msg;
1874 struct socket *sock;
1875 struct sockaddr_storage address; 1888 struct sockaddr_storage address;
1876 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1889 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1877 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1890 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1878 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1891 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1879 /* 20 is size of ipv6_pktinfo */ 1892 /* 20 is size of ipv6_pktinfo */
1880 unsigned char *ctl_buf = ctl; 1893 unsigned char *ctl_buf = ctl;
1881 struct msghdr msg_sys;
1882 int err, ctl_len, iov_size, total_len; 1894 int err, ctl_len, iov_size, total_len;
1883 int fput_needed;
1884 1895
1885 err = -EFAULT; 1896 err = -EFAULT;
1886 if (MSG_CMSG_COMPAT & flags) { 1897 if (MSG_CMSG_COMPAT & flags) {
1887 if (get_compat_msghdr(&msg_sys, msg_compat)) 1898 if (get_compat_msghdr(msg_sys, msg_compat))
1888 return -EFAULT; 1899 return -EFAULT;
1889 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1900 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1890 return -EFAULT; 1901 return -EFAULT;
1891 1902
1892 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1893 if (!sock)
1894 goto out;
1895
1896 /* do not move before msg_sys is valid */ 1903 /* do not move before msg_sys is valid */
1897 err = -EMSGSIZE; 1904 err = -EMSGSIZE;
1898 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1905 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1899 goto out_put; 1906 goto out;
1900 1907
1901 /* Check whether to allocate the iovec area */ 1908 /* Check whether to allocate the iovec area */
1902 err = -ENOMEM; 1909 err = -ENOMEM;
1903 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1910 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1904 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1911 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1905 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1912 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1906 if (!iov) 1913 if (!iov)
1907 goto out_put; 1914 goto out;
1908 } 1915 }
1909 1916
1910 /* This will also move the address data into kernel space */ 1917 /* This will also move the address data into kernel space */
1911 if (MSG_CMSG_COMPAT & flags) { 1918 if (MSG_CMSG_COMPAT & flags) {
1912 err = verify_compat_iovec(&msg_sys, iov, 1919 err = verify_compat_iovec(msg_sys, iov,
1913 (struct sockaddr *)&address, 1920 (struct sockaddr *)&address,
1914 VERIFY_READ); 1921 VERIFY_READ);
1915 } else 1922 } else
1916 err = verify_iovec(&msg_sys, iov, 1923 err = verify_iovec(msg_sys, iov,
1917 (struct sockaddr *)&address, 1924 (struct sockaddr *)&address,
1918 VERIFY_READ); 1925 VERIFY_READ);
1919 if (err < 0) 1926 if (err < 0)
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1922 1929
1923 err = -ENOBUFS; 1930 err = -ENOBUFS;
1924 1931
1925 if (msg_sys.msg_controllen > INT_MAX) 1932 if (msg_sys->msg_controllen > INT_MAX)
1926 goto out_freeiov; 1933 goto out_freeiov;
1927 ctl_len = msg_sys.msg_controllen; 1934 ctl_len = msg_sys->msg_controllen;
1928 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1935 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1929 err = 1936 err =
1930 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1937 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
1931 sizeof(ctl)); 1938 sizeof(ctl));
1932 if (err) 1939 if (err)
1933 goto out_freeiov; 1940 goto out_freeiov;
1934 ctl_buf = msg_sys.msg_control; 1941 ctl_buf = msg_sys->msg_control;
1935 ctl_len = msg_sys.msg_controllen; 1942 ctl_len = msg_sys->msg_controllen;
1936 } else if (ctl_len) { 1943 } else if (ctl_len) {
1937 if (ctl_len > sizeof(ctl)) { 1944 if (ctl_len > sizeof(ctl)) {
1938 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1945 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1941 } 1948 }
1942 err = -EFAULT; 1949 err = -EFAULT;
1943 /* 1950 /*
1944 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1951 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1945 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1952 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1946 * checking falls down on this. 1953 * checking falls down on this.
1947 */ 1954 */
1948 if (copy_from_user(ctl_buf, 1955 if (copy_from_user(ctl_buf,
1949 (void __user __force *)msg_sys.msg_control, 1956 (void __user __force *)msg_sys->msg_control,
1950 ctl_len)) 1957 ctl_len))
1951 goto out_freectl; 1958 goto out_freectl;
1952 msg_sys.msg_control = ctl_buf; 1959 msg_sys->msg_control = ctl_buf;
1953 } 1960 }
1954 msg_sys.msg_flags = flags; 1961 msg_sys->msg_flags = flags;
1955 1962
1956 if (sock->file->f_flags & O_NONBLOCK) 1963 if (sock->file->f_flags & O_NONBLOCK)
1957 msg_sys.msg_flags |= MSG_DONTWAIT; 1964 msg_sys->msg_flags |= MSG_DONTWAIT;
1958 err = sock_sendmsg(sock, &msg_sys, total_len); 1965 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
1966 total_len);
1959 1967
1960out_freectl: 1968out_freectl:
1961 if (ctl_buf != ctl) 1969 if (ctl_buf != ctl)
@@ -1963,12 +1971,114 @@ out_freectl:
1963out_freeiov: 1971out_freeiov:
1964 if (iov != iovstack) 1972 if (iov != iovstack)
1965 sock_kfree_s(sock->sk, iov, iov_size); 1973 sock_kfree_s(sock->sk, iov, iov_size);
1966out_put: 1974out:
1975 return err;
1976}
1977
1978/*
1979 * BSD sendmsg interface
1980 */
1981
1982SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1983{
1984 int fput_needed, err;
1985 struct msghdr msg_sys;
1986 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
1987
1988 if (!sock)
1989 goto out;
1990
1991 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
1992
1967 fput_light(sock->file, fput_needed); 1993 fput_light(sock->file, fput_needed);
1968out: 1994out:
1969 return err; 1995 return err;
1970} 1996}
1971 1997
1998/*
1999 * Linux sendmmsg interface
2000 */
2001
2002int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2003 unsigned int flags)
2004{
2005 int fput_needed, err, datagrams;
2006 struct socket *sock;
2007 struct mmsghdr __user *entry;
2008 struct compat_mmsghdr __user *compat_entry;
2009 struct msghdr msg_sys;
2010
2011 datagrams = 0;
2012
2013 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2014 if (!sock)
2015 return err;
2016
2017 err = sock_error(sock->sk);
2018 if (err)
2019 goto out_put;
2020
2021 entry = mmsg;
2022 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2023
2024 while (datagrams < vlen) {
2025 /*
2026 * No need to ask LSM for more than the first datagram.
2027 */
2028 if (MSG_CMSG_COMPAT & flags) {
2029 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2030 &msg_sys, flags, datagrams);
2031 if (err < 0)
2032 break;
2033 err = __put_user(err, &compat_entry->msg_len);
2034 ++compat_entry;
2035 } else {
2036 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2037 &msg_sys, flags, datagrams);
2038 if (err < 0)
2039 break;
2040 err = put_user(err, &entry->msg_len);
2041 ++entry;
2042 }
2043
2044 if (err)
2045 break;
2046 ++datagrams;
2047 }
2048
2049out_put:
2050 fput_light(sock->file, fput_needed);
2051
2052 if (err == 0)
2053 return datagrams;
2054
2055 if (datagrams != 0) {
2056 /*
2057 * We may send less entries than requested (vlen) if the
2058 * sock is non blocking...
2059 */
2060 if (err != -EAGAIN) {
2061 /*
2062 * ... or if sendmsg returns an error after we
2063 * send some datagrams, where we record the
2064 * error to return on the next call or if the
2065 * app asks about it using getsockopt(SO_ERROR).
2066 */
2067 sock->sk->sk_err = -err;
2068 }
2069
2070 return datagrams;
2071 }
2072
2073 return err;
2074}
2075
2076SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2077 unsigned int, vlen, unsigned int, flags)
2078{
2079 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2080}
2081
1972static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 2082static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1973 struct msghdr *msg_sys, unsigned flags, int nosec) 2083 struct msghdr *msg_sys, unsigned flags, int nosec)
1974{ 2084{
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2214#ifdef __ARCH_WANT_SYS_SOCKETCALL 2324#ifdef __ARCH_WANT_SYS_SOCKETCALL
2215/* Argument list sizes for sys_socketcall */ 2325/* Argument list sizes for sys_socketcall */
2216#define AL(x) ((x) * sizeof(unsigned long)) 2326#define AL(x) ((x) * sizeof(unsigned long))
2217static const unsigned char nargs[20] = { 2327static const unsigned char nargs[21] = {
2218 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2328 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2219 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2329 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2220 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2330 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2221 AL(4), AL(5) 2331 AL(4), AL(5), AL(4)
2222}; 2332};
2223 2333
2224#undef AL 2334#undef AL
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2238 int err; 2348 int err;
2239 unsigned int len; 2349 unsigned int len;
2240 2350
2241 if (call < 1 || call > SYS_RECVMMSG) 2351 if (call < 1 || call > SYS_SENDMMSG)
2242 return -EINVAL; 2352 return -EINVAL;
2243 2353
2244 len = nargs[call]; 2354 len = nargs[call];
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2313 case SYS_SENDMSG: 2423 case SYS_SENDMSG:
2314 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2424 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2315 break; 2425 break;
2426 case SYS_SENDMMSG:
2427 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2428 break;
2316 case SYS_RECVMSG: 2429 case SYS_RECVMSG:
2317 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2430 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2318 break; 2431 break;