diff options
author | Anton Blanchard <anton@samba.org> | 2011-05-02 16:21:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-05-05 14:10:14 -0400 |
commit | 228e548e602061b08ee8e8966f567c12aa079682 (patch) | |
tree | 4c79ecf071d6174d42da1557812a4646d0aaa5af | |
parent | 1c5cae815d19ffe02bdfda1260949ef2b1806171 (diff) |
net: Add sendmmsg socket system call
This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.
I wrote a microbenchmark to test the performance gains of using
this new syscall:
http://ozlabs.org/~anton/junkcode/sendmmsg_test.c
The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.
64B UDP
batch pkts/sec
1 804570
2 872800 (+ 8 %)
4 916556 (+14 %)
8 939712 (+17 %)
16 952688 (+18 %)
32 956448 (+19 %)
64 964800 (+20 %)
64B raw socket
batch pkts/sec
1 1201449
2 1350028 (+12 %)
4 1461416 (+22 %)
8 1513080 (+26 %)
16 1541216 (+28 %)
32 1553440 (+29 %)
64 1557888 (+30 %)
We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.
[ Add sparc syscall entries. -DaveM ]
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/powerpc/include/asm/systbl.h | 1 | ||||
-rw-r--r-- | arch/powerpc/include/asm/unistd.h | 3 | ||||
-rw-r--r-- | arch/sparc/include/asm/unistd.h | 3 | ||||
-rw-r--r-- | arch/sparc/kernel/systbls_32.S | 2 | ||||
-rw-r--r-- | arch/sparc/kernel/systbls_64.S | 4 | ||||
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd_32.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd_64.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/syscall_table_32.S | 1 | ||||
-rw-r--r-- | include/linux/net.h | 1 | ||||
-rw-r--r-- | include/linux/socket.h | 2 | ||||
-rw-r--r-- | include/linux/syscalls.h | 2 | ||||
-rw-r--r-- | include/net/compat.h | 2 | ||||
-rw-r--r-- | kernel/sys_ni.c | 2 | ||||
-rw-r--r-- | net/compat.c | 16 | ||||
-rw-r--r-- | net/socket.c | 199 |
16 files changed, 192 insertions, 52 deletions
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 60f64b132bd4..8489d372077f 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h | |||
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at) | |||
352 | COMPAT_SYS_SPU(open_by_handle_at) | 352 | COMPAT_SYS_SPU(open_by_handle_at) |
353 | COMPAT_SYS_SPU(clock_adjtime) | 353 | COMPAT_SYS_SPU(clock_adjtime) |
354 | SYSCALL_SPU(syncfs) | 354 | SYSCALL_SPU(syncfs) |
355 | COMPAT_SYS_SPU(sendmmsg) | ||
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3c215648ce6d..6d23c8193caa 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h | |||
@@ -371,10 +371,11 @@ | |||
371 | #define __NR_open_by_handle_at 346 | 371 | #define __NR_open_by_handle_at 346 |
372 | #define __NR_clock_adjtime 347 | 372 | #define __NR_clock_adjtime 347 |
373 | #define __NR_syncfs 348 | 373 | #define __NR_syncfs 348 |
374 | #define __NR_sendmmsg 349 | ||
374 | 375 | ||
375 | #ifdef __KERNEL__ | 376 | #ifdef __KERNEL__ |
376 | 377 | ||
377 | #define __NR_syscalls 349 | 378 | #define __NR_syscalls 350 |
378 | 379 | ||
379 | #define __NR__exit __NR_exit | 380 | #define __NR__exit __NR_exit |
380 | #define NR_syscalls __NR_syscalls | 381 | #define NR_syscalls __NR_syscalls |
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 9d897b6db983..c5387ed0add8 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h | |||
@@ -404,8 +404,9 @@ | |||
404 | #define __NR_open_by_handle_at 333 | 404 | #define __NR_open_by_handle_at 333 |
405 | #define __NR_clock_adjtime 334 | 405 | #define __NR_clock_adjtime 334 |
406 | #define __NR_syncfs 335 | 406 | #define __NR_syncfs 335 |
407 | #define __NR_sendmmsg 336 | ||
407 | 408 | ||
408 | #define NR_syscalls 336 | 409 | #define NR_syscalls 337 |
409 | 410 | ||
410 | #ifdef __32bit_syscall_numbers__ | 411 | #ifdef __32bit_syscall_numbers__ |
411 | /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, | 412 | /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, |
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 47ac73c32e88..332c83ff7701 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S | |||
@@ -84,4 +84,4 @@ sys_call_table: | |||
84 | /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv | 84 | /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv |
85 | /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init | 85 | /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init |
86 | /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime | 86 | /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime |
87 | /*335*/ .long sys_syncfs | 87 | /*335*/ .long sys_syncfs, sys_sendmmsg |
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 4f3170c1ef47..43887ca0be0e 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S | |||
@@ -85,7 +85,7 @@ sys_call_table32: | |||
85 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv | 85 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv |
86 | .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init | 86 | .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init |
87 | /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime | 87 | /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime |
88 | .word sys_syncfs | 88 | .word sys_syncfs, compat_sys_sendmmsg |
89 | 89 | ||
90 | #endif /* CONFIG_COMPAT */ | 90 | #endif /* CONFIG_COMPAT */ |
91 | 91 | ||
@@ -162,4 +162,4 @@ sys_call_table: | |||
162 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv | 162 | /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv |
163 | .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init | 163 | .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init |
164 | /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime | 164 | /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime |
165 | .word sys_syncfs | 165 | .word sys_syncfs, sys_sendmmsg |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 849a9d23c71d..95f5826be458 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -848,4 +848,5 @@ ia32_sys_call_table: | |||
848 | .quad compat_sys_open_by_handle_at | 848 | .quad compat_sys_open_by_handle_at |
849 | .quad compat_sys_clock_adjtime | 849 | .quad compat_sys_clock_adjtime |
850 | .quad sys_syncfs | 850 | .quad sys_syncfs |
851 | .quad compat_sys_sendmmsg /* 345 */ | ||
851 | ia32_syscall_end: | 852 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index a755ef5e5977..fb6a625c99bf 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -350,10 +350,11 @@ | |||
350 | #define __NR_open_by_handle_at 342 | 350 | #define __NR_open_by_handle_at 342 |
351 | #define __NR_clock_adjtime 343 | 351 | #define __NR_clock_adjtime 343 |
352 | #define __NR_syncfs 344 | 352 | #define __NR_syncfs 344 |
353 | #define __NR_sendmmsg 345 | ||
353 | 354 | ||
354 | #ifdef __KERNEL__ | 355 | #ifdef __KERNEL__ |
355 | 356 | ||
356 | #define NR_syscalls 345 | 357 | #define NR_syscalls 346 |
357 | 358 | ||
358 | #define __ARCH_WANT_IPC_PARSE_VERSION | 359 | #define __ARCH_WANT_IPC_PARSE_VERSION |
359 | #define __ARCH_WANT_OLD_READDIR | 360 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 160fa76bd578..79f90eb15aad 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | |||
677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | 677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) |
678 | #define __NR_syncfs 306 | 678 | #define __NR_syncfs 306 |
679 | __SYSCALL(__NR_syncfs, sys_syncfs) | 679 | __SYSCALL(__NR_syncfs, sys_syncfs) |
680 | #define __NR_sendmmsg 307 | ||
681 | __SYSCALL(__NR_sendmmsg, sys_sendmmsg) | ||
680 | 682 | ||
681 | #ifndef __NO_STUBS | 683 | #ifndef __NO_STUBS |
682 | #define __ARCH_WANT_OLD_READDIR | 684 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index abce34d5c79d..32cbffb0c494 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -344,3 +344,4 @@ ENTRY(sys_call_table) | |||
344 | .long sys_open_by_handle_at | 344 | .long sys_open_by_handle_at |
345 | .long sys_clock_adjtime | 345 | .long sys_clock_adjtime |
346 | .long sys_syncfs | 346 | .long sys_syncfs |
347 | .long sys_sendmmsg /* 345 */ | ||
diff --git a/include/linux/net.h b/include/linux/net.h index 94de83c0f877..1da55e9b6f01 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -42,6 +42,7 @@ | |||
42 | #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ | 42 | #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ |
43 | #define SYS_ACCEPT4 18 /* sys_accept4(2) */ | 43 | #define SYS_ACCEPT4 18 /* sys_accept4(2) */ |
44 | #define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ | 44 | #define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ |
45 | #define SYS_SENDMMSG 20 /* sys_sendmmsg(2) */ | ||
45 | 46 | ||
46 | typedef enum { | 47 | typedef enum { |
47 | SS_FREE = 0, /* not allocated */ | 48 | SS_FREE = 0, /* not allocated */ |
diff --git a/include/linux/socket.h b/include/linux/socket.h index d2b5e982f079..4ef98e422fde 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h | |||
@@ -333,5 +333,7 @@ struct timespec; | |||
333 | 333 | ||
334 | extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | 334 | extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, |
335 | unsigned int flags, struct timespec *timeout); | 335 | unsigned int flags, struct timespec *timeout); |
336 | extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, | ||
337 | unsigned int vlen, unsigned int flags); | ||
336 | #endif /* not kernel and not glibc */ | 338 | #endif /* not kernel and not glibc */ |
337 | #endif /* _LINUX_SOCKET_H */ | 339 | #endif /* _LINUX_SOCKET_H */ |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 83ecc1749ef6..ab71447d0c5a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned); | |||
610 | asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, | 610 | asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, |
611 | struct sockaddr __user *, int); | 611 | struct sockaddr __user *, int); |
612 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); | 612 | asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); |
613 | asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, | ||
614 | unsigned int vlen, unsigned flags); | ||
613 | asmlinkage long sys_recv(int, void __user *, size_t, unsigned); | 615 | asmlinkage long sys_recv(int, void __user *, size_t, unsigned); |
614 | asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, | 616 | asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, |
615 | struct sockaddr __user *, int __user *); | 617 | struct sockaddr __user *, int __user *); |
diff --git a/include/net/compat.h b/include/net/compat.h index 28d5428ec6a2..9ee75edcc295 100644 --- a/include/net/compat.h +++ b/include/net/compat.h | |||
@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *); | |||
43 | extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); | 43 | extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); |
44 | extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); | 44 | extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); |
45 | extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); | 45 | extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); |
46 | extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, | ||
47 | unsigned, unsigned); | ||
46 | extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); | 48 | extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); |
47 | extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, | 49 | extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, |
48 | unsigned, unsigned, | 50 | unsigned, unsigned, |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 25cc41cd8f33..97e966f171c6 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt); | |||
46 | cond_syscall(compat_sys_getsockopt); | 46 | cond_syscall(compat_sys_getsockopt); |
47 | cond_syscall(sys_shutdown); | 47 | cond_syscall(sys_shutdown); |
48 | cond_syscall(sys_sendmsg); | 48 | cond_syscall(sys_sendmsg); |
49 | cond_syscall(sys_sendmmsg); | ||
49 | cond_syscall(compat_sys_sendmsg); | 50 | cond_syscall(compat_sys_sendmsg); |
51 | cond_syscall(compat_sys_sendmmsg); | ||
50 | cond_syscall(sys_recvmsg); | 52 | cond_syscall(sys_recvmsg); |
51 | cond_syscall(sys_recvmmsg); | 53 | cond_syscall(sys_recvmmsg); |
52 | cond_syscall(compat_sys_recvmsg); | 54 | cond_syscall(compat_sys_recvmsg); |
diff --git a/net/compat.c b/net/compat.c index 3649d5895361..c578d9382e19 100644 --- a/net/compat.c +++ b/net/compat.c | |||
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt); | |||
722 | 722 | ||
723 | /* Argument list sizes for compat_sys_socketcall */ | 723 | /* Argument list sizes for compat_sys_socketcall */ |
724 | #define AL(x) ((x) * sizeof(u32)) | 724 | #define AL(x) ((x) * sizeof(u32)) |
725 | static unsigned char nas[20] = { | 725 | static unsigned char nas[21] = { |
726 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), | 726 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), |
727 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), | 727 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), |
728 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), | 728 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), |
729 | AL(4), AL(5) | 729 | AL(4), AL(5), AL(4) |
730 | }; | 730 | }; |
731 | #undef AL | 731 | #undef AL |
732 | 732 | ||
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns | |||
735 | return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); | 735 | return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); |
736 | } | 736 | } |
737 | 737 | ||
738 | asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, | ||
739 | unsigned vlen, unsigned int flags) | ||
740 | { | ||
741 | return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, | ||
742 | flags | MSG_CMSG_COMPAT); | ||
743 | } | ||
744 | |||
738 | asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) | 745 | asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) |
739 | { | 746 | { |
740 | return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); | 747 | return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); |
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) | |||
780 | u32 a[6]; | 787 | u32 a[6]; |
781 | u32 a0, a1; | 788 | u32 a0, a1; |
782 | 789 | ||
783 | if (call < SYS_SOCKET || call > SYS_RECVMMSG) | 790 | if (call < SYS_SOCKET || call > SYS_SENDMMSG) |
784 | return -EINVAL; | 791 | return -EINVAL; |
785 | if (copy_from_user(a, args, nas[call])) | 792 | if (copy_from_user(a, args, nas[call])) |
786 | return -EFAULT; | 793 | return -EFAULT; |
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) | |||
839 | case SYS_SENDMSG: | 846 | case SYS_SENDMSG: |
840 | ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); | 847 | ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); |
841 | break; | 848 | break; |
849 | case SYS_SENDMMSG: | ||
850 | ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]); | ||
851 | break; | ||
842 | case SYS_RECVMSG: | 852 | case SYS_RECVMSG: |
843 | ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); | 853 | ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); |
844 | break; | 854 | break; |
diff --git a/net/socket.c b/net/socket.c index d25f5a9d6fa2..ed50255143d5 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) | |||
551 | } | 551 | } |
552 | EXPORT_SYMBOL(sock_tx_timestamp); | 552 | EXPORT_SYMBOL(sock_tx_timestamp); |
553 | 553 | ||
554 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | 554 | static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, |
555 | struct msghdr *msg, size_t size) | 555 | struct msghdr *msg, size_t size) |
556 | { | 556 | { |
557 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 557 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
558 | int err; | ||
559 | 558 | ||
560 | sock_update_classid(sock->sk); | 559 | sock_update_classid(sock->sk); |
561 | 560 | ||
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
564 | si->msg = msg; | 563 | si->msg = msg; |
565 | si->size = size; | 564 | si->size = size; |
566 | 565 | ||
567 | err = security_socket_sendmsg(sock, msg, size); | ||
568 | if (err) | ||
569 | return err; | ||
570 | |||
571 | return sock->ops->sendmsg(iocb, sock, msg, size); | 566 | return sock->ops->sendmsg(iocb, sock, msg, size); |
572 | } | 567 | } |
573 | 568 | ||
569 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | ||
570 | struct msghdr *msg, size_t size) | ||
571 | { | ||
572 | int err = security_socket_sendmsg(sock, msg, size); | ||
573 | |||
574 | return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); | ||
575 | } | ||
576 | |||
574 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | 577 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) |
575 | { | 578 | { |
576 | struct kiocb iocb; | 579 | struct kiocb iocb; |
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | |||
586 | } | 589 | } |
587 | EXPORT_SYMBOL(sock_sendmsg); | 590 | EXPORT_SYMBOL(sock_sendmsg); |
588 | 591 | ||
592 | int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) | ||
593 | { | ||
594 | struct kiocb iocb; | ||
595 | struct sock_iocb siocb; | ||
596 | int ret; | ||
597 | |||
598 | init_sync_kiocb(&iocb, NULL); | ||
599 | iocb.private = &siocb; | ||
600 | ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); | ||
601 | if (-EIOCBQUEUED == ret) | ||
602 | ret = wait_on_sync_kiocb(&iocb); | ||
603 | return ret; | ||
604 | } | ||
605 | |||
589 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, | 606 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, |
590 | struct kvec *vec, size_t num, size_t size) | 607 | struct kvec *vec, size_t num, size_t size) |
591 | { | 608 | { |
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) | |||
1863 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) | 1880 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) |
1864 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) | 1881 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) |
1865 | 1882 | ||
1866 | /* | 1883 | static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, |
1867 | * BSD sendmsg interface | 1884 | struct msghdr *msg_sys, unsigned flags, int nosec) |
1868 | */ | ||
1869 | |||
1870 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | ||
1871 | { | 1885 | { |
1872 | struct compat_msghdr __user *msg_compat = | 1886 | struct compat_msghdr __user *msg_compat = |
1873 | (struct compat_msghdr __user *)msg; | 1887 | (struct compat_msghdr __user *)msg; |
1874 | struct socket *sock; | ||
1875 | struct sockaddr_storage address; | 1888 | struct sockaddr_storage address; |
1876 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; | 1889 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; |
1877 | unsigned char ctl[sizeof(struct cmsghdr) + 20] | 1890 | unsigned char ctl[sizeof(struct cmsghdr) + 20] |
1878 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); | 1891 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); |
1879 | /* 20 is size of ipv6_pktinfo */ | 1892 | /* 20 is size of ipv6_pktinfo */ |
1880 | unsigned char *ctl_buf = ctl; | 1893 | unsigned char *ctl_buf = ctl; |
1881 | struct msghdr msg_sys; | ||
1882 | int err, ctl_len, iov_size, total_len; | 1894 | int err, ctl_len, iov_size, total_len; |
1883 | int fput_needed; | ||
1884 | 1895 | ||
1885 | err = -EFAULT; | 1896 | err = -EFAULT; |
1886 | if (MSG_CMSG_COMPAT & flags) { | 1897 | if (MSG_CMSG_COMPAT & flags) { |
1887 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1898 | if (get_compat_msghdr(msg_sys, msg_compat)) |
1888 | return -EFAULT; | 1899 | return -EFAULT; |
1889 | } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 1900 | } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) |
1890 | return -EFAULT; | 1901 | return -EFAULT; |
1891 | 1902 | ||
1892 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
1893 | if (!sock) | ||
1894 | goto out; | ||
1895 | |||
1896 | /* do not move before msg_sys is valid */ | 1903 | /* do not move before msg_sys is valid */ |
1897 | err = -EMSGSIZE; | 1904 | err = -EMSGSIZE; |
1898 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1905 | if (msg_sys->msg_iovlen > UIO_MAXIOV) |
1899 | goto out_put; | 1906 | goto out; |
1900 | 1907 | ||
1901 | /* Check whether to allocate the iovec area */ | 1908 | /* Check whether to allocate the iovec area */ |
1902 | err = -ENOMEM; | 1909 | err = -ENOMEM; |
1903 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1910 | iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); |
1904 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1911 | if (msg_sys->msg_iovlen > UIO_FASTIOV) { |
1905 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 1912 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
1906 | if (!iov) | 1913 | if (!iov) |
1907 | goto out_put; | 1914 | goto out; |
1908 | } | 1915 | } |
1909 | 1916 | ||
1910 | /* This will also move the address data into kernel space */ | 1917 | /* This will also move the address data into kernel space */ |
1911 | if (MSG_CMSG_COMPAT & flags) { | 1918 | if (MSG_CMSG_COMPAT & flags) { |
1912 | err = verify_compat_iovec(&msg_sys, iov, | 1919 | err = verify_compat_iovec(msg_sys, iov, |
1913 | (struct sockaddr *)&address, | 1920 | (struct sockaddr *)&address, |
1914 | VERIFY_READ); | 1921 | VERIFY_READ); |
1915 | } else | 1922 | } else |
1916 | err = verify_iovec(&msg_sys, iov, | 1923 | err = verify_iovec(msg_sys, iov, |
1917 | (struct sockaddr *)&address, | 1924 | (struct sockaddr *)&address, |
1918 | VERIFY_READ); | 1925 | VERIFY_READ); |
1919 | if (err < 0) | 1926 | if (err < 0) |
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | |||
1922 | 1929 | ||
1923 | err = -ENOBUFS; | 1930 | err = -ENOBUFS; |
1924 | 1931 | ||
1925 | if (msg_sys.msg_controllen > INT_MAX) | 1932 | if (msg_sys->msg_controllen > INT_MAX) |
1926 | goto out_freeiov; | 1933 | goto out_freeiov; |
1927 | ctl_len = msg_sys.msg_controllen; | 1934 | ctl_len = msg_sys->msg_controllen; |
1928 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { | 1935 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { |
1929 | err = | 1936 | err = |
1930 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, | 1937 | cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, |
1931 | sizeof(ctl)); | 1938 | sizeof(ctl)); |
1932 | if (err) | 1939 | if (err) |
1933 | goto out_freeiov; | 1940 | goto out_freeiov; |
1934 | ctl_buf = msg_sys.msg_control; | 1941 | ctl_buf = msg_sys->msg_control; |
1935 | ctl_len = msg_sys.msg_controllen; | 1942 | ctl_len = msg_sys->msg_controllen; |
1936 | } else if (ctl_len) { | 1943 | } else if (ctl_len) { |
1937 | if (ctl_len > sizeof(ctl)) { | 1944 | if (ctl_len > sizeof(ctl)) { |
1938 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); | 1945 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); |
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | |||
1941 | } | 1948 | } |
1942 | err = -EFAULT; | 1949 | err = -EFAULT; |
1943 | /* | 1950 | /* |
1944 | * Careful! Before this, msg_sys.msg_control contains a user pointer. | 1951 | * Careful! Before this, msg_sys->msg_control contains a user pointer. |
1945 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted | 1952 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted |
1946 | * checking falls down on this. | 1953 | * checking falls down on this. |
1947 | */ | 1954 | */ |
1948 | if (copy_from_user(ctl_buf, | 1955 | if (copy_from_user(ctl_buf, |
1949 | (void __user __force *)msg_sys.msg_control, | 1956 | (void __user __force *)msg_sys->msg_control, |
1950 | ctl_len)) | 1957 | ctl_len)) |
1951 | goto out_freectl; | 1958 | goto out_freectl; |
1952 | msg_sys.msg_control = ctl_buf; | 1959 | msg_sys->msg_control = ctl_buf; |
1953 | } | 1960 | } |
1954 | msg_sys.msg_flags = flags; | 1961 | msg_sys->msg_flags = flags; |
1955 | 1962 | ||
1956 | if (sock->file->f_flags & O_NONBLOCK) | 1963 | if (sock->file->f_flags & O_NONBLOCK) |
1957 | msg_sys.msg_flags |= MSG_DONTWAIT; | 1964 | msg_sys->msg_flags |= MSG_DONTWAIT; |
1958 | err = sock_sendmsg(sock, &msg_sys, total_len); | 1965 | err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, |
1966 | total_len); | ||
1959 | 1967 | ||
1960 | out_freectl: | 1968 | out_freectl: |
1961 | if (ctl_buf != ctl) | 1969 | if (ctl_buf != ctl) |
@@ -1963,12 +1971,114 @@ out_freectl: | |||
1963 | out_freeiov: | 1971 | out_freeiov: |
1964 | if (iov != iovstack) | 1972 | if (iov != iovstack) |
1965 | sock_kfree_s(sock->sk, iov, iov_size); | 1973 | sock_kfree_s(sock->sk, iov, iov_size); |
1966 | out_put: | 1974 | out: |
1975 | return err; | ||
1976 | } | ||
1977 | |||
1978 | /* | ||
1979 | * BSD sendmsg interface | ||
1980 | */ | ||
1981 | |||
1982 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | ||
1983 | { | ||
1984 | int fput_needed, err; | ||
1985 | struct msghdr msg_sys; | ||
1986 | struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
1987 | |||
1988 | if (!sock) | ||
1989 | goto out; | ||
1990 | |||
1991 | err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); | ||
1992 | |||
1967 | fput_light(sock->file, fput_needed); | 1993 | fput_light(sock->file, fput_needed); |
1968 | out: | 1994 | out: |
1969 | return err; | 1995 | return err; |
1970 | } | 1996 | } |
1971 | 1997 | ||
1998 | /* | ||
1999 | * Linux sendmmsg interface | ||
2000 | */ | ||
2001 | |||
2002 | int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | ||
2003 | unsigned int flags) | ||
2004 | { | ||
2005 | int fput_needed, err, datagrams; | ||
2006 | struct socket *sock; | ||
2007 | struct mmsghdr __user *entry; | ||
2008 | struct compat_mmsghdr __user *compat_entry; | ||
2009 | struct msghdr msg_sys; | ||
2010 | |||
2011 | datagrams = 0; | ||
2012 | |||
2013 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
2014 | if (!sock) | ||
2015 | return err; | ||
2016 | |||
2017 | err = sock_error(sock->sk); | ||
2018 | if (err) | ||
2019 | goto out_put; | ||
2020 | |||
2021 | entry = mmsg; | ||
2022 | compat_entry = (struct compat_mmsghdr __user *)mmsg; | ||
2023 | |||
2024 | while (datagrams < vlen) { | ||
2025 | /* | ||
2026 | * No need to ask LSM for more than the first datagram. | ||
2027 | */ | ||
2028 | if (MSG_CMSG_COMPAT & flags) { | ||
2029 | err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, | ||
2030 | &msg_sys, flags, datagrams); | ||
2031 | if (err < 0) | ||
2032 | break; | ||
2033 | err = __put_user(err, &compat_entry->msg_len); | ||
2034 | ++compat_entry; | ||
2035 | } else { | ||
2036 | err = __sys_sendmsg(sock, (struct msghdr __user *)entry, | ||
2037 | &msg_sys, flags, datagrams); | ||
2038 | if (err < 0) | ||
2039 | break; | ||
2040 | err = put_user(err, &entry->msg_len); | ||
2041 | ++entry; | ||
2042 | } | ||
2043 | |||
2044 | if (err) | ||
2045 | break; | ||
2046 | ++datagrams; | ||
2047 | } | ||
2048 | |||
2049 | out_put: | ||
2050 | fput_light(sock->file, fput_needed); | ||
2051 | |||
2052 | if (err == 0) | ||
2053 | return datagrams; | ||
2054 | |||
2055 | if (datagrams != 0) { | ||
2056 | /* | ||
2057 | * We may send less entries than requested (vlen) if the | ||
2058 | * sock is non blocking... | ||
2059 | */ | ||
2060 | if (err != -EAGAIN) { | ||
2061 | /* | ||
2062 | * ... or if sendmsg returns an error after we | ||
2063 | * send some datagrams, where we record the | ||
2064 | * error to return on the next call or if the | ||
2065 | * app asks about it using getsockopt(SO_ERROR). | ||
2066 | */ | ||
2067 | sock->sk->sk_err = -err; | ||
2068 | } | ||
2069 | |||
2070 | return datagrams; | ||
2071 | } | ||
2072 | |||
2073 | return err; | ||
2074 | } | ||
2075 | |||
2076 | SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, | ||
2077 | unsigned int, vlen, unsigned int, flags) | ||
2078 | { | ||
2079 | return __sys_sendmmsg(fd, mmsg, vlen, flags); | ||
2080 | } | ||
2081 | |||
1972 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, | 2082 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, |
1973 | struct msghdr *msg_sys, unsigned flags, int nosec) | 2083 | struct msghdr *msg_sys, unsigned flags, int nosec) |
1974 | { | 2084 | { |
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, | |||
2214 | #ifdef __ARCH_WANT_SYS_SOCKETCALL | 2324 | #ifdef __ARCH_WANT_SYS_SOCKETCALL |
2215 | /* Argument list sizes for sys_socketcall */ | 2325 | /* Argument list sizes for sys_socketcall */ |
2216 | #define AL(x) ((x) * sizeof(unsigned long)) | 2326 | #define AL(x) ((x) * sizeof(unsigned long)) |
2217 | static const unsigned char nargs[20] = { | 2327 | static const unsigned char nargs[21] = { |
2218 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), | 2328 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), |
2219 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), | 2329 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), |
2220 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), | 2330 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), |
2221 | AL(4), AL(5) | 2331 | AL(4), AL(5), AL(4) |
2222 | }; | 2332 | }; |
2223 | 2333 | ||
2224 | #undef AL | 2334 | #undef AL |
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) | |||
2238 | int err; | 2348 | int err; |
2239 | unsigned int len; | 2349 | unsigned int len; |
2240 | 2350 | ||
2241 | if (call < 1 || call > SYS_RECVMMSG) | 2351 | if (call < 1 || call > SYS_SENDMMSG) |
2242 | return -EINVAL; | 2352 | return -EINVAL; |
2243 | 2353 | ||
2244 | len = nargs[call]; | 2354 | len = nargs[call]; |
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) | |||
2313 | case SYS_SENDMSG: | 2423 | case SYS_SENDMSG: |
2314 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); | 2424 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); |
2315 | break; | 2425 | break; |
2426 | case SYS_SENDMMSG: | ||
2427 | err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); | ||
2428 | break; | ||
2316 | case SYS_RECVMSG: | 2429 | case SYS_RECVMSG: |
2317 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); | 2430 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); |
2318 | break; | 2431 | break; |