aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2009-10-13 02:40:10 -0400
committerDavid S. Miller <davem@davemloft.net>2009-10-13 02:40:10 -0400
commita2e2725541fad72416326798c2d7fa4dafb7d337 (patch)
tree6174be11da607e83eb8efb3775114ad4d6e0ca3a
parentc05e85a06e376f6b6d59e71e5333d707e956d78b (diff)
net: Introduce recvmmsg socket syscall
Meaning receive multiple messages, reducing the number of syscalls and net stack entry/exit operations. Next patches will introduce mechanisms where protocols that want to optimize this operation will provide an unlocked_recvmsg operation. This takes into account comments made by: . Paul Moore: sock_recvmsg is called only for the first datagram, sock_recvmsg_nosec is used for the rest. . Caitlin Bestler: recvmmsg now has a struct timespec timeout, that works in the same fashion as the ppoll one. If the underlying protocol returns a datagram with MSG_OOB set, this will make recvmmsg return right away with as many datagrams (+ the OOB one) it has received so far. . RĂ©mi Denis-Courmont & Steven Whitehouse: If we receive N < vlen datagrams and then recvmsg returns an error, recvmmsg will return the successfully received datagrams, store the error and return it in the next call. This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg, where we will be able to acquire the lock only at batch start and end, not at every underlying recvmsg call. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/kernel/systbls.S1
-rw-r--r--arch/arm/kernel/calls.S1
-rw-r--r--arch/avr32/kernel/syscall_table.S1
-rw-r--r--arch/blackfin/mach-common/entry.S1
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/microblaze/kernel/syscall_table.S1
-rw-r--r--arch/mips/kernel/scall32-o32.S1
-rw-r--r--arch/mips/kernel/scall64-64.S1
-rw-r--r--arch/mips/kernel/scall64-n32.S1
-rw-r--r--arch/mips/kernel/scall64-o32.S1
-rw-r--r--arch/sh/kernel/syscalls_64.S1
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/xtensa/include/asm/unistd.h4
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/socket.h10
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/net/compat.h8
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--net/compat.c33
-rw-r--r--net/socket.c225
25 files changed, 261 insertions, 50 deletions
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 95c9aef1c106..cda6b8b3d573 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -497,6 +497,7 @@ sys_call_table:
497 .quad sys_signalfd 497 .quad sys_signalfd
498 .quad sys_ni_syscall 498 .quad sys_ni_syscall
499 .quad sys_eventfd 499 .quad sys_eventfd
500 .quad sys_recvmmsg
500 501
501 .size sys_call_table, . - sys_call_table 502 .size sys_call_table, . - sys_call_table
502 .type sys_call_table, @object 503 .type sys_call_table, @object
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index fafce1b5c69f..f58c1156e779 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -374,6 +374,7 @@
374 CALL(sys_pwritev) 374 CALL(sys_pwritev)
375 CALL(sys_rt_tgsigqueueinfo) 375 CALL(sys_rt_tgsigqueueinfo)
376 CALL(sys_perf_event_open) 376 CALL(sys_perf_event_open)
377/* 365 */ CALL(sys_recvmmsg)
377#ifndef syscalls_counted 378#ifndef syscalls_counted
378.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls 379.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
379#define syscalls_counted 380#define syscalls_counted
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 7ee0057613b3..e76bad16b0f0 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -295,4 +295,5 @@ sys_call_table:
295 .long sys_signalfd 295 .long sys_signalfd
296 .long sys_ni_syscall /* 280, was sys_timerfd */ 296 .long sys_ni_syscall /* 280, was sys_timerfd */
297 .long sys_eventfd 297 .long sys_eventfd
298 .long sys_recvmmsg
298 .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ 299 .long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 1e7cac23e25f..48692724b74c 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table)
1621 .long _sys_pwritev 1621 .long _sys_pwritev
1622 .long _sys_rt_tgsigqueueinfo 1622 .long _sys_rt_tgsigqueueinfo
1623 .long _sys_perf_event_open 1623 .long _sys_perf_event_open
1624 .long _sys_recvmmsg /* 370 */
1624 1625
1625 .rept NR_syscalls-(.-_sys_call_table)/4 1626 .rept NR_syscalls-(.-_sys_call_table)/4
1626 .long _sys_ni_syscall 1627 .long _sys_ni_syscall
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index d0e7d37017b4..d75b872ca4dc 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1806,6 +1806,7 @@ sys_call_table:
1806 data8 sys_preadv 1806 data8 sys_preadv
1807 data8 sys_pwritev // 1320 1807 data8 sys_pwritev // 1320
1808 data8 sys_rt_tgsigqueueinfo 1808 data8 sys_rt_tgsigqueueinfo
1809 data8 sys_recvmmsg
1809 1810
1810 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 1811 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
1811#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ 1812#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index ecec19155135..c1ab1dc10898 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -371,3 +371,4 @@ ENTRY(sys_call_table)
371 .long sys_ni_syscall 371 .long sys_ni_syscall
372 .long sys_rt_tgsigqueueinfo /* 365 */ 372 .long sys_rt_tgsigqueueinfo /* 365 */
373 .long sys_perf_event_open 373 .long sys_perf_event_open
374 .long sys_recvmmsg
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index fd2a9bb620d6..17202bbe843f 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -583,6 +583,7 @@ einval: li v0, -ENOSYS
583 sys sys_rt_tgsigqueueinfo 4 583 sys sys_rt_tgsigqueueinfo 4
584 sys sys_perf_event_open 5 584 sys sys_perf_event_open 5
585 sys sys_accept4 4 585 sys sys_accept4 4
586 sys sys_recvmmsg 5
586 .endm 587 .endm
587 588
588 /* We pre-compute the number of _instruction_ bytes needed to 589 /* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 18bf7f32c5e4..a8a6c596eb04 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -420,4 +420,5 @@ sys_call_table:
420 PTR sys_rt_tgsigqueueinfo 420 PTR sys_rt_tgsigqueueinfo
421 PTR sys_perf_event_open 421 PTR sys_perf_event_open
422 PTR sys_accept4 422 PTR sys_accept4
423 PTR sys_recvmmsg
423 .size sys_call_table,.-sys_call_table 424 .size sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 6ebc07976694..5154e64f7cfe 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -418,4 +418,5 @@ EXPORT(sysn32_call_table)
418 PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ 418 PTR compat_sys_rt_tgsigqueueinfo /* 5295 */
419 PTR sys_perf_event_open 419 PTR sys_perf_event_open
420 PTR sys_accept4 420 PTR sys_accept4
421 PTR compat_sys_recvmmsg
421 .size sysn32_call_table,.-sysn32_call_table 422 .size sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9bbf9775e0bd..d0eff53d7cb9 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -538,4 +538,5 @@ sys_call_table:
538 PTR compat_sys_rt_tgsigqueueinfo 538 PTR compat_sys_rt_tgsigqueueinfo
539 PTR sys_perf_event_open 539 PTR sys_perf_event_open
540 PTR sys_accept4 540 PTR sys_accept4
541 PTR compat_sys_recvmmsg
541 .size sys_call_table,.-sys_call_table 542 .size sys_call_table,.-sys_call_table
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 5bfde6c77498..07d2aaea9ae8 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -391,3 +391,4 @@ sys_call_table:
391 .long sys_pwritev 391 .long sys_pwritev
392 .long sys_rt_tgsigqueueinfo 392 .long sys_rt_tgsigqueueinfo
393 .long sys_perf_event_open 393 .long sys_perf_event_open
394 .long sys_recvmmsg /* 365 */
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 0f1658d37490..ceb1530f8aa6 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
82/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate 82/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
83/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 83/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
84/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv 84/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open 85/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
86 86
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 009825f6e73c..f37bef747e60 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
83/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate 83/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
84 .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 84 .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv 85/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open 86 .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg
87 87
88#endif /* CONFIG_COMPAT */ 88#endif /* CONFIG_COMPAT */
89 89
@@ -158,4 +158,4 @@ sys_call_table:
158/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate 158/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
159 .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 159 .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
160/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv 160/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
161 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open 161 .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..11a6c79d5f46 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -832,4 +832,5 @@ ia32_sys_call_table:
832 .quad compat_sys_pwritev 832 .quad compat_sys_pwritev
833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */ 833 .quad compat_sys_rt_tgsigqueueinfo /* 335 */
834 .quad sys_perf_event_open 834 .quad sys_perf_event_open
835 .quad compat_sys_recvmmsg
835ia32_syscall_end: 836ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c209a7e3..3baf379fa840 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,11 @@
342#define __NR_pwritev 334 342#define __NR_pwritev 334
343#define __NR_rt_tgsigqueueinfo 335 343#define __NR_rt_tgsigqueueinfo 335
344#define __NR_perf_event_open 336 344#define __NR_perf_event_open 336
345#define __NR_recvmmsg 337
345 346
346#ifdef __KERNEL__ 347#ifdef __KERNEL__
347 348
348#define NR_syscalls 337 349#define NR_syscalls 338
349 350
350#define __ARCH_WANT_IPC_PARSE_VERSION 351#define __ARCH_WANT_IPC_PARSE_VERSION
351#define __ARCH_WANT_OLD_READDIR 352#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0adbc68..4843f7ba754a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) 661__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
662#define __NR_perf_event_open 298 662#define __NR_perf_event_open 298
663__SYSCALL(__NR_perf_event_open, sys_perf_event_open) 663__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
664#define __NR_recvmmsg 299
665__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
664 666
665#ifndef __NO_STUBS 667#ifndef __NO_STUBS
666#define __ARCH_WANT_OLD_READDIR 668#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0157cd26d7cc..70c2125d55b9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
336 .long sys_pwritev 336 .long sys_pwritev
337 .long sys_rt_tgsigqueueinfo /* 335 */ 337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_event_open 338 .long sys_perf_event_open
339 .long sys_recvmmsg
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index c092c8fbb2cf..4e55dc763021 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3)
681__SYSCALL(305, sys_ni_syscall, 0) 681__SYSCALL(305, sys_ni_syscall, 0)
682#define __NR_eventfd 306 682#define __NR_eventfd 306
683__SYSCALL(306, sys_eventfd, 1) 683__SYSCALL(306, sys_eventfd, 1)
684#define __NR_recvmmsg 307
685__SYSCALL(307, sys_recvmmsg, 5)
684 686
685#define __NR_syscall_count 307 687#define __NR_syscall_count 308
686 688
687/* 689/*
688 * sysxtensa syscall handler 690 * sysxtensa syscall handler
diff --git a/include/linux/net.h b/include/linux/net.h
index 529a0931711d..b42bb60fe92f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -41,6 +41,7 @@
41#define SYS_SENDMSG 16 /* sys_sendmsg(2) */ 41#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
42#define SYS_RECVMSG 17 /* sys_recvmsg(2) */ 42#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
43#define SYS_ACCEPT4 18 /* sys_accept4(2) */ 43#define SYS_ACCEPT4 18 /* sys_accept4(2) */
44#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
44 45
45typedef enum { 46typedef enum {
46 SS_FREE = 0, /* not allocated */ 47 SS_FREE = 0, /* not allocated */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3273a0c5043b..59966f12990c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -65,6 +65,12 @@ struct msghdr {
65 unsigned msg_flags; 65 unsigned msg_flags;
66}; 66};
67 67
68/* For recvmmsg/sendmmsg */
69struct mmsghdr {
70 struct msghdr msg_hdr;
71 unsigned msg_len;
72};
73
68/* 74/*
69 * POSIX 1003.1g - ancillary data object information 75 * POSIX 1003.1g - ancillary data object information
70 * Ancillary data consits of a sequence of pairs of 76 * Ancillary data consits of a sequence of pairs of
@@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd
312extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); 318extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
313extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); 319extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
314 320
321struct timespec;
322
323extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
324 unsigned int flags, struct timespec *timeout);
315#endif 325#endif
316#endif /* not kernel and not glibc */ 326#endif /* not kernel and not glibc */
317#endif /* _LINUX_SOCKET_H */ 327#endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a990ace1a838..714f063a3e6d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -25,6 +25,7 @@ struct linux_dirent64;
25struct list_head; 25struct list_head;
26struct msgbuf; 26struct msgbuf;
27struct msghdr; 27struct msghdr;
28struct mmsghdr;
28struct msqid_ds; 29struct msqid_ds;
29struct new_utsname; 30struct new_utsname;
30struct nfsctl_arg; 31struct nfsctl_arg;
@@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
677asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, 678asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
678 struct sockaddr __user *, int __user *); 679 struct sockaddr __user *, int __user *);
679asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); 680asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
681asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
682 unsigned int vlen, unsigned flags,
683 struct timespec __user *timeout);
680asmlinkage long sys_socket(int, int, int); 684asmlinkage long sys_socket(int, int, int);
681asmlinkage long sys_socketpair(int, int, int, int __user *); 685asmlinkage long sys_socketpair(int, int, int, int __user *);
682asmlinkage long sys_socketcall(int call, unsigned long __user *args); 686asmlinkage long sys_socketcall(int call, unsigned long __user *args);
diff --git a/include/net/compat.h b/include/net/compat.h
index 7c3002832d05..9679f05e9896 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -18,6 +18,11 @@ struct compat_msghdr {
18 compat_uint_t msg_flags; 18 compat_uint_t msg_flags;
19}; 19};
20 20
21struct compat_mmsghdr {
22 struct compat_msghdr msg_hdr;
23 compat_uint_t msg_len;
24};
25
21struct compat_cmsghdr { 26struct compat_cmsghdr {
22 compat_size_t cmsg_len; 27 compat_size_t cmsg_len;
23 compat_int_t cmsg_level; 28 compat_int_t cmsg_level;
@@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
35extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); 40extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
36extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); 41extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
37extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); 42extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
43extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
44 unsigned, unsigned,
45 struct timespec __user *);
38extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); 46extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
39extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); 47extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
40 48
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e06d0b8d1951..f050ba85d420 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -48,8 +48,10 @@ cond_syscall(sys_shutdown);
48cond_syscall(sys_sendmsg); 48cond_syscall(sys_sendmsg);
49cond_syscall(compat_sys_sendmsg); 49cond_syscall(compat_sys_sendmsg);
50cond_syscall(sys_recvmsg); 50cond_syscall(sys_recvmsg);
51cond_syscall(sys_recvmmsg);
51cond_syscall(compat_sys_recvmsg); 52cond_syscall(compat_sys_recvmsg);
52cond_syscall(compat_sys_recvfrom); 53cond_syscall(compat_sys_recvfrom);
54cond_syscall(compat_sys_recvmmsg);
53cond_syscall(sys_socketcall); 55cond_syscall(sys_socketcall);
54cond_syscall(sys_futex); 56cond_syscall(sys_futex);
55cond_syscall(compat_sys_futex); 57cond_syscall(compat_sys_futex);
diff --git a/net/compat.c b/net/compat.c
index a407c3addbae..e13f5256fd20 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
727 727
728/* Argument list sizes for compat_sys_socketcall */ 728/* Argument list sizes for compat_sys_socketcall */
729#define AL(x) ((x) * sizeof(u32)) 729#define AL(x) ((x) * sizeof(u32))
730static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 730static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
731 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 731 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
732 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 732 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
733 AL(4)}; 733 AL(4),AL(5)};
734#undef AL 734#undef AL
735 735
736asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) 736asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
755 return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); 755 return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
756} 756}
757 757
758asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
759 unsigned vlen, unsigned int flags,
760 struct timespec __user *timeout)
761{
762 int datagrams;
763 struct timespec ktspec;
764 struct compat_timespec __user *utspec =
765 (struct compat_timespec __user *)timeout;
766
767 if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
768 get_user(ktspec.tv_nsec, &utspec->tv_nsec))
769 return -EFAULT;
770
771 datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
772 flags | MSG_CMSG_COMPAT, &ktspec);
773 if (datagrams > 0 &&
774 (put_user(ktspec.tv_sec, &utspec->tv_sec) ||
775 put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
776 datagrams = -EFAULT;
777
778 return datagrams;
779}
780
758asmlinkage long compat_sys_socketcall(int call, u32 __user *args) 781asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
759{ 782{
760 int ret; 783 int ret;
761 u32 a[6]; 784 u32 a[6];
762 u32 a0, a1; 785 u32 a0, a1;
763 786
764 if (call < SYS_SOCKET || call > SYS_ACCEPT4) 787 if (call < SYS_SOCKET || call > SYS_RECVMMSG)
765 return -EINVAL; 788 return -EINVAL;
766 if (copy_from_user(a, args, nas[call])) 789 if (copy_from_user(a, args, nas[call]))
767 return -EFAULT; 790 return -EFAULT;
@@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
823 case SYS_RECVMSG: 846 case SYS_RECVMSG:
824 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); 847 ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
825 break; 848 break;
849 case SYS_RECVMMSG:
850 ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
851 compat_ptr(a[4]));
852 break;
826 case SYS_ACCEPT4: 853 case SYS_ACCEPT4:
827 ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); 854 ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
828 break; 855 break;
diff --git a/net/socket.c b/net/socket.c
index 807935693846..9dff31c9b799 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
683} 683}
684EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); 684EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
685 685
686static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 686static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
687 struct msghdr *msg, size_t size, int flags) 687 struct msghdr *msg, size_t size, int flags)
688{ 688{
689 int err;
690 struct sock_iocb *si = kiocb_to_siocb(iocb); 689 struct sock_iocb *si = kiocb_to_siocb(iocb);
691 690
692 si->sock = sock; 691 si->sock = sock;
@@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
695 si->size = size; 694 si->size = size;
696 si->flags = flags; 695 si->flags = flags;
697 696
698 err = security_socket_recvmsg(sock, msg, size, flags);
699 if (err)
700 return err;
701
702 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 697 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
703} 698}
704 699
700static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
701 struct msghdr *msg, size_t size, int flags)
702{
703 int err = security_socket_recvmsg(sock, msg, size, flags);
704
705 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
706}
707
705int sock_recvmsg(struct socket *sock, struct msghdr *msg, 708int sock_recvmsg(struct socket *sock, struct msghdr *msg,
706 size_t size, int flags) 709 size_t size, int flags)
707{ 710{
@@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
717 return ret; 720 return ret;
718} 721}
719 722
723static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
724 size_t size, int flags)
725{
726 struct kiocb iocb;
727 struct sock_iocb siocb;
728 int ret;
729
730 init_sync_kiocb(&iocb, NULL);
731 iocb.private = &siocb;
732 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
733 if (-EIOCBQUEUED == ret)
734 ret = wait_on_sync_kiocb(&iocb);
735 return ret;
736}
737
720int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 738int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
721 struct kvec *vec, size_t num, size_t size, int flags) 739 struct kvec *vec, size_t num, size_t size, int flags)
722{ 740{
@@ -1983,22 +2001,15 @@ out:
1983 return err; 2001 return err;
1984} 2002}
1985 2003
1986/* 2004static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1987 * BSD recvmsg interface 2005 struct msghdr *msg_sys, unsigned flags, int nosec)
1988 */
1989
1990SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
1991 unsigned int, flags)
1992{ 2006{
1993 struct compat_msghdr __user *msg_compat = 2007 struct compat_msghdr __user *msg_compat =
1994 (struct compat_msghdr __user *)msg; 2008 (struct compat_msghdr __user *)msg;
1995 struct socket *sock;
1996 struct iovec iovstack[UIO_FASTIOV]; 2009 struct iovec iovstack[UIO_FASTIOV];
1997 struct iovec *iov = iovstack; 2010 struct iovec *iov = iovstack;
1998 struct msghdr msg_sys;
1999 unsigned long cmsg_ptr; 2011 unsigned long cmsg_ptr;
2000 int err, iov_size, total_len, len; 2012 int err, iov_size, total_len, len;
2001 int fput_needed;
2002 2013
2003 /* kernel mode address */ 2014 /* kernel mode address */
2004 struct sockaddr_storage addr; 2015 struct sockaddr_storage addr;
@@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2008 int __user *uaddr_len; 2019 int __user *uaddr_len;
2009 2020
2010 if (MSG_CMSG_COMPAT & flags) { 2021 if (MSG_CMSG_COMPAT & flags) {
2011 if (get_compat_msghdr(&msg_sys, msg_compat)) 2022 if (get_compat_msghdr(msg_sys, msg_compat))
2012 return -EFAULT; 2023 return -EFAULT;
2013 } 2024 }
2014 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 2025 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
2015 return -EFAULT; 2026 return -EFAULT;
2016 2027
2017 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2018 if (!sock)
2019 goto out;
2020
2021 err = -EMSGSIZE; 2028 err = -EMSGSIZE;
2022 if (msg_sys.msg_iovlen > UIO_MAXIOV) 2029 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2023 goto out_put; 2030 goto out;
2024 2031
2025 /* Check whether to allocate the iovec area */ 2032 /* Check whether to allocate the iovec area */
2026 err = -ENOMEM; 2033 err = -ENOMEM;
2027 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 2034 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2028 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 2035 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
2029 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 2036 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2030 if (!iov) 2037 if (!iov)
2031 goto out_put; 2038 goto out;
2032 } 2039 }
2033 2040
2034 /* 2041 /*
@@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2036 * kernel msghdr to use the kernel address space) 2043 * kernel msghdr to use the kernel address space)
2037 */ 2044 */
2038 2045
2039 uaddr = (__force void __user *)msg_sys.msg_name; 2046 uaddr = (__force void __user *)msg_sys->msg_name;
2040 uaddr_len = COMPAT_NAMELEN(msg); 2047 uaddr_len = COMPAT_NAMELEN(msg);
2041 if (MSG_CMSG_COMPAT & flags) { 2048 if (MSG_CMSG_COMPAT & flags) {
2042 err = verify_compat_iovec(&msg_sys, iov, 2049 err = verify_compat_iovec(msg_sys, iov,
2043 (struct sockaddr *)&addr, 2050 (struct sockaddr *)&addr,
2044 VERIFY_WRITE); 2051 VERIFY_WRITE);
2045 } else 2052 } else
2046 err = verify_iovec(&msg_sys, iov, 2053 err = verify_iovec(msg_sys, iov,
2047 (struct sockaddr *)&addr, 2054 (struct sockaddr *)&addr,
2048 VERIFY_WRITE); 2055 VERIFY_WRITE);
2049 if (err < 0) 2056 if (err < 0)
2050 goto out_freeiov; 2057 goto out_freeiov;
2051 total_len = err; 2058 total_len = err;
2052 2059
2053 cmsg_ptr = (unsigned long)msg_sys.msg_control; 2060 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2054 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2061 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2055 2062
2056 if (sock->file->f_flags & O_NONBLOCK) 2063 if (sock->file->f_flags & O_NONBLOCK)
2057 flags |= MSG_DONTWAIT; 2064 flags |= MSG_DONTWAIT;
2058 err = sock_recvmsg(sock, &msg_sys, total_len, flags); 2065 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2066 total_len, flags);
2059 if (err < 0) 2067 if (err < 0)
2060 goto out_freeiov; 2068 goto out_freeiov;
2061 len = err; 2069 len = err;
2062 2070
2063 if (uaddr != NULL) { 2071 if (uaddr != NULL) {
2064 err = move_addr_to_user((struct sockaddr *)&addr, 2072 err = move_addr_to_user((struct sockaddr *)&addr,
2065 msg_sys.msg_namelen, uaddr, 2073 msg_sys->msg_namelen, uaddr,
2066 uaddr_len); 2074 uaddr_len);
2067 if (err < 0) 2075 if (err < 0)
2068 goto out_freeiov; 2076 goto out_freeiov;
2069 } 2077 }
2070 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), 2078 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2071 COMPAT_FLAGS(msg)); 2079 COMPAT_FLAGS(msg));
2072 if (err) 2080 if (err)
2073 goto out_freeiov; 2081 goto out_freeiov;
2074 if (MSG_CMSG_COMPAT & flags) 2082 if (MSG_CMSG_COMPAT & flags)
2075 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2083 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2076 &msg_compat->msg_controllen); 2084 &msg_compat->msg_controllen);
2077 else 2085 else
2078 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2086 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2079 &msg->msg_controllen); 2087 &msg->msg_controllen);
2080 if (err) 2088 if (err)
2081 goto out_freeiov; 2089 goto out_freeiov;
@@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2084out_freeiov: 2092out_freeiov:
2085 if (iov != iovstack) 2093 if (iov != iovstack)
2086 sock_kfree_s(sock->sk, iov, iov_size); 2094 sock_kfree_s(sock->sk, iov, iov_size);
2087out_put: 2095out:
2096 return err;
2097}
2098
2099/*
2100 * BSD recvmsg interface
2101 */
2102
2103SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2104 unsigned int, flags)
2105{
2106 int fput_needed, err;
2107 struct msghdr msg_sys;
2108 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2109
2110 if (!sock)
2111 goto out;
2112
2113 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2114
2088 fput_light(sock->file, fput_needed); 2115 fput_light(sock->file, fput_needed);
2089out: 2116out:
2090 return err; 2117 return err;
2091} 2118}
2092 2119
2093#ifdef __ARCH_WANT_SYS_SOCKETCALL 2120/*
2121 * Linux recvmmsg interface
2122 */
2123
2124int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2125 unsigned int flags, struct timespec *timeout)
2126{
2127 int fput_needed, err, datagrams;
2128 struct socket *sock;
2129 struct mmsghdr __user *entry;
2130 struct msghdr msg_sys;
2131 struct timespec end_time;
2132
2133 if (timeout &&
2134 poll_select_set_timeout(&end_time, timeout->tv_sec,
2135 timeout->tv_nsec))
2136 return -EINVAL;
2137
2138 datagrams = 0;
2139
2140 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2141 if (!sock)
2142 return err;
2143
2144 err = sock_error(sock->sk);
2145 if (err)
2146 goto out_put;
2147
2148 entry = mmsg;
2149
2150 while (datagrams < vlen) {
2151 /*
2152 * No need to ask LSM for more than the first datagram.
2153 */
2154 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2155 &msg_sys, flags, datagrams);
2156 if (err < 0)
2157 break;
2158 err = put_user(err, &entry->msg_len);
2159 if (err)
2160 break;
2161 ++entry;
2162 ++datagrams;
2163
2164 if (timeout) {
2165 ktime_get_ts(timeout);
2166 *timeout = timespec_sub(end_time, *timeout);
2167 if (timeout->tv_sec < 0) {
2168 timeout->tv_sec = timeout->tv_nsec = 0;
2169 break;
2170 }
2171
2172 /* Timeout, return less than vlen datagrams */
2173 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2174 break;
2175 }
2176
2177 /* Out of band data, return right away */
2178 if (msg_sys.msg_flags & MSG_OOB)
2179 break;
2180 }
2181
2182out_put:
2183 fput_light(sock->file, fput_needed);
2094 2184
2185 if (err == 0)
2186 return datagrams;
2187
2188 if (datagrams != 0) {
2189 /*
2190 * We may return less entries than requested (vlen) if the
2191 * sock is non block and there aren't enough datagrams...
2192 */
2193 if (err != -EAGAIN) {
2194 /*
2195 * ... or if recvmsg returns an error after we
2196 * received some datagrams, where we record the
2197 * error to return on the next call or if the
2198 * app asks about it using getsockopt(SO_ERROR).
2199 */
2200 sock->sk->sk_err = -err;
2201 }
2202
2203 return datagrams;
2204 }
2205
2206 return err;
2207}
2208
2209SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2210 unsigned int, vlen, unsigned int, flags,
2211 struct timespec __user *, timeout)
2212{
2213 int datagrams;
2214 struct timespec timeout_sys;
2215
2216 if (!timeout)
2217 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2218
2219 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2220 return -EFAULT;
2221
2222 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2223
2224 if (datagrams > 0 &&
2225 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2226 datagrams = -EFAULT;
2227
2228 return datagrams;
2229}
2230
2231#ifdef __ARCH_WANT_SYS_SOCKETCALL
2095/* Argument list sizes for sys_socketcall */ 2232/* Argument list sizes for sys_socketcall */
2096#define AL(x) ((x) * sizeof(unsigned long)) 2233#define AL(x) ((x) * sizeof(unsigned long))
2097static const unsigned char nargs[19]={ 2234static const unsigned char nargs[20] = {
2098 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2235 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2099 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2236 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2100 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2237 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
2101 AL(4) 2238 AL(4),AL(5)
2102}; 2239};
2103 2240
2104#undef AL 2241#undef AL
@@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2118 int err; 2255 int err;
2119 unsigned int len; 2256 unsigned int len;
2120 2257
2121 if (call < 1 || call > SYS_ACCEPT4) 2258 if (call < 1 || call > SYS_RECVMMSG)
2122 return -EINVAL; 2259 return -EINVAL;
2123 2260
2124 len = nargs[call]; 2261 len = nargs[call];
@@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2196 case SYS_RECVMSG: 2333 case SYS_RECVMSG:
2197 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2334 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2198 break; 2335 break;
2336 case SYS_RECVMMSG:
2337 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2338 (struct timespec __user *)a[4]);
2339 break;
2199 case SYS_ACCEPT4: 2340 case SYS_ACCEPT4:
2200 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2341 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2201 (int __user *)a[2], a[3]); 2342 (int __user *)a[2], a[3]);