diff options
author | Anton Blanchard <anton@samba.org> | 2011-05-02 16:21:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-05-05 14:10:14 -0400 |
commit | 228e548e602061b08ee8e8966f567c12aa079682 (patch) | |
tree | 4c79ecf071d6174d42da1557812a4646d0aaa5af /net/socket.c | |
parent | 1c5cae815d19ffe02bdfda1260949ef2b1806171 (diff) |
net: Add sendmmsg socket system call
This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.
I wrote a microbenchmark to test the performance gains of using
this new syscall:
http://ozlabs.org/~anton/junkcode/sendmmsg_test.c
The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.
64B UDP
batch pkts/sec
1 804570
2 872800 (+ 8 %)
4 916556 (+14 %)
8 939712 (+17 %)
16 952688 (+18 %)
32 956448 (+19 %)
64 964800 (+20 %)
64B raw socket
batch pkts/sec
1 1201449
2 1350028 (+12 %)
4 1461416 (+22 %)
8 1513080 (+26 %)
16 1541216 (+28 %)
32 1553440 (+29 %)
64 1557888 (+30 %)
We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.
[ Add sparc syscall entries. -DaveM ]
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/socket.c')
-rw-r--r-- | net/socket.c | 199 |
1 files changed, 156 insertions, 43 deletions
diff --git a/net/socket.c b/net/socket.c index d25f5a9d6fa2..ed50255143d5 100644 --- a/net/socket.c +++ b/net/socket.c | |||
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) | |||
551 | } | 551 | } |
552 | EXPORT_SYMBOL(sock_tx_timestamp); | 552 | EXPORT_SYMBOL(sock_tx_timestamp); |
553 | 553 | ||
554 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | 554 | static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, |
555 | struct msghdr *msg, size_t size) | 555 | struct msghdr *msg, size_t size) |
556 | { | 556 | { |
557 | struct sock_iocb *si = kiocb_to_siocb(iocb); | 557 | struct sock_iocb *si = kiocb_to_siocb(iocb); |
558 | int err; | ||
559 | 558 | ||
560 | sock_update_classid(sock->sk); | 559 | sock_update_classid(sock->sk); |
561 | 560 | ||
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
564 | si->msg = msg; | 563 | si->msg = msg; |
565 | si->size = size; | 564 | si->size = size; |
566 | 565 | ||
567 | err = security_socket_sendmsg(sock, msg, size); | ||
568 | if (err) | ||
569 | return err; | ||
570 | |||
571 | return sock->ops->sendmsg(iocb, sock, msg, size); | 566 | return sock->ops->sendmsg(iocb, sock, msg, size); |
572 | } | 567 | } |
573 | 568 | ||
569 | static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, | ||
570 | struct msghdr *msg, size_t size) | ||
571 | { | ||
572 | int err = security_socket_sendmsg(sock, msg, size); | ||
573 | |||
574 | return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); | ||
575 | } | ||
576 | |||
574 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | 577 | int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) |
575 | { | 578 | { |
576 | struct kiocb iocb; | 579 | struct kiocb iocb; |
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) | |||
586 | } | 589 | } |
587 | EXPORT_SYMBOL(sock_sendmsg); | 590 | EXPORT_SYMBOL(sock_sendmsg); |
588 | 591 | ||
592 | int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) | ||
593 | { | ||
594 | struct kiocb iocb; | ||
595 | struct sock_iocb siocb; | ||
596 | int ret; | ||
597 | |||
598 | init_sync_kiocb(&iocb, NULL); | ||
599 | iocb.private = &siocb; | ||
600 | ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); | ||
601 | if (-EIOCBQUEUED == ret) | ||
602 | ret = wait_on_sync_kiocb(&iocb); | ||
603 | return ret; | ||
604 | } | ||
605 | |||
589 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, | 606 | int kernel_sendmsg(struct socket *sock, struct msghdr *msg, |
590 | struct kvec *vec, size_t num, size_t size) | 607 | struct kvec *vec, size_t num, size_t size) |
591 | { | 608 | { |
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) | |||
1863 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) | 1880 | #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) |
1864 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) | 1881 | #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) |
1865 | 1882 | ||
1866 | /* | 1883 | static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, |
1867 | * BSD sendmsg interface | 1884 | struct msghdr *msg_sys, unsigned flags, int nosec) |
1868 | */ | ||
1869 | |||
1870 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | ||
1871 | { | 1885 | { |
1872 | struct compat_msghdr __user *msg_compat = | 1886 | struct compat_msghdr __user *msg_compat = |
1873 | (struct compat_msghdr __user *)msg; | 1887 | (struct compat_msghdr __user *)msg; |
1874 | struct socket *sock; | ||
1875 | struct sockaddr_storage address; | 1888 | struct sockaddr_storage address; |
1876 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; | 1889 | struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; |
1877 | unsigned char ctl[sizeof(struct cmsghdr) + 20] | 1890 | unsigned char ctl[sizeof(struct cmsghdr) + 20] |
1878 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); | 1891 | __attribute__ ((aligned(sizeof(__kernel_size_t)))); |
1879 | /* 20 is size of ipv6_pktinfo */ | 1892 | /* 20 is size of ipv6_pktinfo */ |
1880 | unsigned char *ctl_buf = ctl; | 1893 | unsigned char *ctl_buf = ctl; |
1881 | struct msghdr msg_sys; | ||
1882 | int err, ctl_len, iov_size, total_len; | 1894 | int err, ctl_len, iov_size, total_len; |
1883 | int fput_needed; | ||
1884 | 1895 | ||
1885 | err = -EFAULT; | 1896 | err = -EFAULT; |
1886 | if (MSG_CMSG_COMPAT & flags) { | 1897 | if (MSG_CMSG_COMPAT & flags) { |
1887 | if (get_compat_msghdr(&msg_sys, msg_compat)) | 1898 | if (get_compat_msghdr(msg_sys, msg_compat)) |
1888 | return -EFAULT; | 1899 | return -EFAULT; |
1889 | } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) | 1900 | } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) |
1890 | return -EFAULT; | 1901 | return -EFAULT; |
1891 | 1902 | ||
1892 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
1893 | if (!sock) | ||
1894 | goto out; | ||
1895 | |||
1896 | /* do not move before msg_sys is valid */ | 1903 | /* do not move before msg_sys is valid */ |
1897 | err = -EMSGSIZE; | 1904 | err = -EMSGSIZE; |
1898 | if (msg_sys.msg_iovlen > UIO_MAXIOV) | 1905 | if (msg_sys->msg_iovlen > UIO_MAXIOV) |
1899 | goto out_put; | 1906 | goto out; |
1900 | 1907 | ||
1901 | /* Check whether to allocate the iovec area */ | 1908 | /* Check whether to allocate the iovec area */ |
1902 | err = -ENOMEM; | 1909 | err = -ENOMEM; |
1903 | iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); | 1910 | iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); |
1904 | if (msg_sys.msg_iovlen > UIO_FASTIOV) { | 1911 | if (msg_sys->msg_iovlen > UIO_FASTIOV) { |
1905 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); | 1912 | iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); |
1906 | if (!iov) | 1913 | if (!iov) |
1907 | goto out_put; | 1914 | goto out; |
1908 | } | 1915 | } |
1909 | 1916 | ||
1910 | /* This will also move the address data into kernel space */ | 1917 | /* This will also move the address data into kernel space */ |
1911 | if (MSG_CMSG_COMPAT & flags) { | 1918 | if (MSG_CMSG_COMPAT & flags) { |
1912 | err = verify_compat_iovec(&msg_sys, iov, | 1919 | err = verify_compat_iovec(msg_sys, iov, |
1913 | (struct sockaddr *)&address, | 1920 | (struct sockaddr *)&address, |
1914 | VERIFY_READ); | 1921 | VERIFY_READ); |
1915 | } else | 1922 | } else |
1916 | err = verify_iovec(&msg_sys, iov, | 1923 | err = verify_iovec(msg_sys, iov, |
1917 | (struct sockaddr *)&address, | 1924 | (struct sockaddr *)&address, |
1918 | VERIFY_READ); | 1925 | VERIFY_READ); |
1919 | if (err < 0) | 1926 | if (err < 0) |
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | |||
1922 | 1929 | ||
1923 | err = -ENOBUFS; | 1930 | err = -ENOBUFS; |
1924 | 1931 | ||
1925 | if (msg_sys.msg_controllen > INT_MAX) | 1932 | if (msg_sys->msg_controllen > INT_MAX) |
1926 | goto out_freeiov; | 1933 | goto out_freeiov; |
1927 | ctl_len = msg_sys.msg_controllen; | 1934 | ctl_len = msg_sys->msg_controllen; |
1928 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { | 1935 | if ((MSG_CMSG_COMPAT & flags) && ctl_len) { |
1929 | err = | 1936 | err = |
1930 | cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, | 1937 | cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, |
1931 | sizeof(ctl)); | 1938 | sizeof(ctl)); |
1932 | if (err) | 1939 | if (err) |
1933 | goto out_freeiov; | 1940 | goto out_freeiov; |
1934 | ctl_buf = msg_sys.msg_control; | 1941 | ctl_buf = msg_sys->msg_control; |
1935 | ctl_len = msg_sys.msg_controllen; | 1942 | ctl_len = msg_sys->msg_controllen; |
1936 | } else if (ctl_len) { | 1943 | } else if (ctl_len) { |
1937 | if (ctl_len > sizeof(ctl)) { | 1944 | if (ctl_len > sizeof(ctl)) { |
1938 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); | 1945 | ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); |
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | |||
1941 | } | 1948 | } |
1942 | err = -EFAULT; | 1949 | err = -EFAULT; |
1943 | /* | 1950 | /* |
1944 | * Careful! Before this, msg_sys.msg_control contains a user pointer. | 1951 | * Careful! Before this, msg_sys->msg_control contains a user pointer. |
1945 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted | 1952 | * Afterwards, it will be a kernel pointer. Thus the compiler-assisted |
1946 | * checking falls down on this. | 1953 | * checking falls down on this. |
1947 | */ | 1954 | */ |
1948 | if (copy_from_user(ctl_buf, | 1955 | if (copy_from_user(ctl_buf, |
1949 | (void __user __force *)msg_sys.msg_control, | 1956 | (void __user __force *)msg_sys->msg_control, |
1950 | ctl_len)) | 1957 | ctl_len)) |
1951 | goto out_freectl; | 1958 | goto out_freectl; |
1952 | msg_sys.msg_control = ctl_buf; | 1959 | msg_sys->msg_control = ctl_buf; |
1953 | } | 1960 | } |
1954 | msg_sys.msg_flags = flags; | 1961 | msg_sys->msg_flags = flags; |
1955 | 1962 | ||
1956 | if (sock->file->f_flags & O_NONBLOCK) | 1963 | if (sock->file->f_flags & O_NONBLOCK) |
1957 | msg_sys.msg_flags |= MSG_DONTWAIT; | 1964 | msg_sys->msg_flags |= MSG_DONTWAIT; |
1958 | err = sock_sendmsg(sock, &msg_sys, total_len); | 1965 | err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, |
1966 | total_len); | ||
1959 | 1967 | ||
1960 | out_freectl: | 1968 | out_freectl: |
1961 | if (ctl_buf != ctl) | 1969 | if (ctl_buf != ctl) |
@@ -1963,12 +1971,114 @@ out_freectl: | |||
1963 | out_freeiov: | 1971 | out_freeiov: |
1964 | if (iov != iovstack) | 1972 | if (iov != iovstack) |
1965 | sock_kfree_s(sock->sk, iov, iov_size); | 1973 | sock_kfree_s(sock->sk, iov, iov_size); |
1966 | out_put: | 1974 | out: |
1975 | return err; | ||
1976 | } | ||
1977 | |||
1978 | /* | ||
1979 | * BSD sendmsg interface | ||
1980 | */ | ||
1981 | |||
1982 | SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) | ||
1983 | { | ||
1984 | int fput_needed, err; | ||
1985 | struct msghdr msg_sys; | ||
1986 | struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
1987 | |||
1988 | if (!sock) | ||
1989 | goto out; | ||
1990 | |||
1991 | err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); | ||
1992 | |||
1967 | fput_light(sock->file, fput_needed); | 1993 | fput_light(sock->file, fput_needed); |
1968 | out: | 1994 | out: |
1969 | return err; | 1995 | return err; |
1970 | } | 1996 | } |
1971 | 1997 | ||
1998 | /* | ||
1999 | * Linux sendmmsg interface | ||
2000 | */ | ||
2001 | |||
2002 | int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, | ||
2003 | unsigned int flags) | ||
2004 | { | ||
2005 | int fput_needed, err, datagrams; | ||
2006 | struct socket *sock; | ||
2007 | struct mmsghdr __user *entry; | ||
2008 | struct compat_mmsghdr __user *compat_entry; | ||
2009 | struct msghdr msg_sys; | ||
2010 | |||
2011 | datagrams = 0; | ||
2012 | |||
2013 | sock = sockfd_lookup_light(fd, &err, &fput_needed); | ||
2014 | if (!sock) | ||
2015 | return err; | ||
2016 | |||
2017 | err = sock_error(sock->sk); | ||
2018 | if (err) | ||
2019 | goto out_put; | ||
2020 | |||
2021 | entry = mmsg; | ||
2022 | compat_entry = (struct compat_mmsghdr __user *)mmsg; | ||
2023 | |||
2024 | while (datagrams < vlen) { | ||
2025 | /* | ||
2026 | * No need to ask LSM for more than the first datagram. | ||
2027 | */ | ||
2028 | if (MSG_CMSG_COMPAT & flags) { | ||
2029 | err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, | ||
2030 | &msg_sys, flags, datagrams); | ||
2031 | if (err < 0) | ||
2032 | break; | ||
2033 | err = __put_user(err, &compat_entry->msg_len); | ||
2034 | ++compat_entry; | ||
2035 | } else { | ||
2036 | err = __sys_sendmsg(sock, (struct msghdr __user *)entry, | ||
2037 | &msg_sys, flags, datagrams); | ||
2038 | if (err < 0) | ||
2039 | break; | ||
2040 | err = put_user(err, &entry->msg_len); | ||
2041 | ++entry; | ||
2042 | } | ||
2043 | |||
2044 | if (err) | ||
2045 | break; | ||
2046 | ++datagrams; | ||
2047 | } | ||
2048 | |||
2049 | out_put: | ||
2050 | fput_light(sock->file, fput_needed); | ||
2051 | |||
2052 | if (err == 0) | ||
2053 | return datagrams; | ||
2054 | |||
2055 | if (datagrams != 0) { | ||
2056 | /* | ||
2057 | * We may send less entries than requested (vlen) if the | ||
2058 | * sock is non blocking... | ||
2059 | */ | ||
2060 | if (err != -EAGAIN) { | ||
2061 | /* | ||
2062 | * ... or if sendmsg returns an error after we | ||
2063 | * send some datagrams, where we record the | ||
2064 | * error to return on the next call or if the | ||
2065 | * app asks about it using getsockopt(SO_ERROR). | ||
2066 | */ | ||
2067 | sock->sk->sk_err = -err; | ||
2068 | } | ||
2069 | |||
2070 | return datagrams; | ||
2071 | } | ||
2072 | |||
2073 | return err; | ||
2074 | } | ||
2075 | |||
2076 | SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, | ||
2077 | unsigned int, vlen, unsigned int, flags) | ||
2078 | { | ||
2079 | return __sys_sendmmsg(fd, mmsg, vlen, flags); | ||
2080 | } | ||
2081 | |||
1972 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, | 2082 | static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, |
1973 | struct msghdr *msg_sys, unsigned flags, int nosec) | 2083 | struct msghdr *msg_sys, unsigned flags, int nosec) |
1974 | { | 2084 | { |
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, | |||
2214 | #ifdef __ARCH_WANT_SYS_SOCKETCALL | 2324 | #ifdef __ARCH_WANT_SYS_SOCKETCALL |
2215 | /* Argument list sizes for sys_socketcall */ | 2325 | /* Argument list sizes for sys_socketcall */ |
2216 | #define AL(x) ((x) * sizeof(unsigned long)) | 2326 | #define AL(x) ((x) * sizeof(unsigned long)) |
2217 | static const unsigned char nargs[20] = { | 2327 | static const unsigned char nargs[21] = { |
2218 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), | 2328 | AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), |
2219 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), | 2329 | AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), |
2220 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), | 2330 | AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), |
2221 | AL(4), AL(5) | 2331 | AL(4), AL(5), AL(4) |
2222 | }; | 2332 | }; |
2223 | 2333 | ||
2224 | #undef AL | 2334 | #undef AL |
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) | |||
2238 | int err; | 2348 | int err; |
2239 | unsigned int len; | 2349 | unsigned int len; |
2240 | 2350 | ||
2241 | if (call < 1 || call > SYS_RECVMMSG) | 2351 | if (call < 1 || call > SYS_SENDMMSG) |
2242 | return -EINVAL; | 2352 | return -EINVAL; |
2243 | 2353 | ||
2244 | len = nargs[call]; | 2354 | len = nargs[call]; |
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) | |||
2313 | case SYS_SENDMSG: | 2423 | case SYS_SENDMSG: |
2314 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); | 2424 | err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); |
2315 | break; | 2425 | break; |
2426 | case SYS_SENDMMSG: | ||
2427 | err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); | ||
2428 | break; | ||
2316 | case SYS_RECVMSG: | 2429 | case SYS_RECVMSG: |
2317 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); | 2430 | err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); |
2318 | break; | 2431 | break; |