aboutsummaryrefslogtreecommitdiffstats
path: root/net/socket.c
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2011-05-02 16:21:35 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-05 14:10:14 -0400
commit228e548e602061b08ee8e8966f567c12aa079682 (patch)
tree4c79ecf071d6174d42da1557812a4646d0aaa5af /net/socket.c
parent1c5cae815d19ffe02bdfda1260949ef2b1806171 (diff)
net: Add sendmmsg socket system call
This patch adds a multiple message send syscall and is the send version of the existing recvmmsg syscall. This is heavily based on the patch by Arnaldo that added recvmmsg. I wrote a microbenchmark to test the performance gains of using this new syscall: http://ozlabs.org/~anton/junkcode/sendmmsg_test.c The test was run on a ppc64 box with a 10 Gbit network card. The benchmark can send both UDP and RAW ethernet packets. 64B UDP batch pkts/sec 1 804570 2 872800 (+ 8 %) 4 916556 (+14 %) 8 939712 (+17 %) 16 952688 (+18 %) 32 956448 (+19 %) 64 964800 (+20 %) 64B raw socket batch pkts/sec 1 1201449 2 1350028 (+12 %) 4 1461416 (+22 %) 8 1513080 (+26 %) 16 1541216 (+28 %) 32 1553440 (+29 %) 64 1557888 (+30 %) We see a 20% improvement in throughput on UDP send and 30% on raw socket send. [ Add sparc syscall entries. -DaveM ] Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/socket.c')
-rw-r--r--net/socket.c199
1 files changed, 156 insertions, 43 deletions
diff --git a/net/socket.c b/net/socket.c
index d25f5a9d6fa2..ed50255143d5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
551} 551}
552EXPORT_SYMBOL(sock_tx_timestamp); 552EXPORT_SYMBOL(sock_tx_timestamp);
553 553
554static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 554static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
555 struct msghdr *msg, size_t size) 555 struct msghdr *msg, size_t size)
556{ 556{
557 struct sock_iocb *si = kiocb_to_siocb(iocb); 557 struct sock_iocb *si = kiocb_to_siocb(iocb);
558 int err;
559 558
560 sock_update_classid(sock->sk); 559 sock_update_classid(sock->sk);
561 560
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
564 si->msg = msg; 563 si->msg = msg;
565 si->size = size; 564 si->size = size;
566 565
567 err = security_socket_sendmsg(sock, msg, size);
568 if (err)
569 return err;
570
571 return sock->ops->sendmsg(iocb, sock, msg, size); 566 return sock->ops->sendmsg(iocb, sock, msg, size);
572} 567}
573 568
569static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
570 struct msghdr *msg, size_t size)
571{
572 int err = security_socket_sendmsg(sock, msg, size);
573
574 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
575}
576
574int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 577int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
575{ 578{
576 struct kiocb iocb; 579 struct kiocb iocb;
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
586} 589}
587EXPORT_SYMBOL(sock_sendmsg); 590EXPORT_SYMBOL(sock_sendmsg);
588 591
592int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
593{
594 struct kiocb iocb;
595 struct sock_iocb siocb;
596 int ret;
597
598 init_sync_kiocb(&iocb, NULL);
599 iocb.private = &siocb;
600 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
601 if (-EIOCBQUEUED == ret)
602 ret = wait_on_sync_kiocb(&iocb);
603 return ret;
604}
605
589int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 606int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
590 struct kvec *vec, size_t num, size_t size) 607 struct kvec *vec, size_t num, size_t size)
591{ 608{
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1863#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1880#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1864#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1881#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1865 1882
1866/* 1883static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1867 * BSD sendmsg interface 1884 struct msghdr *msg_sys, unsigned flags, int nosec)
1868 */
1869
1870SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1871{ 1885{
1872 struct compat_msghdr __user *msg_compat = 1886 struct compat_msghdr __user *msg_compat =
1873 (struct compat_msghdr __user *)msg; 1887 (struct compat_msghdr __user *)msg;
1874 struct socket *sock;
1875 struct sockaddr_storage address; 1888 struct sockaddr_storage address;
1876 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1889 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1877 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1890 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1878 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1891 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1879 /* 20 is size of ipv6_pktinfo */ 1892 /* 20 is size of ipv6_pktinfo */
1880 unsigned char *ctl_buf = ctl; 1893 unsigned char *ctl_buf = ctl;
1881 struct msghdr msg_sys;
1882 int err, ctl_len, iov_size, total_len; 1894 int err, ctl_len, iov_size, total_len;
1883 int fput_needed;
1884 1895
1885 err = -EFAULT; 1896 err = -EFAULT;
1886 if (MSG_CMSG_COMPAT & flags) { 1897 if (MSG_CMSG_COMPAT & flags) {
1887 if (get_compat_msghdr(&msg_sys, msg_compat)) 1898 if (get_compat_msghdr(msg_sys, msg_compat))
1888 return -EFAULT; 1899 return -EFAULT;
1889 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1900 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1890 return -EFAULT; 1901 return -EFAULT;
1891 1902
1892 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1893 if (!sock)
1894 goto out;
1895
1896 /* do not move before msg_sys is valid */ 1903 /* do not move before msg_sys is valid */
1897 err = -EMSGSIZE; 1904 err = -EMSGSIZE;
1898 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1905 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1899 goto out_put; 1906 goto out;
1900 1907
1901 /* Check whether to allocate the iovec area */ 1908 /* Check whether to allocate the iovec area */
1902 err = -ENOMEM; 1909 err = -ENOMEM;
1903 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1910 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1904 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1911 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1905 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1912 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1906 if (!iov) 1913 if (!iov)
1907 goto out_put; 1914 goto out;
1908 } 1915 }
1909 1916
1910 /* This will also move the address data into kernel space */ 1917 /* This will also move the address data into kernel space */
1911 if (MSG_CMSG_COMPAT & flags) { 1918 if (MSG_CMSG_COMPAT & flags) {
1912 err = verify_compat_iovec(&msg_sys, iov, 1919 err = verify_compat_iovec(msg_sys, iov,
1913 (struct sockaddr *)&address, 1920 (struct sockaddr *)&address,
1914 VERIFY_READ); 1921 VERIFY_READ);
1915 } else 1922 } else
1916 err = verify_iovec(&msg_sys, iov, 1923 err = verify_iovec(msg_sys, iov,
1917 (struct sockaddr *)&address, 1924 (struct sockaddr *)&address,
1918 VERIFY_READ); 1925 VERIFY_READ);
1919 if (err < 0) 1926 if (err < 0)
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1922 1929
1923 err = -ENOBUFS; 1930 err = -ENOBUFS;
1924 1931
1925 if (msg_sys.msg_controllen > INT_MAX) 1932 if (msg_sys->msg_controllen > INT_MAX)
1926 goto out_freeiov; 1933 goto out_freeiov;
1927 ctl_len = msg_sys.msg_controllen; 1934 ctl_len = msg_sys->msg_controllen;
1928 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1935 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1929 err = 1936 err =
1930 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1937 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
1931 sizeof(ctl)); 1938 sizeof(ctl));
1932 if (err) 1939 if (err)
1933 goto out_freeiov; 1940 goto out_freeiov;
1934 ctl_buf = msg_sys.msg_control; 1941 ctl_buf = msg_sys->msg_control;
1935 ctl_len = msg_sys.msg_controllen; 1942 ctl_len = msg_sys->msg_controllen;
1936 } else if (ctl_len) { 1943 } else if (ctl_len) {
1937 if (ctl_len > sizeof(ctl)) { 1944 if (ctl_len > sizeof(ctl)) {
1938 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1945 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1941 } 1948 }
1942 err = -EFAULT; 1949 err = -EFAULT;
1943 /* 1950 /*
1944 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1951 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1945 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1952 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1946 * checking falls down on this. 1953 * checking falls down on this.
1947 */ 1954 */
1948 if (copy_from_user(ctl_buf, 1955 if (copy_from_user(ctl_buf,
1949 (void __user __force *)msg_sys.msg_control, 1956 (void __user __force *)msg_sys->msg_control,
1950 ctl_len)) 1957 ctl_len))
1951 goto out_freectl; 1958 goto out_freectl;
1952 msg_sys.msg_control = ctl_buf; 1959 msg_sys->msg_control = ctl_buf;
1953 } 1960 }
1954 msg_sys.msg_flags = flags; 1961 msg_sys->msg_flags = flags;
1955 1962
1956 if (sock->file->f_flags & O_NONBLOCK) 1963 if (sock->file->f_flags & O_NONBLOCK)
1957 msg_sys.msg_flags |= MSG_DONTWAIT; 1964 msg_sys->msg_flags |= MSG_DONTWAIT;
1958 err = sock_sendmsg(sock, &msg_sys, total_len); 1965 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
1966 total_len);
1959 1967
1960out_freectl: 1968out_freectl:
1961 if (ctl_buf != ctl) 1969 if (ctl_buf != ctl)
@@ -1963,12 +1971,114 @@ out_freectl:
1963out_freeiov: 1971out_freeiov:
1964 if (iov != iovstack) 1972 if (iov != iovstack)
1965 sock_kfree_s(sock->sk, iov, iov_size); 1973 sock_kfree_s(sock->sk, iov, iov_size);
1966out_put: 1974out:
1975 return err;
1976}
1977
1978/*
1979 * BSD sendmsg interface
1980 */
1981
1982SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1983{
1984 int fput_needed, err;
1985 struct msghdr msg_sys;
1986 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
1987
1988 if (!sock)
1989 goto out;
1990
1991 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
1992
1967 fput_light(sock->file, fput_needed); 1993 fput_light(sock->file, fput_needed);
1968out: 1994out:
1969 return err; 1995 return err;
1970} 1996}
1971 1997
1998/*
1999 * Linux sendmmsg interface
2000 */
2001
2002int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2003 unsigned int flags)
2004{
2005 int fput_needed, err, datagrams;
2006 struct socket *sock;
2007 struct mmsghdr __user *entry;
2008 struct compat_mmsghdr __user *compat_entry;
2009 struct msghdr msg_sys;
2010
2011 datagrams = 0;
2012
2013 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2014 if (!sock)
2015 return err;
2016
2017 err = sock_error(sock->sk);
2018 if (err)
2019 goto out_put;
2020
2021 entry = mmsg;
2022 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2023
2024 while (datagrams < vlen) {
2025 /*
2026 * No need to ask LSM for more than the first datagram.
2027 */
2028 if (MSG_CMSG_COMPAT & flags) {
2029 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2030 &msg_sys, flags, datagrams);
2031 if (err < 0)
2032 break;
2033 err = __put_user(err, &compat_entry->msg_len);
2034 ++compat_entry;
2035 } else {
2036 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2037 &msg_sys, flags, datagrams);
2038 if (err < 0)
2039 break;
2040 err = put_user(err, &entry->msg_len);
2041 ++entry;
2042 }
2043
2044 if (err)
2045 break;
2046 ++datagrams;
2047 }
2048
2049out_put:
2050 fput_light(sock->file, fput_needed);
2051
2052 if (err == 0)
2053 return datagrams;
2054
2055 if (datagrams != 0) {
2056 /*
2057 * We may send less entries than requested (vlen) if the
2058 * sock is non blocking...
2059 */
2060 if (err != -EAGAIN) {
2061 /*
2062 * ... or if sendmsg returns an error after we
2063 * send some datagrams, where we record the
2064 * error to return on the next call or if the
2065 * app asks about it using getsockopt(SO_ERROR).
2066 */
2067 sock->sk->sk_err = -err;
2068 }
2069
2070 return datagrams;
2071 }
2072
2073 return err;
2074}
2075
2076SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2077 unsigned int, vlen, unsigned int, flags)
2078{
2079 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2080}
2081
1972static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 2082static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1973 struct msghdr *msg_sys, unsigned flags, int nosec) 2083 struct msghdr *msg_sys, unsigned flags, int nosec)
1974{ 2084{
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2214#ifdef __ARCH_WANT_SYS_SOCKETCALL 2324#ifdef __ARCH_WANT_SYS_SOCKETCALL
2215/* Argument list sizes for sys_socketcall */ 2325/* Argument list sizes for sys_socketcall */
2216#define AL(x) ((x) * sizeof(unsigned long)) 2326#define AL(x) ((x) * sizeof(unsigned long))
2217static const unsigned char nargs[20] = { 2327static const unsigned char nargs[21] = {
2218 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2328 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2219 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2329 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2220 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2330 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2221 AL(4), AL(5) 2331 AL(4), AL(5), AL(4)
2222}; 2332};
2223 2333
2224#undef AL 2334#undef AL
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2238 int err; 2348 int err;
2239 unsigned int len; 2349 unsigned int len;
2240 2350
2241 if (call < 1 || call > SYS_RECVMMSG) 2351 if (call < 1 || call > SYS_SENDMMSG)
2242 return -EINVAL; 2352 return -EINVAL;
2243 2353
2244 len = nargs[call]; 2354 len = nargs[call];
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2313 case SYS_SENDMSG: 2423 case SYS_SENDMSG:
2314 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2424 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2315 break; 2425 break;
2426 case SYS_SENDMMSG:
2427 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2428 break;
2316 case SYS_RECVMSG: 2429 case SYS_RECVMSG:
2317 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2430 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2318 break; 2431 break;