aboutsummaryrefslogtreecommitdiffstats
path: root/net/socket.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/socket.c')
-rw-r--r--net/socket.c485
1 files changed, 372 insertions, 113 deletions
diff --git a/net/socket.c b/net/socket.c
index 2270b941bcc7..02dc82db3d23 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = {
156 */ 156 */
157 157
158static DEFINE_SPINLOCK(net_family_lock); 158static DEFINE_SPINLOCK(net_family_lock);
159static const struct net_proto_family *net_families[NPROTO] __read_mostly; 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
160 160
161/* 161/*
162 * Statistics counters of the socket lists 162 * Statistics counters of the socket lists
@@ -209,8 +209,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
209 * specified. Zero is returned for a success. 209 * specified. Zero is returned for a success.
210 */ 210 */
211 211
212int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 212static int move_addr_to_user(struct sockaddr *kaddr, int klen,
213 int __user *ulen) 213 void __user *uaddr, int __user *ulen)
214{ 214{
215 int err; 215 int err;
216 int len; 216 int len;
@@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly;
240static struct inode *sock_alloc_inode(struct super_block *sb) 240static struct inode *sock_alloc_inode(struct super_block *sb)
241{ 241{
242 struct socket_alloc *ei; 242 struct socket_alloc *ei;
243 struct socket_wq *wq;
243 244
244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
245 if (!ei) 246 if (!ei)
246 return NULL; 247 return NULL;
247 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); 248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!ei->socket.wq) { 249 if (!wq) {
249 kmem_cache_free(sock_inode_cachep, ei); 250 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL; 251 return NULL;
251 } 252 }
252 init_waitqueue_head(&ei->socket.wq->wait); 253 init_waitqueue_head(&wq->wait);
253 ei->socket.wq->fasync_list = NULL; 254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
254 256
255 ei->socket.state = SS_UNCONNECTED; 257 ei->socket.state = SS_UNCONNECTED;
256 ei->socket.flags = 0; 258 ei->socket.flags = 0;
@@ -261,20 +263,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
261 return &ei->vfs_inode; 263 return &ei->vfs_inode;
262} 264}
263 265
264
265static void wq_free_rcu(struct rcu_head *head)
266{
267 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
268
269 kfree(wq);
270}
271
272static void sock_destroy_inode(struct inode *inode) 266static void sock_destroy_inode(struct inode *inode)
273{ 267{
274 struct socket_alloc *ei; 268 struct socket_alloc *ei;
269 struct socket_wq *wq;
275 270
276 ei = container_of(inode, struct socket_alloc, vfs_inode); 271 ei = container_of(inode, struct socket_alloc, vfs_inode);
277 call_rcu(&ei->socket.wq->rcu, wq_free_rcu); 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
273 kfree_rcu(wq, rcu);
278 kmem_cache_free(sock_inode_cachep, ei); 274 kmem_cache_free(sock_inode_cachep, ei);
279} 275}
280 276
@@ -305,22 +301,6 @@ static const struct super_operations sockfs_ops = {
305 .statfs = simple_statfs, 301 .statfs = simple_statfs,
306}; 302};
307 303
308static int sockfs_get_sb(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *data,
310 struct vfsmount *mnt)
311{
312 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
313 mnt);
314}
315
316static struct vfsmount *sock_mnt __read_mostly;
317
318static struct file_system_type sock_fs_type = {
319 .name = "sockfs",
320 .get_sb = sockfs_get_sb,
321 .kill_sb = kill_anon_super,
322};
323
324/* 304/*
325 * sockfs_dname() is called from d_path(). 305 * sockfs_dname() is called from d_path().
326 */ 306 */
@@ -334,6 +314,21 @@ static const struct dentry_operations sockfs_dentry_operations = {
334 .d_dname = sockfs_dname, 314 .d_dname = sockfs_dname,
335}; 315};
336 316
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
337/* 332/*
338 * Obtains the first available file descriptor and sets it up for use. 333 * Obtains the first available file descriptor and sets it up for use.
339 * 334 *
@@ -362,14 +357,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
362 if (unlikely(fd < 0)) 357 if (unlikely(fd < 0))
363 return fd; 358 return fd;
364 359
365 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
366 if (unlikely(!path.dentry)) { 361 if (unlikely(!path.dentry)) {
367 put_unused_fd(fd); 362 put_unused_fd(fd);
368 return -ENOMEM; 363 return -ENOMEM;
369 } 364 }
370 path.mnt = mntget(sock_mnt); 365 path.mnt = mntget(sock_mnt);
371 366
372 path.dentry->d_op = &sockfs_dentry_operations;
373 d_instantiate(path.dentry, SOCK_INODE(sock)); 367 d_instantiate(path.dentry, SOCK_INODE(sock));
374 SOCK_INODE(sock)->i_fop = &socket_file_ops; 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
375 369
@@ -377,7 +371,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
377 &socket_file_ops); 371 &socket_file_ops);
378 if (unlikely(!file)) { 372 if (unlikely(!file)) {
379 /* drop dentry, keep inode */ 373 /* drop dentry, keep inode */
380 atomic_inc(&path.dentry->d_inode->i_count); 374 ihold(path.dentry->d_inode);
381 path_put(&path); 375 path_put(&path);
382 put_unused_fd(fd); 376 put_unused_fd(fd);
383 return -ENFILE; 377 return -ENFILE;
@@ -480,6 +474,7 @@ static struct socket *sock_alloc(void)
480 sock = SOCKET_I(inode); 474 sock = SOCKET_I(inode);
481 475
482 kmemcheck_annotate_bitfield(sock, type); 476 kmemcheck_annotate_bitfield(sock, type);
477 inode->i_ino = get_next_ino();
483 inode->i_mode = S_IFSOCK | S_IRWXUGO; 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
484 inode->i_uid = current_fsuid(); 479 inode->i_uid = current_fsuid();
485 inode->i_gid = current_fsgid(); 480 inode->i_gid = current_fsgid();
@@ -502,6 +497,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502const struct file_operations bad_sock_fops = { 497const struct file_operations bad_sock_fops = {
503 .owner = THIS_MODULE, 498 .owner = THIS_MODULE,
504 .open = sock_no_open, 499 .open = sock_no_open,
500 .llseek = noop_llseek,
505}; 501};
506 502
507/** 503/**
@@ -523,7 +519,7 @@ void sock_release(struct socket *sock)
523 module_put(owner); 519 module_put(owner);
524 } 520 }
525 521
526 if (sock->wq->fasync_list) 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528 524
529 percpu_sub(sockets_in_use, 1); 525 percpu_sub(sockets_in_use, 1);
@@ -535,23 +531,21 @@ void sock_release(struct socket *sock)
535} 531}
536EXPORT_SYMBOL(sock_release); 532EXPORT_SYMBOL(sock_release);
537 533
538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
539 union skb_shared_tx *shtx)
540{ 535{
541 shtx->flags = 0; 536 *tx_flags = 0;
542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
543 shtx->hardware = 1; 538 *tx_flags |= SKBTX_HW_TSTAMP;
544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
545 shtx->software = 1; 540 *tx_flags |= SKBTX_SW_TSTAMP;
546 return 0; 541 return 0;
547} 542}
548EXPORT_SYMBOL(sock_tx_timestamp); 543EXPORT_SYMBOL(sock_tx_timestamp);
549 544
550static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 545static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
551 struct msghdr *msg, size_t size) 546 struct msghdr *msg, size_t size)
552{ 547{
553 struct sock_iocb *si = kiocb_to_siocb(iocb); 548 struct sock_iocb *si = kiocb_to_siocb(iocb);
554 int err;
555 549
556 sock_update_classid(sock->sk); 550 sock_update_classid(sock->sk);
557 551
@@ -560,13 +554,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
560 si->msg = msg; 554 si->msg = msg;
561 si->size = size; 555 si->size = size;
562 556
563 err = security_socket_sendmsg(sock, msg, size);
564 if (err)
565 return err;
566
567 return sock->ops->sendmsg(iocb, sock, msg, size); 557 return sock->ops->sendmsg(iocb, sock, msg, size);
568} 558}
569 559
560static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
561 struct msghdr *msg, size_t size)
562{
563 int err = security_socket_sendmsg(sock, msg, size);
564
565 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
566}
567
570int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 568int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
571{ 569{
572 struct kiocb iocb; 570 struct kiocb iocb;
@@ -582,6 +580,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
582} 580}
583EXPORT_SYMBOL(sock_sendmsg); 581EXPORT_SYMBOL(sock_sendmsg);
584 582
583int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
584{
585 struct kiocb iocb;
586 struct sock_iocb siocb;
587 int ret;
588
589 init_sync_kiocb(&iocb, NULL);
590 iocb.private = &siocb;
591 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
592 if (-EIOCBQUEUED == ret)
593 ret = wait_on_sync_kiocb(&iocb);
594 return ret;
595}
596
585int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 597int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
586 struct kvec *vec, size_t num, size_t size) 598 struct kvec *vec, size_t num, size_t size)
587{ 599{
@@ -662,7 +674,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
662} 674}
663EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 675EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
664 676
665inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 677static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
678 struct sk_buff *skb)
666{ 679{
667 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 680 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
668 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 681 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
@@ -732,6 +745,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
732 return ret; 745 return ret;
733} 746}
734 747
748/**
749 * kernel_recvmsg - Receive a message from a socket (kernel space)
750 * @sock: The socket to receive the message from
751 * @msg: Received message
752 * @vec: Input s/g array for message data
753 * @num: Size of input s/g array
754 * @size: Number of bytes to read
755 * @flags: Message flags (MSG_DONTWAIT, etc...)
756 *
757 * On return the msg structure contains the scatter/gather array passed in the
758 * vec argument. The array is modified so that it consists of the unfilled
759 * portion of the original array.
760 *
761 * The returned value is the total number of bytes received, or an error.
762 */
735int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 763int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
736 struct kvec *vec, size_t num, size_t size, int flags) 764 struct kvec *vec, size_t num, size_t size, int flags)
737{ 765{
@@ -1092,15 +1120,16 @@ static int sock_fasync(int fd, struct file *filp, int on)
1092{ 1120{
1093 struct socket *sock = filp->private_data; 1121 struct socket *sock = filp->private_data;
1094 struct sock *sk = sock->sk; 1122 struct sock *sk = sock->sk;
1123 struct socket_wq *wq;
1095 1124
1096 if (sk == NULL) 1125 if (sk == NULL)
1097 return -EINVAL; 1126 return -EINVAL;
1098 1127
1099 lock_sock(sk); 1128 lock_sock(sk);
1129 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1130 fasync_helper(fd, filp, on, &wq->fasync_list);
1100 1131
1101 fasync_helper(fd, filp, on, &sock->wq->fasync_list); 1132 if (!wq->fasync_list)
1102
1103 if (!sock->wq->fasync_list)
1104 sock_reset_flag(sk, SOCK_FASYNC); 1133 sock_reset_flag(sk, SOCK_FASYNC);
1105 else 1134 else
1106 sock_set_flag(sk, SOCK_FASYNC); 1135 sock_set_flag(sk, SOCK_FASYNC);
@@ -1144,7 +1173,7 @@ call_kill:
1144} 1173}
1145EXPORT_SYMBOL(sock_wake_async); 1174EXPORT_SYMBOL(sock_wake_async);
1146 1175
1147static int __sock_create(struct net *net, int family, int type, int protocol, 1176int __sock_create(struct net *net, int family, int type, int protocol,
1148 struct socket **res, int kern) 1177 struct socket **res, int kern)
1149{ 1178{
1150 int err; 1179 int err;
@@ -1200,7 +1229,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol,
1200 * requested real, full-featured networking support upon configuration. 1229 * requested real, full-featured networking support upon configuration.
1201 * Otherwise module support will break! 1230 * Otherwise module support will break!
1202 */ 1231 */
1203 if (net_families[family] == NULL) 1232 if (rcu_access_pointer(net_families[family]) == NULL)
1204 request_module("net-pf-%d", family); 1233 request_module("net-pf-%d", family);
1205#endif 1234#endif
1206 1235
@@ -1256,6 +1285,7 @@ out_release:
1256 rcu_read_unlock(); 1285 rcu_read_unlock();
1257 goto out_sock_release; 1286 goto out_sock_release;
1258} 1287}
1288EXPORT_SYMBOL(__sock_create);
1259 1289
1260int sock_create(int family, int type, int protocol, struct socket **res) 1290int sock_create(int family, int type, int protocol, struct socket **res)
1261{ 1291{
@@ -1651,6 +1681,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1651 struct iovec iov; 1681 struct iovec iov;
1652 int fput_needed; 1682 int fput_needed;
1653 1683
1684 if (len > INT_MAX)
1685 len = INT_MAX;
1654 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1655 if (!sock) 1687 if (!sock)
1656 goto out; 1688 goto out;
@@ -1708,6 +1740,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1708 int err, err2; 1740 int err, err2;
1709 int fput_needed; 1741 int fput_needed;
1710 1742
1743 if (size > INT_MAX)
1744 size = INT_MAX;
1711 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1712 if (!sock) 1746 if (!sock)
1713 goto out; 1747 goto out;
@@ -1837,57 +1871,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1837#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1838#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1839 1873
1840/* 1874static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1841 * BSD sendmsg interface 1875 struct msghdr *msg_sys, unsigned flags, int nosec)
1842 */
1843
1844SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1845{ 1876{
1846 struct compat_msghdr __user *msg_compat = 1877 struct compat_msghdr __user *msg_compat =
1847 (struct compat_msghdr __user *)msg; 1878 (struct compat_msghdr __user *)msg;
1848 struct socket *sock;
1849 struct sockaddr_storage address; 1879 struct sockaddr_storage address;
1850 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1880 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1851 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1881 unsigned char ctl[sizeof(struct cmsghdr) + 20]
1852 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1882 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1853 /* 20 is size of ipv6_pktinfo */ 1883 /* 20 is size of ipv6_pktinfo */
1854 unsigned char *ctl_buf = ctl; 1884 unsigned char *ctl_buf = ctl;
1855 struct msghdr msg_sys;
1856 int err, ctl_len, iov_size, total_len; 1885 int err, ctl_len, iov_size, total_len;
1857 int fput_needed;
1858 1886
1859 err = -EFAULT; 1887 err = -EFAULT;
1860 if (MSG_CMSG_COMPAT & flags) { 1888 if (MSG_CMSG_COMPAT & flags) {
1861 if (get_compat_msghdr(&msg_sys, msg_compat)) 1889 if (get_compat_msghdr(msg_sys, msg_compat))
1862 return -EFAULT; 1890 return -EFAULT;
1863 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1891 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1864 return -EFAULT; 1892 return -EFAULT;
1865 1893
1866 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1867 if (!sock)
1868 goto out;
1869
1870 /* do not move before msg_sys is valid */ 1894 /* do not move before msg_sys is valid */
1871 err = -EMSGSIZE; 1895 err = -EMSGSIZE;
1872 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1896 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1873 goto out_put; 1897 goto out;
1874 1898
1875 /* Check whether to allocate the iovec area */ 1899 /* Check whether to allocate the iovec area */
1876 err = -ENOMEM; 1900 err = -ENOMEM;
1877 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1901 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1878 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1902 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1879 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1903 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1880 if (!iov) 1904 if (!iov)
1881 goto out_put; 1905 goto out;
1882 } 1906 }
1883 1907
1884 /* This will also move the address data into kernel space */ 1908 /* This will also move the address data into kernel space */
1885 if (MSG_CMSG_COMPAT & flags) { 1909 if (MSG_CMSG_COMPAT & flags) {
1886 err = verify_compat_iovec(&msg_sys, iov, 1910 err = verify_compat_iovec(msg_sys, iov,
1887 (struct sockaddr *)&address, 1911 (struct sockaddr *)&address,
1888 VERIFY_READ); 1912 VERIFY_READ);
1889 } else 1913 } else
1890 err = verify_iovec(&msg_sys, iov, 1914 err = verify_iovec(msg_sys, iov,
1891 (struct sockaddr *)&address, 1915 (struct sockaddr *)&address,
1892 VERIFY_READ); 1916 VERIFY_READ);
1893 if (err < 0) 1917 if (err < 0)
@@ -1896,17 +1920,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1896 1920
1897 err = -ENOBUFS; 1921 err = -ENOBUFS;
1898 1922
1899 if (msg_sys.msg_controllen > INT_MAX) 1923 if (msg_sys->msg_controllen > INT_MAX)
1900 goto out_freeiov; 1924 goto out_freeiov;
1901 ctl_len = msg_sys.msg_controllen; 1925 ctl_len = msg_sys->msg_controllen;
1902 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1926 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1903 err = 1927 err =
1904 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1928 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
1905 sizeof(ctl)); 1929 sizeof(ctl));
1906 if (err) 1930 if (err)
1907 goto out_freeiov; 1931 goto out_freeiov;
1908 ctl_buf = msg_sys.msg_control; 1932 ctl_buf = msg_sys->msg_control;
1909 ctl_len = msg_sys.msg_controllen; 1933 ctl_len = msg_sys->msg_controllen;
1910 } else if (ctl_len) { 1934 } else if (ctl_len) {
1911 if (ctl_len > sizeof(ctl)) { 1935 if (ctl_len > sizeof(ctl)) {
1912 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1936 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1915,20 +1939,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1915 } 1939 }
1916 err = -EFAULT; 1940 err = -EFAULT;
1917 /* 1941 /*
1918 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1942 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1919 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1943 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1920 * checking falls down on this. 1944 * checking falls down on this.
1921 */ 1945 */
1922 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1946 if (copy_from_user(ctl_buf,
1947 (void __user __force *)msg_sys->msg_control,
1923 ctl_len)) 1948 ctl_len))
1924 goto out_freectl; 1949 goto out_freectl;
1925 msg_sys.msg_control = ctl_buf; 1950 msg_sys->msg_control = ctl_buf;
1926 } 1951 }
1927 msg_sys.msg_flags = flags; 1952 msg_sys->msg_flags = flags;
1928 1953
1929 if (sock->file->f_flags & O_NONBLOCK) 1954 if (sock->file->f_flags & O_NONBLOCK)
1930 msg_sys.msg_flags |= MSG_DONTWAIT; 1955 msg_sys->msg_flags |= MSG_DONTWAIT;
1931 err = sock_sendmsg(sock, &msg_sys, total_len); 1956 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
1957 total_len);
1932 1958
1933out_freectl: 1959out_freectl:
1934 if (ctl_buf != ctl) 1960 if (ctl_buf != ctl)
@@ -1936,12 +1962,114 @@ out_freectl:
1936out_freeiov: 1962out_freeiov:
1937 if (iov != iovstack) 1963 if (iov != iovstack)
1938 sock_kfree_s(sock->sk, iov, iov_size); 1964 sock_kfree_s(sock->sk, iov, iov_size);
1939out_put: 1965out:
1966 return err;
1967}
1968
1969/*
1970 * BSD sendmsg interface
1971 */
1972
1973SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1974{
1975 int fput_needed, err;
1976 struct msghdr msg_sys;
1977 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
1978
1979 if (!sock)
1980 goto out;
1981
1982 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
1983
1940 fput_light(sock->file, fput_needed); 1984 fput_light(sock->file, fput_needed);
1941out: 1985out:
1942 return err; 1986 return err;
1943} 1987}
1944 1988
1989/*
1990 * Linux sendmmsg interface
1991 */
1992
1993int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1994 unsigned int flags)
1995{
1996 int fput_needed, err, datagrams;
1997 struct socket *sock;
1998 struct mmsghdr __user *entry;
1999 struct compat_mmsghdr __user *compat_entry;
2000 struct msghdr msg_sys;
2001
2002 datagrams = 0;
2003
2004 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2005 if (!sock)
2006 return err;
2007
2008 err = sock_error(sock->sk);
2009 if (err)
2010 goto out_put;
2011
2012 entry = mmsg;
2013 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2014
2015 while (datagrams < vlen) {
2016 /*
2017 * No need to ask LSM for more than the first datagram.
2018 */
2019 if (MSG_CMSG_COMPAT & flags) {
2020 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2021 &msg_sys, flags, datagrams);
2022 if (err < 0)
2023 break;
2024 err = __put_user(err, &compat_entry->msg_len);
2025 ++compat_entry;
2026 } else {
2027 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2028 &msg_sys, flags, datagrams);
2029 if (err < 0)
2030 break;
2031 err = put_user(err, &entry->msg_len);
2032 ++entry;
2033 }
2034
2035 if (err)
2036 break;
2037 ++datagrams;
2038 }
2039
2040out_put:
2041 fput_light(sock->file, fput_needed);
2042
2043 if (err == 0)
2044 return datagrams;
2045
2046 if (datagrams != 0) {
2047 /*
2048 * We may send less entries than requested (vlen) if the
2049 * sock is non blocking...
2050 */
2051 if (err != -EAGAIN) {
2052 /*
2053 * ... or if sendmsg returns an error after we
2054 * send some datagrams, where we record the
2055 * error to return on the next call or if the
2056 * app asks about it using getsockopt(SO_ERROR).
2057 */
2058 sock->sk->sk_err = -err;
2059 }
2060
2061 return datagrams;
2062 }
2063
2064 return err;
2065}
2066
2067SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2068 unsigned int, vlen, unsigned int, flags)
2069{
2070 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2071}
2072
1945static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 2073static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1946 struct msghdr *msg_sys, unsigned flags, int nosec) 2074 struct msghdr *msg_sys, unsigned flags, int nosec)
1947{ 2075{
@@ -2095,14 +2223,16 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2095 */ 2223 */
2096 if (MSG_CMSG_COMPAT & flags) { 2224 if (MSG_CMSG_COMPAT & flags) {
2097 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, 2225 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2098 &msg_sys, flags, datagrams); 2226 &msg_sys, flags & ~MSG_WAITFORONE,
2227 datagrams);
2099 if (err < 0) 2228 if (err < 0)
2100 break; 2229 break;
2101 err = __put_user(err, &compat_entry->msg_len); 2230 err = __put_user(err, &compat_entry->msg_len);
2102 ++compat_entry; 2231 ++compat_entry;
2103 } else { 2232 } else {
2104 err = __sys_recvmsg(sock, (struct msghdr __user *)entry, 2233 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2105 &msg_sys, flags, datagrams); 2234 &msg_sys, flags & ~MSG_WAITFORONE,
2235 datagrams);
2106 if (err < 0) 2236 if (err < 0)
2107 break; 2237 break;
2108 err = put_user(err, &entry->msg_len); 2238 err = put_user(err, &entry->msg_len);
@@ -2187,11 +2317,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2187#ifdef __ARCH_WANT_SYS_SOCKETCALL 2317#ifdef __ARCH_WANT_SYS_SOCKETCALL
2188/* Argument list sizes for sys_socketcall */ 2318/* Argument list sizes for sys_socketcall */
2189#define AL(x) ((x) * sizeof(unsigned long)) 2319#define AL(x) ((x) * sizeof(unsigned long))
2190static const unsigned char nargs[20] = { 2320static const unsigned char nargs[21] = {
2191 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2321 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2192 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2322 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2193 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2323 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2194 AL(4), AL(5) 2324 AL(4), AL(5), AL(4)
2195}; 2325};
2196 2326
2197#undef AL 2327#undef AL
@@ -2211,7 +2341,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2211 int err; 2341 int err;
2212 unsigned int len; 2342 unsigned int len;
2213 2343
2214 if (call < 1 || call > SYS_RECVMMSG) 2344 if (call < 1 || call > SYS_SENDMMSG)
2215 return -EINVAL; 2345 return -EINVAL;
2216 2346
2217 len = nargs[call]; 2347 len = nargs[call];
@@ -2286,6 +2416,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2286 case SYS_SENDMSG: 2416 case SYS_SENDMSG:
2287 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2417 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2288 break; 2418 break;
2419 case SYS_SENDMMSG:
2420 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2421 break;
2289 case SYS_RECVMSG: 2422 case SYS_RECVMSG:
2290 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2423 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2291 break; 2424 break;
@@ -2326,10 +2459,11 @@ int sock_register(const struct net_proto_family *ops)
2326 } 2459 }
2327 2460
2328 spin_lock(&net_family_lock); 2461 spin_lock(&net_family_lock);
2329 if (net_families[ops->family]) 2462 if (rcu_dereference_protected(net_families[ops->family],
2463 lockdep_is_held(&net_family_lock)))
2330 err = -EEXIST; 2464 err = -EEXIST;
2331 else { 2465 else {
2332 net_families[ops->family] = ops; 2466 rcu_assign_pointer(net_families[ops->family], ops);
2333 err = 0; 2467 err = 0;
2334 } 2468 }
2335 spin_unlock(&net_family_lock); 2469 spin_unlock(&net_family_lock);
@@ -2357,7 +2491,7 @@ void sock_unregister(int family)
2357 BUG_ON(family < 0 || family >= NPROTO); 2491 BUG_ON(family < 0 || family >= NPROTO);
2358 2492
2359 spin_lock(&net_family_lock); 2493 spin_lock(&net_family_lock);
2360 net_families[family] = NULL; 2494 rcu_assign_pointer(net_families[family], NULL);
2361 spin_unlock(&net_family_lock); 2495 spin_unlock(&net_family_lock);
2362 2496
2363 synchronize_rcu(); 2497 synchronize_rcu();
@@ -2368,6 +2502,8 @@ EXPORT_SYMBOL(sock_unregister);
2368 2502
2369static int __init sock_init(void) 2503static int __init sock_init(void)
2370{ 2504{
2505 int err;
2506
2371 /* 2507 /*
2372 * Initialize sock SLAB cache. 2508 * Initialize sock SLAB cache.
2373 */ 2509 */
@@ -2384,8 +2520,15 @@ static int __init sock_init(void)
2384 */ 2520 */
2385 2521
2386 init_inodecache(); 2522 init_inodecache();
2387 register_filesystem(&sock_fs_type); 2523
2524 err = register_filesystem(&sock_fs_type);
2525 if (err)
2526 goto out_fs;
2388 sock_mnt = kern_mount(&sock_fs_type); 2527 sock_mnt = kern_mount(&sock_fs_type);
2528 if (IS_ERR(sock_mnt)) {
2529 err = PTR_ERR(sock_mnt);
2530 goto out_mount;
2531 }
2389 2532
2390 /* The real protocol initialization is performed in later initcalls. 2533 /* The real protocol initialization is performed in later initcalls.
2391 */ 2534 */
@@ -2398,7 +2541,13 @@ static int __init sock_init(void)
2398 skb_timestamping_init(); 2541 skb_timestamping_init();
2399#endif 2542#endif
2400 2543
2401 return 0; 2544out:
2545 return err;
2546
2547out_mount:
2548 unregister_filesystem(&sock_fs_type);
2549out_fs:
2550 goto out;
2402} 2551}
2403 2552
2404core_initcall(sock_init); /* early initcall */ 2553core_initcall(sock_init); /* early initcall */
@@ -2545,23 +2694,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2545 2694
2546static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 2695static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2547{ 2696{
2697 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2698 bool convert_in = false, convert_out = false;
2699 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2700 struct ethtool_rxnfc __user *rxnfc;
2548 struct ifreq __user *ifr; 2701 struct ifreq __user *ifr;
2702 u32 rule_cnt = 0, actual_rule_cnt;
2703 u32 ethcmd;
2549 u32 data; 2704 u32 data;
2550 void __user *datap; 2705 int ret;
2551 2706
2552 ifr = compat_alloc_user_space(sizeof(*ifr)); 2707 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2708 return -EFAULT;
2553 2709
2554 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 2710 compat_rxnfc = compat_ptr(data);
2711
2712 if (get_user(ethcmd, &compat_rxnfc->cmd))
2555 return -EFAULT; 2713 return -EFAULT;
2556 2714
2557 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 2715 /* Most ethtool structures are defined without padding.
2716 * Unfortunately struct ethtool_rxnfc is an exception.
2717 */
2718 switch (ethcmd) {
2719 default:
2720 break;
2721 case ETHTOOL_GRXCLSRLALL:
2722 /* Buffer size is variable */
2723 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2724 return -EFAULT;
2725 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2726 return -ENOMEM;
2727 buf_size += rule_cnt * sizeof(u32);
2728 /* fall through */
2729 case ETHTOOL_GRXRINGS:
2730 case ETHTOOL_GRXCLSRLCNT:
2731 case ETHTOOL_GRXCLSRULE:
2732 convert_out = true;
2733 /* fall through */
2734 case ETHTOOL_SRXCLSRLDEL:
2735 case ETHTOOL_SRXCLSRLINS:
2736 buf_size += sizeof(struct ethtool_rxnfc);
2737 convert_in = true;
2738 break;
2739 }
2740
2741 ifr = compat_alloc_user_space(buf_size);
2742 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2743
2744 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2558 return -EFAULT; 2745 return -EFAULT;
2559 2746
2560 datap = compat_ptr(data); 2747 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2561 if (put_user(datap, &ifr->ifr_ifru.ifru_data)) 2748 &ifr->ifr_ifru.ifru_data))
2562 return -EFAULT; 2749 return -EFAULT;
2563 2750
2564 return dev_ioctl(net, SIOCETHTOOL, ifr); 2751 if (convert_in) {
2752 /* We expect there to be holes between fs.m_ext and
2753 * fs.ring_cookie and at the end of fs, but nowhere else.
2754 */
2755 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2756 sizeof(compat_rxnfc->fs.m_ext) !=
2757 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2758 sizeof(rxnfc->fs.m_ext));
2759 BUILD_BUG_ON(
2760 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2761 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2762 offsetof(struct ethtool_rxnfc, fs.location) -
2763 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2764
2765 if (copy_in_user(rxnfc, compat_rxnfc,
2766 (void *)(&rxnfc->fs.m_ext + 1) -
2767 (void *)rxnfc) ||
2768 copy_in_user(&rxnfc->fs.ring_cookie,
2769 &compat_rxnfc->fs.ring_cookie,
2770 (void *)(&rxnfc->fs.location + 1) -
2771 (void *)&rxnfc->fs.ring_cookie) ||
2772 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2773 sizeof(rxnfc->rule_cnt)))
2774 return -EFAULT;
2775 }
2776
2777 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2778 if (ret)
2779 return ret;
2780
2781 if (convert_out) {
2782 if (copy_in_user(compat_rxnfc, rxnfc,
2783 (const void *)(&rxnfc->fs.m_ext + 1) -
2784 (const void *)rxnfc) ||
2785 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2786 &rxnfc->fs.ring_cookie,
2787 (const void *)(&rxnfc->fs.location + 1) -
2788 (const void *)&rxnfc->fs.ring_cookie) ||
2789 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2790 sizeof(rxnfc->rule_cnt)))
2791 return -EFAULT;
2792
2793 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2794 /* As an optimisation, we only copy the actual
2795 * number of rules that the underlying
2796 * function returned. Since Mallory might
2797 * change the rule count in user memory, we
2798 * check that it is less than the rule count
2799 * originally given (as the user buffer size),
2800 * which has been range-checked.
2801 */
2802 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2803 return -EFAULT;
2804 if (actual_rule_cnt < rule_cnt)
2805 rule_cnt = actual_rule_cnt;
2806 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2807 &rxnfc->rule_locs[0],
2808 rule_cnt * sizeof(u32)))
2809 return -EFAULT;
2810 }
2811 }
2812
2813 return 0;
2565} 2814}
2566 2815
2567static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 2816static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
@@ -2605,7 +2854,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2605 2854
2606 old_fs = get_fs(); 2855 old_fs = get_fs();
2607 set_fs(KERNEL_DS); 2856 set_fs(KERNEL_DS);
2608 err = dev_ioctl(net, cmd, &kifr); 2857 err = dev_ioctl(net, cmd,
2858 (struct ifreq __user __force *) &kifr);
2609 set_fs(old_fs); 2859 set_fs(old_fs);
2610 2860
2611 return err; 2861 return err;
@@ -2714,7 +2964,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2714 2964
2715 old_fs = get_fs(); 2965 old_fs = get_fs();
2716 set_fs(KERNEL_DS); 2966 set_fs(KERNEL_DS);
2717 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2967 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
2718 set_fs(old_fs); 2968 set_fs(old_fs);
2719 2969
2720 if (cmd == SIOCGIFMAP && !err) { 2970 if (cmd == SIOCGIFMAP && !err) {
@@ -2819,7 +3069,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2819 ret |= __get_user(rtdev, &(ur4->rt_dev)); 3069 ret |= __get_user(rtdev, &(ur4->rt_dev));
2820 if (rtdev) { 3070 if (rtdev) {
2821 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3071 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
2822 r4.rt_dev = devname; devname[15] = 0; 3072 r4.rt_dev = (char __user __force *)devname;
3073 devname[15] = 0;
2823 } else 3074 } else
2824 r4.rt_dev = NULL; 3075 r4.rt_dev = NULL;
2825 3076
@@ -2841,7 +3092,7 @@ out:
2841 3092
2842/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 3093/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2843 * for some operations; this forces use of the newer bridge-utils that 3094 * for some operations; this forces use of the newer bridge-utils that
2844 * use compatiable ioctls 3095 * use compatible ioctls
2845 */ 3096 */
2846static int old_bridge_ioctl(compat_ulong_t __user *argp) 3097static int old_bridge_ioctl(compat_ulong_t __user *argp)
2847{ 3098{
@@ -3054,14 +3305,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3054 char *optval, int *optlen) 3305 char *optval, int *optlen)
3055{ 3306{
3056 mm_segment_t oldfs = get_fs(); 3307 mm_segment_t oldfs = get_fs();
3308 char __user *uoptval;
3309 int __user *uoptlen;
3057 int err; 3310 int err;
3058 3311
3312 uoptval = (char __user __force *) optval;
3313 uoptlen = (int __user __force *) optlen;
3314
3059 set_fs(KERNEL_DS); 3315 set_fs(KERNEL_DS);
3060 if (level == SOL_SOCKET) 3316 if (level == SOL_SOCKET)
3061 err = sock_getsockopt(sock, level, optname, optval, optlen); 3317 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3062 else 3318 else
3063 err = sock->ops->getsockopt(sock, level, optname, optval, 3319 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3064 optlen); 3320 uoptlen);
3065 set_fs(oldfs); 3321 set_fs(oldfs);
3066 return err; 3322 return err;
3067} 3323}
@@ -3071,13 +3327,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3071 char *optval, unsigned int optlen) 3327 char *optval, unsigned int optlen)
3072{ 3328{
3073 mm_segment_t oldfs = get_fs(); 3329 mm_segment_t oldfs = get_fs();
3330 char __user *uoptval;
3074 int err; 3331 int err;
3075 3332
3333 uoptval = (char __user __force *) optval;
3334
3076 set_fs(KERNEL_DS); 3335 set_fs(KERNEL_DS);
3077 if (level == SOL_SOCKET) 3336 if (level == SOL_SOCKET)
3078 err = sock_setsockopt(sock, level, optname, optval, optlen); 3337 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3079 else 3338 else
3080 err = sock->ops->setsockopt(sock, level, optname, optval, 3339 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3081 optlen); 3340 optlen);
3082 set_fs(oldfs); 3341 set_fs(oldfs);
3083 return err; 3342 return err;