diff options
Diffstat (limited to 'net/socket.c')
| -rw-r--r-- | net/socket.c | 94 |
1 files changed, 59 insertions, 35 deletions
diff --git a/net/socket.c b/net/socket.c index 088fb3fd45e0..937d0fcf74bc 100644 --- a/net/socket.c +++ b/net/socket.c | |||
| @@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = { | |||
| 156 | */ | 156 | */ |
| 157 | 157 | ||
| 158 | static DEFINE_SPINLOCK(net_family_lock); | 158 | static DEFINE_SPINLOCK(net_family_lock); |
| 159 | static const struct net_proto_family *net_families[NPROTO] __read_mostly; | 159 | static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; |
| 160 | 160 | ||
| 161 | /* | 161 | /* |
| 162 | * Statistics counters of the socket lists | 162 | * Statistics counters of the socket lists |
| @@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; | |||
| 240 | static struct inode *sock_alloc_inode(struct super_block *sb) | 240 | static struct inode *sock_alloc_inode(struct super_block *sb) |
| 241 | { | 241 | { |
| 242 | struct socket_alloc *ei; | 242 | struct socket_alloc *ei; |
| 243 | struct socket_wq *wq; | ||
| 243 | 244 | ||
| 244 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); | 245 | ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); |
| 245 | if (!ei) | 246 | if (!ei) |
| 246 | return NULL; | 247 | return NULL; |
| 247 | ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); | 248 | wq = kmalloc(sizeof(*wq), GFP_KERNEL); |
| 248 | if (!ei->socket.wq) { | 249 | if (!wq) { |
| 249 | kmem_cache_free(sock_inode_cachep, ei); | 250 | kmem_cache_free(sock_inode_cachep, ei); |
| 250 | return NULL; | 251 | return NULL; |
| 251 | } | 252 | } |
| 252 | init_waitqueue_head(&ei->socket.wq->wait); | 253 | init_waitqueue_head(&wq->wait); |
| 253 | ei->socket.wq->fasync_list = NULL; | 254 | wq->fasync_list = NULL; |
| 255 | RCU_INIT_POINTER(ei->socket.wq, wq); | ||
| 254 | 256 | ||
| 255 | ei->socket.state = SS_UNCONNECTED; | 257 | ei->socket.state = SS_UNCONNECTED; |
| 256 | ei->socket.flags = 0; | 258 | ei->socket.flags = 0; |
| @@ -262,6 +264,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) | |||
| 262 | } | 264 | } |
| 263 | 265 | ||
| 264 | 266 | ||
| 267 | |||
| 265 | static void wq_free_rcu(struct rcu_head *head) | 268 | static void wq_free_rcu(struct rcu_head *head) |
| 266 | { | 269 | { |
| 267 | struct socket_wq *wq = container_of(head, struct socket_wq, rcu); | 270 | struct socket_wq *wq = container_of(head, struct socket_wq, rcu); |
| @@ -272,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head) | |||
| 272 | static void sock_destroy_inode(struct inode *inode) | 275 | static void sock_destroy_inode(struct inode *inode) |
| 273 | { | 276 | { |
| 274 | struct socket_alloc *ei; | 277 | struct socket_alloc *ei; |
| 278 | struct socket_wq *wq; | ||
| 275 | 279 | ||
| 276 | ei = container_of(inode, struct socket_alloc, vfs_inode); | 280 | ei = container_of(inode, struct socket_alloc, vfs_inode); |
| 277 | call_rcu(&ei->socket.wq->rcu, wq_free_rcu); | 281 | wq = rcu_dereference_protected(ei->socket.wq, 1); |
| 282 | call_rcu(&wq->rcu, wq_free_rcu); | ||
| 278 | kmem_cache_free(sock_inode_cachep, ei); | 283 | kmem_cache_free(sock_inode_cachep, ei); |
| 279 | } | 284 | } |
| 280 | 285 | ||
| @@ -305,20 +310,6 @@ static const struct super_operations sockfs_ops = { | |||
| 305 | .statfs = simple_statfs, | 310 | .statfs = simple_statfs, |
| 306 | }; | 311 | }; |
| 307 | 312 | ||
| 308 | static struct dentry *sockfs_mount(struct file_system_type *fs_type, | ||
| 309 | int flags, const char *dev_name, void *data) | ||
| 310 | { | ||
| 311 | return mount_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); | ||
| 312 | } | ||
| 313 | |||
| 314 | static struct vfsmount *sock_mnt __read_mostly; | ||
| 315 | |||
| 316 | static struct file_system_type sock_fs_type = { | ||
| 317 | .name = "sockfs", | ||
| 318 | .mount = sockfs_mount, | ||
| 319 | .kill_sb = kill_anon_super, | ||
| 320 | }; | ||
| 321 | |||
| 322 | /* | 313 | /* |
| 323 | * sockfs_dname() is called from d_path(). | 314 | * sockfs_dname() is called from d_path(). |
| 324 | */ | 315 | */ |
| @@ -332,6 +323,21 @@ static const struct dentry_operations sockfs_dentry_operations = { | |||
| 332 | .d_dname = sockfs_dname, | 323 | .d_dname = sockfs_dname, |
| 333 | }; | 324 | }; |
| 334 | 325 | ||
| 326 | static struct dentry *sockfs_mount(struct file_system_type *fs_type, | ||
| 327 | int flags, const char *dev_name, void *data) | ||
| 328 | { | ||
| 329 | return mount_pseudo(fs_type, "socket:", &sockfs_ops, | ||
| 330 | &sockfs_dentry_operations, SOCKFS_MAGIC); | ||
| 331 | } | ||
| 332 | |||
| 333 | static struct vfsmount *sock_mnt __read_mostly; | ||
| 334 | |||
| 335 | static struct file_system_type sock_fs_type = { | ||
| 336 | .name = "sockfs", | ||
| 337 | .mount = sockfs_mount, | ||
| 338 | .kill_sb = kill_anon_super, | ||
| 339 | }; | ||
| 340 | |||
| 335 | /* | 341 | /* |
| 336 | * Obtains the first available file descriptor and sets it up for use. | 342 | * Obtains the first available file descriptor and sets it up for use. |
| 337 | * | 343 | * |
| @@ -360,14 +366,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) | |||
| 360 | if (unlikely(fd < 0)) | 366 | if (unlikely(fd < 0)) |
| 361 | return fd; | 367 | return fd; |
| 362 | 368 | ||
| 363 | path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); | 369 | path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); |
| 364 | if (unlikely(!path.dentry)) { | 370 | if (unlikely(!path.dentry)) { |
| 365 | put_unused_fd(fd); | 371 | put_unused_fd(fd); |
| 366 | return -ENOMEM; | 372 | return -ENOMEM; |
| 367 | } | 373 | } |
| 368 | path.mnt = mntget(sock_mnt); | 374 | path.mnt = mntget(sock_mnt); |
| 369 | 375 | ||
| 370 | path.dentry->d_op = &sockfs_dentry_operations; | ||
| 371 | d_instantiate(path.dentry, SOCK_INODE(sock)); | 376 | d_instantiate(path.dentry, SOCK_INODE(sock)); |
| 372 | SOCK_INODE(sock)->i_fop = &socket_file_ops; | 377 | SOCK_INODE(sock)->i_fop = &socket_file_ops; |
| 373 | 378 | ||
| @@ -523,7 +528,7 @@ void sock_release(struct socket *sock) | |||
| 523 | module_put(owner); | 528 | module_put(owner); |
| 524 | } | 529 | } |
| 525 | 530 | ||
| 526 | if (sock->wq->fasync_list) | 531 | if (rcu_dereference_protected(sock->wq, 1)->fasync_list) |
| 527 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); | 532 | printk(KERN_ERR "sock_release: fasync list not empty!\n"); |
| 528 | 533 | ||
| 529 | percpu_sub(sockets_in_use, 1); | 534 | percpu_sub(sockets_in_use, 1); |
| @@ -1107,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on) | |||
| 1107 | { | 1112 | { |
| 1108 | struct socket *sock = filp->private_data; | 1113 | struct socket *sock = filp->private_data; |
| 1109 | struct sock *sk = sock->sk; | 1114 | struct sock *sk = sock->sk; |
| 1115 | struct socket_wq *wq; | ||
| 1110 | 1116 | ||
| 1111 | if (sk == NULL) | 1117 | if (sk == NULL) |
| 1112 | return -EINVAL; | 1118 | return -EINVAL; |
| 1113 | 1119 | ||
| 1114 | lock_sock(sk); | 1120 | lock_sock(sk); |
| 1121 | wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); | ||
| 1122 | fasync_helper(fd, filp, on, &wq->fasync_list); | ||
| 1115 | 1123 | ||
| 1116 | fasync_helper(fd, filp, on, &sock->wq->fasync_list); | 1124 | if (!wq->fasync_list) |
| 1117 | |||
| 1118 | if (!sock->wq->fasync_list) | ||
| 1119 | sock_reset_flag(sk, SOCK_FASYNC); | 1125 | sock_reset_flag(sk, SOCK_FASYNC); |
| 1120 | else | 1126 | else |
| 1121 | sock_set_flag(sk, SOCK_FASYNC); | 1127 | sock_set_flag(sk, SOCK_FASYNC); |
| @@ -1215,7 +1221,7 @@ int __sock_create(struct net *net, int family, int type, int protocol, | |||
| 1215 | * requested real, full-featured networking support upon configuration. | 1221 | * requested real, full-featured networking support upon configuration. |
| 1216 | * Otherwise module support will break! | 1222 | * Otherwise module support will break! |
| 1217 | */ | 1223 | */ |
| 1218 | if (net_families[family] == NULL) | 1224 | if (rcu_access_pointer(net_families[family]) == NULL) |
| 1219 | request_module("net-pf-%d", family); | 1225 | request_module("net-pf-%d", family); |
| 1220 | #endif | 1226 | #endif |
| 1221 | 1227 | ||
| @@ -2347,10 +2353,11 @@ int sock_register(const struct net_proto_family *ops) | |||
| 2347 | } | 2353 | } |
| 2348 | 2354 | ||
| 2349 | spin_lock(&net_family_lock); | 2355 | spin_lock(&net_family_lock); |
| 2350 | if (net_families[ops->family]) | 2356 | if (rcu_dereference_protected(net_families[ops->family], |
| 2357 | lockdep_is_held(&net_family_lock))) | ||
| 2351 | err = -EEXIST; | 2358 | err = -EEXIST; |
| 2352 | else { | 2359 | else { |
| 2353 | net_families[ops->family] = ops; | 2360 | rcu_assign_pointer(net_families[ops->family], ops); |
| 2354 | err = 0; | 2361 | err = 0; |
| 2355 | } | 2362 | } |
| 2356 | spin_unlock(&net_family_lock); | 2363 | spin_unlock(&net_family_lock); |
| @@ -2378,7 +2385,7 @@ void sock_unregister(int family) | |||
| 2378 | BUG_ON(family < 0 || family >= NPROTO); | 2385 | BUG_ON(family < 0 || family >= NPROTO); |
| 2379 | 2386 | ||
| 2380 | spin_lock(&net_family_lock); | 2387 | spin_lock(&net_family_lock); |
| 2381 | net_families[family] = NULL; | 2388 | rcu_assign_pointer(net_families[family], NULL); |
| 2382 | spin_unlock(&net_family_lock); | 2389 | spin_unlock(&net_family_lock); |
| 2383 | 2390 | ||
| 2384 | synchronize_rcu(); | 2391 | synchronize_rcu(); |
| @@ -2389,6 +2396,8 @@ EXPORT_SYMBOL(sock_unregister); | |||
| 2389 | 2396 | ||
| 2390 | static int __init sock_init(void) | 2397 | static int __init sock_init(void) |
| 2391 | { | 2398 | { |
| 2399 | int err; | ||
| 2400 | |||
| 2392 | /* | 2401 | /* |
| 2393 | * Initialize sock SLAB cache. | 2402 | * Initialize sock SLAB cache. |
| 2394 | */ | 2403 | */ |
| @@ -2405,8 +2414,15 @@ static int __init sock_init(void) | |||
| 2405 | */ | 2414 | */ |
| 2406 | 2415 | ||
| 2407 | init_inodecache(); | 2416 | init_inodecache(); |
| 2408 | register_filesystem(&sock_fs_type); | 2417 | |
| 2418 | err = register_filesystem(&sock_fs_type); | ||
| 2419 | if (err) | ||
| 2420 | goto out_fs; | ||
| 2409 | sock_mnt = kern_mount(&sock_fs_type); | 2421 | sock_mnt = kern_mount(&sock_fs_type); |
| 2422 | if (IS_ERR(sock_mnt)) { | ||
| 2423 | err = PTR_ERR(sock_mnt); | ||
| 2424 | goto out_mount; | ||
| 2425 | } | ||
| 2410 | 2426 | ||
| 2411 | /* The real protocol initialization is performed in later initcalls. | 2427 | /* The real protocol initialization is performed in later initcalls. |
| 2412 | */ | 2428 | */ |
| @@ -2419,7 +2435,13 @@ static int __init sock_init(void) | |||
| 2419 | skb_timestamping_init(); | 2435 | skb_timestamping_init(); |
| 2420 | #endif | 2436 | #endif |
| 2421 | 2437 | ||
| 2422 | return 0; | 2438 | out: |
| 2439 | return err; | ||
| 2440 | |||
| 2441 | out_mount: | ||
| 2442 | unregister_filesystem(&sock_fs_type); | ||
| 2443 | out_fs: | ||
| 2444 | goto out; | ||
| 2423 | } | 2445 | } |
| 2424 | 2446 | ||
| 2425 | core_initcall(sock_init); /* early initcall */ | 2447 | core_initcall(sock_init); /* early initcall */ |
| @@ -2626,7 +2648,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, | |||
| 2626 | 2648 | ||
| 2627 | old_fs = get_fs(); | 2649 | old_fs = get_fs(); |
| 2628 | set_fs(KERNEL_DS); | 2650 | set_fs(KERNEL_DS); |
| 2629 | err = dev_ioctl(net, cmd, &kifr); | 2651 | err = dev_ioctl(net, cmd, |
| 2652 | (struct ifreq __user __force *) &kifr); | ||
| 2630 | set_fs(old_fs); | 2653 | set_fs(old_fs); |
| 2631 | 2654 | ||
| 2632 | return err; | 2655 | return err; |
| @@ -2735,7 +2758,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, | |||
| 2735 | 2758 | ||
| 2736 | old_fs = get_fs(); | 2759 | old_fs = get_fs(); |
| 2737 | set_fs(KERNEL_DS); | 2760 | set_fs(KERNEL_DS); |
| 2738 | err = dev_ioctl(net, cmd, (void __user *)&ifr); | 2761 | err = dev_ioctl(net, cmd, (void __user __force *)&ifr); |
| 2739 | set_fs(old_fs); | 2762 | set_fs(old_fs); |
| 2740 | 2763 | ||
| 2741 | if (cmd == SIOCGIFMAP && !err) { | 2764 | if (cmd == SIOCGIFMAP && !err) { |
| @@ -2840,7 +2863,8 @@ static int routing_ioctl(struct net *net, struct socket *sock, | |||
| 2840 | ret |= __get_user(rtdev, &(ur4->rt_dev)); | 2863 | ret |= __get_user(rtdev, &(ur4->rt_dev)); |
| 2841 | if (rtdev) { | 2864 | if (rtdev) { |
| 2842 | ret |= copy_from_user(devname, compat_ptr(rtdev), 15); | 2865 | ret |= copy_from_user(devname, compat_ptr(rtdev), 15); |
| 2843 | r4.rt_dev = devname; devname[15] = 0; | 2866 | r4.rt_dev = (char __user __force *)devname; |
| 2867 | devname[15] = 0; | ||
| 2844 | } else | 2868 | } else |
| 2845 | r4.rt_dev = NULL; | 2869 | r4.rt_dev = NULL; |
| 2846 | 2870 | ||
