aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/sock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c157
1 files changed, 129 insertions, 28 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 5d820c376653..b77e155cbe6c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -92,7 +92,6 @@
92 */ 92 */
93 93
94#include <linux/capability.h> 94#include <linux/capability.h>
95#include <linux/config.h>
96#include <linux/errno.h> 95#include <linux/errno.h>
97#include <linux/types.h> 96#include <linux/types.h>
98#include <linux/socket.h> 97#include <linux/socket.h>
@@ -130,6 +129,53 @@
130#include <net/tcp.h> 129#include <net/tcp.h>
131#endif 130#endif
132 131
132/*
133 * Each address family might have different locking rules, so we have
134 * one slock key per address family:
135 */
136static struct lock_class_key af_family_keys[AF_MAX];
137static struct lock_class_key af_family_slock_keys[AF_MAX];
138
139#ifdef CONFIG_DEBUG_LOCK_ALLOC
140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
145static const char *af_family_key_strings[AF_MAX+1] = {
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
153 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
157};
158static const char *af_family_slock_key_strings[AF_MAX+1] = {
159 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
160 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
161 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
162 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
163 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
164 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
165 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
166 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
167 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
168 "slock-27" , "slock-28" , "slock-29" ,
169 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX"
170};
171#endif
172
173/*
174 * sk_callback_lock locking rules are per-address-family,
175 * so split the lock classes by using a per-AF key:
176 */
177static struct lock_class_key af_callback_keys[AF_MAX];
178
133/* Take into consideration the size of the struct sk_buff overhead in the 179/* Take into consideration the size of the struct sk_buff overhead in the
134 * determination of these values, since that is non-constant across 180 * determination of these values, since that is non-constant across
135 * platforms. This makes socket queueing behavior and performance 181 * platforms. This makes socket queueing behavior and performance
@@ -141,13 +187,13 @@
141#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 187#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
142 188
143/* Run time adjustable parameters. */ 189/* Run time adjustable parameters. */
144__u32 sysctl_wmem_max = SK_WMEM_MAX; 190__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
145__u32 sysctl_rmem_max = SK_RMEM_MAX; 191__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
146__u32 sysctl_wmem_default = SK_WMEM_MAX; 192__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
147__u32 sysctl_rmem_default = SK_RMEM_MAX; 193__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
148 194
149/* Maximal space eaten by iovec or ancilliary data plus some space */ 195/* Maximal space eaten by iovec or ancilliary data plus some space */
150int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); 196int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
151 197
152static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 198static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
153{ 199{
@@ -201,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
201 goto out; 247 goto out;
202 } 248 }
203 249
204 /* It would be deadlock, if sock_queue_rcv_skb is used 250 err = sk_filter(sk, skb);
205 with socket lock! We assume that users of this
206 function are lock free.
207 */
208 err = sk_filter(sk, skb, 1);
209 if (err) 251 if (err)
210 goto out; 252 goto out;
211 253
@@ -232,15 +274,22 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
232{ 274{
233 int rc = NET_RX_SUCCESS; 275 int rc = NET_RX_SUCCESS;
234 276
235 if (sk_filter(sk, skb, 0)) 277 if (sk_filter(sk, skb))
236 goto discard_and_relse; 278 goto discard_and_relse;
237 279
238 skb->dev = NULL; 280 skb->dev = NULL;
239 281
240 bh_lock_sock(sk); 282 bh_lock_sock(sk);
241 if (!sock_owned_by_user(sk)) 283 if (!sock_owned_by_user(sk)) {
284 /*
285 * trylock + unlock semantics:
286 */
287 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
288
242 rc = sk->sk_backlog_rcv(sk, skb); 289 rc = sk->sk_backlog_rcv(sk, skb);
243 else 290
291 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
292 } else
244 sk_add_backlog(sk, skb); 293 sk_add_backlog(sk, skb);
245 bh_unlock_sock(sk); 294 bh_unlock_sock(sk);
246out: 295out:
@@ -553,18 +602,25 @@ set_rcvbuf:
553 break; 602 break;
554 603
555 case SO_DETACH_FILTER: 604 case SO_DETACH_FILTER:
556 spin_lock_bh(&sk->sk_lock.slock); 605 rcu_read_lock_bh();
557 filter = sk->sk_filter; 606 filter = rcu_dereference(sk->sk_filter);
558 if (filter) { 607 if (filter) {
559 sk->sk_filter = NULL; 608 rcu_assign_pointer(sk->sk_filter, NULL);
560 spin_unlock_bh(&sk->sk_lock.slock);
561 sk_filter_release(sk, filter); 609 sk_filter_release(sk, filter);
610 rcu_read_unlock_bh();
562 break; 611 break;
563 } 612 }
564 spin_unlock_bh(&sk->sk_lock.slock); 613 rcu_read_unlock_bh();
565 ret = -ENONET; 614 ret = -ENONET;
566 break; 615 break;
567 616
617 case SO_PASSSEC:
618 if (valbool)
619 set_bit(SOCK_PASSSEC, &sock->flags);
620 else
621 clear_bit(SOCK_PASSSEC, &sock->flags);
622 break;
623
568 /* We implement the SO_SNDLOWAT etc to 624 /* We implement the SO_SNDLOWAT etc to
569 not be settable (1003.1g 5.3) */ 625 not be settable (1003.1g 5.3) */
570 default: 626 default:
@@ -723,6 +779,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
723 v.val = sk->sk_state == TCP_LISTEN; 779 v.val = sk->sk_state == TCP_LISTEN;
724 break; 780 break;
725 781
782 case SO_PASSSEC:
783 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
784 break;
785
726 case SO_PEERSEC: 786 case SO_PEERSEC:
727 return security_socket_getpeersec_stream(sock, optval, optlen, len); 787 return security_socket_getpeersec_stream(sock, optval, optlen, len);
728 788
@@ -739,6 +799,33 @@ lenout:
739 return 0; 799 return 0;
740} 800}
741 801
802/*
803 * Initialize an sk_lock.
804 *
805 * (We also register the sk_lock with the lock validator.)
806 */
807static void inline sock_lock_init(struct sock *sk)
808{
809 spin_lock_init(&sk->sk_lock.slock);
810 sk->sk_lock.owner = NULL;
811 init_waitqueue_head(&sk->sk_lock.wq);
812 /*
813 * Make sure we are not reinitializing a held lock:
814 */
815 debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
816
817 /*
818 * Mark both the sk_lock and the sk_lock.slock as a
819 * per-address-family lock class:
820 */
821 lockdep_set_class_and_name(&sk->sk_lock.slock,
822 af_family_slock_keys + sk->sk_family,
823 af_family_slock_key_strings[sk->sk_family]);
824 lockdep_init_map(&sk->sk_lock.dep_map,
825 af_family_key_strings[sk->sk_family],
826 af_family_keys + sk->sk_family);
827}
828
742/** 829/**
743 * sk_alloc - All socket objects are allocated here 830 * sk_alloc - All socket objects are allocated here
744 * @family: protocol family 831 * @family: protocol family
@@ -793,10 +880,10 @@ void sk_free(struct sock *sk)
793 if (sk->sk_destruct) 880 if (sk->sk_destruct)
794 sk->sk_destruct(sk); 881 sk->sk_destruct(sk);
795 882
796 filter = sk->sk_filter; 883 filter = rcu_dereference(sk->sk_filter);
797 if (filter) { 884 if (filter) {
798 sk_filter_release(sk, filter); 885 sk_filter_release(sk, filter);
799 sk->sk_filter = NULL; 886 rcu_assign_pointer(sk->sk_filter, NULL);
800 } 887 }
801 888
802 sock_disable_timestamp(sk); 889 sock_disable_timestamp(sk);
@@ -820,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
820 if (newsk != NULL) { 907 if (newsk != NULL) {
821 struct sk_filter *filter; 908 struct sk_filter *filter;
822 909
823 memcpy(newsk, sk, sk->sk_prot->obj_size); 910 sock_copy(newsk, sk);
824 911
825 /* SANITY */ 912 /* SANITY */
826 sk_node_init(&newsk->sk_node); 913 sk_node_init(&newsk->sk_node);
@@ -838,6 +925,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
838 925
839 rwlock_init(&newsk->sk_dst_lock); 926 rwlock_init(&newsk->sk_dst_lock);
840 rwlock_init(&newsk->sk_callback_lock); 927 rwlock_init(&newsk->sk_callback_lock);
928 lockdep_set_class(&newsk->sk_callback_lock,
929 af_callback_keys + newsk->sk_family);
841 930
842 newsk->sk_dst_cache = NULL; 931 newsk->sk_dst_cache = NULL;
843 newsk->sk_wmem_queued = 0; 932 newsk->sk_wmem_queued = 0;
@@ -1412,6 +1501,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1412 1501
1413 rwlock_init(&sk->sk_dst_lock); 1502 rwlock_init(&sk->sk_dst_lock);
1414 rwlock_init(&sk->sk_callback_lock); 1503 rwlock_init(&sk->sk_callback_lock);
1504 lockdep_set_class(&sk->sk_callback_lock,
1505 af_callback_keys + sk->sk_family);
1415 1506
1416 sk->sk_state_change = sock_def_wakeup; 1507 sk->sk_state_change = sock_def_wakeup;
1417 sk->sk_data_ready = sock_def_readable; 1508 sk->sk_data_ready = sock_def_readable;
@@ -1439,24 +1530,34 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1439void fastcall lock_sock(struct sock *sk) 1530void fastcall lock_sock(struct sock *sk)
1440{ 1531{
1441 might_sleep(); 1532 might_sleep();
1442 spin_lock_bh(&(sk->sk_lock.slock)); 1533 spin_lock_bh(&sk->sk_lock.slock);
1443 if (sk->sk_lock.owner) 1534 if (sk->sk_lock.owner)
1444 __lock_sock(sk); 1535 __lock_sock(sk);
1445 sk->sk_lock.owner = (void *)1; 1536 sk->sk_lock.owner = (void *)1;
1446 spin_unlock_bh(&(sk->sk_lock.slock)); 1537 spin_unlock(&sk->sk_lock.slock);
1538 /*
1539 * The sk_lock has mutex_lock() semantics here:
1540 */
1541 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
1542 local_bh_enable();
1447} 1543}
1448 1544
1449EXPORT_SYMBOL(lock_sock); 1545EXPORT_SYMBOL(lock_sock);
1450 1546
1451void fastcall release_sock(struct sock *sk) 1547void fastcall release_sock(struct sock *sk)
1452{ 1548{
1453 spin_lock_bh(&(sk->sk_lock.slock)); 1549 /*
1550 * The sk_lock has mutex_unlock() semantics:
1551 */
1552 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1553
1554 spin_lock_bh(&sk->sk_lock.slock);
1454 if (sk->sk_backlog.tail) 1555 if (sk->sk_backlog.tail)
1455 __release_sock(sk); 1556 __release_sock(sk);
1456 sk->sk_lock.owner = NULL; 1557 sk->sk_lock.owner = NULL;
1457 if (waitqueue_active(&(sk->sk_lock.wq))) 1558 if (waitqueue_active(&sk->sk_lock.wq))
1458 wake_up(&(sk->sk_lock.wq)); 1559 wake_up(&sk->sk_lock.wq);
1459 spin_unlock_bh(&(sk->sk_lock.slock)); 1560 spin_unlock_bh(&sk->sk_lock.slock);
1460} 1561}
1461EXPORT_SYMBOL(release_sock); 1562EXPORT_SYMBOL(release_sock);
1462 1563