diff options
Diffstat (limited to 'net/core/sock.c')
| -rw-r--r-- | net/core/sock.c | 157 |
1 files changed, 129 insertions, 28 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 5d820c376653..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -92,7 +92,6 @@ | |||
| 92 | */ | 92 | */ |
| 93 | 93 | ||
| 94 | #include <linux/capability.h> | 94 | #include <linux/capability.h> |
| 95 | #include <linux/config.h> | ||
| 96 | #include <linux/errno.h> | 95 | #include <linux/errno.h> |
| 97 | #include <linux/types.h> | 96 | #include <linux/types.h> |
| 98 | #include <linux/socket.h> | 97 | #include <linux/socket.h> |
| @@ -130,6 +129,53 @@ | |||
| 130 | #include <net/tcp.h> | 129 | #include <net/tcp.h> |
| 131 | #endif | 130 | #endif |
| 132 | 131 | ||
| 132 | /* | ||
| 133 | * Each address family might have different locking rules, so we have | ||
| 134 | * one slock key per address family: | ||
| 135 | */ | ||
| 136 | static struct lock_class_key af_family_keys[AF_MAX]; | ||
| 137 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | ||
| 138 | |||
| 139 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 140 | /* | ||
| 141 | * Make lock validator output more readable. (we pre-construct these | ||
| 142 | * strings build-time, so that runtime initialization of socket | ||
| 143 | * locks is fast): | ||
| 144 | */ | ||
| 145 | static const char *af_family_key_strings[AF_MAX+1] = { | ||
| 146 | "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , | ||
| 147 | "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", | ||
| 148 | "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , | ||
| 149 | "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , | ||
| 150 | "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , | ||
| 151 | "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , | ||
| 152 | "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , | ||
| 153 | "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , | ||
| 154 | "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , | ||
| 155 | "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , | ||
| 156 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX" | ||
| 157 | }; | ||
| 158 | static const char *af_family_slock_key_strings[AF_MAX+1] = { | ||
| 159 | "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , | ||
| 160 | "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", | ||
| 161 | "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , | ||
| 162 | "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , | ||
| 163 | "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , | ||
| 164 | "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , | ||
| 165 | "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , | ||
| 166 | "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , | ||
| 167 | "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , | ||
| 168 | "slock-27" , "slock-28" , "slock-29" , | ||
| 169 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_MAX" | ||
| 170 | }; | ||
| 171 | #endif | ||
| 172 | |||
| 173 | /* | ||
| 174 | * sk_callback_lock locking rules are per-address-family, | ||
| 175 | * so split the lock classes by using a per-AF key: | ||
| 176 | */ | ||
| 177 | static struct lock_class_key af_callback_keys[AF_MAX]; | ||
| 178 | |||
| 133 | /* Take into consideration the size of the struct sk_buff overhead in the | 179 | /* Take into consideration the size of the struct sk_buff overhead in the |
| 134 | * determination of these values, since that is non-constant across | 180 | * determination of these values, since that is non-constant across |
| 135 | * platforms. This makes socket queueing behavior and performance | 181 | * platforms. This makes socket queueing behavior and performance |
| @@ -141,13 +187,13 @@ | |||
| 141 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | 187 | #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) |
| 142 | 188 | ||
| 143 | /* Run time adjustable parameters. */ | 189 | /* Run time adjustable parameters. */ |
| 144 | __u32 sysctl_wmem_max = SK_WMEM_MAX; | 190 | __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; |
| 145 | __u32 sysctl_rmem_max = SK_RMEM_MAX; | 191 | __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; |
| 146 | __u32 sysctl_wmem_default = SK_WMEM_MAX; | 192 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; |
| 147 | __u32 sysctl_rmem_default = SK_RMEM_MAX; | 193 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; |
| 148 | 194 | ||
| 149 | /* Maximal space eaten by iovec or ancilliary data plus some space */ | 195 | /* Maximal space eaten by iovec or ancilliary data plus some space */ |
| 150 | int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); | 196 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
| 151 | 197 | ||
| 152 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 198 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
| 153 | { | 199 | { |
| @@ -201,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 201 | goto out; | 247 | goto out; |
| 202 | } | 248 | } |
| 203 | 249 | ||
| 204 | /* It would be deadlock, if sock_queue_rcv_skb is used | 250 | err = sk_filter(sk, skb); |
| 205 | with socket lock! We assume that users of this | ||
| 206 | function are lock free. | ||
| 207 | */ | ||
| 208 | err = sk_filter(sk, skb, 1); | ||
| 209 | if (err) | 251 | if (err) |
| 210 | goto out; | 252 | goto out; |
| 211 | 253 | ||
| @@ -232,15 +274,22 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) | |||
| 232 | { | 274 | { |
| 233 | int rc = NET_RX_SUCCESS; | 275 | int rc = NET_RX_SUCCESS; |
| 234 | 276 | ||
| 235 | if (sk_filter(sk, skb, 0)) | 277 | if (sk_filter(sk, skb)) |
| 236 | goto discard_and_relse; | 278 | goto discard_and_relse; |
| 237 | 279 | ||
| 238 | skb->dev = NULL; | 280 | skb->dev = NULL; |
| 239 | 281 | ||
| 240 | bh_lock_sock(sk); | 282 | bh_lock_sock(sk); |
| 241 | if (!sock_owned_by_user(sk)) | 283 | if (!sock_owned_by_user(sk)) { |
| 284 | /* | ||
| 285 | * trylock + unlock semantics: | ||
| 286 | */ | ||
| 287 | mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); | ||
| 288 | |||
| 242 | rc = sk->sk_backlog_rcv(sk, skb); | 289 | rc = sk->sk_backlog_rcv(sk, skb); |
| 243 | else | 290 | |
| 291 | mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); | ||
| 292 | } else | ||
| 244 | sk_add_backlog(sk, skb); | 293 | sk_add_backlog(sk, skb); |
| 245 | bh_unlock_sock(sk); | 294 | bh_unlock_sock(sk); |
| 246 | out: | 295 | out: |
| @@ -553,18 +602,25 @@ set_rcvbuf: | |||
| 553 | break; | 602 | break; |
| 554 | 603 | ||
| 555 | case SO_DETACH_FILTER: | 604 | case SO_DETACH_FILTER: |
| 556 | spin_lock_bh(&sk->sk_lock.slock); | 605 | rcu_read_lock_bh(); |
| 557 | filter = sk->sk_filter; | 606 | filter = rcu_dereference(sk->sk_filter); |
| 558 | if (filter) { | 607 | if (filter) { |
| 559 | sk->sk_filter = NULL; | 608 | rcu_assign_pointer(sk->sk_filter, NULL); |
| 560 | spin_unlock_bh(&sk->sk_lock.slock); | ||
| 561 | sk_filter_release(sk, filter); | 609 | sk_filter_release(sk, filter); |
| 610 | rcu_read_unlock_bh(); | ||
| 562 | break; | 611 | break; |
| 563 | } | 612 | } |
| 564 | spin_unlock_bh(&sk->sk_lock.slock); | 613 | rcu_read_unlock_bh(); |
| 565 | ret = -ENONET; | 614 | ret = -ENONET; |
| 566 | break; | 615 | break; |
| 567 | 616 | ||
| 617 | case SO_PASSSEC: | ||
| 618 | if (valbool) | ||
| 619 | set_bit(SOCK_PASSSEC, &sock->flags); | ||
| 620 | else | ||
| 621 | clear_bit(SOCK_PASSSEC, &sock->flags); | ||
| 622 | break; | ||
| 623 | |||
| 568 | /* We implement the SO_SNDLOWAT etc to | 624 | /* We implement the SO_SNDLOWAT etc to |
| 569 | not be settable (1003.1g 5.3) */ | 625 | not be settable (1003.1g 5.3) */ |
| 570 | default: | 626 | default: |
| @@ -723,6 +779,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
| 723 | v.val = sk->sk_state == TCP_LISTEN; | 779 | v.val = sk->sk_state == TCP_LISTEN; |
| 724 | break; | 780 | break; |
| 725 | 781 | ||
| 782 | case SO_PASSSEC: | ||
| 783 | v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; | ||
| 784 | break; | ||
| 785 | |||
| 726 | case SO_PEERSEC: | 786 | case SO_PEERSEC: |
| 727 | return security_socket_getpeersec_stream(sock, optval, optlen, len); | 787 | return security_socket_getpeersec_stream(sock, optval, optlen, len); |
| 728 | 788 | ||
| @@ -739,6 +799,33 @@ lenout: | |||
| 739 | return 0; | 799 | return 0; |
| 740 | } | 800 | } |
| 741 | 801 | ||
| 802 | /* | ||
| 803 | * Initialize an sk_lock. | ||
| 804 | * | ||
| 805 | * (We also register the sk_lock with the lock validator.) | ||
| 806 | */ | ||
| 807 | static void inline sock_lock_init(struct sock *sk) | ||
| 808 | { | ||
| 809 | spin_lock_init(&sk->sk_lock.slock); | ||
| 810 | sk->sk_lock.owner = NULL; | ||
| 811 | init_waitqueue_head(&sk->sk_lock.wq); | ||
| 812 | /* | ||
| 813 | * Make sure we are not reinitializing a held lock: | ||
| 814 | */ | ||
| 815 | debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); | ||
| 816 | |||
| 817 | /* | ||
| 818 | * Mark both the sk_lock and the sk_lock.slock as a | ||
| 819 | * per-address-family lock class: | ||
| 820 | */ | ||
| 821 | lockdep_set_class_and_name(&sk->sk_lock.slock, | ||
| 822 | af_family_slock_keys + sk->sk_family, | ||
| 823 | af_family_slock_key_strings[sk->sk_family]); | ||
| 824 | lockdep_init_map(&sk->sk_lock.dep_map, | ||
| 825 | af_family_key_strings[sk->sk_family], | ||
| 826 | af_family_keys + sk->sk_family); | ||
| 827 | } | ||
| 828 | |||
| 742 | /** | 829 | /** |
| 743 | * sk_alloc - All socket objects are allocated here | 830 | * sk_alloc - All socket objects are allocated here |
| 744 | * @family: protocol family | 831 | * @family: protocol family |
| @@ -793,10 +880,10 @@ void sk_free(struct sock *sk) | |||
| 793 | if (sk->sk_destruct) | 880 | if (sk->sk_destruct) |
| 794 | sk->sk_destruct(sk); | 881 | sk->sk_destruct(sk); |
| 795 | 882 | ||
| 796 | filter = sk->sk_filter; | 883 | filter = rcu_dereference(sk->sk_filter); |
| 797 | if (filter) { | 884 | if (filter) { |
| 798 | sk_filter_release(sk, filter); | 885 | sk_filter_release(sk, filter); |
| 799 | sk->sk_filter = NULL; | 886 | rcu_assign_pointer(sk->sk_filter, NULL); |
| 800 | } | 887 | } |
| 801 | 888 | ||
| 802 | sock_disable_timestamp(sk); | 889 | sock_disable_timestamp(sk); |
| @@ -820,7 +907,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 820 | if (newsk != NULL) { | 907 | if (newsk != NULL) { |
| 821 | struct sk_filter *filter; | 908 | struct sk_filter *filter; |
| 822 | 909 | ||
| 823 | memcpy(newsk, sk, sk->sk_prot->obj_size); | 910 | sock_copy(newsk, sk); |
| 824 | 911 | ||
| 825 | /* SANITY */ | 912 | /* SANITY */ |
| 826 | sk_node_init(&newsk->sk_node); | 913 | sk_node_init(&newsk->sk_node); |
| @@ -838,6 +925,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 838 | 925 | ||
| 839 | rwlock_init(&newsk->sk_dst_lock); | 926 | rwlock_init(&newsk->sk_dst_lock); |
| 840 | rwlock_init(&newsk->sk_callback_lock); | 927 | rwlock_init(&newsk->sk_callback_lock); |
| 928 | lockdep_set_class(&newsk->sk_callback_lock, | ||
| 929 | af_callback_keys + newsk->sk_family); | ||
| 841 | 930 | ||
| 842 | newsk->sk_dst_cache = NULL; | 931 | newsk->sk_dst_cache = NULL; |
| 843 | newsk->sk_wmem_queued = 0; | 932 | newsk->sk_wmem_queued = 0; |
| @@ -1412,6 +1501,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1412 | 1501 | ||
| 1413 | rwlock_init(&sk->sk_dst_lock); | 1502 | rwlock_init(&sk->sk_dst_lock); |
| 1414 | rwlock_init(&sk->sk_callback_lock); | 1503 | rwlock_init(&sk->sk_callback_lock); |
| 1504 | lockdep_set_class(&sk->sk_callback_lock, | ||
| 1505 | af_callback_keys + sk->sk_family); | ||
| 1415 | 1506 | ||
| 1416 | sk->sk_state_change = sock_def_wakeup; | 1507 | sk->sk_state_change = sock_def_wakeup; |
| 1417 | sk->sk_data_ready = sock_def_readable; | 1508 | sk->sk_data_ready = sock_def_readable; |
| @@ -1439,24 +1530,34 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1439 | void fastcall lock_sock(struct sock *sk) | 1530 | void fastcall lock_sock(struct sock *sk) |
| 1440 | { | 1531 | { |
| 1441 | might_sleep(); | 1532 | might_sleep(); |
| 1442 | spin_lock_bh(&(sk->sk_lock.slock)); | 1533 | spin_lock_bh(&sk->sk_lock.slock); |
| 1443 | if (sk->sk_lock.owner) | 1534 | if (sk->sk_lock.owner) |
| 1444 | __lock_sock(sk); | 1535 | __lock_sock(sk); |
| 1445 | sk->sk_lock.owner = (void *)1; | 1536 | sk->sk_lock.owner = (void *)1; |
| 1446 | spin_unlock_bh(&(sk->sk_lock.slock)); | 1537 | spin_unlock(&sk->sk_lock.slock); |
| 1538 | /* | ||
| 1539 | * The sk_lock has mutex_lock() semantics here: | ||
| 1540 | */ | ||
| 1541 | mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); | ||
| 1542 | local_bh_enable(); | ||
| 1447 | } | 1543 | } |
| 1448 | 1544 | ||
| 1449 | EXPORT_SYMBOL(lock_sock); | 1545 | EXPORT_SYMBOL(lock_sock); |
| 1450 | 1546 | ||
| 1451 | void fastcall release_sock(struct sock *sk) | 1547 | void fastcall release_sock(struct sock *sk) |
| 1452 | { | 1548 | { |
| 1453 | spin_lock_bh(&(sk->sk_lock.slock)); | 1549 | /* |
| 1550 | * The sk_lock has mutex_unlock() semantics: | ||
| 1551 | */ | ||
| 1552 | mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); | ||
| 1553 | |||
| 1554 | spin_lock_bh(&sk->sk_lock.slock); | ||
| 1454 | if (sk->sk_backlog.tail) | 1555 | if (sk->sk_backlog.tail) |
| 1455 | __release_sock(sk); | 1556 | __release_sock(sk); |
| 1456 | sk->sk_lock.owner = NULL; | 1557 | sk->sk_lock.owner = NULL; |
| 1457 | if (waitqueue_active(&(sk->sk_lock.wq))) | 1558 | if (waitqueue_active(&sk->sk_lock.wq)) |
| 1458 | wake_up(&(sk->sk_lock.wq)); | 1559 | wake_up(&sk->sk_lock.wq); |
| 1459 | spin_unlock_bh(&(sk->sk_lock.slock)); | 1560 | spin_unlock_bh(&sk->sk_lock.slock); |
| 1460 | } | 1561 | } |
| 1461 | EXPORT_SYMBOL(release_sock); | 1562 | EXPORT_SYMBOL(release_sock); |
| 1462 | 1563 | ||
