diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-05-26 15:20:18 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-27 03:30:53 -0400 |
commit | 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c (patch) | |
tree | 3110e7e59883597b5d0f617e8507e15b8f965f3f | |
parent | a56635a56f2afb3d22d9ce07e8f8d69537416b2d (diff) |
net: fix lock_sock_bh/unlock_sock_bh
This new sock lock primitive was introduced to speedup some user context
socket manipulation. But it is unsafe to protect two threads, one using
regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh
This patch changes lock_sock_bh to be careful against 'owned' state.
If owned is found to be set, we must take the slow path.
lock_sock_bh() now returns a boolean to say if the slow path was taken,
and this boolean is used at unlock_sock_bh time to call the appropriate
unlock function.
After this change, BH are either disabled or enabled during the
lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
so we rename these functions to lock_sock_fast()/unlock_sock_fast().
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sock.h | 20 | ||||
-rw-r--r-- | net/core/datagram.c | 6 | ||||
-rw-r--r-- | net/core/sock.c | 33 | ||||
-rw-r--r-- | net/ipv4/udp.c | 14 | ||||
-rw-r--r-- | net/ipv6/udp.c | 5 |
5 files changed, 62 insertions, 16 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index d2a71b04a5ae..ca241ea14875 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1026,15 +1026,23 @@ extern void release_sock(struct sock *sk); | |||
1026 | SINGLE_DEPTH_NESTING) | 1026 | SINGLE_DEPTH_NESTING) |
1027 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) | 1027 | #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) |
1028 | 1028 | ||
1029 | static inline void lock_sock_bh(struct sock *sk) | 1029 | extern bool lock_sock_fast(struct sock *sk); |
1030 | /** | ||
1031 | * unlock_sock_fast - complement of lock_sock_fast | ||
1032 | * @sk: socket | ||
1033 | * @slow: slow mode | ||
1034 | * | ||
1035 | * fast unlock socket for user context. | ||
1036 | * If slow mode is on, we call regular release_sock() | ||
1037 | */ | ||
1038 | static inline void unlock_sock_fast(struct sock *sk, bool slow) | ||
1030 | { | 1039 | { |
1031 | spin_lock_bh(&sk->sk_lock.slock); | 1040 | if (slow) |
1041 | release_sock(sk); | ||
1042 | else | ||
1043 | spin_unlock_bh(&sk->sk_lock.slock); | ||
1032 | } | 1044 | } |
1033 | 1045 | ||
1034 | static inline void unlock_sock_bh(struct sock *sk) | ||
1035 | { | ||
1036 | spin_unlock_bh(&sk->sk_lock.slock); | ||
1037 | } | ||
1038 | 1046 | ||
1039 | extern struct sock *sk_alloc(struct net *net, int family, | 1047 | extern struct sock *sk_alloc(struct net *net, int family, |
1040 | gfp_t priority, | 1048 | gfp_t priority, |
diff --git a/net/core/datagram.c b/net/core/datagram.c index e0097531417a..f5b6f43a4c2e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram); | |||
229 | 229 | ||
230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) | 230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) |
231 | { | 231 | { |
232 | bool slow; | ||
233 | |||
232 | if (likely(atomic_read(&skb->users) == 1)) | 234 | if (likely(atomic_read(&skb->users) == 1)) |
233 | smp_rmb(); | 235 | smp_rmb(); |
234 | else if (likely(!atomic_dec_and_test(&skb->users))) | 236 | else if (likely(!atomic_dec_and_test(&skb->users))) |
235 | return; | 237 | return; |
236 | 238 | ||
237 | lock_sock_bh(sk); | 239 | slow = lock_sock_fast(sk); |
238 | skb_orphan(skb); | 240 | skb_orphan(skb); |
239 | sk_mem_reclaim_partial(sk); | 241 | sk_mem_reclaim_partial(sk); |
240 | unlock_sock_bh(sk); | 242 | unlock_sock_fast(sk, slow); |
241 | 243 | ||
242 | /* skb is now orphaned, can be freed outside of locked section */ | 244 | /* skb is now orphaned, can be freed outside of locked section */ |
243 | __kfree_skb(skb); | 245 | __kfree_skb(skb); |
diff --git a/net/core/sock.c b/net/core/sock.c index 37fe9b6adade..2cf7f9f7e775 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk) | |||
2007 | } | 2007 | } |
2008 | EXPORT_SYMBOL(release_sock); | 2008 | EXPORT_SYMBOL(release_sock); |
2009 | 2009 | ||
2010 | /** | ||
2011 | * lock_sock_fast - fast version of lock_sock | ||
2012 | * @sk: socket | ||
2013 | * | ||
2014 | * This version should be used for very small section, where process wont block | ||
2015 | * return false if fast path is taken | ||
2016 | * sk_lock.slock locked, owned = 0, BH disabled | ||
2017 | * return true if slow path is taken | ||
2018 | * sk_lock.slock unlocked, owned = 1, BH enabled | ||
2019 | */ | ||
2020 | bool lock_sock_fast(struct sock *sk) | ||
2021 | { | ||
2022 | might_sleep(); | ||
2023 | spin_lock_bh(&sk->sk_lock.slock); | ||
2024 | |||
2025 | if (!sk->sk_lock.owned) | ||
2026 | /* | ||
2027 | * Note : We must disable BH | ||
2028 | */ | ||
2029 | return false; | ||
2030 | |||
2031 | __lock_sock(sk); | ||
2032 | sk->sk_lock.owned = 1; | ||
2033 | spin_unlock(&sk->sk_lock.slock); | ||
2034 | /* | ||
2035 | * The sk_lock has mutex_lock() semantics here: | ||
2036 | */ | ||
2037 | mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); | ||
2038 | local_bh_enable(); | ||
2039 | return true; | ||
2040 | } | ||
2041 | EXPORT_SYMBOL(lock_sock_fast); | ||
2042 | |||
2010 | int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) | 2043 | int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) |
2011 | { | 2044 | { |
2012 | struct timeval tv; | 2045 | struct timeval tv; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9de6a698f91d..b9d0d409516f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk) | |||
1063 | spin_unlock_bh(&rcvq->lock); | 1063 | spin_unlock_bh(&rcvq->lock); |
1064 | 1064 | ||
1065 | if (!skb_queue_empty(&list_kill)) { | 1065 | if (!skb_queue_empty(&list_kill)) { |
1066 | lock_sock_bh(sk); | 1066 | bool slow = lock_sock_fast(sk); |
1067 | |||
1067 | __skb_queue_purge(&list_kill); | 1068 | __skb_queue_purge(&list_kill); |
1068 | sk_mem_reclaim_partial(sk); | 1069 | sk_mem_reclaim_partial(sk); |
1069 | unlock_sock_bh(sk); | 1070 | unlock_sock_fast(sk, slow); |
1070 | } | 1071 | } |
1071 | return res; | 1072 | return res; |
1072 | } | 1073 | } |
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
1123 | int peeked; | 1124 | int peeked; |
1124 | int err; | 1125 | int err; |
1125 | int is_udplite = IS_UDPLITE(sk); | 1126 | int is_udplite = IS_UDPLITE(sk); |
1127 | bool slow; | ||
1126 | 1128 | ||
1127 | /* | 1129 | /* |
1128 | * Check any passed addresses | 1130 | * Check any passed addresses |
@@ -1197,10 +1199,10 @@ out: | |||
1197 | return err; | 1199 | return err; |
1198 | 1200 | ||
1199 | csum_copy_err: | 1201 | csum_copy_err: |
1200 | lock_sock_bh(sk); | 1202 | slow = lock_sock_fast(sk); |
1201 | if (!skb_kill_datagram(sk, skb, flags)) | 1203 | if (!skb_kill_datagram(sk, skb, flags)) |
1202 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); | 1204 | UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); |
1203 | unlock_sock_bh(sk); | 1205 | unlock_sock_fast(sk, slow); |
1204 | 1206 | ||
1205 | if (noblock) | 1207 | if (noblock) |
1206 | return -EAGAIN; | 1208 | return -EAGAIN; |
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb) | |||
1625 | 1627 | ||
1626 | void udp_destroy_sock(struct sock *sk) | 1628 | void udp_destroy_sock(struct sock *sk) |
1627 | { | 1629 | { |
1628 | lock_sock_bh(sk); | 1630 | bool slow = lock_sock_fast(sk); |
1629 | udp_flush_pending_frames(sk); | 1631 | udp_flush_pending_frames(sk); |
1630 | unlock_sock_bh(sk); | 1632 | unlock_sock_fast(sk, slow); |
1631 | } | 1633 | } |
1632 | 1634 | ||
1633 | /* | 1635 | /* |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d7a2c0b836a..87be58673b55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, | |||
328 | int err; | 328 | int err; |
329 | int is_udplite = IS_UDPLITE(sk); | 329 | int is_udplite = IS_UDPLITE(sk); |
330 | int is_udp4; | 330 | int is_udp4; |
331 | bool slow; | ||
331 | 332 | ||
332 | if (addr_len) | 333 | if (addr_len) |
333 | *addr_len=sizeof(struct sockaddr_in6); | 334 | *addr_len=sizeof(struct sockaddr_in6); |
@@ -424,7 +425,7 @@ out: | |||
424 | return err; | 425 | return err; |
425 | 426 | ||
426 | csum_copy_err: | 427 | csum_copy_err: |
427 | lock_sock_bh(sk); | 428 | slow = lock_sock_fast(sk); |
428 | if (!skb_kill_datagram(sk, skb, flags)) { | 429 | if (!skb_kill_datagram(sk, skb, flags)) { |
429 | if (is_udp4) | 430 | if (is_udp4) |
430 | UDP_INC_STATS_USER(sock_net(sk), | 431 | UDP_INC_STATS_USER(sock_net(sk), |
@@ -433,7 +434,7 @@ csum_copy_err: | |||
433 | UDP6_INC_STATS_USER(sock_net(sk), | 434 | UDP6_INC_STATS_USER(sock_net(sk), |
434 | UDP_MIB_INERRORS, is_udplite); | 435 | UDP_MIB_INERRORS, is_udplite); |
435 | } | 436 | } |
436 | unlock_sock_bh(sk); | 437 | unlock_sock_fast(sk, slow); |
437 | 438 | ||
438 | if (flags & MSG_DONTWAIT) | 439 | if (flags & MSG_DONTWAIT) |
439 | return -EAGAIN; | 440 | return -EAGAIN; |