diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-08 19:03:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-04-13 04:41:33 -0400 |
commit | b6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 (patch) | |
tree | 42032b4978874e8ffcf6c851d13324b8c8c7c113 /net/ipv4 | |
parent | 7a161ea92471087a1579239d7a58dd06eaa5601c (diff) |
net: sk_dst_cache RCUification
With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this
work.
sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock)
This rwlock is readlocked for a very small amount of time, and dst
entries are already freed after RCU grace period. This calls for RCU
again :)
This patch converts sk_dst_lock to a spinlock, and use RCU for readers.
__sk_dst_get() is supposed to be called with rcu_read_lock() or if
socket locked by user, so use appropriate rcu_dereference_check()
condition (rcu_read_lock_held() || sock_owned_by_user(sk))
This patch avoids two atomic ops per tx packet on UDP connected sockets,
for example, and permits sk_dst_lock to be much less dirtied.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 4 |
3 files changed, 5 insertions, 5 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a0beb32beaa3..193dcd6ed64f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) | |||
154 | WARN_ON(sk->sk_forward_alloc); | 154 | WARN_ON(sk->sk_forward_alloc); |
155 | 155 | ||
156 | kfree(inet->opt); | 156 | kfree(inet->opt); |
157 | dst_release(sk->sk_dst_cache); | 157 | dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); |
158 | sk_refcnt_debug_dec(sk); | 158 | sk_refcnt_debug_dec(sk); |
159 | } | 159 | } |
160 | EXPORT_SYMBOL(inet_sock_destruct); | 160 | EXPORT_SYMBOL(inet_sock_destruct); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4000b10610b7..ae3ec15fb630 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -3710,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3710 | } | 3710 | } |
3711 | 3711 | ||
3712 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) | 3712 | if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) |
3713 | dst_confirm(sk->sk_dst_cache); | 3713 | dst_confirm(__sk_dst_get(sk)); |
3714 | 3714 | ||
3715 | return 1; | 3715 | return 1; |
3716 | 3716 | ||
@@ -5833,7 +5833,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5833 | if (tp->snd_una == tp->write_seq) { | 5833 | if (tp->snd_una == tp->write_seq) { |
5834 | tcp_set_state(sk, TCP_FIN_WAIT2); | 5834 | tcp_set_state(sk, TCP_FIN_WAIT2); |
5835 | sk->sk_shutdown |= SEND_SHUTDOWN; | 5835 | sk->sk_shutdown |= SEND_SHUTDOWN; |
5836 | dst_confirm(sk->sk_dst_cache); | 5836 | dst_confirm(__sk_dst_get(sk)); |
5837 | 5837 | ||
5838 | if (!sock_flag(sk, SOCK_DEAD)) | 5838 | if (!sock_flag(sk, SOCK_DEAD)) |
5839 | /* Wake up lingering close() */ | 5839 | /* Wake up lingering close() */ |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8a0ab2977f1f..c732be00606b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -172,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
172 | 172 | ||
173 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 173 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
174 | if (icsk->icsk_retransmits) | 174 | if (icsk->icsk_retransmits) |
175 | dst_negative_advice(&sk->sk_dst_cache, sk); | 175 | dst_negative_advice(sk); |
176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
177 | } else { | 177 | } else { |
178 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | 178 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { |
179 | /* Black hole detection */ | 179 | /* Black hole detection */ |
180 | tcp_mtu_probing(icsk, sk); | 180 | tcp_mtu_probing(icsk, sk); |
181 | 181 | ||
182 | dst_negative_advice(&sk->sk_dst_cache, sk); | 182 | dst_negative_advice(sk); |
183 | } | 183 | } |
184 | 184 | ||
185 | retry_until = sysctl_tcp_retries2; | 185 | retry_until = sysctl_tcp_retries2; |