aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-08 19:03:29 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-13 04:41:33 -0400
commitb6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 (patch)
tree42032b4978874e8ffcf6c851d13324b8c8c7c113 /net/ipv4
parent7a161ea92471087a1579239d7a58dd06eaa5601c (diff)
net: sk_dst_cache RCUification
With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this work. sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock) This rwlock is readlocked for a very small amount of time, and dst entries are already freed after RCU grace period. This calls for RCU again :) This patch converts sk_dst_lock to a spinlock, and use RCU for readers. __sk_dst_get() is supposed to be called with rcu_read_lock() or if socket locked by user, so use appropriate rcu_dereference_check() condition (rcu_read_lock_held() || sock_owned_by_user(sk)) This patch avoids two atomic ops per tx packet on UDP connected sockets, for example, and permits sk_dst_lock to be much less dirtied. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_timer.c4
3 files changed, 5 insertions, 5 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a0beb32beaa3..193dcd6ed64f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
154 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
155 155
156 kfree(inet->opt); 156 kfree(inet->opt);
157 dst_release(sk->sk_dst_cache); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
159} 159}
160EXPORT_SYMBOL(inet_sock_destruct); 160EXPORT_SYMBOL(inet_sock_destruct);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4000b10610b7..ae3ec15fb630 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3710,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3710 } 3710 }
3711 3711
3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3713 dst_confirm(sk->sk_dst_cache); 3713 dst_confirm(__sk_dst_get(sk));
3714 3714
3715 return 1; 3715 return 1;
3716 3716
@@ -5833,7 +5833,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5833 if (tp->snd_una == tp->write_seq) { 5833 if (tp->snd_una == tp->write_seq) {
5834 tcp_set_state(sk, TCP_FIN_WAIT2); 5834 tcp_set_state(sk, TCP_FIN_WAIT2);
5835 sk->sk_shutdown |= SEND_SHUTDOWN; 5835 sk->sk_shutdown |= SEND_SHUTDOWN;
5836 dst_confirm(sk->sk_dst_cache); 5836 dst_confirm(__sk_dst_get(sk));
5837 5837
5838 if (!sock_flag(sk, SOCK_DEAD)) 5838 if (!sock_flag(sk, SOCK_DEAD))
5839 /* Wake up lingering close() */ 5839 /* Wake up lingering close() */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab2977f1f..c732be00606b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -172,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk)
172 172
173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
174 if (icsk->icsk_retransmits) 174 if (icsk->icsk_retransmits)
175 dst_negative_advice(&sk->sk_dst_cache, sk); 175 dst_negative_advice(sk);
176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
177 } else { 177 } else {
178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
179 /* Black hole detection */ 179 /* Black hole detection */
180 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
181 181
182 dst_negative_advice(&sk->sk_dst_cache, sk); 182 dst_negative_advice(sk);
183 } 183 }
184 184
185 retry_until = sysctl_tcp_retries2; 185 retry_until = sysctl_tcp_retries2;