diff options
author | Christoph Paasch <christoph.paasch@uclouvain.be> | 2014-01-16 14:01:21 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-01-17 21:05:34 -0500 |
commit | 77f99ad16a07aa062c2d30fae57b1fee456f6ef6 (patch) | |
tree | d95bb59a1fdcab37cd1eef6d909a4e75fc75d4c5 /net | |
parent | c196403b79aa241c3fefb3ee5bb328aa7c5cc860 (diff) |
tcp: metrics: Avoid duplicate entries with the same destination-IP
Because the tcp-metrics is an RCU-list, it may be that two
soft-interrupts are inside __tcp_get_metrics() for the same
destination-IP at the same time. If this destination-IP is not yet part of
the tcp-metrics, both soft-interrupts will end up in tcpm_new and create
a new entry for this IP.
So, we will have two tcp-metrics with the same destination-IP in the list.
This patch checks twice __tcp_get_metrics(). First without holding the
lock, then while holding the lock. The second one is there to confirm
that the entry has not been added by another soft-irq while waiting for
the spin-lock.
Fixes: 51c5d0c4b169b (tcp: Maintain dynamic metrics in local cache.)
Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/tcp_metrics.c | 51 |
1 files changed, 32 insertions, 19 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 06493736fbc8..098b3a29f6f3 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
@@ -22,6 +22,9 @@ | |||
22 | 22 | ||
23 | int sysctl_tcp_nometrics_save __read_mostly; | 23 | int sysctl_tcp_nometrics_save __read_mostly; |
24 | 24 | ||
25 | static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, | ||
26 | struct net *net, unsigned int hash); | ||
27 | |||
25 | struct tcp_fastopen_metrics { | 28 | struct tcp_fastopen_metrics { |
26 | u16 mss; | 29 | u16 mss; |
27 | u16 syn_loss:10; /* Recurring Fast Open SYN losses */ | 30 | u16 syn_loss:10; /* Recurring Fast Open SYN losses */ |
@@ -130,16 +133,41 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, | |||
130 | } | 133 | } |
131 | } | 134 | } |
132 | 135 | ||
136 | #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) | ||
137 | |||
138 | static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) | ||
139 | { | ||
140 | if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) | ||
141 | tcpm_suck_dst(tm, dst, false); | ||
142 | } | ||
143 | |||
144 | #define TCP_METRICS_RECLAIM_DEPTH 5 | ||
145 | #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL | ||
146 | |||
133 | static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, | 147 | static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, |
134 | struct inetpeer_addr *addr, | 148 | struct inetpeer_addr *addr, |
135 | unsigned int hash, | 149 | unsigned int hash) |
136 | bool reclaim) | ||
137 | { | 150 | { |
138 | struct tcp_metrics_block *tm; | 151 | struct tcp_metrics_block *tm; |
139 | struct net *net; | 152 | struct net *net; |
153 | bool reclaim = false; | ||
140 | 154 | ||
141 | spin_lock_bh(&tcp_metrics_lock); | 155 | spin_lock_bh(&tcp_metrics_lock); |
142 | net = dev_net(dst->dev); | 156 | net = dev_net(dst->dev); |
157 | |||
158 | /* While waiting for the spin-lock the cache might have been populated | ||
159 | * with this entry and so we have to check again. | ||
160 | */ | ||
161 | tm = __tcp_get_metrics(addr, net, hash); | ||
162 | if (tm == TCP_METRICS_RECLAIM_PTR) { | ||
163 | reclaim = true; | ||
164 | tm = NULL; | ||
165 | } | ||
166 | if (tm) { | ||
167 | tcpm_check_stamp(tm, dst); | ||
168 | goto out_unlock; | ||
169 | } | ||
170 | |||
143 | if (unlikely(reclaim)) { | 171 | if (unlikely(reclaim)) { |
144 | struct tcp_metrics_block *oldest; | 172 | struct tcp_metrics_block *oldest; |
145 | 173 | ||
@@ -169,17 +197,6 @@ out_unlock: | |||
169 | return tm; | 197 | return tm; |
170 | } | 198 | } |
171 | 199 | ||
172 | #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) | ||
173 | |||
174 | static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) | ||
175 | { | ||
176 | if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) | ||
177 | tcpm_suck_dst(tm, dst, false); | ||
178 | } | ||
179 | |||
180 | #define TCP_METRICS_RECLAIM_DEPTH 5 | ||
181 | #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL | ||
182 | |||
183 | static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) | 200 | static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) |
184 | { | 201 | { |
185 | if (tm) | 202 | if (tm) |
@@ -282,7 +299,6 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, | |||
282 | struct inetpeer_addr addr; | 299 | struct inetpeer_addr addr; |
283 | unsigned int hash; | 300 | unsigned int hash; |
284 | struct net *net; | 301 | struct net *net; |
285 | bool reclaim; | ||
286 | 302 | ||
287 | addr.family = sk->sk_family; | 303 | addr.family = sk->sk_family; |
288 | switch (addr.family) { | 304 | switch (addr.family) { |
@@ -304,13 +320,10 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, | |||
304 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); | 320 | hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); |
305 | 321 | ||
306 | tm = __tcp_get_metrics(&addr, net, hash); | 322 | tm = __tcp_get_metrics(&addr, net, hash); |
307 | reclaim = false; | 323 | if (tm == TCP_METRICS_RECLAIM_PTR) |
308 | if (tm == TCP_METRICS_RECLAIM_PTR) { | ||
309 | reclaim = true; | ||
310 | tm = NULL; | 324 | tm = NULL; |
311 | } | ||
312 | if (!tm && create) | 325 | if (!tm && create) |
313 | tm = tcpm_new(dst, &addr, hash, reclaim); | 326 | tm = tcpm_new(dst, &addr, hash); |
314 | else | 327 | else |
315 | tcpm_check_stamp(tm, dst); | 328 | tcpm_check_stamp(tm, dst); |
316 | 329 | ||