aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/ip.h
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-06-02 08:26:03 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-02 14:00:41 -0400
commit73f156a6e8c1074ac6327e0abd1169e95eb66463 (patch)
tree2c8b222f21784e738c397ba95dee70a8f256ea64 /include/net/ip.h
parente067ee336a9d3f038ffa9699c59f2abec3376bf7 (diff)
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/ip.h')
-rw-r--r--include/net/ip.h40
1 files changed, 23 insertions, 17 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 2e4947895d75..0e795df05ec9 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
309 } 309 }
310} 310}
311 311
312void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); 312#define IP_IDENTS_SZ 2048u
313extern atomic_t *ip_idents;
313 314
314static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) 315static inline u32 ip_idents_reserve(u32 hash, int segs)
316{
317 atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
318
319 return atomic_add_return(segs, id_ptr) - segs;
320}
321
322void __ip_select_ident(struct iphdr *iph, int segs);
323
324static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
315{ 325{
316 struct iphdr *iph = ip_hdr(skb); 326 struct iphdr *iph = ip_hdr(skb);
317 327
@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
321 * does not change, they drop every other packet in 331 * does not change, they drop every other packet in
322 * a TCP stream using header compression. 332 * a TCP stream using header compression.
323 */ 333 */
324 iph->id = (sk && inet_sk(sk)->inet_daddr) ?
325 htons(inet_sk(sk)->inet_id++) : 0;
326 } else
327 __ip_select_ident(iph, dst, 0);
328}
329
330static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
331{
332 struct iphdr *iph = ip_hdr(skb);
333
334 if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
335 if (sk && inet_sk(sk)->inet_daddr) { 334 if (sk && inet_sk(sk)->inet_daddr) {
336 iph->id = htons(inet_sk(sk)->inet_id); 335 iph->id = htons(inet_sk(sk)->inet_id);
337 inet_sk(sk)->inet_id += 1 + more; 336 inet_sk(sk)->inet_id += segs;
338 } else 337 } else {
339 iph->id = 0; 338 iph->id = 0;
340 } else 339 }
341 __ip_select_ident(iph, dst, more); 340 } else {
341 __ip_select_ident(iph, segs);
342 }
343}
344
345static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
346{
347 ip_select_ident_segs(skb, sk, 1);
342} 348}
343 349
344static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) 350static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)