aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-06-02 08:26:03 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-02 14:00:41 -0400
commit73f156a6e8c1074ac6327e0abd1169e95eb66463 (patch)
tree2c8b222f21784e738c397ba95dee70a8f256ea64
parente067ee336a9d3f038ffa9699c59f2abec3376bf7 (diff)
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ppp/pptp.c2
-rw-r--r--include/net/inetpeer.h23
-rw-r--r--include/net/ip.h40
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/secure_seq.h2
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inetpeer.c18
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c45
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/output_core.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
17 files changed, 65 insertions, 155 deletions
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 01805319e1e0..1aff970be33e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
281 nf_reset(skb); 281 nf_reset(skb);
282 282
283 skb->ip_summed = CHECKSUM_NONE; 283 skb->ip_summed = CHECKSUM_NONE;
284 ip_select_ident(skb, &rt->dst, NULL); 284 ip_select_ident(skb, NULL);
285 ip_send_check(iph); 285 ip_send_check(iph);
286 286
287 ip_local_out(skb); 287 ip_local_out(skb);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6efe73c79c52..823ec7bb9c67 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,14 +41,13 @@ struct inet_peer {
41 struct rcu_head gc_rcu; 41 struct rcu_head gc_rcu;
42 }; 42 };
43 /* 43 /*
44 * Once inet_peer is queued for deletion (refcnt == -1), following fields 44 * Once inet_peer is queued for deletion (refcnt == -1), following field
45 * are not available: rid, ip_id_count 45 * is not available: rid
46 * We can share memory with rcu_head to help keep inet_peer small. 46 * We can share memory with rcu_head to help keep inet_peer small.
47 */ 47 */
48 union { 48 union {
49 struct { 49 struct {
50 atomic_t rid; /* Frag reception counter */ 50 atomic_t rid; /* Frag reception counter */
51 atomic_t ip_id_count; /* IP ID for the next packet */
52 }; 51 };
53 struct rcu_head rcu; 52 struct rcu_head rcu;
54 struct inet_peer *gc_next; 53 struct inet_peer *gc_next;
@@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
165void inetpeer_invalidate_tree(struct inet_peer_base *); 164void inetpeer_invalidate_tree(struct inet_peer_base *);
166 165
167/* 166/*
168 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, 167 * temporary check to make sure we dont access rid, tcp_ts,
169 * tcp_ts_stamp if no refcount is taken on inet_peer 168 * tcp_ts_stamp if no refcount is taken on inet_peer
170 */ 169 */
171static inline void inet_peer_refcheck(const struct inet_peer *p) 170static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
173 WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); 172 WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
174} 173}
175 174
176
177/* can be called with or without local BH being disabled */
178static inline int inet_getid(struct inet_peer *p, int more)
179{
180 int old, new;
181 more++;
182 inet_peer_refcheck(p);
183 do {
184 old = atomic_read(&p->ip_id_count);
185 new = old + more;
186 if (!new)
187 new = 1;
188 } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old);
189 return new;
190}
191
192#endif /* _NET_INETPEER_H */ 175#endif /* _NET_INETPEER_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 2e4947895d75..0e795df05ec9 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
309 } 309 }
310} 310}
311 311
312void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); 312#define IP_IDENTS_SZ 2048u
313extern atomic_t *ip_idents;
313 314
314static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) 315static inline u32 ip_idents_reserve(u32 hash, int segs)
316{
317 atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
318
319 return atomic_add_return(segs, id_ptr) - segs;
320}
321
322void __ip_select_ident(struct iphdr *iph, int segs);
323
324static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
315{ 325{
316 struct iphdr *iph = ip_hdr(skb); 326 struct iphdr *iph = ip_hdr(skb);
317 327
@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
321 * does not change, they drop every other packet in 331 * does not change, they drop every other packet in
322 * a TCP stream using header compression. 332 * a TCP stream using header compression.
323 */ 333 */
324 iph->id = (sk && inet_sk(sk)->inet_daddr) ?
325 htons(inet_sk(sk)->inet_id++) : 0;
326 } else
327 __ip_select_ident(iph, dst, 0);
328}
329
330static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
331{
332 struct iphdr *iph = ip_hdr(skb);
333
334 if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
335 if (sk && inet_sk(sk)->inet_daddr) { 334 if (sk && inet_sk(sk)->inet_daddr) {
336 iph->id = htons(inet_sk(sk)->inet_id); 335 iph->id = htons(inet_sk(sk)->inet_id);
337 inet_sk(sk)->inet_id += 1 + more; 336 inet_sk(sk)->inet_id += segs;
338 } else 337 } else {
339 iph->id = 0; 338 iph->id = 0;
340 } else 339 }
341 __ip_select_ident(iph, dst, more); 340 } else {
341 __ip_select_ident(iph, segs);
342 }
343}
344
345static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
346{
347 ip_select_ident_segs(skb, sk, 1);
342} 348}
343 349
344static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) 350static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ba810d0546bc..574337fe72dd 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
668 return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); 668 return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
669} 669}
670 670
671void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
672
673int ip6_dst_hoplimit(struct dst_entry *dst); 671int ip6_dst_hoplimit(struct dst_entry *dst);
674 672
675static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, 673static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index f257486f17be..3f36d45b714a 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,8 +3,6 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5 5
6__u32 secure_ip_id(__be32 daddr);
7__u32 secure_ipv6_id(const __be32 daddr[4]);
8u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); 6u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
9u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, 7u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
10 __be16 dport); 8 __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56f3aff..ba71212f0251 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
85#endif 85#endif
86 86
87#ifdef CONFIG_INET 87#ifdef CONFIG_INET
88__u32 secure_ip_id(__be32 daddr)
89{
90 u32 hash[MD5_DIGEST_WORDS];
91
92 net_secret_init();
93 hash[0] = (__force __u32) daddr;
94 hash[1] = net_secret[13];
95 hash[2] = net_secret[14];
96 hash[3] = net_secret[15];
97
98 md5_transform(hash, net_secret);
99
100 return hash[0];
101}
102
103__u32 secure_ipv6_id(const __be32 daddr[4])
104{
105 __u32 hash[4];
106
107 net_secret_init();
108 memcpy(hash, daddr, 16);
109 md5_transform(hash, net_secret);
110
111 return hash[0];
112}
113 88
114__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, 89__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
115 __be16 sport, __be16 dport) 90 __be16 sport, __be16 dport)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 17d34e3c2ac3..6748d420f714 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
369 pip->saddr = fl4.saddr; 369 pip->saddr = fl4.saddr;
370 pip->protocol = IPPROTO_IGMP; 370 pip->protocol = IPPROTO_IGMP;
371 pip->tot_len = 0; /* filled in later */ 371 pip->tot_len = 0; /* filled in later */
372 ip_select_ident(skb, &rt->dst, NULL); 372 ip_select_ident(skb, NULL);
373 ((u8 *)&pip[1])[0] = IPOPT_RA; 373 ((u8 *)&pip[1])[0] = IPOPT_RA;
374 ((u8 *)&pip[1])[1] = 4; 374 ((u8 *)&pip[1])[1] = 4;
375 ((u8 *)&pip[1])[2] = 0; 375 ((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
714 iph->daddr = dst; 714 iph->daddr = dst;
715 iph->saddr = fl4.saddr; 715 iph->saddr = fl4.saddr;
716 iph->protocol = IPPROTO_IGMP; 716 iph->protocol = IPPROTO_IGMP;
717 ip_select_ident(skb, &rt->dst, NULL); 717 ip_select_ident(skb, NULL);
718 ((u8 *)&iph[1])[0] = IPOPT_RA; 718 ((u8 *)&iph[1])[0] = IPOPT_RA;
719 ((u8 *)&iph[1])[1] = 4; 719 ((u8 *)&iph[1])[1] = 4;
720 ((u8 *)&iph[1])[2] = 0; 720 ((u8 *)&iph[1])[2] = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c98cf141f4ed..4ced1b9a97f0 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
26 * Theory of operations. 26 * Theory of operations.
27 * We keep one entry for each peer IP address. The nodes contains long-living 27 * We keep one entry for each peer IP address. The nodes contains long-living
28 * information about the peer which doesn't depend on routes. 28 * information about the peer which doesn't depend on routes.
29 * At this moment this information consists only of ID field for the next
30 * outgoing IP packet. This field is incremented with each packet as encoded
31 * in inet_getid() function (include/net/inetpeer.h).
32 * At the moment of writing this notes identifier of IP packets is generated
33 * to be unpredictable using this code only for packets subjected
34 * (actually or potentially) to defragmentation. I.e. DF packets less than
35 * PMTU in size when local fragmentation is disabled use a constant ID and do
36 * not use this code (see ip_select_ident() in include/net/ip.h).
37 * 29 *
38 * Route cache entries hold references to our nodes.
39 * New cache entries get references via lookup by destination IP address in
40 * the avl tree. The reference is grabbed only when it's needed i.e. only
41 * when we try to output IP packet which needs an unpredictable ID (see
42 * __ip_select_ident() in net/ipv4/route.c).
43 * Nodes are removed only when reference counter goes to 0. 30 * Nodes are removed only when reference counter goes to 0.
44 * When it's happened the node may be removed when a sufficient amount of 31 * When it's happened the node may be removed when a sufficient amount of
45 * time has been passed since its last use. The less-recently-used entry can 32 * time has been passed since its last use. The less-recently-used entry can
@@ -62,7 +49,6 @@
62 * refcnt: atomically against modifications on other CPU; 49 * refcnt: atomically against modifications on other CPU;
63 * usually under some other lock to prevent node disappearing 50 * usually under some other lock to prevent node disappearing
64 * daddr: unchangeable 51 * daddr: unchangeable
65 * ip_id_count: atomic value (no lock needed)
66 */ 52 */
67 53
68static struct kmem_cache *peer_cachep __read_mostly; 54static struct kmem_cache *peer_cachep __read_mostly;
@@ -497,10 +483,6 @@ relookup:
497 p->daddr = *daddr; 483 p->daddr = *daddr;
498 atomic_set(&p->refcnt, 1); 484 atomic_set(&p->refcnt, 1);
499 atomic_set(&p->rid, 0); 485 atomic_set(&p->rid, 0);
500 atomic_set(&p->ip_id_count,
501 (daddr->family == AF_INET) ?
502 secure_ip_id(daddr->addr.a4) :
503 secure_ipv6_id(daddr->addr.a6));
504 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 486 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
505 p->rate_tokens = 0; 487 p->rate_tokens = 0;
506 /* 60*HZ is arbitrary, but chosen enough high so that the first 488 /* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e231ab58d65..8d3b6b0e9857 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
148 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); 148 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
149 iph->saddr = saddr; 149 iph->saddr = saddr;
150 iph->protocol = sk->sk_protocol; 150 iph->protocol = sk->sk_protocol;
151 ip_select_ident(skb, &rt->dst, sk); 151 ip_select_ident(skb, sk);
152 152
153 if (opt && opt->opt.optlen) { 153 if (opt && opt->opt.optlen) {
154 iph->ihl += opt->opt.optlen>>2; 154 iph->ihl += opt->opt.optlen>>2;
@@ -430,8 +430,7 @@ packet_routed:
430 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); 430 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
431 } 431 }
432 432
433 ip_select_ident_more(skb, &rt->dst, sk, 433 ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
434 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
435 434
436 /* TODO : should we use skb->sk here instead of sk ? */ 435 /* TODO : should we use skb->sk here instead of sk ? */
437 skb->priority = sk->sk_priority; 436 skb->priority = sk->sk_priority;
@@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
1379 iph->ttl = ttl; 1378 iph->ttl = ttl;
1380 iph->protocol = sk->sk_protocol; 1379 iph->protocol = sk->sk_protocol;
1381 ip_copy_addrs(iph, fl4); 1380 ip_copy_addrs(iph, fl4);
1382 ip_select_ident(skb, &rt->dst, sk); 1381 ip_select_ident(skb, sk);
1383 1382
1384 if (opt) { 1383 if (opt) {
1385 iph->ihl += opt->optlen>>2; 1384 iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index bcf206c79005..847e69cbff7e 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
74 iph->daddr = dst; 74 iph->daddr = dst;
75 iph->saddr = src; 75 iph->saddr = src;
76 iph->ttl = ttl; 76 iph->ttl = ttl;
77 __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); 77 __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
78 78
79 err = ip_local_out_sk(sk, skb); 79 err = ip_local_out_sk(sk, skb);
80 if (unlikely(net_xmit_eval(err))) 80 if (unlikely(net_xmit_eval(err)))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2bc9cc47f246..65bcaa789043 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1663 iph->protocol = IPPROTO_IPIP; 1663 iph->protocol = IPPROTO_IPIP;
1664 iph->ihl = 5; 1664 iph->ihl = 5;
1665 iph->tot_len = htons(skb->len); 1665 iph->tot_len = htons(skb->len);
1666 ip_select_ident(skb, skb_dst(skb), NULL); 1666 ip_select_ident(skb, NULL);
1667 ip_send_check(iph); 1667 ip_send_check(iph);
1668 1668
1669 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 1669 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a9dbe58bdfe7..2c65160565e1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
389 iph->check = 0; 389 iph->check = 0;
390 iph->tot_len = htons(length); 390 iph->tot_len = htons(length);
391 if (!iph->id) 391 if (!iph->id)
392 ip_select_ident(skb, &rt->dst, NULL); 392 ip_select_ident(skb, NULL);
393 393
394 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 394 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
395 } 395 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4154eb76b0ad..082239ffe34a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
89#include <linux/rcupdate.h> 89#include <linux/rcupdate.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/slab.h> 91#include <linux/slab.h>
92#include <linux/jhash.h>
92#include <net/dst.h> 93#include <net/dst.h>
93#include <net/net_namespace.h> 94#include <net/net_namespace.h>
94#include <net/protocol.h> 95#include <net/protocol.h>
@@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
456 return neigh_create(&arp_tbl, pkey, dev); 457 return neigh_create(&arp_tbl, pkey, dev);
457} 458}
458 459
459/* 460atomic_t *ip_idents __read_mostly;
460 * Peer allocation may fail only in serious out-of-memory conditions. However 461EXPORT_SYMBOL(ip_idents);
461 * we still can generate some output.
462 * Random ID selection looks a bit dangerous because we have no chances to
463 * select ID being unique in a reasonable period of time.
464 * But broken packet identifier may be better than no packet at all.
465 */
466static void ip_select_fb_ident(struct iphdr *iph)
467{
468 static DEFINE_SPINLOCK(ip_fb_id_lock);
469 static u32 ip_fallback_id;
470 u32 salt;
471 462
472 spin_lock_bh(&ip_fb_id_lock); 463void __ip_select_ident(struct iphdr *iph, int segs)
473 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
474 iph->id = htons(salt & 0xFFFF);
475 ip_fallback_id = salt;
476 spin_unlock_bh(&ip_fb_id_lock);
477}
478
479void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
480{ 464{
481 struct net *net = dev_net(dst->dev); 465 static u32 ip_idents_hashrnd __read_mostly;
482 struct inet_peer *peer; 466 u32 hash, id;
483 467
484 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); 468 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
485 if (peer) {
486 iph->id = htons(inet_getid(peer, more));
487 inet_putpeer(peer);
488 return;
489 }
490 469
491 ip_select_fb_ident(iph); 470 hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
471 id = ip_idents_reserve(hash, segs);
472 iph->id = htons(id);
492} 473}
493EXPORT_SYMBOL(__ip_select_ident); 474EXPORT_SYMBOL(__ip_select_ident);
494 475
@@ -2711,6 +2692,12 @@ int __init ip_rt_init(void)
2711{ 2692{
2712 int rc = 0; 2693 int rc = 0;
2713 2694
2695 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2696 if (!ip_idents)
2697 panic("IP: failed to allocate ip_idents\n");
2698
2699 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2700
2714#ifdef CONFIG_IP_ROUTE_CLASSID 2701#ifdef CONFIG_IP_ROUTE_CLASSID
2715 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 2702 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
2716 if (!ip_rt_acct) 2703 if (!ip_rt_acct)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 05f2b484954f..91771a7c802f 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
58 58
59 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 59 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
60 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); 60 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
61 ip_select_ident(skb, dst->child, NULL);
62 61
63 top_iph->ttl = ip4_dst_hoplimit(dst->child); 62 top_iph->ttl = ip4_dst_hoplimit(dst->child);
64 63
65 top_iph->saddr = x->props.saddr.a4; 64 top_iph->saddr = x->props.saddr.a4;
66 top_iph->daddr = x->id.daddr.a4; 65 top_iph->daddr = x->id.daddr.a4;
66 ip_select_ident(skb, NULL);
67 67
68 return 0; 68 return 0;
69} 69}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 85aaeca1f7f3..cb9df0eb4023 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
537 skb_copy_secmark(to, from); 537 skb_copy_secmark(to, from);
538} 538}
539 539
540static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
541{
542 static u32 ip6_idents_hashrnd __read_mostly;
543 u32 hash, id;
544
545 net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
546
547 hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
548 id = ip_idents_reserve(hash, 1);
549 fhdr->identification = htonl(id);
550}
551
540int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 552int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
541{ 553{
542 struct sk_buff *frag; 554 struct sk_buff *frag;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 6313abd53c9d..6179ac186ab9 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,36 +8,6 @@
8#include <net/addrconf.h> 8#include <net/addrconf.h>
9#include <net/secure_seq.h> 9#include <net/secure_seq.h>
10 10
11void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
12{
13 static atomic_t ipv6_fragmentation_id;
14 struct in6_addr addr;
15 int old, new;
16
17#if IS_ENABLED(CONFIG_IPV6)
18 struct inet_peer *peer;
19 struct net *net;
20
21 net = dev_net(rt->dst.dev);
22 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
23 if (peer) {
24 fhdr->identification = htonl(inet_getid(peer, 0));
25 inet_putpeer(peer);
26 return;
27 }
28#endif
29 do {
30 old = atomic_read(&ipv6_fragmentation_id);
31 new = old + 1;
32 if (!new)
33 new = 1;
34 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
35
36 addr = rt->rt6i_dst.addr;
37 addr.s6_addr32[0] ^= (__force __be32)new;
38 fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
39}
40EXPORT_SYMBOL(ipv6_select_ident);
41 11
42int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) 12int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
43{ 13{
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 487b55e04337..73ba1cc7a88d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
883 iph->daddr = cp->daddr.ip; 883 iph->daddr = cp->daddr.ip;
884 iph->saddr = saddr; 884 iph->saddr = saddr;
885 iph->ttl = old_iph->ttl; 885 iph->ttl = old_iph->ttl;
886 ip_select_ident(skb, &rt->dst, NULL); 886 ip_select_ident(skb, NULL);
887 887
888 /* Another hack: avoid icmp_send in ip_fragment */ 888 /* Another hack: avoid icmp_send in ip_fragment */
889 skb->ignore_df = 1; 889 skb->ignore_df = 1;