diff options
author | Eric Dumazet <edumazet@google.com> | 2014-06-02 08:26:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-06-02 14:00:41 -0400 |
commit | 73f156a6e8c1074ac6327e0abd1169e95eb66463 (patch) | |
tree | 2c8b222f21784e738c397ba95dee70a8f256ea64 | |
parent | e067ee336a9d3f038ffa9699c59f2abec3376bf7 (diff) |
inetpeer: get rid of ip_id_count
Ideally, we would need to generate IP ID using a per destination IP
generator.
linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.
1) each inet_peer struct consumes 192 bytes
2) inetpeer cache uses a binary tree of inet_peer structs,
with a nominal size of ~66000 elements under load.
3) lookups in this tree are hitting a lot of cache lines, as tree depth
is about 20.
4) If server deals with many tcp flows, we have a high probability of
not finding the inet_peer, allocating a fresh one, inserting it in
the tree with same initial ip_id_count, (cf secure_ip_id())
5) We garbage collect inet_peer aggressively.
IP ID generation do not have to be 'perfect'
Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.
We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.
ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)
secure_ip_id() and secure_ipv6_id() no longer are needed.
Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ppp/pptp.c | 2 | ||||
-rw-r--r-- | include/net/inetpeer.h | 23 | ||||
-rw-r--r-- | include/net/ip.h | 40 | ||||
-rw-r--r-- | include/net/ipv6.h | 2 | ||||
-rw-r--r-- | include/net/secure_seq.h | 2 | ||||
-rw-r--r-- | net/core/secure_seq.c | 25 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 4 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 18 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 7 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 45 | ||||
-rw-r--r-- | net/ipv4/xfrm4_mode_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 12 | ||||
-rw-r--r-- | net/ipv6/output_core.c | 30 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 2 |
17 files changed, 65 insertions, 155 deletions
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 01805319e1e0..1aff970be33e 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c | |||
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) | |||
281 | nf_reset(skb); | 281 | nf_reset(skb); |
282 | 282 | ||
283 | skb->ip_summed = CHECKSUM_NONE; | 283 | skb->ip_summed = CHECKSUM_NONE; |
284 | ip_select_ident(skb, &rt->dst, NULL); | 284 | ip_select_ident(skb, NULL); |
285 | ip_send_check(iph); | 285 | ip_send_check(iph); |
286 | 286 | ||
287 | ip_local_out(skb); | 287 | ip_local_out(skb); |
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 6efe73c79c52..823ec7bb9c67 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h | |||
@@ -41,14 +41,13 @@ struct inet_peer { | |||
41 | struct rcu_head gc_rcu; | 41 | struct rcu_head gc_rcu; |
42 | }; | 42 | }; |
43 | /* | 43 | /* |
44 | * Once inet_peer is queued for deletion (refcnt == -1), following fields | 44 | * Once inet_peer is queued for deletion (refcnt == -1), following field |
45 | * are not available: rid, ip_id_count | 45 | * is not available: rid |
46 | * We can share memory with rcu_head to help keep inet_peer small. | 46 | * We can share memory with rcu_head to help keep inet_peer small. |
47 | */ | 47 | */ |
48 | union { | 48 | union { |
49 | struct { | 49 | struct { |
50 | atomic_t rid; /* Frag reception counter */ | 50 | atomic_t rid; /* Frag reception counter */ |
51 | atomic_t ip_id_count; /* IP ID for the next packet */ | ||
52 | }; | 51 | }; |
53 | struct rcu_head rcu; | 52 | struct rcu_head rcu; |
54 | struct inet_peer *gc_next; | 53 | struct inet_peer *gc_next; |
@@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); | |||
165 | void inetpeer_invalidate_tree(struct inet_peer_base *); | 164 | void inetpeer_invalidate_tree(struct inet_peer_base *); |
166 | 165 | ||
167 | /* | 166 | /* |
168 | * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, | 167 | * temporary check to make sure we dont access rid, tcp_ts, |
169 | * tcp_ts_stamp if no refcount is taken on inet_peer | 168 | * tcp_ts_stamp if no refcount is taken on inet_peer |
170 | */ | 169 | */ |
171 | static inline void inet_peer_refcheck(const struct inet_peer *p) | 170 | static inline void inet_peer_refcheck(const struct inet_peer *p) |
@@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) | |||
173 | WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); | 172 | WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); |
174 | } | 173 | } |
175 | 174 | ||
176 | |||
177 | /* can be called with or without local BH being disabled */ | ||
178 | static inline int inet_getid(struct inet_peer *p, int more) | ||
179 | { | ||
180 | int old, new; | ||
181 | more++; | ||
182 | inet_peer_refcheck(p); | ||
183 | do { | ||
184 | old = atomic_read(&p->ip_id_count); | ||
185 | new = old + more; | ||
186 | if (!new) | ||
187 | new = 1; | ||
188 | } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old); | ||
189 | return new; | ||
190 | } | ||
191 | |||
192 | #endif /* _NET_INETPEER_H */ | 175 | #endif /* _NET_INETPEER_H */ |
diff --git a/include/net/ip.h b/include/net/ip.h index 2e4947895d75..0e795df05ec9 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) | |||
309 | } | 309 | } |
310 | } | 310 | } |
311 | 311 | ||
312 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); | 312 | #define IP_IDENTS_SZ 2048u |
313 | extern atomic_t *ip_idents; | ||
313 | 314 | ||
314 | static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) | 315 | static inline u32 ip_idents_reserve(u32 hash, int segs) |
316 | { | ||
317 | atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; | ||
318 | |||
319 | return atomic_add_return(segs, id_ptr) - segs; | ||
320 | } | ||
321 | |||
322 | void __ip_select_ident(struct iphdr *iph, int segs); | ||
323 | |||
324 | static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) | ||
315 | { | 325 | { |
316 | struct iphdr *iph = ip_hdr(skb); | 326 | struct iphdr *iph = ip_hdr(skb); |
317 | 327 | ||
@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s | |||
321 | * does not change, they drop every other packet in | 331 | * does not change, they drop every other packet in |
322 | * a TCP stream using header compression. | 332 | * a TCP stream using header compression. |
323 | */ | 333 | */ |
324 | iph->id = (sk && inet_sk(sk)->inet_daddr) ? | ||
325 | htons(inet_sk(sk)->inet_id++) : 0; | ||
326 | } else | ||
327 | __ip_select_ident(iph, dst, 0); | ||
328 | } | ||
329 | |||
330 | static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) | ||
331 | { | ||
332 | struct iphdr *iph = ip_hdr(skb); | ||
333 | |||
334 | if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { | ||
335 | if (sk && inet_sk(sk)->inet_daddr) { | 334 | if (sk && inet_sk(sk)->inet_daddr) { |
336 | iph->id = htons(inet_sk(sk)->inet_id); | 335 | iph->id = htons(inet_sk(sk)->inet_id); |
337 | inet_sk(sk)->inet_id += 1 + more; | 336 | inet_sk(sk)->inet_id += segs; |
338 | } else | 337 | } else { |
339 | iph->id = 0; | 338 | iph->id = 0; |
340 | } else | 339 | } |
341 | __ip_select_ident(iph, dst, more); | 340 | } else { |
341 | __ip_select_ident(iph, segs); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) | ||
346 | { | ||
347 | ip_select_ident_segs(skb, sk, 1); | ||
342 | } | 348 | } |
343 | 349 | ||
344 | static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) | 350 | static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) |
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ba810d0546bc..574337fe72dd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h | |||
@@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add | |||
668 | return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); | 668 | return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); |
669 | } | 669 | } |
670 | 670 | ||
671 | void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); | ||
672 | |||
673 | int ip6_dst_hoplimit(struct dst_entry *dst); | 671 | int ip6_dst_hoplimit(struct dst_entry *dst); |
674 | 672 | ||
675 | static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, | 673 | static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, |
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index f257486f17be..3f36d45b714a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h | |||
@@ -3,8 +3,6 @@ | |||
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | __u32 secure_ip_id(__be32 daddr); | ||
7 | __u32 secure_ipv6_id(const __be32 daddr[4]); | ||
8 | u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); | 6 | u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); |
9 | u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, | 7 | u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, |
10 | __be16 dport); | 8 | __be16 dport); |
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 897da56f3aff..ba71212f0251 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c | |||
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); | |||
85 | #endif | 85 | #endif |
86 | 86 | ||
87 | #ifdef CONFIG_INET | 87 | #ifdef CONFIG_INET |
88 | __u32 secure_ip_id(__be32 daddr) | ||
89 | { | ||
90 | u32 hash[MD5_DIGEST_WORDS]; | ||
91 | |||
92 | net_secret_init(); | ||
93 | hash[0] = (__force __u32) daddr; | ||
94 | hash[1] = net_secret[13]; | ||
95 | hash[2] = net_secret[14]; | ||
96 | hash[3] = net_secret[15]; | ||
97 | |||
98 | md5_transform(hash, net_secret); | ||
99 | |||
100 | return hash[0]; | ||
101 | } | ||
102 | |||
103 | __u32 secure_ipv6_id(const __be32 daddr[4]) | ||
104 | { | ||
105 | __u32 hash[4]; | ||
106 | |||
107 | net_secret_init(); | ||
108 | memcpy(hash, daddr, 16); | ||
109 | md5_transform(hash, net_secret); | ||
110 | |||
111 | return hash[0]; | ||
112 | } | ||
113 | 88 | ||
114 | __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, | 89 | __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, |
115 | __be16 sport, __be16 dport) | 90 | __be16 sport, __be16 dport) |
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 17d34e3c2ac3..6748d420f714 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
369 | pip->saddr = fl4.saddr; | 369 | pip->saddr = fl4.saddr; |
370 | pip->protocol = IPPROTO_IGMP; | 370 | pip->protocol = IPPROTO_IGMP; |
371 | pip->tot_len = 0; /* filled in later */ | 371 | pip->tot_len = 0; /* filled in later */ |
372 | ip_select_ident(skb, &rt->dst, NULL); | 372 | ip_select_ident(skb, NULL); |
373 | ((u8 *)&pip[1])[0] = IPOPT_RA; | 373 | ((u8 *)&pip[1])[0] = IPOPT_RA; |
374 | ((u8 *)&pip[1])[1] = 4; | 374 | ((u8 *)&pip[1])[1] = 4; |
375 | ((u8 *)&pip[1])[2] = 0; | 375 | ((u8 *)&pip[1])[2] = 0; |
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
714 | iph->daddr = dst; | 714 | iph->daddr = dst; |
715 | iph->saddr = fl4.saddr; | 715 | iph->saddr = fl4.saddr; |
716 | iph->protocol = IPPROTO_IGMP; | 716 | iph->protocol = IPPROTO_IGMP; |
717 | ip_select_ident(skb, &rt->dst, NULL); | 717 | ip_select_ident(skb, NULL); |
718 | ((u8 *)&iph[1])[0] = IPOPT_RA; | 718 | ((u8 *)&iph[1])[0] = IPOPT_RA; |
719 | ((u8 *)&iph[1])[1] = 4; | 719 | ((u8 *)&iph[1])[1] = 4; |
720 | ((u8 *)&iph[1])[2] = 0; | 720 | ((u8 *)&iph[1])[2] = 0; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c98cf141f4ed..4ced1b9a97f0 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -26,20 +26,7 @@ | |||
26 | * Theory of operations. | 26 | * Theory of operations. |
27 | * We keep one entry for each peer IP address. The nodes contains long-living | 27 | * We keep one entry for each peer IP address. The nodes contains long-living |
28 | * information about the peer which doesn't depend on routes. | 28 | * information about the peer which doesn't depend on routes. |
29 | * At this moment this information consists only of ID field for the next | ||
30 | * outgoing IP packet. This field is incremented with each packet as encoded | ||
31 | * in inet_getid() function (include/net/inetpeer.h). | ||
32 | * At the moment of writing this notes identifier of IP packets is generated | ||
33 | * to be unpredictable using this code only for packets subjected | ||
34 | * (actually or potentially) to defragmentation. I.e. DF packets less than | ||
35 | * PMTU in size when local fragmentation is disabled use a constant ID and do | ||
36 | * not use this code (see ip_select_ident() in include/net/ip.h). | ||
37 | * | 29 | * |
38 | * Route cache entries hold references to our nodes. | ||
39 | * New cache entries get references via lookup by destination IP address in | ||
40 | * the avl tree. The reference is grabbed only when it's needed i.e. only | ||
41 | * when we try to output IP packet which needs an unpredictable ID (see | ||
42 | * __ip_select_ident() in net/ipv4/route.c). | ||
43 | * Nodes are removed only when reference counter goes to 0. | 30 | * Nodes are removed only when reference counter goes to 0. |
44 | * When it's happened the node may be removed when a sufficient amount of | 31 | * When it's happened the node may be removed when a sufficient amount of |
45 | * time has been passed since its last use. The less-recently-used entry can | 32 | * time has been passed since its last use. The less-recently-used entry can |
@@ -62,7 +49,6 @@ | |||
62 | * refcnt: atomically against modifications on other CPU; | 49 | * refcnt: atomically against modifications on other CPU; |
63 | * usually under some other lock to prevent node disappearing | 50 | * usually under some other lock to prevent node disappearing |
64 | * daddr: unchangeable | 51 | * daddr: unchangeable |
65 | * ip_id_count: atomic value (no lock needed) | ||
66 | */ | 52 | */ |
67 | 53 | ||
68 | static struct kmem_cache *peer_cachep __read_mostly; | 54 | static struct kmem_cache *peer_cachep __read_mostly; |
@@ -497,10 +483,6 @@ relookup: | |||
497 | p->daddr = *daddr; | 483 | p->daddr = *daddr; |
498 | atomic_set(&p->refcnt, 1); | 484 | atomic_set(&p->refcnt, 1); |
499 | atomic_set(&p->rid, 0); | 485 | atomic_set(&p->rid, 0); |
500 | atomic_set(&p->ip_id_count, | ||
501 | (daddr->family == AF_INET) ? | ||
502 | secure_ip_id(daddr->addr.a4) : | ||
503 | secure_ipv6_id(daddr->addr.a6)); | ||
504 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 486 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
505 | p->rate_tokens = 0; | 487 | p->rate_tokens = 0; |
506 | /* 60*HZ is arbitrary, but chosen enough high so that the first | 488 | /* 60*HZ is arbitrary, but chosen enough high so that the first |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e231ab58d65..8d3b6b0e9857 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
148 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); | 148 | iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); |
149 | iph->saddr = saddr; | 149 | iph->saddr = saddr; |
150 | iph->protocol = sk->sk_protocol; | 150 | iph->protocol = sk->sk_protocol; |
151 | ip_select_ident(skb, &rt->dst, sk); | 151 | ip_select_ident(skb, sk); |
152 | 152 | ||
153 | if (opt && opt->opt.optlen) { | 153 | if (opt && opt->opt.optlen) { |
154 | iph->ihl += opt->opt.optlen>>2; | 154 | iph->ihl += opt->opt.optlen>>2; |
@@ -430,8 +430,7 @@ packet_routed: | |||
430 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); | 430 | ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); |
431 | } | 431 | } |
432 | 432 | ||
433 | ip_select_ident_more(skb, &rt->dst, sk, | 433 | ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); |
434 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | ||
435 | 434 | ||
436 | /* TODO : should we use skb->sk here instead of sk ? */ | 435 | /* TODO : should we use skb->sk here instead of sk ? */ |
437 | skb->priority = sk->sk_priority; | 436 | skb->priority = sk->sk_priority; |
@@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
1379 | iph->ttl = ttl; | 1378 | iph->ttl = ttl; |
1380 | iph->protocol = sk->sk_protocol; | 1379 | iph->protocol = sk->sk_protocol; |
1381 | ip_copy_addrs(iph, fl4); | 1380 | ip_copy_addrs(iph, fl4); |
1382 | ip_select_ident(skb, &rt->dst, sk); | 1381 | ip_select_ident(skb, sk); |
1383 | 1382 | ||
1384 | if (opt) { | 1383 | if (opt) { |
1385 | iph->ihl += opt->optlen>>2; | 1384 | iph->ihl += opt->optlen>>2; |
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index bcf206c79005..847e69cbff7e 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | |||
74 | iph->daddr = dst; | 74 | iph->daddr = dst; |
75 | iph->saddr = src; | 75 | iph->saddr = src; |
76 | iph->ttl = ttl; | 76 | iph->ttl = ttl; |
77 | __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 77 | __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); |
78 | 78 | ||
79 | err = ip_local_out_sk(sk, skb); | 79 | err = ip_local_out_sk(sk, skb); |
80 | if (unlikely(net_xmit_eval(err))) | 80 | if (unlikely(net_xmit_eval(err))) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2bc9cc47f246..65bcaa789043 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) | |||
1663 | iph->protocol = IPPROTO_IPIP; | 1663 | iph->protocol = IPPROTO_IPIP; |
1664 | iph->ihl = 5; | 1664 | iph->ihl = 5; |
1665 | iph->tot_len = htons(skb->len); | 1665 | iph->tot_len = htons(skb->len); |
1666 | ip_select_ident(skb, skb_dst(skb), NULL); | 1666 | ip_select_ident(skb, NULL); |
1667 | ip_send_check(iph); | 1667 | ip_send_check(iph); |
1668 | 1668 | ||
1669 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 1669 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a9dbe58bdfe7..2c65160565e1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, | |||
389 | iph->check = 0; | 389 | iph->check = 0; |
390 | iph->tot_len = htons(length); | 390 | iph->tot_len = htons(length); |
391 | if (!iph->id) | 391 | if (!iph->id) |
392 | ip_select_ident(skb, &rt->dst, NULL); | 392 | ip_select_ident(skb, NULL); |
393 | 393 | ||
394 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 394 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
395 | } | 395 | } |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4154eb76b0ad..082239ffe34a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -89,6 +89,7 @@ | |||
89 | #include <linux/rcupdate.h> | 89 | #include <linux/rcupdate.h> |
90 | #include <linux/times.h> | 90 | #include <linux/times.h> |
91 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
92 | #include <linux/jhash.h> | ||
92 | #include <net/dst.h> | 93 | #include <net/dst.h> |
93 | #include <net/net_namespace.h> | 94 | #include <net/net_namespace.h> |
94 | #include <net/protocol.h> | 95 | #include <net/protocol.h> |
@@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
456 | return neigh_create(&arp_tbl, pkey, dev); | 457 | return neigh_create(&arp_tbl, pkey, dev); |
457 | } | 458 | } |
458 | 459 | ||
459 | /* | 460 | atomic_t *ip_idents __read_mostly; |
460 | * Peer allocation may fail only in serious out-of-memory conditions. However | 461 | EXPORT_SYMBOL(ip_idents); |
461 | * we still can generate some output. | ||
462 | * Random ID selection looks a bit dangerous because we have no chances to | ||
463 | * select ID being unique in a reasonable period of time. | ||
464 | * But broken packet identifier may be better than no packet at all. | ||
465 | */ | ||
466 | static void ip_select_fb_ident(struct iphdr *iph) | ||
467 | { | ||
468 | static DEFINE_SPINLOCK(ip_fb_id_lock); | ||
469 | static u32 ip_fallback_id; | ||
470 | u32 salt; | ||
471 | 462 | ||
472 | spin_lock_bh(&ip_fb_id_lock); | 463 | void __ip_select_ident(struct iphdr *iph, int segs) |
473 | salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); | ||
474 | iph->id = htons(salt & 0xFFFF); | ||
475 | ip_fallback_id = salt; | ||
476 | spin_unlock_bh(&ip_fb_id_lock); | ||
477 | } | ||
478 | |||
479 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | ||
480 | { | 464 | { |
481 | struct net *net = dev_net(dst->dev); | 465 | static u32 ip_idents_hashrnd __read_mostly; |
482 | struct inet_peer *peer; | 466 | u32 hash, id; |
483 | 467 | ||
484 | peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); | 468 | net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); |
485 | if (peer) { | ||
486 | iph->id = htons(inet_getid(peer, more)); | ||
487 | inet_putpeer(peer); | ||
488 | return; | ||
489 | } | ||
490 | 469 | ||
491 | ip_select_fb_ident(iph); | 470 | hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); |
471 | id = ip_idents_reserve(hash, segs); | ||
472 | iph->id = htons(id); | ||
492 | } | 473 | } |
493 | EXPORT_SYMBOL(__ip_select_ident); | 474 | EXPORT_SYMBOL(__ip_select_ident); |
494 | 475 | ||
@@ -2711,6 +2692,12 @@ int __init ip_rt_init(void) | |||
2711 | { | 2692 | { |
2712 | int rc = 0; | 2693 | int rc = 0; |
2713 | 2694 | ||
2695 | ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); | ||
2696 | if (!ip_idents) | ||
2697 | panic("IP: failed to allocate ip_idents\n"); | ||
2698 | |||
2699 | prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); | ||
2700 | |||
2714 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2701 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2715 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 2702 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
2716 | if (!ip_rt_acct) | 2703 | if (!ip_rt_acct) |
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 05f2b484954f..91771a7c802f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c | |||
@@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) | |||
58 | 58 | ||
59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? | 59 | top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? |
60 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); | 60 | 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); |
61 | ip_select_ident(skb, dst->child, NULL); | ||
62 | 61 | ||
63 | top_iph->ttl = ip4_dst_hoplimit(dst->child); | 62 | top_iph->ttl = ip4_dst_hoplimit(dst->child); |
64 | 63 | ||
65 | top_iph->saddr = x->props.saddr.a4; | 64 | top_iph->saddr = x->props.saddr.a4; |
66 | top_iph->daddr = x->id.daddr.a4; | 65 | top_iph->daddr = x->id.daddr.a4; |
66 | ip_select_ident(skb, NULL); | ||
67 | 67 | ||
68 | return 0; | 68 | return 0; |
69 | } | 69 | } |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 85aaeca1f7f3..cb9df0eb4023 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
537 | skb_copy_secmark(to, from); | 537 | skb_copy_secmark(to, from); |
538 | } | 538 | } |
539 | 539 | ||
540 | static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) | ||
541 | { | ||
542 | static u32 ip6_idents_hashrnd __read_mostly; | ||
543 | u32 hash, id; | ||
544 | |||
545 | net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); | ||
546 | |||
547 | hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); | ||
548 | id = ip_idents_reserve(hash, 1); | ||
549 | fhdr->identification = htonl(id); | ||
550 | } | ||
551 | |||
540 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | 552 | int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
541 | { | 553 | { |
542 | struct sk_buff *frag; | 554 | struct sk_buff *frag; |
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 6313abd53c9d..6179ac186ab9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c | |||
@@ -8,36 +8,6 @@ | |||
8 | #include <net/addrconf.h> | 8 | #include <net/addrconf.h> |
9 | #include <net/secure_seq.h> | 9 | #include <net/secure_seq.h> |
10 | 10 | ||
11 | void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) | ||
12 | { | ||
13 | static atomic_t ipv6_fragmentation_id; | ||
14 | struct in6_addr addr; | ||
15 | int old, new; | ||
16 | |||
17 | #if IS_ENABLED(CONFIG_IPV6) | ||
18 | struct inet_peer *peer; | ||
19 | struct net *net; | ||
20 | |||
21 | net = dev_net(rt->dst.dev); | ||
22 | peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); | ||
23 | if (peer) { | ||
24 | fhdr->identification = htonl(inet_getid(peer, 0)); | ||
25 | inet_putpeer(peer); | ||
26 | return; | ||
27 | } | ||
28 | #endif | ||
29 | do { | ||
30 | old = atomic_read(&ipv6_fragmentation_id); | ||
31 | new = old + 1; | ||
32 | if (!new) | ||
33 | new = 1; | ||
34 | } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); | ||
35 | |||
36 | addr = rt->rt6i_dst.addr; | ||
37 | addr.s6_addr32[0] ^= (__force __be32)new; | ||
38 | fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32)); | ||
39 | } | ||
40 | EXPORT_SYMBOL(ipv6_select_ident); | ||
41 | 11 | ||
42 | int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) | 12 | int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) |
43 | { | 13 | { |
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 487b55e04337..73ba1cc7a88d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c | |||
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, | |||
883 | iph->daddr = cp->daddr.ip; | 883 | iph->daddr = cp->daddr.ip; |
884 | iph->saddr = saddr; | 884 | iph->saddr = saddr; |
885 | iph->ttl = old_iph->ttl; | 885 | iph->ttl = old_iph->ttl; |
886 | ip_select_ident(skb, &rt->dst, NULL); | 886 | ip_select_ident(skb, NULL); |
887 | 887 | ||
888 | /* Another hack: avoid icmp_send in ip_fragment */ | 888 | /* Another hack: avoid icmp_send in ip_fragment */ |
889 | skb->ignore_df = 1; | 889 | skb->ignore_df = 1; |