diff options
-rw-r--r-- | include/linux/rtnetlink.h | 3 | ||||
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | include/net/dst.h | 6 | ||||
-rw-r--r-- | include/net/flow.h | 5 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 1 | ||||
-rw-r--r-- | include/net/inet_sock.h | 2 | ||||
-rw-r--r-- | include/net/inetpeer.h | 8 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
-rw-r--r-- | include/net/route.h | 61 | ||||
-rw-r--r-- | include/net/tcp.h | 9 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 4 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 13 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 349 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 188 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 697 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 62 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 8 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 4 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 10 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 8 | ||||
-rw-r--r-- | net/ipv6/route.c | 16 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 49 |
29 files changed, 837 insertions, 731 deletions
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ea60b0854109..db71c4ad8624 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h | |||
@@ -619,8 +619,7 @@ extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, | |||
619 | extern void rtnl_set_sk_err(struct net *net, u32 group, int error); | 619 | extern void rtnl_set_sk_err(struct net *net, u32 group, int error); |
620 | extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); | 620 | extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); |
621 | extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, | 621 | extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, |
622 | u32 id, u32 ts, u32 tsage, long expires, | 622 | u32 id, long expires, u32 error); |
623 | u32 error); | ||
624 | 623 | ||
625 | extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change); | 624 | extern void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change); |
626 | 625 | ||
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7d3bcedc062a..2de9cf46f9fc 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -506,7 +506,6 @@ struct tcp_timewait_sock { | |||
506 | u32 tw_rcv_wnd; | 506 | u32 tw_rcv_wnd; |
507 | u32 tw_ts_recent; | 507 | u32 tw_ts_recent; |
508 | long tw_ts_recent_stamp; | 508 | long tw_ts_recent_stamp; |
509 | struct inet_peer *tw_peer; | ||
510 | #ifdef CONFIG_TCP_MD5SIG | 509 | #ifdef CONFIG_TCP_MD5SIG |
511 | struct tcp_md5sig_key *tw_md5_key; | 510 | struct tcp_md5sig_key *tw_md5_key; |
512 | #endif | 511 | #endif |
diff --git a/include/net/dst.h b/include/net/dst.h index b2634e446613..51610468c63d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -209,12 +209,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr | |||
209 | return msecs_to_jiffies(dst_metric(dst, metric)); | 209 | return msecs_to_jiffies(dst_metric(dst, metric)); |
210 | } | 210 | } |
211 | 211 | ||
212 | static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, | ||
213 | unsigned long rtt) | ||
214 | { | ||
215 | dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); | ||
216 | } | ||
217 | |||
218 | static inline u32 | 212 | static inline u32 |
219 | dst_allfrag(const struct dst_entry *dst) | 213 | dst_allfrag(const struct dst_entry *dst) |
220 | { | 214 | { |
diff --git a/include/net/flow.h b/include/net/flow.h index bd524f598561..ce9cb7656b47 100644 --- a/include/net/flow.h +++ b/include/net/flow.h | |||
@@ -20,9 +20,8 @@ struct flowi_common { | |||
20 | __u8 flowic_proto; | 20 | __u8 flowic_proto; |
21 | __u8 flowic_flags; | 21 | __u8 flowic_flags; |
22 | #define FLOWI_FLAG_ANYSRC 0x01 | 22 | #define FLOWI_FLAG_ANYSRC 0x01 |
23 | #define FLOWI_FLAG_PRECOW_METRICS 0x02 | 23 | #define FLOWI_FLAG_CAN_SLEEP 0x02 |
24 | #define FLOWI_FLAG_CAN_SLEEP 0x04 | 24 | #define FLOWI_FLAG_RT_NOCACHE 0x04 |
25 | #define FLOWI_FLAG_RT_NOCACHE 0x08 | ||
26 | __u32 flowic_secid; | 25 | __u32 flowic_secid; |
27 | }; | 26 | }; |
28 | 27 | ||
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index af3c743a40e4..291e7cee14e7 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops { | |||
43 | struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, | 43 | struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, |
44 | struct request_sock *req, | 44 | struct request_sock *req, |
45 | struct dst_entry *dst); | 45 | struct dst_entry *dst); |
46 | struct inet_peer *(*get_peer)(struct sock *sk); | ||
47 | u16 net_header_len; | 46 | u16 net_header_len; |
48 | u16 net_frag_header_len; | 47 | u16 net_frag_header_len; |
49 | u16 sockaddr_len; | 48 | u16 sockaddr_len; |
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ae17e1352d7e..924d7b98ab60 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h | |||
@@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) | |||
245 | 245 | ||
246 | if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) | 246 | if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) |
247 | flags |= FLOWI_FLAG_ANYSRC; | 247 | flags |= FLOWI_FLAG_ANYSRC; |
248 | if (sk->sk_protocol == IPPROTO_TCP) | ||
249 | flags |= FLOWI_FLAG_PRECOW_METRICS; | ||
250 | return flags; | 248 | return flags; |
251 | } | 249 | } |
252 | 250 | ||
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index c27c8f10ebdc..53f464d7cddc 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h | |||
@@ -36,25 +36,19 @@ struct inet_peer { | |||
36 | u32 metrics[RTAX_MAX]; | 36 | u32 metrics[RTAX_MAX]; |
37 | u32 rate_tokens; /* rate limiting for ICMP */ | 37 | u32 rate_tokens; /* rate limiting for ICMP */ |
38 | unsigned long rate_last; | 38 | unsigned long rate_last; |
39 | unsigned long pmtu_expires; | ||
40 | u32 pmtu_orig; | ||
41 | u32 pmtu_learned; | ||
42 | struct inetpeer_addr_base redirect_learned; | ||
43 | union { | 39 | union { |
44 | struct list_head gc_list; | 40 | struct list_head gc_list; |
45 | struct rcu_head gc_rcu; | 41 | struct rcu_head gc_rcu; |
46 | }; | 42 | }; |
47 | /* | 43 | /* |
48 | * Once inet_peer is queued for deletion (refcnt == -1), following fields | 44 | * Once inet_peer is queued for deletion (refcnt == -1), following fields |
49 | * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp | 45 | * are not available: rid, ip_id_count |
50 | * We can share memory with rcu_head to help keep inet_peer small. | 46 | * We can share memory with rcu_head to help keep inet_peer small. |
51 | */ | 47 | */ |
52 | union { | 48 | union { |
53 | struct { | 49 | struct { |
54 | atomic_t rid; /* Frag reception counter */ | 50 | atomic_t rid; /* Frag reception counter */ |
55 | atomic_t ip_id_count; /* IP ID for the next packet */ | 51 | atomic_t ip_id_count; /* IP ID for the next packet */ |
56 | __u32 tcp_ts; | ||
57 | __u32 tcp_ts_stamp; | ||
58 | }; | 52 | }; |
59 | struct rcu_head rcu; | 53 | struct rcu_head rcu; |
60 | struct inet_peer *gc_next; | 54 | struct inet_peer *gc_next; |
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 599e48fa97cb..2e089a99d603 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <net/inet_frag.h> | 8 | #include <net/inet_frag.h> |
9 | 9 | ||
10 | struct tcpm_hash_bucket; | ||
10 | struct ctl_table_header; | 11 | struct ctl_table_header; |
11 | struct ipv4_devconf; | 12 | struct ipv4_devconf; |
12 | struct fib_rules_ops; | 13 | struct fib_rules_ops; |
@@ -39,6 +40,8 @@ struct netns_ipv4 { | |||
39 | struct sock **icmp_sk; | 40 | struct sock **icmp_sk; |
40 | struct sock *tcp_sock; | 41 | struct sock *tcp_sock; |
41 | struct inet_peer_base *peers; | 42 | struct inet_peer_base *peers; |
43 | struct tcpm_hash_bucket *tcp_metrics_hash; | ||
44 | unsigned int tcp_metrics_hash_mask; | ||
42 | struct netns_frags frags; | 45 | struct netns_frags frags; |
43 | #ifdef CONFIG_NETFILTER | 46 | #ifdef CONFIG_NETFILTER |
44 | struct xt_table *iptable_filter; | 47 | struct xt_table *iptable_filter; |
diff --git a/include/net/route.h b/include/net/route.h index 211e2665139b..52362368af09 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -40,7 +40,6 @@ | |||
40 | #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) | 40 | #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) |
41 | 41 | ||
42 | struct fib_nh; | 42 | struct fib_nh; |
43 | struct inet_peer; | ||
44 | struct fib_info; | 43 | struct fib_info; |
45 | struct rtable { | 44 | struct rtable { |
46 | struct dst_entry dst; | 45 | struct dst_entry dst; |
@@ -65,45 +64,10 @@ struct rtable { | |||
65 | __be32 rt_gateway; | 64 | __be32 rt_gateway; |
66 | 65 | ||
67 | /* Miscellaneous cached information */ | 66 | /* Miscellaneous cached information */ |
68 | u32 rt_peer_genid; | 67 | u32 rt_pmtu; |
69 | unsigned long _peer; /* long-living peer info */ | ||
70 | struct fib_info *fi; /* for client ref to shared metrics */ | 68 | struct fib_info *fi; /* for client ref to shared metrics */ |
71 | }; | 69 | }; |
72 | 70 | ||
73 | static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) | ||
74 | { | ||
75 | return inetpeer_ptr(rt->_peer); | ||
76 | } | ||
77 | |||
78 | static inline bool rt_has_peer(struct rtable *rt) | ||
79 | { | ||
80 | return inetpeer_ptr_is_peer(rt->_peer); | ||
81 | } | ||
82 | |||
83 | static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) | ||
84 | { | ||
85 | __inetpeer_ptr_set_peer(&rt->_peer, peer); | ||
86 | } | ||
87 | |||
88 | static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) | ||
89 | { | ||
90 | return inetpeer_ptr_set_peer(&rt->_peer, peer); | ||
91 | } | ||
92 | |||
93 | static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) | ||
94 | { | ||
95 | inetpeer_init_ptr(&rt->_peer, base); | ||
96 | } | ||
97 | |||
98 | static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) | ||
99 | { | ||
100 | rt->_peer = ort->_peer; | ||
101 | if (rt_has_peer(ort)) { | ||
102 | struct inet_peer *peer = rt_peer_ptr(ort); | ||
103 | atomic_inc(&peer->refcnt); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static inline bool rt_is_input_route(const struct rtable *rt) | 71 | static inline bool rt_is_input_route(const struct rtable *rt) |
108 | { | 72 | { |
109 | return rt->rt_route_iif != 0; | 73 | return rt->rt_route_iif != 0; |
@@ -278,8 +242,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 | |||
278 | 242 | ||
279 | if (inet_sk(sk)->transparent) | 243 | if (inet_sk(sk)->transparent) |
280 | flow_flags |= FLOWI_FLAG_ANYSRC; | 244 | flow_flags |= FLOWI_FLAG_ANYSRC; |
281 | if (protocol == IPPROTO_TCP) | ||
282 | flow_flags |= FLOWI_FLAG_PRECOW_METRICS; | ||
283 | if (can_sleep) | 245 | if (can_sleep) |
284 | flow_flags |= FLOWI_FLAG_CAN_SLEEP; | 246 | flow_flags |= FLOWI_FLAG_CAN_SLEEP; |
285 | 247 | ||
@@ -328,27 +290,6 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable | |||
328 | return rt; | 290 | return rt; |
329 | } | 291 | } |
330 | 292 | ||
331 | extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); | ||
332 | |||
333 | static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) | ||
334 | { | ||
335 | if (rt_has_peer(rt)) | ||
336 | return rt_peer_ptr(rt); | ||
337 | |||
338 | rt_bind_peer(rt, daddr, create); | ||
339 | return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); | ||
340 | } | ||
341 | |||
342 | static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) | ||
343 | { | ||
344 | return __rt_get_peer(rt, daddr, 0); | ||
345 | } | ||
346 | |||
347 | static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) | ||
348 | { | ||
349 | return __rt_get_peer(rt, daddr, 1); | ||
350 | } | ||
351 | |||
352 | static inline int inet_iif(const struct sk_buff *skb) | 293 | static inline int inet_iif(const struct sk_buff *skb) |
353 | { | 294 | { |
354 | return skb_rtable(skb)->rt_iif; | 295 | return skb_rtable(skb)->rt_iif; |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 53fb7d814170..3618fefae049 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -388,6 +388,13 @@ extern void tcp_enter_frto(struct sock *sk); | |||
388 | extern void tcp_enter_loss(struct sock *sk, int how); | 388 | extern void tcp_enter_loss(struct sock *sk, int how); |
389 | extern void tcp_clear_retrans(struct tcp_sock *tp); | 389 | extern void tcp_clear_retrans(struct tcp_sock *tp); |
390 | extern void tcp_update_metrics(struct sock *sk); | 390 | extern void tcp_update_metrics(struct sock *sk); |
391 | extern void tcp_init_metrics(struct sock *sk); | ||
392 | extern void tcp_metrics_init(void); | ||
393 | extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); | ||
394 | extern bool tcp_remember_stamp(struct sock *sk); | ||
395 | extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); | ||
396 | extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); | ||
397 | extern void tcp_disable_fack(struct tcp_sock *tp); | ||
391 | extern void tcp_close(struct sock *sk, long timeout); | 398 | extern void tcp_close(struct sock *sk, long timeout); |
392 | extern void tcp_init_sock(struct sock *sk); | 399 | extern void tcp_init_sock(struct sock *sk); |
393 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, | 400 | extern unsigned int tcp_poll(struct file * file, struct socket *sock, |
@@ -556,6 +563,8 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) | |||
556 | return (tp->srtt >> 3) + tp->rttvar; | 563 | return (tp->srtt >> 3) + tp->rttvar; |
557 | } | 564 | } |
558 | 565 | ||
566 | extern void tcp_set_rto(struct sock *sk); | ||
567 | |||
559 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) | 568 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
560 | { | 569 | { |
561 | tp->pred_flags = htonl((tp->tcp_header_len << 26) | | 570 | tp->pred_flags = htonl((tp->tcp_header_len << 26) | |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2b325c340b44..64127eee786d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -615,7 +615,7 @@ nla_put_failure: | |||
615 | EXPORT_SYMBOL(rtnetlink_put_metrics); | 615 | EXPORT_SYMBOL(rtnetlink_put_metrics); |
616 | 616 | ||
617 | int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, | 617 | int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, |
618 | u32 ts, u32 tsage, long expires, u32 error) | 618 | long expires, u32 error) |
619 | { | 619 | { |
620 | struct rta_cacheinfo ci = { | 620 | struct rta_cacheinfo ci = { |
621 | .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), | 621 | .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), |
@@ -623,8 +623,6 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, | |||
623 | .rta_clntref = atomic_read(&(dst->__refcnt)), | 623 | .rta_clntref = atomic_read(&(dst->__refcnt)), |
624 | .rta_error = error, | 624 | .rta_error = error, |
625 | .rta_id = id, | 625 | .rta_id = id, |
626 | .rta_ts = ts, | ||
627 | .rta_tsage = tsage, | ||
628 | }; | 626 | }; |
629 | 627 | ||
630 | if (expires) | 628 | if (expires) |
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6e74b3f110bc..b5594cc73ee1 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -1590,7 +1590,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, | |||
1590 | goto errout; | 1590 | goto errout; |
1591 | 1591 | ||
1592 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | 1592 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; |
1593 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, | 1593 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, |
1594 | rt->dst.error) < 0) | 1594 | rt->dst.error) < 0) |
1595 | goto errout; | 1595 | goto errout; |
1596 | 1596 | ||
@@ -1812,12 +1812,11 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) | |||
1812 | char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; | 1812 | char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; |
1813 | 1813 | ||
1814 | seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", | 1814 | seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", |
1815 | rt->dst.dev ? rt->dst.dev->name : "*", | 1815 | rt->dst.dev ? rt->dst.dev->name : "*", |
1816 | dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), | 1816 | dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), |
1817 | dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), | 1817 | dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), |
1818 | atomic_read(&rt->dst.__refcnt), | 1818 | atomic_read(&rt->dst.__refcnt), |
1819 | rt->dst.__use, | 1819 | rt->dst.__use, 0); |
1820 | (int) dst_metric(&rt->dst, RTAX_RTT)); | ||
1821 | return 0; | 1820 | return 0; |
1822 | } | 1821 | } |
1823 | 1822 | ||
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6a..5a23e8b37106 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
7 | ip_output.o ip_sockglue.o inet_hashtables.o \ | 7 | ip_output.o ip_sockglue.o inet_hashtables.o \ |
8 | inet_timewait_sock.o inet_connection_sock.o \ | 8 | inet_timewait_sock.o inet_connection_sock.o \ |
9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ | 9 | tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ |
10 | tcp_minisocks.o tcp_cong.o \ | 10 | tcp_minisocks.o tcp_cong.o tcp_metrics.o \ |
11 | datagram.o raw.o udp.o udplite.o \ | 11 | datagram.o raw.o udp.o udplite.o \ |
12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o fib_trie.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ae301c897a19..d71bfbdc0bf4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -794,6 +794,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
794 | val = nla_get_u32(nla); | 794 | val = nla_get_u32(nla); |
795 | if (type == RTAX_ADVMSS && val > 65535 - 40) | 795 | if (type == RTAX_ADVMSS && val > 65535 - 40) |
796 | val = 65535 - 40; | 796 | val = 65535 - 40; |
797 | if (type == RTAX_MTU && val > 65535 - 15) | ||
798 | val = 65535 - 15; | ||
797 | fi->fib_metrics[type - 1] = val; | 799 | fi->fib_metrics[type - 1] = val; |
798 | } | 800 | } |
799 | } | 801 | } |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4bce5a2830aa..4a049449305f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -254,9 +254,10 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | |||
254 | 254 | ||
255 | /* Limit if icmp type is enabled in ratemask. */ | 255 | /* Limit if icmp type is enabled in ratemask. */ |
256 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { | 256 | if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { |
257 | struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); | 257 | struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); |
258 | rc = inet_peer_xrlim_allow(peer, | 258 | rc = inet_peer_xrlim_allow(peer, |
259 | net->ipv4.sysctl_icmp_ratelimit); | 259 | net->ipv4.sysctl_icmp_ratelimit); |
260 | inet_putpeer(peer); | ||
260 | } | 261 | } |
261 | out: | 262 | out: |
262 | return rc; | 263 | return rc; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 034ddbe42adf..76825be3b643 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
375 | const struct inet_request_sock *ireq = inet_rsk(req); | 375 | const struct inet_request_sock *ireq = inet_rsk(req); |
376 | struct ip_options_rcu *opt = inet_rsk(req)->opt; | 376 | struct ip_options_rcu *opt = inet_rsk(req)->opt; |
377 | struct net *net = sock_net(sk); | 377 | struct net *net = sock_net(sk); |
378 | int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; | 378 | int flags = inet_sk_flowi_flags(sk); |
379 | 379 | ||
380 | if (nocache) | 380 | if (nocache) |
381 | flags |= FLOWI_FLAG_RT_NOCACHE; | 381 | flags |= FLOWI_FLAG_RT_NOCACHE; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da90a8cab614..e1e0a4e8fd34 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -508,13 +508,9 @@ relookup: | |||
508 | (daddr->family == AF_INET) ? | 508 | (daddr->family == AF_INET) ? |
509 | secure_ip_id(daddr->addr.a4) : | 509 | secure_ip_id(daddr->addr.a4) : |
510 | secure_ipv6_id(daddr->addr.a6)); | 510 | secure_ipv6_id(daddr->addr.a6)); |
511 | p->tcp_ts_stamp = 0; | ||
512 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; | 511 | p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; |
513 | p->rate_tokens = 0; | 512 | p->rate_tokens = 0; |
514 | p->rate_last = 0; | 513 | p->rate_last = 0; |
515 | p->pmtu_expires = 0; | ||
516 | p->pmtu_orig = 0; | ||
517 | memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); | ||
518 | INIT_LIST_HEAD(&p->gc_list); | 514 | INIT_LIST_HEAD(&p->gc_list); |
519 | 515 | ||
520 | /* Link the node. */ | 516 | /* Link the node. */ |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 72e88c208025..95bfa1ba5b28 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -158,34 +158,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
158 | 158 | ||
159 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | 159 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) |
160 | { | 160 | { |
161 | struct rtable *rt = (struct rtable *) dst; | 161 | WARN_ON(1); |
162 | struct inet_peer *peer; | 162 | return NULL; |
163 | u32 *p = NULL; | ||
164 | |||
165 | peer = rt_get_peer_create(rt, rt->rt_dst); | ||
166 | if (peer) { | ||
167 | u32 *old_p = __DST_METRICS_PTR(old); | ||
168 | unsigned long prev, new; | ||
169 | |||
170 | p = peer->metrics; | ||
171 | if (inet_metrics_new(peer)) | ||
172 | memcpy(p, old_p, sizeof(u32) * RTAX_MAX); | ||
173 | |||
174 | new = (unsigned long) p; | ||
175 | prev = cmpxchg(&dst->_metrics, old, new); | ||
176 | |||
177 | if (prev != old) { | ||
178 | p = __DST_METRICS_PTR(prev); | ||
179 | if (prev & DST_METRICS_READ_ONLY) | ||
180 | p = NULL; | ||
181 | } else { | ||
182 | if (rt->fi) { | ||
183 | fib_info_put(rt->fi); | ||
184 | rt->fi = NULL; | ||
185 | } | ||
186 | } | ||
187 | } | ||
188 | return p; | ||
189 | } | 163 | } |
190 | 164 | ||
191 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | 165 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, |
@@ -423,18 +397,16 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
423 | int len; | 397 | int len; |
424 | 398 | ||
425 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | 399 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
426 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 400 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
427 | r->dst.dev ? r->dst.dev->name : "*", | 401 | r->dst.dev ? r->dst.dev->name : "*", |
428 | (__force u32)r->rt_dst, | 402 | (__force u32)r->rt_dst, |
429 | (__force u32)r->rt_gateway, | 403 | (__force u32)r->rt_gateway, |
430 | r->rt_flags, atomic_read(&r->dst.__refcnt), | 404 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
431 | r->dst.__use, 0, (__force u32)r->rt_src, | 405 | r->dst.__use, 0, (__force u32)r->rt_src, |
432 | dst_metric_advmss(&r->dst) + 40, | 406 | dst_metric_advmss(&r->dst) + 40, |
433 | dst_metric(&r->dst, RTAX_WINDOW), | 407 | dst_metric(&r->dst, RTAX_WINDOW), 0, |
434 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 408 | r->rt_key_tos, |
435 | dst_metric(&r->dst, RTAX_RTTVAR)), | 409 | -1, 0, 0, &len); |
436 | r->rt_key_tos, | ||
437 | -1, 0, 0, &len); | ||
438 | 410 | ||
439 | seq_printf(seq, "%*s\n", 127 - len, ""); | 411 | seq_printf(seq, "%*s\n", 127 - len, ""); |
440 | } | 412 | } |
@@ -671,7 +643,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
671 | static inline int rt_valuable(struct rtable *rth) | 643 | static inline int rt_valuable(struct rtable *rth) |
672 | { | 644 | { |
673 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 645 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
674 | (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); | 646 | rth->dst.expires; |
675 | } | 647 | } |
676 | 648 | ||
677 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 649 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -917,7 +889,6 @@ static void rt_cache_invalidate(struct net *net) | |||
917 | 889 | ||
918 | get_random_bytes(&shuffle, sizeof(shuffle)); | 890 | get_random_bytes(&shuffle, sizeof(shuffle)); |
919 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); | 891 | atomic_add(shuffle + 1U, &net->ipv4.rt_genid); |
920 | inetpeer_invalidate_family(AF_INET); | ||
921 | } | 892 | } |
922 | 893 | ||
923 | /* | 894 | /* |
@@ -1244,31 +1215,6 @@ skip_hashing: | |||
1244 | return rt; | 1215 | return rt; |
1245 | } | 1216 | } |
1246 | 1217 | ||
1247 | static atomic_t __rt_peer_genid = ATOMIC_INIT(0); | ||
1248 | |||
1249 | static u32 rt_peer_genid(void) | ||
1250 | { | ||
1251 | return atomic_read(&__rt_peer_genid); | ||
1252 | } | ||
1253 | |||
1254 | void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) | ||
1255 | { | ||
1256 | struct inet_peer_base *base; | ||
1257 | struct inet_peer *peer; | ||
1258 | |||
1259 | base = inetpeer_base_ptr(rt->_peer); | ||
1260 | if (!base) | ||
1261 | return; | ||
1262 | |||
1263 | peer = inet_getpeer_v4(base, daddr, create); | ||
1264 | if (peer) { | ||
1265 | if (!rt_set_peer(rt, peer)) | ||
1266 | inet_putpeer(peer); | ||
1267 | else | ||
1268 | rt->rt_peer_genid = rt_peer_genid(); | ||
1269 | } | ||
1270 | } | ||
1271 | |||
1272 | /* | 1218 | /* |
1273 | * Peer allocation may fail only in serious out-of-memory conditions. However | 1219 | * Peer allocation may fail only in serious out-of-memory conditions. However |
1274 | * we still can generate some output. | 1220 | * we still can generate some output. |
@@ -1291,20 +1237,15 @@ static void ip_select_fb_ident(struct iphdr *iph) | |||
1291 | 1237 | ||
1292 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | 1238 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) |
1293 | { | 1239 | { |
1294 | struct rtable *rt = (struct rtable *) dst; | 1240 | struct net *net = dev_net(dst->dev); |
1295 | 1241 | struct inet_peer *peer; | |
1296 | if (rt && !(rt->dst.flags & DST_NOPEER)) { | ||
1297 | struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); | ||
1298 | 1242 | ||
1299 | /* If peer is attached to destination, it is never detached, | 1243 | peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); |
1300 | so that we need not to grab a lock to dereference it. | 1244 | if (peer) { |
1301 | */ | 1245 | iph->id = htons(inet_getid(peer, more)); |
1302 | if (peer) { | 1246 | inet_putpeer(peer); |
1303 | iph->id = htons(inet_getid(peer, more)); | 1247 | return; |
1304 | return; | 1248 | } |
1305 | } | ||
1306 | } else if (!rt) | ||
1307 | pr_debug("rt_bind_peer(0) @%p\n", __builtin_return_address(0)); | ||
1308 | 1249 | ||
1309 | ip_select_fb_ident(iph); | 1250 | ip_select_fb_ident(iph); |
1310 | } | 1251 | } |
@@ -1330,30 +1271,6 @@ static void rt_del(unsigned int hash, struct rtable *rt) | |||
1330 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1271 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1331 | } | 1272 | } |
1332 | 1273 | ||
1333 | static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) | ||
1334 | { | ||
1335 | struct rtable *rt = (struct rtable *) dst; | ||
1336 | __be32 orig_gw = rt->rt_gateway; | ||
1337 | struct neighbour *n; | ||
1338 | |||
1339 | dst_confirm(&rt->dst); | ||
1340 | |||
1341 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1342 | |||
1343 | n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); | ||
1344 | if (!n) { | ||
1345 | rt->rt_gateway = orig_gw; | ||
1346 | return; | ||
1347 | } | ||
1348 | if (!(n->nud_state & NUD_VALID)) { | ||
1349 | neigh_event_send(n, NULL); | ||
1350 | } else { | ||
1351 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1352 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); | ||
1353 | } | ||
1354 | neigh_release(n); | ||
1355 | } | ||
1356 | |||
1357 | /* called in rcu_read_lock() section */ | 1274 | /* called in rcu_read_lock() section */ |
1358 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1275 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1359 | __be32 saddr, struct net_device *dev) | 1276 | __be32 saddr, struct net_device *dev) |
@@ -1362,7 +1279,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1362 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1279 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1363 | __be32 skeys[2] = { saddr, 0 }; | 1280 | __be32 skeys[2] = { saddr, 0 }; |
1364 | int ikeys[2] = { dev->ifindex, 0 }; | 1281 | int ikeys[2] = { dev->ifindex, 0 }; |
1365 | struct inet_peer *peer; | ||
1366 | struct net *net; | 1282 | struct net *net; |
1367 | 1283 | ||
1368 | if (!in_dev) | 1284 | if (!in_dev) |
@@ -1395,6 +1311,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1395 | rthp = &rt_hash_table[hash].chain; | 1311 | rthp = &rt_hash_table[hash].chain; |
1396 | 1312 | ||
1397 | while ((rt = rcu_dereference(*rthp)) != NULL) { | 1313 | while ((rt = rcu_dereference(*rthp)) != NULL) { |
1314 | struct neighbour *n; | ||
1315 | |||
1398 | rthp = &rt->dst.rt_next; | 1316 | rthp = &rt->dst.rt_next; |
1399 | 1317 | ||
1400 | if (rt->rt_key_dst != daddr || | 1318 | if (rt->rt_key_dst != daddr || |
@@ -1408,13 +1326,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1408 | rt->rt_gateway != old_gw) | 1326 | rt->rt_gateway != old_gw) |
1409 | continue; | 1327 | continue; |
1410 | 1328 | ||
1411 | peer = rt_get_peer_create(rt, rt->rt_dst); | 1329 | n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); |
1412 | if (peer) { | 1330 | if (n) { |
1413 | if (peer->redirect_learned.a4 != new_gw) { | 1331 | if (!(n->nud_state & NUD_VALID)) { |
1414 | peer->redirect_learned.a4 = new_gw; | 1332 | neigh_event_send(n, NULL); |
1415 | atomic_inc(&__rt_peer_genid); | 1333 | } else { |
1334 | rt->rt_gateway = new_gw; | ||
1335 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1336 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); | ||
1416 | } | 1337 | } |
1417 | check_peer_redir(&rt->dst, peer); | 1338 | neigh_release(n); |
1418 | } | 1339 | } |
1419 | } | 1340 | } |
1420 | } | 1341 | } |
@@ -1432,23 +1353,6 @@ reject_redirect: | |||
1432 | ; | 1353 | ; |
1433 | } | 1354 | } |
1434 | 1355 | ||
1435 | static bool peer_pmtu_expired(struct inet_peer *peer) | ||
1436 | { | ||
1437 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | ||
1438 | |||
1439 | return orig && | ||
1440 | time_after_eq(jiffies, orig) && | ||
1441 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | ||
1442 | } | ||
1443 | |||
1444 | static bool peer_pmtu_cleaned(struct inet_peer *peer) | ||
1445 | { | ||
1446 | unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); | ||
1447 | |||
1448 | return orig && | ||
1449 | cmpxchg(&peer->pmtu_expires, orig, 0) == orig; | ||
1450 | } | ||
1451 | |||
1452 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1356 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
1453 | { | 1357 | { |
1454 | struct rtable *rt = (struct rtable *)dst; | 1358 | struct rtable *rt = (struct rtable *)dst; |
@@ -1458,16 +1362,13 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1458 | if (dst->obsolete > 0) { | 1362 | if (dst->obsolete > 0) { |
1459 | ip_rt_put(rt); | 1363 | ip_rt_put(rt); |
1460 | ret = NULL; | 1364 | ret = NULL; |
1461 | } else if (rt->rt_flags & RTCF_REDIRECTED) { | 1365 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
1366 | rt->dst.expires) { | ||
1462 | unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 1367 | unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, |
1463 | rt->rt_oif, | 1368 | rt->rt_oif, |
1464 | rt_genid(dev_net(dst->dev))); | 1369 | rt_genid(dev_net(dst->dev))); |
1465 | rt_del(hash, rt); | 1370 | rt_del(hash, rt); |
1466 | ret = NULL; | 1371 | ret = NULL; |
1467 | } else if (rt_has_peer(rt)) { | ||
1468 | struct inet_peer *peer = rt_peer_ptr(rt); | ||
1469 | if (peer_pmtu_expired(peer)) | ||
1470 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1471 | } | 1372 | } |
1472 | } | 1373 | } |
1473 | return ret; | 1374 | return ret; |
@@ -1494,6 +1395,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1494 | struct rtable *rt = skb_rtable(skb); | 1395 | struct rtable *rt = skb_rtable(skb); |
1495 | struct in_device *in_dev; | 1396 | struct in_device *in_dev; |
1496 | struct inet_peer *peer; | 1397 | struct inet_peer *peer; |
1398 | struct net *net; | ||
1497 | int log_martians; | 1399 | int log_martians; |
1498 | 1400 | ||
1499 | rcu_read_lock(); | 1401 | rcu_read_lock(); |
@@ -1505,7 +1407,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1505 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 1407 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1506 | rcu_read_unlock(); | 1408 | rcu_read_unlock(); |
1507 | 1409 | ||
1508 | peer = rt_get_peer_create(rt, rt->rt_dst); | 1410 | net = dev_net(rt->dst.dev); |
1411 | peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); | ||
1509 | if (!peer) { | 1412 | if (!peer) { |
1510 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1413 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1511 | return; | 1414 | return; |
@@ -1522,7 +1425,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1522 | */ | 1425 | */ |
1523 | if (peer->rate_tokens >= ip_rt_redirect_number) { | 1426 | if (peer->rate_tokens >= ip_rt_redirect_number) { |
1524 | peer->rate_last = jiffies; | 1427 | peer->rate_last = jiffies; |
1525 | return; | 1428 | goto out_put_peer; |
1526 | } | 1429 | } |
1527 | 1430 | ||
1528 | /* Check for load limit; set rate_last to the latest sent | 1431 | /* Check for load limit; set rate_last to the latest sent |
@@ -1543,6 +1446,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1543 | &rt->rt_dst, &rt->rt_gateway); | 1446 | &rt->rt_dst, &rt->rt_gateway); |
1544 | #endif | 1447 | #endif |
1545 | } | 1448 | } |
1449 | out_put_peer: | ||
1450 | inet_putpeer(peer); | ||
1546 | } | 1451 | } |
1547 | 1452 | ||
1548 | static int ip_error(struct sk_buff *skb) | 1453 | static int ip_error(struct sk_buff *skb) |
@@ -1585,7 +1490,7 @@ static int ip_error(struct sk_buff *skb) | |||
1585 | break; | 1490 | break; |
1586 | } | 1491 | } |
1587 | 1492 | ||
1588 | peer = rt_get_peer_create(rt, rt->rt_dst); | 1493 | peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); |
1589 | 1494 | ||
1590 | send = true; | 1495 | send = true; |
1591 | if (peer) { | 1496 | if (peer) { |
@@ -1598,6 +1503,7 @@ static int ip_error(struct sk_buff *skb) | |||
1598 | peer->rate_tokens -= ip_rt_error_cost; | 1503 | peer->rate_tokens -= ip_rt_error_cost; |
1599 | else | 1504 | else |
1600 | send = false; | 1505 | send = false; |
1506 | inet_putpeer(peer); | ||
1601 | } | 1507 | } |
1602 | if (send) | 1508 | if (send) |
1603 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1509 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
@@ -1606,50 +1512,17 @@ out: kfree_skb(skb); | |||
1606 | return 0; | 1512 | return 0; |
1607 | } | 1513 | } |
1608 | 1514 | ||
1609 | static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) | ||
1610 | { | ||
1611 | unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); | ||
1612 | |||
1613 | if (!expires) | ||
1614 | return; | ||
1615 | if (time_before(jiffies, expires)) { | ||
1616 | u32 orig_dst_mtu = dst_mtu(dst); | ||
1617 | if (peer->pmtu_learned < orig_dst_mtu) { | ||
1618 | if (!peer->pmtu_orig) | ||
1619 | peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); | ||
1620 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); | ||
1621 | } | ||
1622 | } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) | ||
1623 | dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); | ||
1624 | } | ||
1625 | |||
1626 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | 1515 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) |
1627 | { | 1516 | { |
1628 | struct rtable *rt = (struct rtable *) dst; | 1517 | struct rtable *rt = (struct rtable *) dst; |
1629 | struct inet_peer *peer; | ||
1630 | 1518 | ||
1631 | dst_confirm(dst); | 1519 | dst_confirm(dst); |
1632 | 1520 | ||
1633 | peer = rt_get_peer_create(rt, rt->rt_dst); | 1521 | if (mtu < ip_rt_min_pmtu) |
1634 | if (peer) { | 1522 | mtu = ip_rt_min_pmtu; |
1635 | unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); | ||
1636 | |||
1637 | if (mtu < ip_rt_min_pmtu) | ||
1638 | mtu = ip_rt_min_pmtu; | ||
1639 | if (!pmtu_expires || mtu < peer->pmtu_learned) { | ||
1640 | |||
1641 | pmtu_expires = jiffies + ip_rt_mtu_expires; | ||
1642 | if (!pmtu_expires) | ||
1643 | pmtu_expires = 1UL; | ||
1644 | |||
1645 | peer->pmtu_learned = mtu; | ||
1646 | peer->pmtu_expires = pmtu_expires; | ||
1647 | 1523 | ||
1648 | atomic_inc(&__rt_peer_genid); | 1524 | rt->rt_pmtu = mtu; |
1649 | rt->rt_peer_genid = rt_peer_genid(); | 1525 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); |
1650 | } | ||
1651 | check_peer_pmtu(dst, peer); | ||
1652 | } | ||
1653 | } | 1526 | } |
1654 | 1527 | ||
1655 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | 1528 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, |
@@ -1660,7 +1533,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | |||
1660 | struct rtable *rt; | 1533 | struct rtable *rt; |
1661 | 1534 | ||
1662 | flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, | 1535 | flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, |
1663 | protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, | 1536 | protocol, flow_flags, |
1664 | iph->daddr, iph->saddr, 0, 0); | 1537 | iph->daddr, iph->saddr, 0, 0); |
1665 | rt = __ip_route_output_key(net, &fl4); | 1538 | rt = __ip_route_output_key(net, &fl4); |
1666 | if (!IS_ERR(rt)) { | 1539 | if (!IS_ERR(rt)) { |
@@ -1681,30 +1554,12 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1681 | } | 1554 | } |
1682 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); | 1555 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); |
1683 | 1556 | ||
1684 | static void ipv4_validate_peer(struct rtable *rt) | ||
1685 | { | ||
1686 | if (rt->rt_peer_genid != rt_peer_genid()) { | ||
1687 | struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); | ||
1688 | |||
1689 | if (peer) { | ||
1690 | check_peer_pmtu(&rt->dst, peer); | ||
1691 | |||
1692 | if (peer->redirect_learned.a4 && | ||
1693 | peer->redirect_learned.a4 != rt->rt_gateway) | ||
1694 | check_peer_redir(&rt->dst, peer); | ||
1695 | } | ||
1696 | |||
1697 | rt->rt_peer_genid = rt_peer_genid(); | ||
1698 | } | ||
1699 | } | ||
1700 | |||
1701 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | 1557 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) |
1702 | { | 1558 | { |
1703 | struct rtable *rt = (struct rtable *) dst; | 1559 | struct rtable *rt = (struct rtable *) dst; |
1704 | 1560 | ||
1705 | if (rt_is_expired(rt)) | 1561 | if (rt_is_expired(rt)) |
1706 | return NULL; | 1562 | return NULL; |
1707 | ipv4_validate_peer(rt); | ||
1708 | return dst; | 1563 | return dst; |
1709 | } | 1564 | } |
1710 | 1565 | ||
@@ -1716,10 +1571,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1716 | fib_info_put(rt->fi); | 1571 | fib_info_put(rt->fi); |
1717 | rt->fi = NULL; | 1572 | rt->fi = NULL; |
1718 | } | 1573 | } |
1719 | if (rt_has_peer(rt)) { | ||
1720 | struct inet_peer *peer = rt_peer_ptr(rt); | ||
1721 | inet_putpeer(peer); | ||
1722 | } | ||
1723 | } | 1574 | } |
1724 | 1575 | ||
1725 | 1576 | ||
@@ -1730,11 +1581,8 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1730 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | 1581 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); |
1731 | 1582 | ||
1732 | rt = skb_rtable(skb); | 1583 | rt = skb_rtable(skb); |
1733 | if (rt && rt_has_peer(rt)) { | 1584 | if (rt) |
1734 | struct inet_peer *peer = rt_peer_ptr(rt); | 1585 | dst_set_expires(&rt->dst, 0); |
1735 | if (peer_pmtu_cleaned(peer)) | ||
1736 | dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); | ||
1737 | } | ||
1738 | } | 1586 | } |
1739 | 1587 | ||
1740 | static int ip_rt_bug(struct sk_buff *skb) | 1588 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1814,7 +1662,13 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) | |||
1814 | static unsigned int ipv4_mtu(const struct dst_entry *dst) | 1662 | static unsigned int ipv4_mtu(const struct dst_entry *dst) |
1815 | { | 1663 | { |
1816 | const struct rtable *rt = (const struct rtable *) dst; | 1664 | const struct rtable *rt = (const struct rtable *) dst; |
1817 | unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); | 1665 | unsigned int mtu = rt->rt_pmtu; |
1666 | |||
1667 | if (mtu && time_after_eq(jiffies, rt->dst.expires)) | ||
1668 | mtu = 0; | ||
1669 | |||
1670 | if (!mtu) | ||
1671 | mtu = dst_metric_raw(dst, RTAX_MTU); | ||
1818 | 1672 | ||
1819 | if (mtu && rt_is_output_route(rt)) | 1673 | if (mtu && rt_is_output_route(rt)) |
1820 | return mtu; | 1674 | return mtu; |
@@ -1836,63 +1690,27 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1836 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | 1690 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, |
1837 | struct fib_info *fi) | 1691 | struct fib_info *fi) |
1838 | { | 1692 | { |
1839 | struct inet_peer_base *base; | 1693 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { |
1840 | struct inet_peer *peer; | 1694 | rt->fi = fi; |
1841 | int create = 0; | 1695 | atomic_inc(&fi->fib_clntref); |
1842 | |||
1843 | /* If a peer entry exists for this destination, we must hook | ||
1844 | * it up in order to get at cached metrics. | ||
1845 | */ | ||
1846 | if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) | ||
1847 | create = 1; | ||
1848 | |||
1849 | base = inetpeer_base_ptr(rt->_peer); | ||
1850 | BUG_ON(!base); | ||
1851 | |||
1852 | peer = inet_getpeer_v4(base, rt->rt_dst, create); | ||
1853 | if (peer) { | ||
1854 | __rt_set_peer(rt, peer); | ||
1855 | rt->rt_peer_genid = rt_peer_genid(); | ||
1856 | if (inet_metrics_new(peer)) | ||
1857 | memcpy(peer->metrics, fi->fib_metrics, | ||
1858 | sizeof(u32) * RTAX_MAX); | ||
1859 | dst_init_metrics(&rt->dst, peer->metrics, false); | ||
1860 | |||
1861 | check_peer_pmtu(&rt->dst, peer); | ||
1862 | |||
1863 | if (peer->redirect_learned.a4 && | ||
1864 | peer->redirect_learned.a4 != rt->rt_gateway) { | ||
1865 | rt->rt_gateway = peer->redirect_learned.a4; | ||
1866 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1867 | } | ||
1868 | } else { | ||
1869 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1870 | rt->fi = fi; | ||
1871 | atomic_inc(&fi->fib_clntref); | ||
1872 | } | ||
1873 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1874 | } | 1696 | } |
1697 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1875 | } | 1698 | } |
1876 | 1699 | ||
1877 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | 1700 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, |
1878 | const struct fib_result *res, | 1701 | const struct fib_result *res, |
1879 | struct fib_info *fi, u16 type, u32 itag) | 1702 | struct fib_info *fi, u16 type, u32 itag) |
1880 | { | 1703 | { |
1881 | struct dst_entry *dst = &rt->dst; | ||
1882 | |||
1883 | if (fi) { | 1704 | if (fi) { |
1884 | if (FIB_RES_GW(*res) && | 1705 | if (FIB_RES_GW(*res) && |
1885 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1706 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1886 | rt->rt_gateway = FIB_RES_GW(*res); | 1707 | rt->rt_gateway = FIB_RES_GW(*res); |
1887 | rt_init_metrics(rt, fl4, fi); | 1708 | rt_init_metrics(rt, fl4, fi); |
1888 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1709 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1889 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; | 1710 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; |
1890 | #endif | 1711 | #endif |
1891 | } | 1712 | } |
1892 | 1713 | ||
1893 | if (dst_mtu(dst) > IP_MAX_MTU) | ||
1894 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); | ||
1895 | |||
1896 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1714 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1897 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1715 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
1898 | set_class_tag(rt, fib_rules_tclass(res)); | 1716 | set_class_tag(rt, fib_rules_tclass(res)); |
@@ -1964,9 +1782,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1964 | rth->rt_iif = dev->ifindex; | 1782 | rth->rt_iif = dev->ifindex; |
1965 | rth->rt_oif = 0; | 1783 | rth->rt_oif = 0; |
1966 | rth->rt_mark = skb->mark; | 1784 | rth->rt_mark = skb->mark; |
1785 | rth->rt_pmtu = 0; | ||
1967 | rth->rt_gateway = daddr; | 1786 | rth->rt_gateway = daddr; |
1968 | rth->rt_peer_genid = 0; | ||
1969 | rt_init_peer(rth, dev_net(dev)->ipv4.peers); | ||
1970 | rth->fi = NULL; | 1787 | rth->fi = NULL; |
1971 | if (our) { | 1788 | if (our) { |
1972 | rth->dst.input= ip_local_deliver; | 1789 | rth->dst.input= ip_local_deliver; |
@@ -2090,9 +1907,8 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2090 | rth->rt_iif = in_dev->dev->ifindex; | 1907 | rth->rt_iif = in_dev->dev->ifindex; |
2091 | rth->rt_oif = 0; | 1908 | rth->rt_oif = 0; |
2092 | rth->rt_mark = skb->mark; | 1909 | rth->rt_mark = skb->mark; |
1910 | rth->rt_pmtu = 0; | ||
2093 | rth->rt_gateway = daddr; | 1911 | rth->rt_gateway = daddr; |
2094 | rth->rt_peer_genid = 0; | ||
2095 | rt_init_peer(rth, &res->table->tb_peers); | ||
2096 | rth->fi = NULL; | 1912 | rth->fi = NULL; |
2097 | 1913 | ||
2098 | rth->dst.input = ip_forward; | 1914 | rth->dst.input = ip_forward; |
@@ -2269,9 +2085,8 @@ local_input: | |||
2269 | rth->rt_iif = dev->ifindex; | 2085 | rth->rt_iif = dev->ifindex; |
2270 | rth->rt_oif = 0; | 2086 | rth->rt_oif = 0; |
2271 | rth->rt_mark = skb->mark; | 2087 | rth->rt_mark = skb->mark; |
2088 | rth->rt_pmtu = 0; | ||
2272 | rth->rt_gateway = daddr; | 2089 | rth->rt_gateway = daddr; |
2273 | rth->rt_peer_genid = 0; | ||
2274 | rt_init_peer(rth, net->ipv4.peers); | ||
2275 | rth->fi = NULL; | 2090 | rth->fi = NULL; |
2276 | if (res.type == RTN_UNREACHABLE) { | 2091 | if (res.type == RTN_UNREACHABLE) { |
2277 | rth->dst.input= ip_error; | 2092 | rth->dst.input= ip_error; |
@@ -2346,7 +2161,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2346 | rth->rt_mark == skb->mark && | 2161 | rth->rt_mark == skb->mark && |
2347 | net_eq(dev_net(rth->dst.dev), net) && | 2162 | net_eq(dev_net(rth->dst.dev), net) && |
2348 | !rt_is_expired(rth)) { | 2163 | !rt_is_expired(rth)) { |
2349 | ipv4_validate_peer(rth); | ||
2350 | if (noref) { | 2164 | if (noref) { |
2351 | dst_use_noref(&rth->dst, jiffies); | 2165 | dst_use_noref(&rth->dst, jiffies); |
2352 | skb_dst_set_noref(skb, &rth->dst); | 2166 | skb_dst_set_noref(skb, &rth->dst); |
@@ -2468,11 +2282,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2468 | rth->rt_iif = orig_oif ? : dev_out->ifindex; | 2282 | rth->rt_iif = orig_oif ? : dev_out->ifindex; |
2469 | rth->rt_oif = orig_oif; | 2283 | rth->rt_oif = orig_oif; |
2470 | rth->rt_mark = fl4->flowi4_mark; | 2284 | rth->rt_mark = fl4->flowi4_mark; |
2285 | rth->rt_pmtu = 0; | ||
2471 | rth->rt_gateway = fl4->daddr; | 2286 | rth->rt_gateway = fl4->daddr; |
2472 | rth->rt_peer_genid = 0; | ||
2473 | rt_init_peer(rth, (res->table ? | ||
2474 | &res->table->tb_peers : | ||
2475 | dev_net(dev_out)->ipv4.peers)); | ||
2476 | rth->fi = NULL; | 2287 | rth->fi = NULL; |
2477 | 2288 | ||
2478 | RT_CACHE_STAT_INC(out_slow_tot); | 2289 | RT_CACHE_STAT_INC(out_slow_tot); |
@@ -2726,7 +2537,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) | |||
2726 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2537 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2727 | net_eq(dev_net(rth->dst.dev), net) && | 2538 | net_eq(dev_net(rth->dst.dev), net) && |
2728 | !rt_is_expired(rth)) { | 2539 | !rt_is_expired(rth)) { |
2729 | ipv4_validate_peer(rth); | ||
2730 | dst_use(&rth->dst, jiffies); | 2540 | dst_use(&rth->dst, jiffies); |
2731 | RT_CACHE_STAT_INC(out_hit); | 2541 | RT_CACHE_STAT_INC(out_hit); |
2732 | rcu_read_unlock_bh(); | 2542 | rcu_read_unlock_bh(); |
@@ -2790,7 +2600,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2790 | new->__use = 1; | 2600 | new->__use = 1; |
2791 | new->input = dst_discard; | 2601 | new->input = dst_discard; |
2792 | new->output = dst_discard; | 2602 | new->output = dst_discard; |
2793 | dst_copy_metrics(new, &ort->dst); | ||
2794 | 2603 | ||
2795 | new->dev = ort->dst.dev; | 2604 | new->dev = ort->dst.dev; |
2796 | if (new->dev) | 2605 | if (new->dev) |
@@ -2803,6 +2612,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2803 | rt->rt_iif = ort->rt_iif; | 2612 | rt->rt_iif = ort->rt_iif; |
2804 | rt->rt_oif = ort->rt_oif; | 2613 | rt->rt_oif = ort->rt_oif; |
2805 | rt->rt_mark = ort->rt_mark; | 2614 | rt->rt_mark = ort->rt_mark; |
2615 | rt->rt_pmtu = ort->rt_pmtu; | ||
2806 | 2616 | ||
2807 | rt->rt_genid = rt_genid(net); | 2617 | rt->rt_genid = rt_genid(net); |
2808 | rt->rt_flags = ort->rt_flags; | 2618 | rt->rt_flags = ort->rt_flags; |
@@ -2810,7 +2620,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2810 | rt->rt_dst = ort->rt_dst; | 2620 | rt->rt_dst = ort->rt_dst; |
2811 | rt->rt_src = ort->rt_src; | 2621 | rt->rt_src = ort->rt_src; |
2812 | rt->rt_gateway = ort->rt_gateway; | 2622 | rt->rt_gateway = ort->rt_gateway; |
2813 | rt_transfer_peer(rt, ort); | ||
2814 | rt->fi = ort->fi; | 2623 | rt->fi = ort->fi; |
2815 | if (rt->fi) | 2624 | if (rt->fi) |
2816 | atomic_inc(&rt->fi->fib_clntref); | 2625 | atomic_inc(&rt->fi->fib_clntref); |
@@ -2848,7 +2657,7 @@ static int rt_fill_info(struct net *net, | |||
2848 | struct rtmsg *r; | 2657 | struct rtmsg *r; |
2849 | struct nlmsghdr *nlh; | 2658 | struct nlmsghdr *nlh; |
2850 | unsigned long expires = 0; | 2659 | unsigned long expires = 0; |
2851 | u32 id = 0, ts = 0, tsage = 0, error; | 2660 | u32 error; |
2852 | 2661 | ||
2853 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); | 2662 | nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); |
2854 | if (nlh == NULL) | 2663 | if (nlh == NULL) |
@@ -2901,21 +2710,12 @@ static int rt_fill_info(struct net *net, | |||
2901 | goto nla_put_failure; | 2710 | goto nla_put_failure; |
2902 | 2711 | ||
2903 | error = rt->dst.error; | 2712 | error = rt->dst.error; |
2904 | if (rt_has_peer(rt)) { | 2713 | expires = rt->dst.expires; |
2905 | const struct inet_peer *peer = rt_peer_ptr(rt); | 2714 | if (expires) { |
2906 | inet_peer_refcheck(peer); | 2715 | if (time_before(jiffies, expires)) |
2907 | id = atomic_read(&peer->ip_id_count) & 0xffff; | 2716 | expires -= jiffies; |
2908 | if (peer->tcp_ts_stamp) { | 2717 | else |
2909 | ts = peer->tcp_ts; | 2718 | expires = 0; |
2910 | tsage = get_seconds() - peer->tcp_ts_stamp; | ||
2911 | } | ||
2912 | expires = ACCESS_ONCE(peer->pmtu_expires); | ||
2913 | if (expires) { | ||
2914 | if (time_before(jiffies, expires)) | ||
2915 | expires -= jiffies; | ||
2916 | else | ||
2917 | expires = 0; | ||
2918 | } | ||
2919 | } | 2719 | } |
2920 | 2720 | ||
2921 | if (rt_is_input_route(rt)) { | 2721 | if (rt_is_input_route(rt)) { |
@@ -2944,8 +2744,7 @@ static int rt_fill_info(struct net *net, | |||
2944 | goto nla_put_failure; | 2744 | goto nla_put_failure; |
2945 | } | 2745 | } |
2946 | 2746 | ||
2947 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, | 2747 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) |
2948 | expires, error) < 0) | ||
2949 | goto nla_put_failure; | 2748 | goto nla_put_failure; |
2950 | 2749 | ||
2951 | return nlmsg_end(skb, nlh); | 2750 | return nlmsg_end(skb, nlh); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ba605f60e4e..29aa0c800cd0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -3563,6 +3563,8 @@ void __init tcp_init(void) | |||
3563 | pr_info("Hash tables configured (established %u bind %u)\n", | 3563 | pr_info("Hash tables configured (established %u bind %u)\n", |
3564 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); | 3564 | tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); |
3565 | 3565 | ||
3566 | tcp_metrics_init(); | ||
3567 | |||
3566 | tcp_register_congestion_control(&tcp_reno); | 3568 | tcp_register_congestion_control(&tcp_reno); |
3567 | 3569 | ||
3568 | memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); | 3570 | memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ca0d0e7c9778..055ac49b8b40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -93,7 +93,6 @@ int sysctl_tcp_rfc1337 __read_mostly; | |||
93 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; | 93 | int sysctl_tcp_max_orphans __read_mostly = NR_FILE; |
94 | int sysctl_tcp_frto __read_mostly = 2; | 94 | int sysctl_tcp_frto __read_mostly = 2; |
95 | int sysctl_tcp_frto_response __read_mostly; | 95 | int sysctl_tcp_frto_response __read_mostly; |
96 | int sysctl_tcp_nometrics_save __read_mostly; | ||
97 | 96 | ||
98 | int sysctl_tcp_thin_dupack __read_mostly; | 97 | int sysctl_tcp_thin_dupack __read_mostly; |
99 | 98 | ||
@@ -701,7 +700,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
701 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 700 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
702 | * routine referred to above. | 701 | * routine referred to above. |
703 | */ | 702 | */ |
704 | static inline void tcp_set_rto(struct sock *sk) | 703 | void tcp_set_rto(struct sock *sk) |
705 | { | 704 | { |
706 | const struct tcp_sock *tp = tcp_sk(sk); | 705 | const struct tcp_sock *tp = tcp_sk(sk); |
707 | /* Old crap is replaced with new one. 8) | 706 | /* Old crap is replaced with new one. 8) |
@@ -728,109 +727,6 @@ static inline void tcp_set_rto(struct sock *sk) | |||
728 | tcp_bound_rto(sk); | 727 | tcp_bound_rto(sk); |
729 | } | 728 | } |
730 | 729 | ||
731 | /* Save metrics learned by this TCP session. | ||
732 | This function is called only, when TCP finishes successfully | ||
733 | i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. | ||
734 | */ | ||
735 | void tcp_update_metrics(struct sock *sk) | ||
736 | { | ||
737 | struct tcp_sock *tp = tcp_sk(sk); | ||
738 | struct dst_entry *dst = __sk_dst_get(sk); | ||
739 | |||
740 | if (sysctl_tcp_nometrics_save) | ||
741 | return; | ||
742 | |||
743 | if (dst && (dst->flags & DST_HOST)) { | ||
744 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
745 | int m; | ||
746 | unsigned long rtt; | ||
747 | |||
748 | dst_confirm(dst); | ||
749 | |||
750 | if (icsk->icsk_backoff || !tp->srtt) { | ||
751 | /* This session failed to estimate rtt. Why? | ||
752 | * Probably, no packets returned in time. | ||
753 | * Reset our results. | ||
754 | */ | ||
755 | if (!(dst_metric_locked(dst, RTAX_RTT))) | ||
756 | dst_metric_set(dst, RTAX_RTT, 0); | ||
757 | return; | ||
758 | } | ||
759 | |||
760 | rtt = dst_metric_rtt(dst, RTAX_RTT); | ||
761 | m = rtt - tp->srtt; | ||
762 | |||
763 | /* If newly calculated rtt larger than stored one, | ||
764 | * store new one. Otherwise, use EWMA. Remember, | ||
765 | * rtt overestimation is always better than underestimation. | ||
766 | */ | ||
767 | if (!(dst_metric_locked(dst, RTAX_RTT))) { | ||
768 | if (m <= 0) | ||
769 | set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); | ||
770 | else | ||
771 | set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); | ||
772 | } | ||
773 | |||
774 | if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { | ||
775 | unsigned long var; | ||
776 | if (m < 0) | ||
777 | m = -m; | ||
778 | |||
779 | /* Scale deviation to rttvar fixed point */ | ||
780 | m >>= 1; | ||
781 | if (m < tp->mdev) | ||
782 | m = tp->mdev; | ||
783 | |||
784 | var = dst_metric_rtt(dst, RTAX_RTTVAR); | ||
785 | if (m >= var) | ||
786 | var = m; | ||
787 | else | ||
788 | var -= (var - m) >> 2; | ||
789 | |||
790 | set_dst_metric_rtt(dst, RTAX_RTTVAR, var); | ||
791 | } | ||
792 | |||
793 | if (tcp_in_initial_slowstart(tp)) { | ||
794 | /* Slow start still did not finish. */ | ||
795 | if (dst_metric(dst, RTAX_SSTHRESH) && | ||
796 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | ||
797 | (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) | ||
798 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); | ||
799 | if (!dst_metric_locked(dst, RTAX_CWND) && | ||
800 | tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) | ||
801 | dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); | ||
802 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | ||
803 | icsk->icsk_ca_state == TCP_CA_Open) { | ||
804 | /* Cong. avoidance phase, cwnd is reliable. */ | ||
805 | if (!dst_metric_locked(dst, RTAX_SSTHRESH)) | ||
806 | dst_metric_set(dst, RTAX_SSTHRESH, | ||
807 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); | ||
808 | if (!dst_metric_locked(dst, RTAX_CWND)) | ||
809 | dst_metric_set(dst, RTAX_CWND, | ||
810 | (dst_metric(dst, RTAX_CWND) + | ||
811 | tp->snd_cwnd) >> 1); | ||
812 | } else { | ||
813 | /* Else slow start did not finish, cwnd is non-sense, | ||
814 | ssthresh may be also invalid. | ||
815 | */ | ||
816 | if (!dst_metric_locked(dst, RTAX_CWND)) | ||
817 | dst_metric_set(dst, RTAX_CWND, | ||
818 | (dst_metric(dst, RTAX_CWND) + | ||
819 | tp->snd_ssthresh) >> 1); | ||
820 | if (dst_metric(dst, RTAX_SSTHRESH) && | ||
821 | !dst_metric_locked(dst, RTAX_SSTHRESH) && | ||
822 | tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) | ||
823 | dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); | ||
824 | } | ||
825 | |||
826 | if (!dst_metric_locked(dst, RTAX_REORDERING)) { | ||
827 | if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && | ||
828 | tp->reordering != sysctl_tcp_reordering) | ||
829 | dst_metric_set(dst, RTAX_REORDERING, tp->reordering); | ||
830 | } | ||
831 | } | ||
832 | } | ||
833 | |||
834 | __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) | 730 | __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) |
835 | { | 731 | { |
836 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); | 732 | __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); |
@@ -867,7 +763,7 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) | |||
867 | * Packet counting of FACK is based on in-order assumptions, therefore TCP | 763 | * Packet counting of FACK is based on in-order assumptions, therefore TCP |
868 | * disables it when reordering is detected | 764 | * disables it when reordering is detected |
869 | */ | 765 | */ |
870 | static void tcp_disable_fack(struct tcp_sock *tp) | 766 | void tcp_disable_fack(struct tcp_sock *tp) |
871 | { | 767 | { |
872 | /* RFC3517 uses different metric in lost marker => reset on change */ | 768 | /* RFC3517 uses different metric in lost marker => reset on change */ |
873 | if (tcp_is_fack(tp)) | 769 | if (tcp_is_fack(tp)) |
@@ -881,86 +777,6 @@ static void tcp_dsack_seen(struct tcp_sock *tp) | |||
881 | tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; | 777 | tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; |
882 | } | 778 | } |
883 | 779 | ||
884 | /* Initialize metrics on socket. */ | ||
885 | |||
886 | static void tcp_init_metrics(struct sock *sk) | ||
887 | { | ||
888 | struct tcp_sock *tp = tcp_sk(sk); | ||
889 | struct dst_entry *dst = __sk_dst_get(sk); | ||
890 | |||
891 | if (dst == NULL) | ||
892 | goto reset; | ||
893 | |||
894 | dst_confirm(dst); | ||
895 | |||
896 | if (dst_metric_locked(dst, RTAX_CWND)) | ||
897 | tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); | ||
898 | if (dst_metric(dst, RTAX_SSTHRESH)) { | ||
899 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); | ||
900 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) | ||
901 | tp->snd_ssthresh = tp->snd_cwnd_clamp; | ||
902 | } else { | ||
903 | /* ssthresh may have been reduced unnecessarily during. | ||
904 | * 3WHS. Restore it back to its initial default. | ||
905 | */ | ||
906 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
907 | } | ||
908 | if (dst_metric(dst, RTAX_REORDERING) && | ||
909 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | ||
910 | tcp_disable_fack(tp); | ||
911 | tcp_disable_early_retrans(tp); | ||
912 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | ||
913 | } | ||
914 | |||
915 | if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) | ||
916 | goto reset; | ||
917 | |||
918 | /* Initial rtt is determined from SYN,SYN-ACK. | ||
919 | * The segment is small and rtt may appear much | ||
920 | * less than real one. Use per-dst memory | ||
921 | * to make it more realistic. | ||
922 | * | ||
923 | * A bit of theory. RTT is time passed after "normal" sized packet | ||
924 | * is sent until it is ACKed. In normal circumstances sending small | ||
925 | * packets force peer to delay ACKs and calculation is correct too. | ||
926 | * The algorithm is adaptive and, provided we follow specs, it | ||
927 | * NEVER underestimate RTT. BUT! If peer tries to make some clever | ||
928 | * tricks sort of "quick acks" for time long enough to decrease RTT | ||
929 | * to low value, and then abruptly stops to do it and starts to delay | ||
930 | * ACKs, wait for troubles. | ||
931 | */ | ||
932 | if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { | ||
933 | tp->srtt = dst_metric_rtt(dst, RTAX_RTT); | ||
934 | tp->rtt_seq = tp->snd_nxt; | ||
935 | } | ||
936 | if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { | ||
937 | tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); | ||
938 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | ||
939 | } | ||
940 | tcp_set_rto(sk); | ||
941 | reset: | ||
942 | if (tp->srtt == 0) { | ||
943 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from | ||
944 | * 3WHS. This is most likely due to retransmission, | ||
945 | * including spurious one. Reset the RTO back to 3secs | ||
946 | * from the more aggressive 1sec to avoid more spurious | ||
947 | * retransmission. | ||
948 | */ | ||
949 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; | ||
950 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | ||
951 | } | ||
952 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | ||
953 | * retransmitted. In light of RFC6298 more aggressive 1sec | ||
954 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | ||
955 | * retransmission has occurred. | ||
956 | */ | ||
957 | if (tp->total_retrans > 1) | ||
958 | tp->snd_cwnd = 1; | ||
959 | else | ||
960 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
961 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
962 | } | ||
963 | |||
964 | static void tcp_update_reordering(struct sock *sk, const int metric, | 780 | static void tcp_update_reordering(struct sock *sk, const int metric, |
965 | const int ts) | 781 | const int ts) |
966 | { | 782 | { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 64568fa21d05..ddefd39ac0cf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
209 | } | 209 | } |
210 | 210 | ||
211 | if (tcp_death_row.sysctl_tw_recycle && | 211 | if (tcp_death_row.sysctl_tw_recycle && |
212 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { | 212 | !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) |
213 | struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); | 213 | tcp_fetch_timewait_stamp(sk, &rt->dst); |
214 | /* | ||
215 | * VJ's idea. We save last timestamp seen from | ||
216 | * the destination in peer table, when entering state | ||
217 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, | ||
218 | * when trying new connection. | ||
219 | */ | ||
220 | if (peer) { | ||
221 | inet_peer_refcheck(peer); | ||
222 | if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { | ||
223 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | ||
224 | tp->rx_opt.ts_recent = peer->tcp_ts; | ||
225 | } | ||
226 | } | ||
227 | } | ||
228 | 214 | ||
229 | inet->inet_dport = usin->sin_port; | 215 | inet->inet_dport = usin->sin_port; |
230 | inet->inet_daddr = daddr; | 216 | inet->inet_daddr = daddr; |
@@ -1375,7 +1361,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1375 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1361 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
1376 | req->cookie_ts = tmp_opt.tstamp_ok; | 1362 | req->cookie_ts = tmp_opt.tstamp_ok; |
1377 | } else if (!isn) { | 1363 | } else if (!isn) { |
1378 | struct inet_peer *peer = NULL; | ||
1379 | struct flowi4 fl4; | 1364 | struct flowi4 fl4; |
1380 | 1365 | ||
1381 | /* VJ's idea. We save last timestamp seen | 1366 | /* VJ's idea. We save last timestamp seen |
@@ -1390,12 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1390 | if (tmp_opt.saw_tstamp && | 1375 | if (tmp_opt.saw_tstamp && |
1391 | tcp_death_row.sysctl_tw_recycle && | 1376 | tcp_death_row.sysctl_tw_recycle && |
1392 | (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && | 1377 | (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && |
1393 | fl4.daddr == saddr && | 1378 | fl4.daddr == saddr) { |
1394 | (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { | 1379 | if (!tcp_peer_is_proven(req, dst, true)) { |
1395 | inet_peer_refcheck(peer); | ||
1396 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | ||
1397 | (s32)(peer->tcp_ts - req->ts_recent) > | ||
1398 | TCP_PAWS_WINDOW) { | ||
1399 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | 1380 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); |
1400 | goto drop_and_release; | 1381 | goto drop_and_release; |
1401 | } | 1382 | } |
@@ -1404,8 +1385,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1404 | else if (!sysctl_tcp_syncookies && | 1385 | else if (!sysctl_tcp_syncookies && |
1405 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | 1386 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < |
1406 | (sysctl_max_syn_backlog >> 2)) && | 1387 | (sysctl_max_syn_backlog >> 2)) && |
1407 | (!peer || !peer->tcp_ts_stamp) && | 1388 | !tcp_peer_is_proven(req, dst, false)) { |
1408 | (!dst || !dst_metric(dst, RTAX_RTT))) { | ||
1409 | /* Without syncookies last quarter of | 1389 | /* Without syncookies last quarter of |
1410 | * backlog is filled with destinations, | 1390 | * backlog is filled with destinations, |
1411 | * proven to be alive. | 1391 | * proven to be alive. |
@@ -1867,21 +1847,6 @@ do_time_wait: | |||
1867 | goto discard_it; | 1847 | goto discard_it; |
1868 | } | 1848 | } |
1869 | 1849 | ||
1870 | struct inet_peer *tcp_v4_get_peer(struct sock *sk) | ||
1871 | { | ||
1872 | struct rtable *rt = (struct rtable *) __sk_dst_get(sk); | ||
1873 | struct inet_sock *inet = inet_sk(sk); | ||
1874 | |||
1875 | /* If we don't have a valid cached route, or we're doing IP | ||
1876 | * options which make the IPv4 header destination address | ||
1877 | * different from our peer's, do not bother with this. | ||
1878 | */ | ||
1879 | if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) | ||
1880 | return NULL; | ||
1881 | return rt_get_peer_create(rt, inet->inet_daddr); | ||
1882 | } | ||
1883 | EXPORT_SYMBOL(tcp_v4_get_peer); | ||
1884 | |||
1885 | static struct timewait_sock_ops tcp_timewait_sock_ops = { | 1850 | static struct timewait_sock_ops tcp_timewait_sock_ops = { |
1886 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), | 1851 | .twsk_obj_size = sizeof(struct tcp_timewait_sock), |
1887 | .twsk_unique = tcp_twsk_unique, | 1852 | .twsk_unique = tcp_twsk_unique, |
@@ -1894,7 +1859,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
1894 | .rebuild_header = inet_sk_rebuild_header, | 1859 | .rebuild_header = inet_sk_rebuild_header, |
1895 | .conn_request = tcp_v4_conn_request, | 1860 | .conn_request = tcp_v4_conn_request, |
1896 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1861 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
1897 | .get_peer = tcp_v4_get_peer, | ||
1898 | .net_header_len = sizeof(struct iphdr), | 1862 | .net_header_len = sizeof(struct iphdr), |
1899 | .setsockopt = ip_setsockopt, | 1863 | .setsockopt = ip_setsockopt, |
1900 | .getsockopt = ip_getsockopt, | 1864 | .getsockopt = ip_getsockopt, |
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c new file mode 100644 index 000000000000..1fd83d3118fe --- /dev/null +++ b/net/ipv4/tcp_metrics.c | |||
@@ -0,0 +1,697 @@ | |||
1 | #include <linux/rcupdate.h> | ||
2 | #include <linux/spinlock.h> | ||
3 | #include <linux/jiffies.h> | ||
4 | #include <linux/bootmem.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/cache.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/tcp.h> | ||
10 | |||
11 | #include <net/inet_connection_sock.h> | ||
12 | #include <net/net_namespace.h> | ||
13 | #include <net/request_sock.h> | ||
14 | #include <net/inetpeer.h> | ||
15 | #include <net/sock.h> | ||
16 | #include <net/ipv6.h> | ||
17 | #include <net/dst.h> | ||
18 | #include <net/tcp.h> | ||
19 | |||
20 | int sysctl_tcp_nometrics_save __read_mostly; | ||
21 | |||
22 | enum tcp_metric_index { | ||
23 | TCP_METRIC_RTT, | ||
24 | TCP_METRIC_RTTVAR, | ||
25 | TCP_METRIC_SSTHRESH, | ||
26 | TCP_METRIC_CWND, | ||
27 | TCP_METRIC_REORDERING, | ||
28 | |||
29 | /* Always last. */ | ||
30 | TCP_METRIC_MAX, | ||
31 | }; | ||
32 | |||
33 | struct tcp_metrics_block { | ||
34 | struct tcp_metrics_block __rcu *tcpm_next; | ||
35 | struct inetpeer_addr tcpm_addr; | ||
36 | unsigned long tcpm_stamp; | ||
37 | u32 tcpm_ts; | ||
38 | u32 tcpm_ts_stamp; | ||
39 | u32 tcpm_lock; | ||
40 | u32 tcpm_vals[TCP_METRIC_MAX]; | ||
41 | }; | ||
42 | |||
43 | static bool tcp_metric_locked(struct tcp_metrics_block *tm, | ||
44 | enum tcp_metric_index idx) | ||
45 | { | ||
46 | return tm->tcpm_lock & (1 << idx); | ||
47 | } | ||
48 | |||
49 | static u32 tcp_metric_get(struct tcp_metrics_block *tm, | ||
50 | enum tcp_metric_index idx) | ||
51 | { | ||
52 | return tm->tcpm_vals[idx]; | ||
53 | } | ||
54 | |||
55 | static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, | ||
56 | enum tcp_metric_index idx) | ||
57 | { | ||
58 | return msecs_to_jiffies(tm->tcpm_vals[idx]); | ||
59 | } | ||
60 | |||
61 | static void tcp_metric_set(struct tcp_metrics_block *tm, | ||
62 | enum tcp_metric_index idx, | ||
63 | u32 val) | ||
64 | { | ||
65 | tm->tcpm_vals[idx] = val; | ||
66 | } | ||
67 | |||
68 | static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, | ||
69 | enum tcp_metric_index idx, | ||
70 | u32 val) | ||
71 | { | ||
72 | tm->tcpm_vals[idx] = jiffies_to_msecs(val); | ||
73 | } | ||
74 | |||
75 | static bool addr_same(const struct inetpeer_addr *a, | ||
76 | const struct inetpeer_addr *b) | ||
77 | { | ||
78 | const struct in6_addr *a6, *b6; | ||
79 | |||
80 | if (a->family != b->family) | ||
81 | return false; | ||
82 | if (a->family == AF_INET) | ||
83 | return a->addr.a4 == b->addr.a4; | ||
84 | |||
85 | a6 = (const struct in6_addr *) &a->addr.a6[0]; | ||
86 | b6 = (const struct in6_addr *) &b->addr.a6[0]; | ||
87 | |||
88 | return ipv6_addr_equal(a6, b6); | ||
89 | } | ||
90 | |||
91 | struct tcpm_hash_bucket { | ||
92 | struct tcp_metrics_block __rcu *chain; | ||
93 | }; | ||
94 | |||
95 | static DEFINE_SPINLOCK(tcp_metrics_lock); | ||
96 | |||
97 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) | ||
98 | { | ||
99 | u32 val; | ||
100 | |||
101 | val = 0; | ||
102 | if (dst_metric_locked(dst, RTAX_RTT)) | ||
103 | val |= 1 << TCP_METRIC_RTT; | ||
104 | if (dst_metric_locked(dst, RTAX_RTTVAR)) | ||
105 | val |= 1 << TCP_METRIC_RTTVAR; | ||
106 | if (dst_metric_locked(dst, RTAX_SSTHRESH)) | ||
107 | val |= 1 << TCP_METRIC_SSTHRESH; | ||
108 | if (dst_metric_locked(dst, RTAX_CWND)) | ||
109 | val |= 1 << TCP_METRIC_CWND; | ||
110 | if (dst_metric_locked(dst, RTAX_REORDERING)) | ||
111 | val |= 1 << TCP_METRIC_REORDERING; | ||
112 | tm->tcpm_lock = val; | ||
113 | |||
114 | tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); | ||
115 | tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); | ||
116 | tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); | ||
117 | tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); | ||
118 | tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); | ||
119 | tm->tcpm_ts = 0; | ||
120 | tm->tcpm_ts_stamp = 0; | ||
121 | } | ||
122 | |||
123 | static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, | ||
124 | struct inetpeer_addr *addr, | ||
125 | unsigned int hash, | ||
126 | bool reclaim) | ||
127 | { | ||
128 | struct tcp_metrics_block *tm; | ||
129 | struct net *net; | ||
130 | |||
131 | spin_lock_bh(&tcp_metrics_lock); | ||
132 | net = dev_net(dst->dev); | ||
133 | if (unlikely(reclaim)) { | ||
134 | struct tcp_metrics_block *oldest; | ||
135 | |||
136 | oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); | ||
137 | for (tm = rcu_dereference(oldest->tcpm_next); tm; | ||
138 | tm = rcu_dereference(tm->tcpm_next)) { | ||
139 | if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) | ||
140 | oldest = tm; | ||
141 | } | ||
142 | tm = oldest; | ||
143 | } else { | ||
144 | tm = kmalloc(sizeof(*tm), GFP_ATOMIC); | ||
145 | if (!tm) | ||
146 | goto out_unlock; | ||
147 | } | ||
148 | tm->tcpm_addr = *addr; | ||
149 | tm->tcpm_stamp = jiffies; | ||
150 | |||
151 | tcpm_suck_dst(tm, dst); | ||
152 | |||
153 | if (likely(!reclaim)) { | ||
154 | tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; | ||
155 | rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); | ||
156 | } | ||
157 | |||
158 | out_unlock: | ||
159 | spin_unlock_bh(&tcp_metrics_lock); | ||
160 | return tm; | ||
161 | } | ||
162 | |||
163 | #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) | ||
164 | |||
165 | static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) | ||
166 | { | ||
167 | if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) | ||
168 | tcpm_suck_dst(tm, dst); | ||
169 | } | ||
170 | |||
171 | #define TCP_METRICS_RECLAIM_DEPTH 5 | ||
172 | #define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL | ||
173 | |||
174 | static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) | ||
175 | { | ||
176 | if (tm) | ||
177 | return tm; | ||
178 | if (depth > TCP_METRICS_RECLAIM_DEPTH) | ||
179 | return TCP_METRICS_RECLAIM_PTR; | ||
180 | return NULL; | ||
181 | } | ||
182 | |||
183 | static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, | ||
184 | struct net *net, unsigned int hash) | ||
185 | { | ||
186 | struct tcp_metrics_block *tm; | ||
187 | int depth = 0; | ||
188 | |||
189 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | ||
190 | tm = rcu_dereference(tm->tcpm_next)) { | ||
191 | if (addr_same(&tm->tcpm_addr, addr)) | ||
192 | break; | ||
193 | depth++; | ||
194 | } | ||
195 | return tcp_get_encode(tm, depth); | ||
196 | } | ||
197 | |||
198 | static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, | ||
199 | struct dst_entry *dst) | ||
200 | { | ||
201 | struct tcp_metrics_block *tm; | ||
202 | struct inetpeer_addr addr; | ||
203 | unsigned int hash; | ||
204 | struct net *net; | ||
205 | |||
206 | addr.family = req->rsk_ops->family; | ||
207 | switch (addr.family) { | ||
208 | case AF_INET: | ||
209 | addr.addr.a4 = inet_rsk(req)->rmt_addr; | ||
210 | hash = (__force unsigned int) addr.addr.a4; | ||
211 | break; | ||
212 | case AF_INET6: | ||
213 | *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; | ||
214 | hash = ((__force unsigned int) addr.addr.a6[0] ^ | ||
215 | (__force unsigned int) addr.addr.a6[1] ^ | ||
216 | (__force unsigned int) addr.addr.a6[2] ^ | ||
217 | (__force unsigned int) addr.addr.a6[3]); | ||
218 | break; | ||
219 | default: | ||
220 | return NULL; | ||
221 | } | ||
222 | |||
223 | hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); | ||
224 | |||
225 | net = dev_net(dst->dev); | ||
226 | hash &= net->ipv4.tcp_metrics_hash_mask; | ||
227 | |||
228 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | ||
229 | tm = rcu_dereference(tm->tcpm_next)) { | ||
230 | if (addr_same(&tm->tcpm_addr, &addr)) | ||
231 | break; | ||
232 | } | ||
233 | tcpm_check_stamp(tm, dst); | ||
234 | return tm; | ||
235 | } | ||
236 | |||
237 | static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) | ||
238 | { | ||
239 | struct inet6_timewait_sock *tw6; | ||
240 | struct tcp_metrics_block *tm; | ||
241 | struct inetpeer_addr addr; | ||
242 | unsigned int hash; | ||
243 | struct net *net; | ||
244 | |||
245 | addr.family = tw->tw_family; | ||
246 | switch (addr.family) { | ||
247 | case AF_INET: | ||
248 | addr.addr.a4 = tw->tw_daddr; | ||
249 | hash = (__force unsigned int) addr.addr.a4; | ||
250 | break; | ||
251 | case AF_INET6: | ||
252 | tw6 = inet6_twsk((struct sock *)tw); | ||
253 | *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; | ||
254 | hash = ((__force unsigned int) addr.addr.a6[0] ^ | ||
255 | (__force unsigned int) addr.addr.a6[1] ^ | ||
256 | (__force unsigned int) addr.addr.a6[2] ^ | ||
257 | (__force unsigned int) addr.addr.a6[3]); | ||
258 | break; | ||
259 | default: | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); | ||
264 | |||
265 | net = twsk_net(tw); | ||
266 | hash &= net->ipv4.tcp_metrics_hash_mask; | ||
267 | |||
268 | for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; | ||
269 | tm = rcu_dereference(tm->tcpm_next)) { | ||
270 | if (addr_same(&tm->tcpm_addr, &addr)) | ||
271 | break; | ||
272 | } | ||
273 | return tm; | ||
274 | } | ||
275 | |||
276 | static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, | ||
277 | struct dst_entry *dst, | ||
278 | bool create) | ||
279 | { | ||
280 | struct tcp_metrics_block *tm; | ||
281 | struct inetpeer_addr addr; | ||
282 | unsigned int hash; | ||
283 | struct net *net; | ||
284 | bool reclaim; | ||
285 | |||
286 | addr.family = sk->sk_family; | ||
287 | switch (addr.family) { | ||
288 | case AF_INET: | ||
289 | addr.addr.a4 = inet_sk(sk)->inet_daddr; | ||
290 | hash = (__force unsigned int) addr.addr.a4; | ||
291 | break; | ||
292 | case AF_INET6: | ||
293 | *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; | ||
294 | hash = ((__force unsigned int) addr.addr.a6[0] ^ | ||
295 | (__force unsigned int) addr.addr.a6[1] ^ | ||
296 | (__force unsigned int) addr.addr.a6[2] ^ | ||
297 | (__force unsigned int) addr.addr.a6[3]); | ||
298 | break; | ||
299 | default: | ||
300 | return NULL; | ||
301 | } | ||
302 | |||
303 | hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); | ||
304 | |||
305 | net = dev_net(dst->dev); | ||
306 | hash &= net->ipv4.tcp_metrics_hash_mask; | ||
307 | |||
308 | tm = __tcp_get_metrics(&addr, net, hash); | ||
309 | reclaim = false; | ||
310 | if (tm == TCP_METRICS_RECLAIM_PTR) { | ||
311 | reclaim = true; | ||
312 | tm = NULL; | ||
313 | } | ||
314 | if (!tm && create) | ||
315 | tm = tcpm_new(dst, &addr, hash, reclaim); | ||
316 | else | ||
317 | tcpm_check_stamp(tm, dst); | ||
318 | |||
319 | return tm; | ||
320 | } | ||
321 | |||
322 | /* Save metrics learned by this TCP session. This function is called | ||
323 | * only, when TCP finishes successfully i.e. when it enters TIME-WAIT | ||
324 | * or goes from LAST-ACK to CLOSE. | ||
325 | */ | ||
326 | void tcp_update_metrics(struct sock *sk) | ||
327 | { | ||
328 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
329 | struct dst_entry *dst = __sk_dst_get(sk); | ||
330 | struct tcp_sock *tp = tcp_sk(sk); | ||
331 | struct tcp_metrics_block *tm; | ||
332 | unsigned long rtt; | ||
333 | u32 val; | ||
334 | int m; | ||
335 | |||
336 | if (sysctl_tcp_nometrics_save || !dst) | ||
337 | return; | ||
338 | |||
339 | if (dst->flags & DST_HOST) | ||
340 | dst_confirm(dst); | ||
341 | |||
342 | rcu_read_lock(); | ||
343 | if (icsk->icsk_backoff || !tp->srtt) { | ||
344 | /* This session failed to estimate rtt. Why? | ||
345 | * Probably, no packets returned in time. Reset our | ||
346 | * results. | ||
347 | */ | ||
348 | tm = tcp_get_metrics(sk, dst, false); | ||
349 | if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT)) | ||
350 | tcp_metric_set(tm, TCP_METRIC_RTT, 0); | ||
351 | goto out_unlock; | ||
352 | } else | ||
353 | tm = tcp_get_metrics(sk, dst, true); | ||
354 | |||
355 | if (!tm) | ||
356 | goto out_unlock; | ||
357 | |||
358 | rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); | ||
359 | m = rtt - tp->srtt; | ||
360 | |||
361 | /* If newly calculated rtt larger than stored one, store new | ||
362 | * one. Otherwise, use EWMA. Remember, rtt overestimation is | ||
363 | * always better than underestimation. | ||
364 | */ | ||
365 | if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { | ||
366 | if (m <= 0) | ||
367 | rtt = tp->srtt; | ||
368 | else | ||
369 | rtt -= (m >> 3); | ||
370 | tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); | ||
371 | } | ||
372 | |||
373 | if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { | ||
374 | unsigned long var; | ||
375 | |||
376 | if (m < 0) | ||
377 | m = -m; | ||
378 | |||
379 | /* Scale deviation to rttvar fixed point */ | ||
380 | m >>= 1; | ||
381 | if (m < tp->mdev) | ||
382 | m = tp->mdev; | ||
383 | |||
384 | var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); | ||
385 | if (m >= var) | ||
386 | var = m; | ||
387 | else | ||
388 | var -= (var - m) >> 2; | ||
389 | |||
390 | tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); | ||
391 | } | ||
392 | |||
393 | if (tcp_in_initial_slowstart(tp)) { | ||
394 | /* Slow start still did not finish. */ | ||
395 | if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { | ||
396 | val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); | ||
397 | if (val && (tp->snd_cwnd >> 1) > val) | ||
398 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, | ||
399 | tp->snd_cwnd >> 1); | ||
400 | } | ||
401 | if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { | ||
402 | val = tcp_metric_get(tm, TCP_METRIC_CWND); | ||
403 | if (tp->snd_cwnd > val) | ||
404 | tcp_metric_set(tm, TCP_METRIC_CWND, | ||
405 | tp->snd_cwnd); | ||
406 | } | ||
407 | } else if (tp->snd_cwnd > tp->snd_ssthresh && | ||
408 | icsk->icsk_ca_state == TCP_CA_Open) { | ||
409 | /* Cong. avoidance phase, cwnd is reliable. */ | ||
410 | if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) | ||
411 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, | ||
412 | max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); | ||
413 | if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { | ||
414 | val = tcp_metric_get(tm, TCP_METRIC_CWND); | ||
415 | tcp_metric_set(tm, RTAX_CWND, (val + tp->snd_cwnd) >> 1); | ||
416 | } | ||
417 | } else { | ||
418 | /* Else slow start did not finish, cwnd is non-sense, | ||
419 | * ssthresh may be also invalid. | ||
420 | */ | ||
421 | if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { | ||
422 | val = tcp_metric_get(tm, TCP_METRIC_CWND); | ||
423 | tcp_metric_set(tm, TCP_METRIC_CWND, | ||
424 | (val + tp->snd_ssthresh) >> 1); | ||
425 | } | ||
426 | if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { | ||
427 | val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); | ||
428 | if (val && tp->snd_ssthresh > val) | ||
429 | tcp_metric_set(tm, TCP_METRIC_SSTHRESH, | ||
430 | tp->snd_ssthresh); | ||
431 | } | ||
432 | if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { | ||
433 | val = tcp_metric_get(tm, TCP_METRIC_REORDERING); | ||
434 | if (val < tp->reordering && | ||
435 | tp->reordering != sysctl_tcp_reordering) | ||
436 | tcp_metric_set(tm, TCP_METRIC_REORDERING, | ||
437 | tp->reordering); | ||
438 | } | ||
439 | } | ||
440 | tm->tcpm_stamp = jiffies; | ||
441 | out_unlock: | ||
442 | rcu_read_unlock(); | ||
443 | } | ||
444 | |||
445 | /* Initialize metrics on socket. */ | ||
446 | |||
447 | void tcp_init_metrics(struct sock *sk) | ||
448 | { | ||
449 | struct dst_entry *dst = __sk_dst_get(sk); | ||
450 | struct tcp_sock *tp = tcp_sk(sk); | ||
451 | struct tcp_metrics_block *tm; | ||
452 | u32 val; | ||
453 | |||
454 | if (dst == NULL) | ||
455 | goto reset; | ||
456 | |||
457 | dst_confirm(dst); | ||
458 | |||
459 | rcu_read_lock(); | ||
460 | tm = tcp_get_metrics(sk, dst, true); | ||
461 | if (!tm) { | ||
462 | rcu_read_unlock(); | ||
463 | goto reset; | ||
464 | } | ||
465 | |||
466 | if (tcp_metric_locked(tm, TCP_METRIC_CWND)) | ||
467 | tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); | ||
468 | |||
469 | val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); | ||
470 | if (val) { | ||
471 | tp->snd_ssthresh = val; | ||
472 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) | ||
473 | tp->snd_ssthresh = tp->snd_cwnd_clamp; | ||
474 | } else { | ||
475 | /* ssthresh may have been reduced unnecessarily during. | ||
476 | * 3WHS. Restore it back to its initial default. | ||
477 | */ | ||
478 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
479 | } | ||
480 | val = tcp_metric_get(tm, TCP_METRIC_REORDERING); | ||
481 | if (val && tp->reordering != val) { | ||
482 | tcp_disable_fack(tp); | ||
483 | tcp_disable_early_retrans(tp); | ||
484 | tp->reordering = val; | ||
485 | } | ||
486 | |||
487 | val = tcp_metric_get(tm, TCP_METRIC_RTT); | ||
488 | if (val == 0 || tp->srtt == 0) { | ||
489 | rcu_read_unlock(); | ||
490 | goto reset; | ||
491 | } | ||
492 | /* Initial rtt is determined from SYN,SYN-ACK. | ||
493 | * The segment is small and rtt may appear much | ||
494 | * less than real one. Use per-dst memory | ||
495 | * to make it more realistic. | ||
496 | * | ||
497 | * A bit of theory. RTT is time passed after "normal" sized packet | ||
498 | * is sent until it is ACKed. In normal circumstances sending small | ||
499 | * packets force peer to delay ACKs and calculation is correct too. | ||
500 | * The algorithm is adaptive and, provided we follow specs, it | ||
501 | * NEVER underestimate RTT. BUT! If peer tries to make some clever | ||
502 | * tricks sort of "quick acks" for time long enough to decrease RTT | ||
503 | * to low value, and then abruptly stops to do it and starts to delay | ||
504 | * ACKs, wait for troubles. | ||
505 | */ | ||
506 | val = msecs_to_jiffies(val); | ||
507 | if (val > tp->srtt) { | ||
508 | tp->srtt = val; | ||
509 | tp->rtt_seq = tp->snd_nxt; | ||
510 | } | ||
511 | val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); | ||
512 | if (val > tp->mdev) { | ||
513 | tp->mdev = val; | ||
514 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | ||
515 | } | ||
516 | rcu_read_unlock(); | ||
517 | |||
518 | tcp_set_rto(sk); | ||
519 | reset: | ||
520 | if (tp->srtt == 0) { | ||
521 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from | ||
522 | * 3WHS. This is most likely due to retransmission, | ||
523 | * including spurious one. Reset the RTO back to 3secs | ||
524 | * from the more aggressive 1sec to avoid more spurious | ||
525 | * retransmission. | ||
526 | */ | ||
527 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; | ||
528 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | ||
529 | } | ||
530 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | ||
531 | * retransmitted. In light of RFC6298 more aggressive 1sec | ||
532 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | ||
533 | * retransmission has occurred. | ||
534 | */ | ||
535 | if (tp->total_retrans > 1) | ||
536 | tp->snd_cwnd = 1; | ||
537 | else | ||
538 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
539 | tp->snd_cwnd_stamp = tcp_time_stamp; | ||
540 | } | ||
541 | |||
542 | bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check) | ||
543 | { | ||
544 | struct tcp_metrics_block *tm; | ||
545 | bool ret; | ||
546 | |||
547 | if (!dst) | ||
548 | return false; | ||
549 | |||
550 | rcu_read_lock(); | ||
551 | tm = __tcp_get_metrics_req(req, dst); | ||
552 | if (paws_check) { | ||
553 | if (tm && | ||
554 | (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && | ||
555 | (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW) | ||
556 | ret = false; | ||
557 | else | ||
558 | ret = true; | ||
559 | } else { | ||
560 | if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) | ||
561 | ret = true; | ||
562 | else | ||
563 | ret = false; | ||
564 | } | ||
565 | rcu_read_unlock(); | ||
566 | |||
567 | return ret; | ||
568 | } | ||
569 | EXPORT_SYMBOL_GPL(tcp_peer_is_proven); | ||
570 | |||
571 | void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) | ||
572 | { | ||
573 | struct tcp_metrics_block *tm; | ||
574 | |||
575 | rcu_read_lock(); | ||
576 | tm = tcp_get_metrics(sk, dst, true); | ||
577 | if (tm) { | ||
578 | struct tcp_sock *tp = tcp_sk(sk); | ||
579 | |||
580 | if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { | ||
581 | tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; | ||
582 | tp->rx_opt.ts_recent = tm->tcpm_ts; | ||
583 | } | ||
584 | } | ||
585 | rcu_read_unlock(); | ||
586 | } | ||
587 | EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); | ||
588 | |||
589 | /* VJ's idea. Save last timestamp seen from this destination and hold | ||
590 | * it at least for normal timewait interval to use for duplicate | ||
591 | * segment detection in subsequent connections, before they enter | ||
592 | * synchronized state. | ||
593 | */ | ||
594 | bool tcp_remember_stamp(struct sock *sk) | ||
595 | { | ||
596 | struct dst_entry *dst = __sk_dst_get(sk); | ||
597 | bool ret = false; | ||
598 | |||
599 | if (dst) { | ||
600 | struct tcp_metrics_block *tm; | ||
601 | |||
602 | rcu_read_lock(); | ||
603 | tm = tcp_get_metrics(sk, dst, true); | ||
604 | if (tm) { | ||
605 | struct tcp_sock *tp = tcp_sk(sk); | ||
606 | |||
607 | if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || | ||
608 | ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && | ||
609 | tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { | ||
610 | tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | ||
611 | tm->tcpm_ts = tp->rx_opt.ts_recent; | ||
612 | } | ||
613 | ret = true; | ||
614 | } | ||
615 | rcu_read_unlock(); | ||
616 | } | ||
617 | return ret; | ||
618 | } | ||
619 | |||
620 | bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | ||
621 | { | ||
622 | struct tcp_metrics_block *tm; | ||
623 | bool ret = false; | ||
624 | |||
625 | rcu_read_lock(); | ||
626 | tm = __tcp_get_metrics_tw(tw); | ||
627 | if (tw) { | ||
628 | const struct tcp_timewait_sock *tcptw; | ||
629 | struct sock *sk = (struct sock *) tw; | ||
630 | |||
631 | tcptw = tcp_twsk(sk); | ||
632 | if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || | ||
633 | ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && | ||
634 | tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | ||
635 | tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | ||
636 | tm->tcpm_ts = tcptw->tw_ts_recent; | ||
637 | } | ||
638 | ret = true; | ||
639 | } | ||
640 | rcu_read_unlock(); | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static unsigned long tcpmhash_entries; | ||
646 | static int __init set_tcpmhash_entries(char *str) | ||
647 | { | ||
648 | ssize_t ret; | ||
649 | |||
650 | if (!str) | ||
651 | return 0; | ||
652 | |||
653 | ret = kstrtoul(str, 0, &tcpmhash_entries); | ||
654 | if (ret) | ||
655 | return 0; | ||
656 | |||
657 | return 1; | ||
658 | } | ||
659 | __setup("tcpmhash_entries=", set_tcpmhash_entries); | ||
660 | |||
661 | static int __net_init tcp_net_metrics_init(struct net *net) | ||
662 | { | ||
663 | int slots, size; | ||
664 | |||
665 | slots = tcpmhash_entries; | ||
666 | if (!slots) { | ||
667 | if (totalram_pages >= 128 * 1024) | ||
668 | slots = 16 * 1024; | ||
669 | else | ||
670 | slots = 8 * 1024; | ||
671 | } | ||
672 | |||
673 | size = slots * sizeof(struct tcpm_hash_bucket); | ||
674 | |||
675 | net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); | ||
676 | if (!net->ipv4.tcp_metrics_hash) | ||
677 | return -ENOMEM; | ||
678 | |||
679 | net->ipv4.tcp_metrics_hash_mask = (slots - 1); | ||
680 | |||
681 | return 0; | ||
682 | } | ||
683 | |||
684 | static void __net_exit tcp_net_metrics_exit(struct net *net) | ||
685 | { | ||
686 | kfree(net->ipv4.tcp_metrics_hash); | ||
687 | } | ||
688 | |||
689 | static __net_initdata struct pernet_operations tcp_net_metrics_ops = { | ||
690 | .init = tcp_net_metrics_init, | ||
691 | .exit = tcp_net_metrics_exit, | ||
692 | }; | ||
693 | |||
694 | void __init tcp_metrics_init(void) | ||
695 | { | ||
696 | register_pernet_subsys(&tcp_net_metrics_ops); | ||
697 | } | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 72b7c63b1a39..65608863fdee 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -49,52 +49,6 @@ struct inet_timewait_death_row tcp_death_row = { | |||
49 | }; | 49 | }; |
50 | EXPORT_SYMBOL_GPL(tcp_death_row); | 50 | EXPORT_SYMBOL_GPL(tcp_death_row); |
51 | 51 | ||
52 | /* VJ's idea. Save last timestamp seen from this destination | ||
53 | * and hold it at least for normal timewait interval to use for duplicate | ||
54 | * segment detection in subsequent connections, before they enter synchronized | ||
55 | * state. | ||
56 | */ | ||
57 | |||
58 | static bool tcp_remember_stamp(struct sock *sk) | ||
59 | { | ||
60 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
61 | struct tcp_sock *tp = tcp_sk(sk); | ||
62 | struct inet_peer *peer; | ||
63 | |||
64 | peer = icsk->icsk_af_ops->get_peer(sk); | ||
65 | if (peer) { | ||
66 | if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || | ||
67 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
68 | peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { | ||
69 | peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; | ||
70 | peer->tcp_ts = tp->rx_opt.ts_recent; | ||
71 | } | ||
72 | return true; | ||
73 | } | ||
74 | |||
75 | return false; | ||
76 | } | ||
77 | |||
78 | static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) | ||
79 | { | ||
80 | const struct tcp_timewait_sock *tcptw; | ||
81 | struct sock *sk = (struct sock *) tw; | ||
82 | struct inet_peer *peer; | ||
83 | |||
84 | tcptw = tcp_twsk(sk); | ||
85 | peer = tcptw->tw_peer; | ||
86 | if (peer) { | ||
87 | if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || | ||
88 | ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && | ||
89 | peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { | ||
90 | peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; | ||
91 | peer->tcp_ts = tcptw->tw_ts_recent; | ||
92 | } | ||
93 | return true; | ||
94 | } | ||
95 | return false; | ||
96 | } | ||
97 | |||
98 | static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 52 | static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
99 | { | 53 | { |
100 | if (seq == s_win) | 54 | if (seq == s_win) |
@@ -313,12 +267,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
313 | const struct inet_connection_sock *icsk = inet_csk(sk); | 267 | const struct inet_connection_sock *icsk = inet_csk(sk); |
314 | const struct tcp_sock *tp = tcp_sk(sk); | 268 | const struct tcp_sock *tp = tcp_sk(sk); |
315 | bool recycle_ok = false; | 269 | bool recycle_ok = false; |
316 | bool recycle_on = false; | ||
317 | 270 | ||
318 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { | 271 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
319 | recycle_ok = tcp_remember_stamp(sk); | 272 | recycle_ok = tcp_remember_stamp(sk); |
320 | recycle_on = true; | ||
321 | } | ||
322 | 273 | ||
323 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) | 274 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
324 | tw = inet_twsk_alloc(sk, state); | 275 | tw = inet_twsk_alloc(sk, state); |
@@ -327,7 +278,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
327 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); | 278 | struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); |
328 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); | 279 | const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); |
329 | struct inet_sock *inet = inet_sk(sk); | 280 | struct inet_sock *inet = inet_sk(sk); |
330 | struct inet_peer *peer = NULL; | ||
331 | 281 | ||
332 | tw->tw_transparent = inet->transparent; | 282 | tw->tw_transparent = inet->transparent; |
333 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; | 283 | tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; |
@@ -351,12 +301,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
351 | } | 301 | } |
352 | #endif | 302 | #endif |
353 | 303 | ||
354 | if (recycle_on) | ||
355 | peer = icsk->icsk_af_ops->get_peer(sk); | ||
356 | tcptw->tw_peer = peer; | ||
357 | if (peer) | ||
358 | atomic_inc(&peer->refcnt); | ||
359 | |||
360 | #ifdef CONFIG_TCP_MD5SIG | 304 | #ifdef CONFIG_TCP_MD5SIG |
361 | /* | 305 | /* |
362 | * The timewait bucket does not have the key DB from the | 306 | * The timewait bucket does not have the key DB from the |
@@ -408,11 +352,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
408 | 352 | ||
409 | void tcp_twsk_destructor(struct sock *sk) | 353 | void tcp_twsk_destructor(struct sock *sk) |
410 | { | 354 | { |
355 | #ifdef CONFIG_TCP_MD5SIG | ||
411 | struct tcp_timewait_sock *twsk = tcp_twsk(sk); | 356 | struct tcp_timewait_sock *twsk = tcp_twsk(sk); |
412 | 357 | ||
413 | if (twsk->tw_peer) | ||
414 | inet_putpeer(twsk->tw_peer); | ||
415 | #ifdef CONFIG_TCP_MD5SIG | ||
416 | if (twsk->tw_md5_key) { | 358 | if (twsk->tw_md5_key) { |
417 | tcp_free_md5sig_pool(); | 359 | tcp_free_md5sig_pool(); |
418 | kfree_rcu(twsk->tw_md5_key, rcu); | 360 | kfree_rcu(twsk->tw_md5_key, rcu); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9815ea0bca7f..87d3fcc302d4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -90,8 +90,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
90 | xdst->u.dst.dev = dev; | 90 | xdst->u.dst.dev = dev; |
91 | dev_hold(dev); | 91 | dev_hold(dev); |
92 | 92 | ||
93 | rt_transfer_peer(&xdst->u.rt, rt); | ||
94 | |||
95 | /* Sheit... I remember I did this right. Apparently, | 93 | /* Sheit... I remember I did this right. Apparently, |
96 | * it was magically lost, so this code needs audit */ | 94 | * it was magically lost, so this code needs audit */ |
97 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | | 95 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | |
@@ -100,6 +98,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
100 | xdst->u.rt.rt_src = rt->rt_src; | 98 | xdst->u.rt.rt_src = rt->rt_src; |
101 | xdst->u.rt.rt_dst = rt->rt_dst; | 99 | xdst->u.rt.rt_dst = rt->rt_dst; |
102 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 100 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
101 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | ||
103 | 102 | ||
104 | return 0; | 103 | return 0; |
105 | } | 104 | } |
@@ -209,11 +208,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) | |||
209 | 208 | ||
210 | dst_destroy_metrics_generic(dst); | 209 | dst_destroy_metrics_generic(dst); |
211 | 210 | ||
212 | if (rt_has_peer(&xdst->u.rt)) { | ||
213 | struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); | ||
214 | inet_putpeer(peer); | ||
215 | } | ||
216 | |||
217 | xfrm_dst_destroy(xdst); | 211 | xfrm_dst_destroy(xdst); |
218 | } | 212 | } |
219 | 213 | ||
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c7da1422cbde..a113f7d7e938 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -194,8 +194,10 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, | |||
194 | if (rt->rt6i_dst.plen < 128) | 194 | if (rt->rt6i_dst.plen < 128) |
195 | tmo >>= ((128 - rt->rt6i_dst.plen)>>5); | 195 | tmo >>= ((128 - rt->rt6i_dst.plen)>>5); |
196 | 196 | ||
197 | peer = rt6_get_peer_create(rt); | 197 | peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); |
198 | res = inet_peer_xrlim_allow(peer, tmo); | 198 | res = inet_peer_xrlim_allow(peer, tmo); |
199 | if (peer) | ||
200 | inet_putpeer(peer); | ||
199 | } | 201 | } |
200 | dst_release(dst); | 202 | dst_release(dst); |
201 | return res; | 203 | return res; |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c6af5963a202..5b2d63ed793e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -466,13 +466,15 @@ int ip6_forward(struct sk_buff *skb) | |||
466 | else | 466 | else |
467 | target = &hdr->daddr; | 467 | target = &hdr->daddr; |
468 | 468 | ||
469 | peer = rt6_get_peer_create(rt); | 469 | peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); |
470 | 470 | ||
471 | /* Limit redirects both by destination (here) | 471 | /* Limit redirects both by destination (here) |
472 | and by source (inside ndisc_send_redirect) | 472 | and by source (inside ndisc_send_redirect) |
473 | */ | 473 | */ |
474 | if (inet_peer_xrlim_allow(peer, 1*HZ)) | 474 | if (inet_peer_xrlim_allow(peer, 1*HZ)) |
475 | ndisc_send_redirect(skb, target); | 475 | ndisc_send_redirect(skb, target); |
476 | if (peer) | ||
477 | inet_putpeer(peer); | ||
476 | } else { | 478 | } else { |
477 | int addrtype = ipv6_addr_type(&hdr->saddr); | 479 | int addrtype = ipv6_addr_type(&hdr->saddr); |
478 | 480 | ||
@@ -592,10 +594,14 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) | |||
592 | int old, new; | 594 | int old, new; |
593 | 595 | ||
594 | if (rt && !(rt->dst.flags & DST_NOPEER)) { | 596 | if (rt && !(rt->dst.flags & DST_NOPEER)) { |
595 | struct inet_peer *peer = rt6_get_peer_create(rt); | 597 | struct inet_peer *peer; |
598 | struct net *net; | ||
596 | 599 | ||
600 | net = dev_net(rt->dst.dev); | ||
601 | peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); | ||
597 | if (peer) { | 602 | if (peer) { |
598 | fhdr->identification = htonl(inet_getid(peer, 0)); | 603 | fhdr->identification = htonl(inet_getid(peer, 0)); |
604 | inet_putpeer(peer); | ||
599 | return; | 605 | return; |
600 | } | 606 | } |
601 | } | 607 | } |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 69a6330dea91..0fddd571400d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -1486,6 +1486,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) | |||
1486 | int rd_len; | 1486 | int rd_len; |
1487 | int err; | 1487 | int err; |
1488 | u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; | 1488 | u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; |
1489 | bool ret; | ||
1489 | 1490 | ||
1490 | if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { | 1491 | if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { |
1491 | ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", | 1492 | ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", |
@@ -1519,8 +1520,11 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) | |||
1519 | "Redirect: destination is not a neighbour\n"); | 1520 | "Redirect: destination is not a neighbour\n"); |
1520 | goto release; | 1521 | goto release; |
1521 | } | 1522 | } |
1522 | peer = rt6_get_peer_create(rt); | 1523 | peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); |
1523 | if (!inet_peer_xrlim_allow(peer, 1*HZ)) | 1524 | ret = inet_peer_xrlim_allow(peer, 1*HZ); |
1525 | if (peer) | ||
1526 | inet_putpeer(peer); | ||
1527 | if (!ret) | ||
1524 | goto release; | 1528 | goto release; |
1525 | 1529 | ||
1526 | if (dev->addr_len) { | 1530 | if (dev->addr_len) { |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6cc6c881f54f..563f12c1c99c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -1093,7 +1093,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, | |||
1093 | memset(&fl6, 0, sizeof(fl6)); | 1093 | memset(&fl6, 0, sizeof(fl6)); |
1094 | fl6.flowi6_oif = oif; | 1094 | fl6.flowi6_oif = oif; |
1095 | fl6.flowi6_mark = mark; | 1095 | fl6.flowi6_mark = mark; |
1096 | fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; | 1096 | fl6.flowi6_flags = 0; |
1097 | fl6.daddr = iph->daddr; | 1097 | fl6.daddr = iph->daddr; |
1098 | fl6.saddr = iph->saddr; | 1098 | fl6.saddr = iph->saddr; |
1099 | fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; | 1099 | fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; |
@@ -2348,13 +2348,11 @@ static int rt6_fill_node(struct net *net, | |||
2348 | int iif, int type, u32 pid, u32 seq, | 2348 | int iif, int type, u32 pid, u32 seq, |
2349 | int prefix, int nowait, unsigned int flags) | 2349 | int prefix, int nowait, unsigned int flags) |
2350 | { | 2350 | { |
2351 | const struct inet_peer *peer; | ||
2352 | struct rtmsg *rtm; | 2351 | struct rtmsg *rtm; |
2353 | struct nlmsghdr *nlh; | 2352 | struct nlmsghdr *nlh; |
2354 | long expires; | 2353 | long expires; |
2355 | u32 table; | 2354 | u32 table; |
2356 | struct neighbour *n; | 2355 | struct neighbour *n; |
2357 | u32 ts, tsage; | ||
2358 | 2356 | ||
2359 | if (prefix) { /* user wants prefix routes only */ | 2357 | if (prefix) { /* user wants prefix routes only */ |
2360 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { | 2358 | if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { |
@@ -2473,17 +2471,7 @@ static int rt6_fill_node(struct net *net, | |||
2473 | else | 2471 | else |
2474 | expires = INT_MAX; | 2472 | expires = INT_MAX; |
2475 | 2473 | ||
2476 | peer = NULL; | 2474 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) |
2477 | if (rt6_has_peer(rt)) | ||
2478 | peer = rt6_peer_ptr(rt); | ||
2479 | ts = tsage = 0; | ||
2480 | if (peer && peer->tcp_ts_stamp) { | ||
2481 | ts = peer->tcp_ts; | ||
2482 | tsage = get_seconds() - peer->tcp_ts_stamp; | ||
2483 | } | ||
2484 | |||
2485 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, | ||
2486 | expires, rt->dst.error) < 0) | ||
2487 | goto nla_put_failure; | 2475 | goto nla_put_failure; |
2488 | 2476 | ||
2489 | return nlmsg_end(skb, nlh); | 2477 | return nlmsg_end(skb, nlh); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6cc67ed6c2e6..61175cb2478f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
277 | rt = (struct rt6_info *) dst; | 277 | rt = (struct rt6_info *) dst; |
278 | if (tcp_death_row.sysctl_tw_recycle && | 278 | if (tcp_death_row.sysctl_tw_recycle && |
279 | !tp->rx_opt.ts_recent_stamp && | 279 | !tp->rx_opt.ts_recent_stamp && |
280 | ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { | 280 | ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) |
281 | struct inet_peer *peer = rt6_get_peer(rt); | 281 | tcp_fetch_timewait_stamp(sk, dst); |
282 | /* | ||
283 | * VJ's idea. We save last timestamp seen from | ||
284 | * the destination in peer table, when entering state | ||
285 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, | ||
286 | * when trying new connection. | ||
287 | */ | ||
288 | if (peer) { | ||
289 | inet_peer_refcheck(peer); | ||
290 | if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { | ||
291 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | ||
292 | tp->rx_opt.ts_recent = peer->tcp_ts; | ||
293 | } | ||
294 | } | ||
295 | } | ||
296 | 282 | ||
297 | icsk->icsk_ext_hdr_len = 0; | 283 | icsk->icsk_ext_hdr_len = 0; |
298 | if (np->opt) | 284 | if (np->opt) |
@@ -1134,8 +1120,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1134 | treq->iif = inet6_iif(skb); | 1120 | treq->iif = inet6_iif(skb); |
1135 | 1121 | ||
1136 | if (!isn) { | 1122 | if (!isn) { |
1137 | struct inet_peer *peer = NULL; | ||
1138 | |||
1139 | if (ipv6_opt_accepted(sk, skb) || | 1123 | if (ipv6_opt_accepted(sk, skb) || |
1140 | np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || | 1124 | np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || |
1141 | np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { | 1125 | np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { |
@@ -1160,14 +1144,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1160 | */ | 1144 | */ |
1161 | if (tmp_opt.saw_tstamp && | 1145 | if (tmp_opt.saw_tstamp && |
1162 | tcp_death_row.sysctl_tw_recycle && | 1146 | tcp_death_row.sysctl_tw_recycle && |
1163 | (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL && | 1147 | (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { |
1164 | (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && | 1148 | if (!tcp_peer_is_proven(req, dst, true)) { |
1165 | ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, | ||
1166 | &treq->rmt_addr)) { | ||
1167 | inet_peer_refcheck(peer); | ||
1168 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | ||
1169 | (s32)(peer->tcp_ts - req->ts_recent) > | ||
1170 | TCP_PAWS_WINDOW) { | ||
1171 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | 1149 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); |
1172 | goto drop_and_release; | 1150 | goto drop_and_release; |
1173 | } | 1151 | } |
@@ -1176,8 +1154,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1176 | else if (!sysctl_tcp_syncookies && | 1154 | else if (!sysctl_tcp_syncookies && |
1177 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < | 1155 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < |
1178 | (sysctl_max_syn_backlog >> 2)) && | 1156 | (sysctl_max_syn_backlog >> 2)) && |
1179 | (!peer || !peer->tcp_ts_stamp) && | 1157 | !tcp_peer_is_proven(req, dst, false)) { |
1180 | (!dst || !dst_metric(dst, RTAX_RTT))) { | ||
1181 | /* Without syncookies last quarter of | 1158 | /* Without syncookies last quarter of |
1182 | * backlog is filled with destinations, | 1159 | * backlog is filled with destinations, |
1183 | * proven to be alive. | 1160 | * proven to be alive. |
@@ -1712,20 +1689,6 @@ do_time_wait: | |||
1712 | goto discard_it; | 1689 | goto discard_it; |
1713 | } | 1690 | } |
1714 | 1691 | ||
1715 | static struct inet_peer *tcp_v6_get_peer(struct sock *sk) | ||
1716 | { | ||
1717 | struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); | ||
1718 | struct ipv6_pinfo *np = inet6_sk(sk); | ||
1719 | |||
1720 | /* If we don't have a valid cached route, or we're doing IP | ||
1721 | * options which make the IPv6 header destination address | ||
1722 | * different from our peer's, do not bother with this. | ||
1723 | */ | ||
1724 | if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) | ||
1725 | return NULL; | ||
1726 | return rt6_get_peer_create(rt); | ||
1727 | } | ||
1728 | |||
1729 | static struct timewait_sock_ops tcp6_timewait_sock_ops = { | 1692 | static struct timewait_sock_ops tcp6_timewait_sock_ops = { |
1730 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), | 1693 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), |
1731 | .twsk_unique = tcp_twsk_unique, | 1694 | .twsk_unique = tcp_twsk_unique, |
@@ -1738,7 +1701,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { | |||
1738 | .rebuild_header = inet6_sk_rebuild_header, | 1701 | .rebuild_header = inet6_sk_rebuild_header, |
1739 | .conn_request = tcp_v6_conn_request, | 1702 | .conn_request = tcp_v6_conn_request, |
1740 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1703 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
1741 | .get_peer = tcp_v6_get_peer, | ||
1742 | .net_header_len = sizeof(struct ipv6hdr), | 1704 | .net_header_len = sizeof(struct ipv6hdr), |
1743 | .net_frag_header_len = sizeof(struct frag_hdr), | 1705 | .net_frag_header_len = sizeof(struct frag_hdr), |
1744 | .setsockopt = ipv6_setsockopt, | 1706 | .setsockopt = ipv6_setsockopt, |
@@ -1770,7 +1732,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { | |||
1770 | .rebuild_header = inet_sk_rebuild_header, | 1732 | .rebuild_header = inet_sk_rebuild_header, |
1771 | .conn_request = tcp_v6_conn_request, | 1733 | .conn_request = tcp_v6_conn_request, |
1772 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1734 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
1773 | .get_peer = tcp_v4_get_peer, | ||
1774 | .net_header_len = sizeof(struct iphdr), | 1735 | .net_header_len = sizeof(struct iphdr), |
1775 | .setsockopt = ipv6_setsockopt, | 1736 | .setsockopt = ipv6_setsockopt, |
1776 | .getsockopt = ipv6_getsockopt, | 1737 | .getsockopt = ipv6_getsockopt, |