diff options
-rw-r--r-- | include/net/dst.h | 15 | ||||
-rw-r--r-- | include/net/flow.h | 1 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 3 | ||||
-rw-r--r-- | include/net/ip_fib.h | 3 | ||||
-rw-r--r-- | include/net/route.h | 40 | ||||
-rw-r--r-- | net/core/dst.c | 4 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 2 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 4 | ||||
-rw-r--r-- | net/ipv4/arp.c | 5 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 5 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 4 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 9 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 9 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_MASQUERADE.c | 5 | ||||
-rw-r--r-- | net/ipv4/route.c | 1329 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/xfrm4_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 9 | ||||
-rw-r--r-- | net/ipv6/route.c | 4 | ||||
-rw-r--r-- | net/sctp/transport.c | 2 | ||||
-rw-r--r-- | net/xfrm/xfrm_policy.c | 23 |
26 files changed, 292 insertions, 1206 deletions
diff --git a/include/net/dst.h b/include/net/dst.h index 51610468c63d..baf597890064 100644 --- a/include/net/dst.h +++ b/include/net/dst.h | |||
@@ -65,7 +65,20 @@ struct dst_entry { | |||
65 | unsigned short pending_confirm; | 65 | unsigned short pending_confirm; |
66 | 66 | ||
67 | short error; | 67 | short error; |
68 | |||
69 | /* A non-zero value of dst->obsolete forces by-hand validation | ||
70 | * of the route entry. Positive values are set by the generic | ||
71 | * dst layer to indicate that the entry has been forcefully | ||
72 | * destroyed. | ||
73 | * | ||
74 | * Negative values are used by the implementation layer code to | ||
75 | * force invocation of the dst_ops->check() method. | ||
76 | */ | ||
68 | short obsolete; | 77 | short obsolete; |
78 | #define DST_OBSOLETE_NONE 0 | ||
79 | #define DST_OBSOLETE_DEAD 2 | ||
80 | #define DST_OBSOLETE_FORCE_CHK -1 | ||
81 | #define DST_OBSOLETE_KILL -2 | ||
69 | unsigned short header_len; /* more space at head required */ | 82 | unsigned short header_len; /* more space at head required */ |
70 | unsigned short trailer_len; /* space to reserve at tail */ | 83 | unsigned short trailer_len; /* space to reserve at tail */ |
71 | #ifdef CONFIG_IP_ROUTE_CLASSID | 84 | #ifdef CONFIG_IP_ROUTE_CLASSID |
@@ -359,7 +372,7 @@ extern struct dst_entry *dst_destroy(struct dst_entry *dst); | |||
359 | 372 | ||
360 | static inline void dst_free(struct dst_entry *dst) | 373 | static inline void dst_free(struct dst_entry *dst) |
361 | { | 374 | { |
362 | if (dst->obsolete > 1) | 375 | if (dst->obsolete > 0) |
363 | return; | 376 | return; |
364 | if (!atomic_read(&dst->__refcnt)) { | 377 | if (!atomic_read(&dst->__refcnt)) { |
365 | dst = dst_destroy(dst); | 378 | dst = dst_destroy(dst); |
diff --git a/include/net/flow.h b/include/net/flow.h index ce9cb7656b47..e1dd5082ec7e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h | |||
@@ -21,7 +21,6 @@ struct flowi_common { | |||
21 | __u8 flowic_flags; | 21 | __u8 flowic_flags; |
22 | #define FLOWI_FLAG_ANYSRC 0x01 | 22 | #define FLOWI_FLAG_ANYSRC 0x01 |
23 | #define FLOWI_FLAG_CAN_SLEEP 0x02 | 23 | #define FLOWI_FLAG_CAN_SLEEP 0x02 |
24 | #define FLOWI_FLAG_RT_NOCACHE 0x04 | ||
25 | __u32 flowic_secid; | 24 | __u32 flowic_secid; |
26 | }; | 25 | }; |
27 | 26 | ||
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2cf44b4ed2e6..5ee66f517b4f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h | |||
@@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); | |||
250 | 250 | ||
251 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, | 251 | extern struct dst_entry* inet_csk_route_req(struct sock *sk, |
252 | struct flowi4 *fl4, | 252 | struct flowi4 *fl4, |
253 | const struct request_sock *req, | 253 | const struct request_sock *req); |
254 | bool nocache); | ||
255 | extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, | 254 | extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, |
256 | struct sock *newsk, | 255 | struct sock *newsk, |
257 | const struct request_sock *req); | 256 | const struct request_sock *req); |
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2daf096dfc60..e69c3a47153d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -46,6 +46,7 @@ struct fib_config { | |||
46 | }; | 46 | }; |
47 | 47 | ||
48 | struct fib_info; | 48 | struct fib_info; |
49 | struct rtable; | ||
49 | 50 | ||
50 | struct fib_nh_exception { | 51 | struct fib_nh_exception { |
51 | struct fib_nh_exception __rcu *fnhe_next; | 52 | struct fib_nh_exception __rcu *fnhe_next; |
@@ -80,6 +81,8 @@ struct fib_nh { | |||
80 | __be32 nh_gw; | 81 | __be32 nh_gw; |
81 | __be32 nh_saddr; | 82 | __be32 nh_saddr; |
82 | int nh_saddr_genid; | 83 | int nh_saddr_genid; |
84 | struct rtable *nh_rth_output; | ||
85 | struct rtable *nh_rth_input; | ||
83 | struct fnhe_hash_bucket *nh_exceptions; | 86 | struct fnhe_hash_bucket *nh_exceptions; |
84 | }; | 87 | }; |
85 | 88 | ||
diff --git a/include/net/route.h b/include/net/route.h index ace3cb442519..60d611dc5cee 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -44,38 +44,35 @@ struct fib_info; | |||
44 | struct rtable { | 44 | struct rtable { |
45 | struct dst_entry dst; | 45 | struct dst_entry dst; |
46 | 46 | ||
47 | /* Lookup key. */ | ||
48 | __be32 rt_key_dst; | ||
49 | __be32 rt_key_src; | ||
50 | |||
51 | int rt_genid; | 47 | int rt_genid; |
52 | unsigned int rt_flags; | 48 | unsigned int rt_flags; |
53 | __u16 rt_type; | 49 | __u16 rt_type; |
54 | __u8 rt_key_tos; | 50 | __u16 rt_is_input; |
55 | 51 | ||
56 | __be32 rt_dst; /* Path destination */ | ||
57 | __be32 rt_src; /* Path source */ | ||
58 | int rt_route_iif; | ||
59 | int rt_iif; | 52 | int rt_iif; |
60 | int rt_oif; | ||
61 | __u32 rt_mark; | ||
62 | 53 | ||
63 | /* Info on neighbour */ | 54 | /* Info on neighbour */ |
64 | __be32 rt_gateway; | 55 | __be32 rt_gateway; |
65 | 56 | ||
66 | /* Miscellaneous cached information */ | 57 | /* Miscellaneous cached information */ |
67 | u32 rt_pmtu; | 58 | u32 rt_pmtu; |
68 | struct fib_info *fi; /* for client ref to shared metrics */ | ||
69 | }; | 59 | }; |
70 | 60 | ||
71 | static inline bool rt_is_input_route(const struct rtable *rt) | 61 | static inline bool rt_is_input_route(const struct rtable *rt) |
72 | { | 62 | { |
73 | return rt->rt_route_iif != 0; | 63 | return rt->rt_is_input != 0; |
74 | } | 64 | } |
75 | 65 | ||
76 | static inline bool rt_is_output_route(const struct rtable *rt) | 66 | static inline bool rt_is_output_route(const struct rtable *rt) |
77 | { | 67 | { |
78 | return rt->rt_route_iif == 0; | 68 | return rt->rt_is_input == 0; |
69 | } | ||
70 | |||
71 | static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) | ||
72 | { | ||
73 | if (rt->rt_gateway) | ||
74 | return rt->rt_gateway; | ||
75 | return daddr; | ||
79 | } | 76 | } |
80 | 77 | ||
81 | struct ip_rt_acct { | 78 | struct ip_rt_acct { |
@@ -109,7 +106,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; | |||
109 | struct in_device; | 106 | struct in_device; |
110 | extern int ip_rt_init(void); | 107 | extern int ip_rt_init(void); |
111 | extern void rt_cache_flush(struct net *net, int how); | 108 | extern void rt_cache_flush(struct net *net, int how); |
112 | extern void rt_cache_flush_batch(struct net *net); | ||
113 | extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); | 109 | extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); |
114 | extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, | 110 | extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, |
115 | struct sock *sk); | 111 | struct sock *sk); |
@@ -161,20 +157,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 | |||
161 | return ip_route_output_key(net, fl4); | 157 | return ip_route_output_key(net, fl4); |
162 | } | 158 | } |
163 | 159 | ||
164 | extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, | 160 | extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, |
165 | u8 tos, struct net_device *devin, bool noref); | 161 | u8 tos, struct net_device *devin); |
166 | |||
167 | static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, | ||
168 | u8 tos, struct net_device *devin) | ||
169 | { | ||
170 | return ip_route_input_common(skb, dst, src, tos, devin, false); | ||
171 | } | ||
172 | |||
173 | static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, | ||
174 | u8 tos, struct net_device *devin) | ||
175 | { | ||
176 | return ip_route_input_common(skb, dst, src, tos, devin, true); | ||
177 | } | ||
178 | 162 | ||
179 | extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | 163 | extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, |
180 | int oif, u32 mark, u8 protocol, int flow_flags); | 164 | int oif, u32 mark, u8 protocol, int flow_flags); |
diff --git a/net/core/dst.c b/net/core/dst.c index 07bacff84aa4..069d51d29414 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -94,7 +94,7 @@ loop: | |||
94 | * But we do not have state "obsoleted, but | 94 | * But we do not have state "obsoleted, but |
95 | * referenced by parent", so it is right. | 95 | * referenced by parent", so it is right. |
96 | */ | 96 | */ |
97 | if (dst->obsolete > 1) | 97 | if (dst->obsolete > 0) |
98 | continue; | 98 | continue; |
99 | 99 | ||
100 | ___dst_free(dst); | 100 | ___dst_free(dst); |
@@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst) | |||
202 | */ | 202 | */ |
203 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) | 203 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) |
204 | dst->input = dst->output = dst_discard; | 204 | dst->input = dst->output = dst_discard; |
205 | dst->obsolete = 2; | 205 | dst->obsolete = DST_OBSOLETE_DEAD; |
206 | } | 206 | } |
207 | 207 | ||
208 | void __dst_free(struct dst_entry *dst) | 208 | void __dst_free(struct dst_entry *dst) |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ab4f44c9bb21..25428d0c50c9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, | |||
508 | struct dst_entry *dst; | 508 | struct dst_entry *dst; |
509 | struct flowi4 fl4; | 509 | struct flowi4 fl4; |
510 | 510 | ||
511 | dst = inet_csk_route_req(sk, &fl4, req, false); | 511 | dst = inet_csk_route_req(sk, &fl4, req); |
512 | if (dst == NULL) | 512 | if (dst == NULL) |
513 | goto out; | 513 | goto out; |
514 | 514 | ||
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 47de90d8fe94..23cc11dd4e40 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -1176,7 +1176,7 @@ make_route: | |||
1176 | if (dev_out->flags & IFF_LOOPBACK) | 1176 | if (dev_out->flags & IFF_LOOPBACK) |
1177 | flags |= RTCF_LOCAL; | 1177 | flags |= RTCF_LOCAL; |
1178 | 1178 | ||
1179 | rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST); | 1179 | rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST); |
1180 | if (rt == NULL) | 1180 | if (rt == NULL) |
1181 | goto e_nobufs; | 1181 | goto e_nobufs; |
1182 | 1182 | ||
@@ -1444,7 +1444,7 @@ static int dn_route_input_slow(struct sk_buff *skb) | |||
1444 | } | 1444 | } |
1445 | 1445 | ||
1446 | make_route: | 1446 | make_route: |
1447 | rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST); | 1447 | rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST); |
1448 | if (rt == NULL) | 1448 | if (rt == NULL) |
1449 | goto e_nobufs; | 1449 | goto e_nobufs; |
1450 | 1450 | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2e560f0c757d..a0124eb7dbea 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -475,8 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
475 | return 1; | 475 | return 1; |
476 | } | 476 | } |
477 | 477 | ||
478 | paddr = skb_rtable(skb)->rt_gateway; | 478 | paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); |
479 | |||
480 | if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, | 479 | if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, |
481 | paddr, dev)) | 480 | paddr, dev)) |
482 | return 0; | 481 | return 0; |
@@ -828,7 +827,7 @@ static int arp_process(struct sk_buff *skb) | |||
828 | } | 827 | } |
829 | 828 | ||
830 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 829 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
831 | ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { | 830 | ip_route_input(skb, tip, sip, 0, dev) == 0) { |
832 | 831 | ||
833 | rt = skb_rtable(skb); | 832 | rt = skb_rtable(skb); |
834 | addr_type = rt->rt_type; | 833 | addr_type = rt->rt_type; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b83203658ee3..f277cf0e6321 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1072,11 +1072,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1072 | rt_cache_flush(dev_net(dev), 0); | 1072 | rt_cache_flush(dev_net(dev), 0); |
1073 | break; | 1073 | break; |
1074 | case NETDEV_UNREGISTER_BATCH: | 1074 | case NETDEV_UNREGISTER_BATCH: |
1075 | /* The batch unregister is only called on the first | ||
1076 | * device in the list of devices being unregistered. | ||
1077 | * Therefore we should not pass dev_net(dev) in here. | ||
1078 | */ | ||
1079 | rt_cache_flush_batch(NULL); | ||
1080 | break; | 1075 | break; |
1081 | } | 1076 | } |
1082 | return NOTIFY_DONE; | 1077 | return NOTIFY_DONE; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2b57d768240d..e55171f184f9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -171,6 +171,10 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
171 | dev_put(nexthop_nh->nh_dev); | 171 | dev_put(nexthop_nh->nh_dev); |
172 | if (nexthop_nh->nh_exceptions) | 172 | if (nexthop_nh->nh_exceptions) |
173 | free_nh_exceptions(nexthop_nh); | 173 | free_nh_exceptions(nexthop_nh); |
174 | if (nexthop_nh->nh_rth_output) | ||
175 | dst_release(&nexthop_nh->nh_rth_output->dst); | ||
176 | if (nexthop_nh->nh_rth_input) | ||
177 | dst_release(&nexthop_nh->nh_rth_input->dst); | ||
174 | } endfor_nexthops(fi); | 178 | } endfor_nexthops(fi); |
175 | 179 | ||
176 | release_net(fi->fib_net); | 180 | release_net(fi->fib_net); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c7a4de05ca04..db0cf17c00f7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | |||
368 | 368 | ||
369 | struct dst_entry *inet_csk_route_req(struct sock *sk, | 369 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
370 | struct flowi4 *fl4, | 370 | struct flowi4 *fl4, |
371 | const struct request_sock *req, | 371 | const struct request_sock *req) |
372 | bool nocache) | ||
373 | { | 372 | { |
374 | struct rtable *rt; | 373 | struct rtable *rt; |
375 | const struct inet_request_sock *ireq = inet_rsk(req); | 374 | const struct inet_request_sock *ireq = inet_rsk(req); |
@@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
377 | struct net *net = sock_net(sk); | 376 | struct net *net = sock_net(sk); |
378 | int flags = inet_sk_flowi_flags(sk); | 377 | int flags = inet_sk_flowi_flags(sk); |
379 | 378 | ||
380 | if (nocache) | ||
381 | flags |= FLOWI_FLAG_RT_NOCACHE; | ||
382 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | 379 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, |
383 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 380 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
384 | sk->sk_protocol, | 381 | sk->sk_protocol, |
@@ -389,7 +386,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
389 | rt = ip_route_output_flow(net, fl4, sk); | 386 | rt = ip_route_output_flow(net, fl4, sk); |
390 | if (IS_ERR(rt)) | 387 | if (IS_ERR(rt)) |
391 | goto no_route; | 388 | goto no_route; |
392 | if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | 389 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) |
393 | goto route_err; | 390 | goto route_err; |
394 | return &rt->dst; | 391 | return &rt->dst; |
395 | 392 | ||
@@ -422,7 +419,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | |||
422 | rt = ip_route_output_flow(net, fl4, sk); | 419 | rt = ip_route_output_flow(net, fl4, sk); |
423 | if (IS_ERR(rt)) | 420 | if (IS_ERR(rt)) |
424 | goto no_route; | 421 | goto no_route; |
425 | if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | 422 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) |
426 | goto route_err; | 423 | goto route_err; |
427 | return &rt->dst; | 424 | return &rt->dst; |
428 | 425 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409c..7ad88e5e7110 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) | |||
258 | /* skb dst is stale, drop it, and perform route lookup again */ | 258 | /* skb dst is stale, drop it, and perform route lookup again */ |
259 | skb_dst_drop(head); | 259 | skb_dst_drop(head); |
260 | iph = ip_hdr(head); | 260 | iph = ip_hdr(head); |
261 | err = ip_route_input_noref(head, iph->daddr, iph->saddr, | 261 | err = ip_route_input(head, iph->daddr, iph->saddr, |
262 | iph->tos, head->dev); | 262 | iph->tos, head->dev); |
263 | if (err) | 263 | if (err) |
264 | goto out_rcu_unlock; | 264 | goto out_rcu_unlock; |
265 | 265 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 42c44b1403c9..b062a98574f2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -766,7 +766,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
766 | 766 | ||
767 | if (skb->protocol == htons(ETH_P_IP)) { | 767 | if (skb->protocol == htons(ETH_P_IP)) { |
768 | rt = skb_rtable(skb); | 768 | rt = skb_rtable(skb); |
769 | dst = rt->rt_gateway; | 769 | dst = rt_nexthop(rt, old_iph->daddr); |
770 | } | 770 | } |
771 | #if IS_ENABLED(CONFIG_IPV6) | 771 | #if IS_ENABLED(CONFIG_IPV6) |
772 | else if (skb->protocol == htons(ETH_P_IPV6)) { | 772 | else if (skb->protocol == htons(ETH_P_IPV6)) { |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b27d4440f523..4ebc6feee250 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -336,8 +336,8 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
336 | * how the packet travels inside Linux networking. | 336 | * how the packet travels inside Linux networking. |
337 | */ | 337 | */ |
338 | if (!skb_dst(skb)) { | 338 | if (!skb_dst(skb)) { |
339 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | 339 | int err = ip_route_input(skb, iph->daddr, iph->saddr, |
340 | iph->tos, skb->dev); | 340 | iph->tos, skb->dev); |
341 | if (unlikely(err)) { | 341 | if (unlikely(err)) { |
342 | if (err == -EXDEV) | 342 | if (err == -EXDEV) |
343 | NET_INC_STATS_BH(dev_net(skb->dev), | 343 | NET_INC_STATS_BH(dev_net(skb->dev), |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 665abbb7122a..ba39a52d18c1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) | |||
371 | skb_dst_set_noref(skb, &rt->dst); | 371 | skb_dst_set_noref(skb, &rt->dst); |
372 | 372 | ||
373 | packet_routed: | 373 | packet_routed: |
374 | if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) | 374 | if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) |
375 | goto no_route; | 375 | goto no_route; |
376 | 376 | ||
377 | /* OK, we know where to send it, allocate and build IP header. */ | 377 | /* OK, we know where to send it, allocate and build IP header. */ |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2c2c35bace76..99af1f0cc658 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -487,7 +487,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
487 | dev->stats.tx_fifo_errors++; | 487 | dev->stats.tx_fifo_errors++; |
488 | goto tx_error; | 488 | goto tx_error; |
489 | } | 489 | } |
490 | dst = rt->rt_gateway; | 490 | dst = rt_nexthop(rt, old_iph->daddr); |
491 | } | 491 | } |
492 | 492 | ||
493 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, | 493 | rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5716c6b808d6..8eec8f4a0536 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1795,9 +1795,12 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) | |||
1795 | .daddr = iph->daddr, | 1795 | .daddr = iph->daddr, |
1796 | .saddr = iph->saddr, | 1796 | .saddr = iph->saddr, |
1797 | .flowi4_tos = RT_TOS(iph->tos), | 1797 | .flowi4_tos = RT_TOS(iph->tos), |
1798 | .flowi4_oif = rt->rt_oif, | 1798 | .flowi4_oif = (rt_is_output_route(rt) ? |
1799 | .flowi4_iif = rt->rt_iif, | 1799 | skb->dev->ifindex : 0), |
1800 | .flowi4_mark = rt->rt_mark, | 1800 | .flowi4_iif = (rt_is_output_route(rt) ? |
1801 | net->loopback_dev->ifindex : | ||
1802 | skb->dev->ifindex), | ||
1803 | .flowi4_mark = skb->mark, | ||
1801 | }; | 1804 | }; |
1802 | struct mr_table *mrt; | 1805 | struct mr_table *mrt; |
1803 | int err; | 1806 | int err; |
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f210c79dc87..cbb6a1a6f6f7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c | |||
@@ -52,7 +52,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
52 | struct nf_nat_ipv4_range newrange; | 52 | struct nf_nat_ipv4_range newrange; |
53 | const struct nf_nat_ipv4_multi_range_compat *mr; | 53 | const struct nf_nat_ipv4_multi_range_compat *mr; |
54 | const struct rtable *rt; | 54 | const struct rtable *rt; |
55 | __be32 newsrc; | 55 | __be32 newsrc, nh; |
56 | 56 | ||
57 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); | 57 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); |
58 | 58 | ||
@@ -70,7 +70,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
70 | 70 | ||
71 | mr = par->targinfo; | 71 | mr = par->targinfo; |
72 | rt = skb_rtable(skb); | 72 | rt = skb_rtable(skb); |
73 | newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); | 73 | nh = rt_nexthop(rt, ip_hdr(skb)->daddr); |
74 | newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE); | ||
74 | if (!newsrc) { | 75 | if (!newsrc) { |
75 | pr_info("%s ate my IP address\n", par->out->name); | 76 | pr_info("%s ate my IP address\n", par->out->name); |
76 | return NF_DROP; | 77 | return NF_DROP; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d547f6fae20d..9add08869c75 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -133,10 +133,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; | |||
133 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | 133 | static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; |
134 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 134 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
135 | static int ip_rt_min_advmss __read_mostly = 256; | 135 | static int ip_rt_min_advmss __read_mostly = 256; |
136 | static int rt_chain_length_max __read_mostly = 20; | ||
137 | |||
138 | static struct delayed_work expires_work; | ||
139 | static unsigned long expires_ljiffies; | ||
140 | 136 | ||
141 | /* | 137 | /* |
142 | * Interface to generic destination cache. | 138 | * Interface to generic destination cache. |
@@ -145,14 +141,12 @@ static unsigned long expires_ljiffies; | |||
145 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
146 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); | 142 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); |
147 | static unsigned int ipv4_mtu(const struct dst_entry *dst); | 143 | static unsigned int ipv4_mtu(const struct dst_entry *dst); |
148 | static void ipv4_dst_destroy(struct dst_entry *dst); | ||
149 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | 144 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
150 | static void ipv4_link_failure(struct sk_buff *skb); | 145 | static void ipv4_link_failure(struct sk_buff *skb); |
151 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | 146 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, |
152 | struct sk_buff *skb, u32 mtu); | 147 | struct sk_buff *skb, u32 mtu); |
153 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, | 148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, |
154 | struct sk_buff *skb); | 149 | struct sk_buff *skb); |
155 | static int rt_garbage_collect(struct dst_ops *ops); | ||
156 | 150 | ||
157 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 151 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
158 | int how) | 152 | int how) |
@@ -172,12 +166,10 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
172 | static struct dst_ops ipv4_dst_ops = { | 166 | static struct dst_ops ipv4_dst_ops = { |
173 | .family = AF_INET, | 167 | .family = AF_INET, |
174 | .protocol = cpu_to_be16(ETH_P_IP), | 168 | .protocol = cpu_to_be16(ETH_P_IP), |
175 | .gc = rt_garbage_collect, | ||
176 | .check = ipv4_dst_check, | 169 | .check = ipv4_dst_check, |
177 | .default_advmss = ipv4_default_advmss, | 170 | .default_advmss = ipv4_default_advmss, |
178 | .mtu = ipv4_mtu, | 171 | .mtu = ipv4_mtu, |
179 | .cow_metrics = ipv4_cow_metrics, | 172 | .cow_metrics = ipv4_cow_metrics, |
180 | .destroy = ipv4_dst_destroy, | ||
181 | .ifdown = ipv4_dst_ifdown, | 173 | .ifdown = ipv4_dst_ifdown, |
182 | .negative_advice = ipv4_negative_advice, | 174 | .negative_advice = ipv4_negative_advice, |
183 | .link_failure = ipv4_link_failure, | 175 | .link_failure = ipv4_link_failure, |
@@ -209,184 +201,30 @@ const __u8 ip_tos2prio[16] = { | |||
209 | }; | 201 | }; |
210 | EXPORT_SYMBOL(ip_tos2prio); | 202 | EXPORT_SYMBOL(ip_tos2prio); |
211 | 203 | ||
212 | /* | ||
213 | * Route cache. | ||
214 | */ | ||
215 | |||
216 | /* The locking scheme is rather straight forward: | ||
217 | * | ||
218 | * 1) Read-Copy Update protects the buckets of the central route hash. | ||
219 | * 2) Only writers remove entries, and they hold the lock | ||
220 | * as they look at rtable reference counts. | ||
221 | * 3) Only readers acquire references to rtable entries, | ||
222 | * they do so with atomic increments and with the | ||
223 | * lock held. | ||
224 | */ | ||
225 | |||
226 | struct rt_hash_bucket { | ||
227 | struct rtable __rcu *chain; | ||
228 | }; | ||
229 | |||
230 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | ||
231 | defined(CONFIG_PROVE_LOCKING) | ||
232 | /* | ||
233 | * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks | ||
234 | * The size of this table is a power of two and depends on the number of CPUS. | ||
235 | * (on lockdep we have a quite big spinlock_t, so keep the size down there) | ||
236 | */ | ||
237 | #ifdef CONFIG_LOCKDEP | ||
238 | # define RT_HASH_LOCK_SZ 256 | ||
239 | #else | ||
240 | # if NR_CPUS >= 32 | ||
241 | # define RT_HASH_LOCK_SZ 4096 | ||
242 | # elif NR_CPUS >= 16 | ||
243 | # define RT_HASH_LOCK_SZ 2048 | ||
244 | # elif NR_CPUS >= 8 | ||
245 | # define RT_HASH_LOCK_SZ 1024 | ||
246 | # elif NR_CPUS >= 4 | ||
247 | # define RT_HASH_LOCK_SZ 512 | ||
248 | # else | ||
249 | # define RT_HASH_LOCK_SZ 256 | ||
250 | # endif | ||
251 | #endif | ||
252 | |||
253 | static spinlock_t *rt_hash_locks; | ||
254 | # define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] | ||
255 | |||
256 | static __init void rt_hash_lock_init(void) | ||
257 | { | ||
258 | int i; | ||
259 | |||
260 | rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, | ||
261 | GFP_KERNEL); | ||
262 | if (!rt_hash_locks) | ||
263 | panic("IP: failed to allocate rt_hash_locks\n"); | ||
264 | |||
265 | for (i = 0; i < RT_HASH_LOCK_SZ; i++) | ||
266 | spin_lock_init(&rt_hash_locks[i]); | ||
267 | } | ||
268 | #else | ||
269 | # define rt_hash_lock_addr(slot) NULL | ||
270 | |||
271 | static inline void rt_hash_lock_init(void) | ||
272 | { | ||
273 | } | ||
274 | #endif | ||
275 | |||
276 | static struct rt_hash_bucket *rt_hash_table __read_mostly; | ||
277 | static unsigned int rt_hash_mask __read_mostly; | ||
278 | static unsigned int rt_hash_log __read_mostly; | ||
279 | |||
280 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 204 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
281 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) | 205 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) |
282 | 206 | ||
283 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, | ||
284 | int genid) | ||
285 | { | ||
286 | return jhash_3words((__force u32)daddr, (__force u32)saddr, | ||
287 | idx, genid) | ||
288 | & rt_hash_mask; | ||
289 | } | ||
290 | |||
291 | static inline int rt_genid(struct net *net) | 207 | static inline int rt_genid(struct net *net) |
292 | { | 208 | { |
293 | return atomic_read(&net->ipv4.rt_genid); | 209 | return atomic_read(&net->ipv4.rt_genid); |
294 | } | 210 | } |
295 | 211 | ||
296 | #ifdef CONFIG_PROC_FS | 212 | #ifdef CONFIG_PROC_FS |
297 | struct rt_cache_iter_state { | ||
298 | struct seq_net_private p; | ||
299 | int bucket; | ||
300 | int genid; | ||
301 | }; | ||
302 | |||
303 | static struct rtable *rt_cache_get_first(struct seq_file *seq) | ||
304 | { | ||
305 | struct rt_cache_iter_state *st = seq->private; | ||
306 | struct rtable *r = NULL; | ||
307 | |||
308 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | ||
309 | if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) | ||
310 | continue; | ||
311 | rcu_read_lock_bh(); | ||
312 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | ||
313 | while (r) { | ||
314 | if (dev_net(r->dst.dev) == seq_file_net(seq) && | ||
315 | r->rt_genid == st->genid) | ||
316 | return r; | ||
317 | r = rcu_dereference_bh(r->dst.rt_next); | ||
318 | } | ||
319 | rcu_read_unlock_bh(); | ||
320 | } | ||
321 | return r; | ||
322 | } | ||
323 | |||
324 | static struct rtable *__rt_cache_get_next(struct seq_file *seq, | ||
325 | struct rtable *r) | ||
326 | { | ||
327 | struct rt_cache_iter_state *st = seq->private; | ||
328 | |||
329 | r = rcu_dereference_bh(r->dst.rt_next); | ||
330 | while (!r) { | ||
331 | rcu_read_unlock_bh(); | ||
332 | do { | ||
333 | if (--st->bucket < 0) | ||
334 | return NULL; | ||
335 | } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); | ||
336 | rcu_read_lock_bh(); | ||
337 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | ||
338 | } | ||
339 | return r; | ||
340 | } | ||
341 | |||
342 | static struct rtable *rt_cache_get_next(struct seq_file *seq, | ||
343 | struct rtable *r) | ||
344 | { | ||
345 | struct rt_cache_iter_state *st = seq->private; | ||
346 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | ||
347 | if (dev_net(r->dst.dev) != seq_file_net(seq)) | ||
348 | continue; | ||
349 | if (r->rt_genid == st->genid) | ||
350 | break; | ||
351 | } | ||
352 | return r; | ||
353 | } | ||
354 | |||
355 | static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) | ||
356 | { | ||
357 | struct rtable *r = rt_cache_get_first(seq); | ||
358 | |||
359 | if (r) | ||
360 | while (pos && (r = rt_cache_get_next(seq, r))) | ||
361 | --pos; | ||
362 | return pos ? NULL : r; | ||
363 | } | ||
364 | |||
365 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 213 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
366 | { | 214 | { |
367 | struct rt_cache_iter_state *st = seq->private; | ||
368 | if (*pos) | 215 | if (*pos) |
369 | return rt_cache_get_idx(seq, *pos - 1); | 216 | return NULL; |
370 | st->genid = rt_genid(seq_file_net(seq)); | ||
371 | return SEQ_START_TOKEN; | 217 | return SEQ_START_TOKEN; |
372 | } | 218 | } |
373 | 219 | ||
374 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 220 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
375 | { | 221 | { |
376 | struct rtable *r; | ||
377 | |||
378 | if (v == SEQ_START_TOKEN) | ||
379 | r = rt_cache_get_first(seq); | ||
380 | else | ||
381 | r = rt_cache_get_next(seq, v); | ||
382 | ++*pos; | 222 | ++*pos; |
383 | return r; | 223 | return NULL; |
384 | } | 224 | } |
385 | 225 | ||
386 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) | 226 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) |
387 | { | 227 | { |
388 | if (v && v != SEQ_START_TOKEN) | ||
389 | rcu_read_unlock_bh(); | ||
390 | } | 228 | } |
391 | 229 | ||
392 | static int rt_cache_seq_show(struct seq_file *seq, void *v) | 230 | static int rt_cache_seq_show(struct seq_file *seq, void *v) |
@@ -396,24 +234,6 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
396 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" | 234 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" |
397 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" | 235 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" |
398 | "HHUptod\tSpecDst"); | 236 | "HHUptod\tSpecDst"); |
399 | else { | ||
400 | struct rtable *r = v; | ||
401 | int len; | ||
402 | |||
403 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | ||
404 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | ||
405 | r->dst.dev ? r->dst.dev->name : "*", | ||
406 | (__force u32)r->rt_dst, | ||
407 | (__force u32)r->rt_gateway, | ||
408 | r->rt_flags, atomic_read(&r->dst.__refcnt), | ||
409 | r->dst.__use, 0, (__force u32)r->rt_src, | ||
410 | dst_metric_advmss(&r->dst) + 40, | ||
411 | dst_metric(&r->dst, RTAX_WINDOW), 0, | ||
412 | r->rt_key_tos, | ||
413 | -1, 0, 0, &len); | ||
414 | |||
415 | seq_printf(seq, "%*s\n", 127 - len, ""); | ||
416 | } | ||
417 | return 0; | 237 | return 0; |
418 | } | 238 | } |
419 | 239 | ||
@@ -426,8 +246,7 @@ static const struct seq_operations rt_cache_seq_ops = { | |||
426 | 246 | ||
427 | static int rt_cache_seq_open(struct inode *inode, struct file *file) | 247 | static int rt_cache_seq_open(struct inode *inode, struct file *file) |
428 | { | 248 | { |
429 | return seq_open_net(inode, file, &rt_cache_seq_ops, | 249 | return seq_open(file, &rt_cache_seq_ops); |
430 | sizeof(struct rt_cache_iter_state)); | ||
431 | } | 250 | } |
432 | 251 | ||
433 | static const struct file_operations rt_cache_seq_fops = { | 252 | static const struct file_operations rt_cache_seq_fops = { |
@@ -435,7 +254,7 @@ static const struct file_operations rt_cache_seq_fops = { | |||
435 | .open = rt_cache_seq_open, | 254 | .open = rt_cache_seq_open, |
436 | .read = seq_read, | 255 | .read = seq_read, |
437 | .llseek = seq_lseek, | 256 | .llseek = seq_lseek, |
438 | .release = seq_release_net, | 257 | .release = seq_release, |
439 | }; | 258 | }; |
440 | 259 | ||
441 | 260 | ||
@@ -625,263 +444,12 @@ static inline int ip_rt_proc_init(void) | |||
625 | } | 444 | } |
626 | #endif /* CONFIG_PROC_FS */ | 445 | #endif /* CONFIG_PROC_FS */ |
627 | 446 | ||
628 | static inline void rt_free(struct rtable *rt) | ||
629 | { | ||
630 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); | ||
631 | } | ||
632 | |||
633 | static inline void rt_drop(struct rtable *rt) | ||
634 | { | ||
635 | ip_rt_put(rt); | ||
636 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); | ||
637 | } | ||
638 | |||
639 | static inline int rt_fast_clean(struct rtable *rth) | ||
640 | { | ||
641 | /* Kill broadcast/multicast entries very aggresively, if they | ||
642 | collide in hash table with more useful entries */ | ||
643 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | ||
644 | rt_is_input_route(rth) && rth->dst.rt_next; | ||
645 | } | ||
646 | |||
647 | static inline int rt_valuable(struct rtable *rth) | ||
648 | { | ||
649 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | ||
650 | rth->dst.expires; | ||
651 | } | ||
652 | |||
653 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | ||
654 | { | ||
655 | unsigned long age; | ||
656 | int ret = 0; | ||
657 | |||
658 | if (atomic_read(&rth->dst.__refcnt)) | ||
659 | goto out; | ||
660 | |||
661 | age = jiffies - rth->dst.lastuse; | ||
662 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | ||
663 | (age <= tmo2 && rt_valuable(rth))) | ||
664 | goto out; | ||
665 | ret = 1; | ||
666 | out: return ret; | ||
667 | } | ||
668 | |||
669 | /* Bits of score are: | ||
670 | * 31: very valuable | ||
671 | * 30: not quite useless | ||
672 | * 29..0: usage counter | ||
673 | */ | ||
674 | static inline u32 rt_score(struct rtable *rt) | ||
675 | { | ||
676 | u32 score = jiffies - rt->dst.lastuse; | ||
677 | |||
678 | score = ~score & ~(3<<30); | ||
679 | |||
680 | if (rt_valuable(rt)) | ||
681 | score |= (1<<31); | ||
682 | |||
683 | if (rt_is_output_route(rt) || | ||
684 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) | ||
685 | score |= (1<<30); | ||
686 | |||
687 | return score; | ||
688 | } | ||
689 | |||
690 | static inline bool rt_caching(const struct net *net) | ||
691 | { | ||
692 | return net->ipv4.current_rt_cache_rebuild_count <= | ||
693 | net->ipv4.sysctl_rt_cache_rebuild_count; | ||
694 | } | ||
695 | |||
696 | static inline bool compare_hash_inputs(const struct rtable *rt1, | ||
697 | const struct rtable *rt2) | ||
698 | { | ||
699 | return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | ||
700 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | ||
701 | (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); | ||
702 | } | ||
703 | |||
704 | static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) | ||
705 | { | ||
706 | return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | | ||
707 | ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | | ||
708 | (rt1->rt_mark ^ rt2->rt_mark) | | ||
709 | (rt1->rt_key_tos ^ rt2->rt_key_tos) | | ||
710 | (rt1->rt_route_iif ^ rt2->rt_route_iif) | | ||
711 | (rt1->rt_oif ^ rt2->rt_oif)) == 0; | ||
712 | } | ||
713 | |||
714 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | ||
715 | { | ||
716 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); | ||
717 | } | ||
718 | |||
719 | static inline int rt_is_expired(struct rtable *rth) | 447 | static inline int rt_is_expired(struct rtable *rth) |
720 | { | 448 | { |
721 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); | 449 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
722 | } | 450 | } |
723 | 451 | ||
724 | /* | 452 | /* |
725 | * Perform a full scan of hash table and free all entries. | ||
726 | * Can be called by a softirq or a process. | ||
727 | * In the later case, we want to be reschedule if necessary | ||
728 | */ | ||
729 | static void rt_do_flush(struct net *net, int process_context) | ||
730 | { | ||
731 | unsigned int i; | ||
732 | struct rtable *rth, *next; | ||
733 | |||
734 | for (i = 0; i <= rt_hash_mask; i++) { | ||
735 | struct rtable __rcu **pprev; | ||
736 | struct rtable *list; | ||
737 | |||
738 | if (process_context && need_resched()) | ||
739 | cond_resched(); | ||
740 | rth = rcu_access_pointer(rt_hash_table[i].chain); | ||
741 | if (!rth) | ||
742 | continue; | ||
743 | |||
744 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
745 | |||
746 | list = NULL; | ||
747 | pprev = &rt_hash_table[i].chain; | ||
748 | rth = rcu_dereference_protected(*pprev, | ||
749 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
750 | |||
751 | while (rth) { | ||
752 | next = rcu_dereference_protected(rth->dst.rt_next, | ||
753 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
754 | |||
755 | if (!net || | ||
756 | net_eq(dev_net(rth->dst.dev), net)) { | ||
757 | rcu_assign_pointer(*pprev, next); | ||
758 | rcu_assign_pointer(rth->dst.rt_next, list); | ||
759 | list = rth; | ||
760 | } else { | ||
761 | pprev = &rth->dst.rt_next; | ||
762 | } | ||
763 | rth = next; | ||
764 | } | ||
765 | |||
766 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
767 | |||
768 | for (; list; list = next) { | ||
769 | next = rcu_dereference_protected(list->dst.rt_next, 1); | ||
770 | rt_free(list); | ||
771 | } | ||
772 | } | ||
773 | } | ||
774 | |||
775 | /* | ||
776 | * While freeing expired entries, we compute average chain length | ||
777 | * and standard deviation, using fixed-point arithmetic. | ||
778 | * This to have an estimation of rt_chain_length_max | ||
779 | * rt_chain_length_max = max(elasticity, AVG + 4*SD) | ||
780 | * We use 3 bits for frational part, and 29 (or 61) for magnitude. | ||
781 | */ | ||
782 | |||
783 | #define FRACT_BITS 3 | ||
784 | #define ONE (1UL << FRACT_BITS) | ||
785 | |||
786 | /* | ||
787 | * Given a hash chain and an item in this hash chain, | ||
788 | * find if a previous entry has the same hash_inputs | ||
789 | * (but differs on tos, mark or oif) | ||
790 | * Returns 0 if an alias is found. | ||
791 | * Returns ONE if rth has no alias before itself. | ||
792 | */ | ||
793 | static int has_noalias(const struct rtable *head, const struct rtable *rth) | ||
794 | { | ||
795 | const struct rtable *aux = head; | ||
796 | |||
797 | while (aux != rth) { | ||
798 | if (compare_hash_inputs(aux, rth)) | ||
799 | return 0; | ||
800 | aux = rcu_dereference_protected(aux->dst.rt_next, 1); | ||
801 | } | ||
802 | return ONE; | ||
803 | } | ||
804 | |||
805 | static void rt_check_expire(void) | ||
806 | { | ||
807 | static unsigned int rover; | ||
808 | unsigned int i = rover, goal; | ||
809 | struct rtable *rth; | ||
810 | struct rtable __rcu **rthp; | ||
811 | unsigned long samples = 0; | ||
812 | unsigned long sum = 0, sum2 = 0; | ||
813 | unsigned long delta; | ||
814 | u64 mult; | ||
815 | |||
816 | delta = jiffies - expires_ljiffies; | ||
817 | expires_ljiffies = jiffies; | ||
818 | mult = ((u64)delta) << rt_hash_log; | ||
819 | if (ip_rt_gc_timeout > 1) | ||
820 | do_div(mult, ip_rt_gc_timeout); | ||
821 | goal = (unsigned int)mult; | ||
822 | if (goal > rt_hash_mask) | ||
823 | goal = rt_hash_mask + 1; | ||
824 | for (; goal > 0; goal--) { | ||
825 | unsigned long tmo = ip_rt_gc_timeout; | ||
826 | unsigned long length; | ||
827 | |||
828 | i = (i + 1) & rt_hash_mask; | ||
829 | rthp = &rt_hash_table[i].chain; | ||
830 | |||
831 | if (need_resched()) | ||
832 | cond_resched(); | ||
833 | |||
834 | samples++; | ||
835 | |||
836 | if (rcu_dereference_raw(*rthp) == NULL) | ||
837 | continue; | ||
838 | length = 0; | ||
839 | spin_lock_bh(rt_hash_lock_addr(i)); | ||
840 | while ((rth = rcu_dereference_protected(*rthp, | ||
841 | lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { | ||
842 | prefetch(rth->dst.rt_next); | ||
843 | if (rt_is_expired(rth) || | ||
844 | rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | ||
845 | *rthp = rth->dst.rt_next; | ||
846 | rt_free(rth); | ||
847 | continue; | ||
848 | } | ||
849 | |||
850 | /* We only count entries on a chain with equal | ||
851 | * hash inputs once so that entries for | ||
852 | * different QOS levels, and other non-hash | ||
853 | * input attributes don't unfairly skew the | ||
854 | * length computation | ||
855 | */ | ||
856 | tmo >>= 1; | ||
857 | rthp = &rth->dst.rt_next; | ||
858 | length += has_noalias(rt_hash_table[i].chain, rth); | ||
859 | } | ||
860 | spin_unlock_bh(rt_hash_lock_addr(i)); | ||
861 | sum += length; | ||
862 | sum2 += length*length; | ||
863 | } | ||
864 | if (samples) { | ||
865 | unsigned long avg = sum / samples; | ||
866 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
867 | rt_chain_length_max = max_t(unsigned long, | ||
868 | ip_rt_gc_elasticity, | ||
869 | (avg + 4*sd) >> FRACT_BITS); | ||
870 | } | ||
871 | rover = i; | ||
872 | } | ||
873 | |||
874 | /* | ||
875 | * rt_worker_func() is run in process context. | ||
876 | * we call rt_check_expire() to scan part of the hash table | ||
877 | */ | ||
878 | static void rt_worker_func(struct work_struct *work) | ||
879 | { | ||
880 | rt_check_expire(); | ||
881 | schedule_delayed_work(&expires_work, ip_rt_gc_interval); | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | * Perturbation of rt_genid by a small quantity [1..256] | 453 | * Perturbation of rt_genid by a small quantity [1..256] |
886 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() | 454 | * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() |
887 | * many times (2^24) without giving recent rt_genid. | 455 | * many times (2^24) without giving recent rt_genid. |
@@ -902,167 +470,6 @@ static void rt_cache_invalidate(struct net *net) | |||
902 | void rt_cache_flush(struct net *net, int delay) | 470 | void rt_cache_flush(struct net *net, int delay) |
903 | { | 471 | { |
904 | rt_cache_invalidate(net); | 472 | rt_cache_invalidate(net); |
905 | if (delay >= 0) | ||
906 | rt_do_flush(net, !in_softirq()); | ||
907 | } | ||
908 | |||
909 | /* Flush previous cache invalidated entries from the cache */ | ||
910 | void rt_cache_flush_batch(struct net *net) | ||
911 | { | ||
912 | rt_do_flush(net, !in_softirq()); | ||
913 | } | ||
914 | |||
915 | static void rt_emergency_hash_rebuild(struct net *net) | ||
916 | { | ||
917 | net_warn_ratelimited("Route hash chain too long!\n"); | ||
918 | rt_cache_invalidate(net); | ||
919 | } | ||
920 | |||
921 | /* | ||
922 | Short description of GC goals. | ||
923 | |||
924 | We want to build algorithm, which will keep routing cache | ||
925 | at some equilibrium point, when number of aged off entries | ||
926 | is kept approximately equal to newly generated ones. | ||
927 | |||
928 | Current expiration strength is variable "expire". | ||
929 | We try to adjust it dynamically, so that if networking | ||
930 | is idle expires is large enough to keep enough of warm entries, | ||
931 | and when load increases it reduces to limit cache size. | ||
932 | */ | ||
933 | |||
934 | static int rt_garbage_collect(struct dst_ops *ops) | ||
935 | { | ||
936 | static unsigned long expire = RT_GC_TIMEOUT; | ||
937 | static unsigned long last_gc; | ||
938 | static int rover; | ||
939 | static int equilibrium; | ||
940 | struct rtable *rth; | ||
941 | struct rtable __rcu **rthp; | ||
942 | unsigned long now = jiffies; | ||
943 | int goal; | ||
944 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | ||
945 | |||
946 | /* | ||
947 | * Garbage collection is pretty expensive, | ||
948 | * do not make it too frequently. | ||
949 | */ | ||
950 | |||
951 | RT_CACHE_STAT_INC(gc_total); | ||
952 | |||
953 | if (now - last_gc < ip_rt_gc_min_interval && | ||
954 | entries < ip_rt_max_size) { | ||
955 | RT_CACHE_STAT_INC(gc_ignored); | ||
956 | goto out; | ||
957 | } | ||
958 | |||
959 | entries = dst_entries_get_slow(&ipv4_dst_ops); | ||
960 | /* Calculate number of entries, which we want to expire now. */ | ||
961 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); | ||
962 | if (goal <= 0) { | ||
963 | if (equilibrium < ipv4_dst_ops.gc_thresh) | ||
964 | equilibrium = ipv4_dst_ops.gc_thresh; | ||
965 | goal = entries - equilibrium; | ||
966 | if (goal > 0) { | ||
967 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); | ||
968 | goal = entries - equilibrium; | ||
969 | } | ||
970 | } else { | ||
971 | /* We are in dangerous area. Try to reduce cache really | ||
972 | * aggressively. | ||
973 | */ | ||
974 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); | ||
975 | equilibrium = entries - goal; | ||
976 | } | ||
977 | |||
978 | if (now - last_gc >= ip_rt_gc_min_interval) | ||
979 | last_gc = now; | ||
980 | |||
981 | if (goal <= 0) { | ||
982 | equilibrium += goal; | ||
983 | goto work_done; | ||
984 | } | ||
985 | |||
986 | do { | ||
987 | int i, k; | ||
988 | |||
989 | for (i = rt_hash_mask, k = rover; i >= 0; i--) { | ||
990 | unsigned long tmo = expire; | ||
991 | |||
992 | k = (k + 1) & rt_hash_mask; | ||
993 | rthp = &rt_hash_table[k].chain; | ||
994 | spin_lock_bh(rt_hash_lock_addr(k)); | ||
995 | while ((rth = rcu_dereference_protected(*rthp, | ||
996 | lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { | ||
997 | if (!rt_is_expired(rth) && | ||
998 | !rt_may_expire(rth, tmo, expire)) { | ||
999 | tmo >>= 1; | ||
1000 | rthp = &rth->dst.rt_next; | ||
1001 | continue; | ||
1002 | } | ||
1003 | *rthp = rth->dst.rt_next; | ||
1004 | rt_free(rth); | ||
1005 | goal--; | ||
1006 | } | ||
1007 | spin_unlock_bh(rt_hash_lock_addr(k)); | ||
1008 | if (goal <= 0) | ||
1009 | break; | ||
1010 | } | ||
1011 | rover = k; | ||
1012 | |||
1013 | if (goal <= 0) | ||
1014 | goto work_done; | ||
1015 | |||
1016 | /* Goal is not achieved. We stop process if: | ||
1017 | |||
1018 | - if expire reduced to zero. Otherwise, expire is halfed. | ||
1019 | - if table is not full. | ||
1020 | - if we are called from interrupt. | ||
1021 | - jiffies check is just fallback/debug loop breaker. | ||
1022 | We will not spin here for long time in any case. | ||
1023 | */ | ||
1024 | |||
1025 | RT_CACHE_STAT_INC(gc_goal_miss); | ||
1026 | |||
1027 | if (expire == 0) | ||
1028 | break; | ||
1029 | |||
1030 | expire >>= 1; | ||
1031 | |||
1032 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) | ||
1033 | goto out; | ||
1034 | } while (!in_softirq() && time_before_eq(jiffies, now)); | ||
1035 | |||
1036 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) | ||
1037 | goto out; | ||
1038 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | ||
1039 | goto out; | ||
1040 | net_warn_ratelimited("dst cache overflow\n"); | ||
1041 | RT_CACHE_STAT_INC(gc_dst_overflow); | ||
1042 | return 1; | ||
1043 | |||
1044 | work_done: | ||
1045 | expire += ip_rt_gc_min_interval; | ||
1046 | if (expire > ip_rt_gc_timeout || | ||
1047 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || | ||
1048 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | ||
1049 | expire = ip_rt_gc_timeout; | ||
1050 | out: return 0; | ||
1051 | } | ||
1052 | |||
1053 | /* | ||
1054 | * Returns number of entries in a hash chain that have different hash_inputs | ||
1055 | */ | ||
1056 | static int slow_chain_length(const struct rtable *head) | ||
1057 | { | ||
1058 | int length = 0; | ||
1059 | const struct rtable *rth = head; | ||
1060 | |||
1061 | while (rth) { | ||
1062 | length += has_noalias(head, rth); | ||
1063 | rth = rcu_dereference_protected(rth->dst.rt_next, 1); | ||
1064 | } | ||
1065 | return length >> FRACT_BITS; | ||
1066 | } | 473 | } |
1067 | 474 | ||
1068 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | 475 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, |
@@ -1086,139 +493,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
1086 | return neigh_create(&arp_tbl, pkey, dev); | 493 | return neigh_create(&arp_tbl, pkey, dev); |
1087 | } | 494 | } |
1088 | 495 | ||
1089 | static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, | ||
1090 | struct sk_buff *skb, int ifindex) | ||
1091 | { | ||
1092 | struct rtable *rth, *cand; | ||
1093 | struct rtable __rcu **rthp, **candp; | ||
1094 | unsigned long now; | ||
1095 | u32 min_score; | ||
1096 | int chain_length; | ||
1097 | |||
1098 | restart: | ||
1099 | chain_length = 0; | ||
1100 | min_score = ~(u32)0; | ||
1101 | cand = NULL; | ||
1102 | candp = NULL; | ||
1103 | now = jiffies; | ||
1104 | |||
1105 | if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { | ||
1106 | /* | ||
1107 | * If we're not caching, just tell the caller we | ||
1108 | * were successful and don't touch the route. The | ||
1109 | * caller hold the sole reference to the cache entry, and | ||
1110 | * it will be released when the caller is done with it. | ||
1111 | * If we drop it here, the callers have no way to resolve routes | ||
1112 | * when we're not caching. Instead, just point *rp at rt, so | ||
1113 | * the caller gets a single use out of the route | ||
1114 | * Note that we do rt_free on this new route entry, so that | ||
1115 | * once its refcount hits zero, we are still able to reap it | ||
1116 | * (Thanks Alexey) | ||
1117 | * Note: To avoid expensive rcu stuff for this uncached dst, | ||
1118 | * we set DST_NOCACHE so that dst_release() can free dst without | ||
1119 | * waiting a grace period. | ||
1120 | */ | ||
1121 | |||
1122 | rt->dst.flags |= DST_NOCACHE; | ||
1123 | goto skip_hashing; | ||
1124 | } | ||
1125 | |||
1126 | rthp = &rt_hash_table[hash].chain; | ||
1127 | |||
1128 | spin_lock_bh(rt_hash_lock_addr(hash)); | ||
1129 | while ((rth = rcu_dereference_protected(*rthp, | ||
1130 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1131 | if (rt_is_expired(rth)) { | ||
1132 | *rthp = rth->dst.rt_next; | ||
1133 | rt_free(rth); | ||
1134 | continue; | ||
1135 | } | ||
1136 | if (compare_keys(rth, rt) && compare_netns(rth, rt)) { | ||
1137 | /* Put it first */ | ||
1138 | *rthp = rth->dst.rt_next; | ||
1139 | /* | ||
1140 | * Since lookup is lockfree, the deletion | ||
1141 | * must be visible to another weakly ordered CPU before | ||
1142 | * the insertion at the start of the hash chain. | ||
1143 | */ | ||
1144 | rcu_assign_pointer(rth->dst.rt_next, | ||
1145 | rt_hash_table[hash].chain); | ||
1146 | /* | ||
1147 | * Since lookup is lockfree, the update writes | ||
1148 | * must be ordered for consistency on SMP. | ||
1149 | */ | ||
1150 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); | ||
1151 | |||
1152 | dst_use(&rth->dst, now); | ||
1153 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
1154 | |||
1155 | rt_drop(rt); | ||
1156 | if (skb) | ||
1157 | skb_dst_set(skb, &rth->dst); | ||
1158 | return rth; | ||
1159 | } | ||
1160 | |||
1161 | if (!atomic_read(&rth->dst.__refcnt)) { | ||
1162 | u32 score = rt_score(rth); | ||
1163 | |||
1164 | if (score <= min_score) { | ||
1165 | cand = rth; | ||
1166 | candp = rthp; | ||
1167 | min_score = score; | ||
1168 | } | ||
1169 | } | ||
1170 | |||
1171 | chain_length++; | ||
1172 | |||
1173 | rthp = &rth->dst.rt_next; | ||
1174 | } | ||
1175 | |||
1176 | if (cand) { | ||
1177 | /* ip_rt_gc_elasticity used to be average length of chain | ||
1178 | * length, when exceeded gc becomes really aggressive. | ||
1179 | * | ||
1180 | * The second limit is less certain. At the moment it allows | ||
1181 | * only 2 entries per bucket. We will see. | ||
1182 | */ | ||
1183 | if (chain_length > ip_rt_gc_elasticity) { | ||
1184 | *candp = cand->dst.rt_next; | ||
1185 | rt_free(cand); | ||
1186 | } | ||
1187 | } else { | ||
1188 | if (chain_length > rt_chain_length_max && | ||
1189 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { | ||
1190 | struct net *net = dev_net(rt->dst.dev); | ||
1191 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | ||
1192 | if (!rt_caching(net)) { | ||
1193 | pr_warn("%s: %d rebuilds is over limit, route caching disabled\n", | ||
1194 | rt->dst.dev->name, num); | ||
1195 | } | ||
1196 | rt_emergency_hash_rebuild(net); | ||
1197 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
1198 | |||
1199 | hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | ||
1200 | ifindex, rt_genid(net)); | ||
1201 | goto restart; | ||
1202 | } | ||
1203 | } | ||
1204 | |||
1205 | rt->dst.rt_next = rt_hash_table[hash].chain; | ||
1206 | |||
1207 | /* | ||
1208 | * Since lookup is lockfree, we must make sure | ||
1209 | * previous writes to rt are committed to memory | ||
1210 | * before making rt visible to other CPUS. | ||
1211 | */ | ||
1212 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | ||
1213 | |||
1214 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
1215 | |||
1216 | skip_hashing: | ||
1217 | if (skb) | ||
1218 | skb_dst_set(skb, &rt->dst); | ||
1219 | return rt; | ||
1220 | } | ||
1221 | |||
1222 | /* | 496 | /* |
1223 | * Peer allocation may fail only in serious out-of-memory conditions. However | 497 | * Peer allocation may fail only in serious out-of-memory conditions. However |
1224 | * we still can generate some output. | 498 | * we still can generate some output. |
@@ -1255,26 +529,6 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1255 | } | 529 | } |
1256 | EXPORT_SYMBOL(__ip_select_ident); | 530 | EXPORT_SYMBOL(__ip_select_ident); |
1257 | 531 | ||
1258 | static void rt_del(unsigned int hash, struct rtable *rt) | ||
1259 | { | ||
1260 | struct rtable __rcu **rthp; | ||
1261 | struct rtable *aux; | ||
1262 | |||
1263 | rthp = &rt_hash_table[hash].chain; | ||
1264 | spin_lock_bh(rt_hash_lock_addr(hash)); | ||
1265 | ip_rt_put(rt); | ||
1266 | while ((aux = rcu_dereference_protected(*rthp, | ||
1267 | lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { | ||
1268 | if (aux == rt || rt_is_expired(aux)) { | ||
1269 | *rthp = aux->dst.rt_next; | ||
1270 | rt_free(aux); | ||
1271 | continue; | ||
1272 | } | ||
1273 | rthp = &aux->dst.rt_next; | ||
1274 | } | ||
1275 | spin_unlock_bh(rt_hash_lock_addr(hash)); | ||
1276 | } | ||
1277 | |||
1278 | static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, | 532 | static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, |
1279 | const struct iphdr *iph, | 533 | const struct iphdr *iph, |
1280 | int oif, u8 tos, | 534 | int oif, u8 tos, |
@@ -1417,7 +671,8 @@ out_unlock: | |||
1417 | return; | 671 | return; |
1418 | } | 672 | } |
1419 | 673 | ||
1420 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) | 674 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, |
675 | bool kill_route) | ||
1421 | { | 676 | { |
1422 | __be32 new_gw = icmp_hdr(skb)->un.gateway; | 677 | __be32 new_gw = icmp_hdr(skb)->un.gateway; |
1423 | __be32 old_gw = ip_hdr(skb)->saddr; | 678 | __be32 old_gw = ip_hdr(skb)->saddr; |
@@ -1472,8 +727,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | |||
1472 | update_or_create_fnhe(nh, fl4->daddr, new_gw, | 727 | update_or_create_fnhe(nh, fl4->daddr, new_gw, |
1473 | 0, 0); | 728 | 0, 0); |
1474 | } | 729 | } |
1475 | rt->rt_gateway = new_gw; | 730 | if (kill_route) |
1476 | rt->rt_flags |= RTCF_REDIRECTED; | 731 | rt->dst.obsolete = DST_OBSOLETE_KILL; |
1477 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); | 732 | call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); |
1478 | } | 733 | } |
1479 | neigh_release(n); | 734 | neigh_release(n); |
@@ -1504,7 +759,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf | |||
1504 | rt = (struct rtable *) dst; | 759 | rt = (struct rtable *) dst; |
1505 | 760 | ||
1506 | ip_rt_build_flow_key(&fl4, sk, skb); | 761 | ip_rt_build_flow_key(&fl4, sk, skb); |
1507 | __ip_do_redirect(rt, skb, &fl4); | 762 | __ip_do_redirect(rt, skb, &fl4, true); |
1508 | } | 763 | } |
1509 | 764 | ||
1510 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 765 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
@@ -1518,10 +773,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1518 | ret = NULL; | 773 | ret = NULL; |
1519 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 774 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
1520 | rt->dst.expires) { | 775 | rt->dst.expires) { |
1521 | unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, | 776 | ip_rt_put(rt); |
1522 | rt->rt_oif, | ||
1523 | rt_genid(dev_net(dst->dev))); | ||
1524 | rt_del(hash, rt); | ||
1525 | ret = NULL; | 777 | ret = NULL; |
1526 | } | 778 | } |
1527 | } | 779 | } |
@@ -1597,7 +849,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1597 | peer->rate_tokens == ip_rt_redirect_number) | 849 | peer->rate_tokens == ip_rt_redirect_number) |
1598 | net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", | 850 | net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", |
1599 | &ip_hdr(skb)->saddr, rt->rt_iif, | 851 | &ip_hdr(skb)->saddr, rt->rt_iif, |
1600 | &rt->rt_dst, &rt->rt_gateway); | 852 | &ip_hdr(skb)->daddr, &rt->rt_gateway); |
1601 | #endif | 853 | #endif |
1602 | } | 854 | } |
1603 | out_put_peer: | 855 | out_put_peer: |
@@ -1666,7 +918,7 @@ out: kfree_skb(skb); | |||
1666 | return 0; | 918 | return 0; |
1667 | } | 919 | } |
1668 | 920 | ||
1669 | static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | 921 | static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) |
1670 | { | 922 | { |
1671 | struct fib_result res; | 923 | struct fib_result res; |
1672 | 924 | ||
@@ -1679,8 +931,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
1679 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, | 931 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, |
1680 | jiffies + ip_rt_mtu_expires); | 932 | jiffies + ip_rt_mtu_expires); |
1681 | } | 933 | } |
1682 | rt->rt_pmtu = mtu; | 934 | return mtu; |
1683 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); | ||
1684 | } | 935 | } |
1685 | 936 | ||
1686 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | 937 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, |
@@ -1690,7 +941,14 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |||
1690 | struct flowi4 fl4; | 941 | struct flowi4 fl4; |
1691 | 942 | ||
1692 | ip_rt_build_flow_key(&fl4, sk, skb); | 943 | ip_rt_build_flow_key(&fl4, sk, skb); |
1693 | __ip_rt_update_pmtu(rt, &fl4, mtu); | 944 | mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); |
945 | |||
946 | if (!rt->rt_pmtu) { | ||
947 | dst->obsolete = DST_OBSOLETE_KILL; | ||
948 | } else { | ||
949 | rt->rt_pmtu = mtu; | ||
950 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); | ||
951 | } | ||
1694 | } | 952 | } |
1695 | 953 | ||
1696 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | 954 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, |
@@ -1736,7 +994,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, | |||
1736 | RT_TOS(iph->tos), protocol, mark, flow_flags); | 994 | RT_TOS(iph->tos), protocol, mark, flow_flags); |
1737 | rt = __ip_route_output_key(net, &fl4); | 995 | rt = __ip_route_output_key(net, &fl4); |
1738 | if (!IS_ERR(rt)) { | 996 | if (!IS_ERR(rt)) { |
1739 | __ip_do_redirect(rt, skb, &fl4); | 997 | __ip_do_redirect(rt, skb, &fl4, false); |
1740 | ip_rt_put(rt); | 998 | ip_rt_put(rt); |
1741 | } | 999 | } |
1742 | } | 1000 | } |
@@ -1751,7 +1009,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) | |||
1751 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); | 1009 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); |
1752 | rt = __ip_route_output_key(sock_net(sk), &fl4); | 1010 | rt = __ip_route_output_key(sock_net(sk), &fl4); |
1753 | if (!IS_ERR(rt)) { | 1011 | if (!IS_ERR(rt)) { |
1754 | __ip_do_redirect(rt, skb, &fl4); | 1012 | __ip_do_redirect(rt, skb, &fl4, false); |
1755 | ip_rt_put(rt); | 1013 | ip_rt_put(rt); |
1756 | } | 1014 | } |
1757 | } | 1015 | } |
@@ -1761,22 +1019,19 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) | |||
1761 | { | 1019 | { |
1762 | struct rtable *rt = (struct rtable *) dst; | 1020 | struct rtable *rt = (struct rtable *) dst; |
1763 | 1021 | ||
1764 | if (rt_is_expired(rt)) | 1022 | /* All IPV4 dsts are created with ->obsolete set to the value |
1023 | * DST_OBSOLETE_FORCE_CHK which forces validation calls down | ||
1024 | * into this function always. | ||
1025 | * | ||
1026 | * When a PMTU/redirect information update invalidates a | ||
1027 | * route, this is indicated by setting obsolete to | ||
1028 | * DST_OBSOLETE_KILL. | ||
1029 | */ | ||
1030 | if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) | ||
1765 | return NULL; | 1031 | return NULL; |
1766 | return dst; | 1032 | return dst; |
1767 | } | 1033 | } |
1768 | 1034 | ||
1769 | static void ipv4_dst_destroy(struct dst_entry *dst) | ||
1770 | { | ||
1771 | struct rtable *rt = (struct rtable *) dst; | ||
1772 | |||
1773 | if (rt->fi) { | ||
1774 | fib_info_put(rt->fi); | ||
1775 | rt->fi = NULL; | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | |||
1780 | static void ipv4_link_failure(struct sk_buff *skb) | 1035 | static void ipv4_link_failure(struct sk_buff *skb) |
1781 | { | 1036 | { |
1782 | struct rtable *rt; | 1037 | struct rtable *rt; |
@@ -1832,8 +1087,9 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) | |||
1832 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) | 1087 | if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) |
1833 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); | 1088 | src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); |
1834 | else | 1089 | else |
1835 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, | 1090 | src = inet_select_addr(rt->dst.dev, |
1836 | RT_SCOPE_UNIVERSE); | 1091 | rt_nexthop(rt, iph->daddr), |
1092 | RT_SCOPE_UNIVERSE); | ||
1837 | rcu_read_unlock(); | 1093 | rcu_read_unlock(); |
1838 | } | 1094 | } |
1839 | memcpy(addr, &src, 4); | 1095 | memcpy(addr, &src, 4); |
@@ -1879,8 +1135,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1879 | mtu = dst->dev->mtu; | 1135 | mtu = dst->dev->mtu; |
1880 | 1136 | ||
1881 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | 1137 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { |
1882 | 1138 | if (rt->rt_gateway && mtu > 576) | |
1883 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) | ||
1884 | mtu = 576; | 1139 | mtu = 576; |
1885 | } | 1140 | } |
1886 | 1141 | ||
@@ -1890,58 +1145,91 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) | |||
1890 | return mtu; | 1145 | return mtu; |
1891 | } | 1146 | } |
1892 | 1147 | ||
1893 | static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, | 1148 | static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) |
1894 | struct fib_info *fi) | ||
1895 | { | ||
1896 | if (fi->fib_metrics != (u32 *) dst_default_metrics) { | ||
1897 | rt->fi = fi; | ||
1898 | atomic_inc(&fi->fib_clntref); | ||
1899 | } | ||
1900 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | ||
1901 | } | ||
1902 | |||
1903 | static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) | ||
1904 | { | 1149 | { |
1905 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; | 1150 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; |
1906 | struct fib_nh_exception *fnhe; | 1151 | struct fib_nh_exception *fnhe; |
1907 | u32 hval; | 1152 | u32 hval; |
1908 | 1153 | ||
1154 | if (!hash) | ||
1155 | return NULL; | ||
1156 | |||
1909 | hval = fnhe_hashfun(daddr); | 1157 | hval = fnhe_hashfun(daddr); |
1910 | 1158 | ||
1911 | restart: | ||
1912 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; | 1159 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; |
1913 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 1160 | fnhe = rcu_dereference(fnhe->fnhe_next)) { |
1914 | __be32 fnhe_daddr, gw; | 1161 | if (fnhe->fnhe_daddr == daddr) |
1915 | unsigned long expires; | 1162 | return fnhe; |
1916 | unsigned int seq; | 1163 | } |
1917 | u32 pmtu; | 1164 | return NULL; |
1918 | 1165 | } | |
1919 | seq = read_seqbegin(&fnhe_seqlock); | ||
1920 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1921 | gw = fnhe->fnhe_gw; | ||
1922 | pmtu = fnhe->fnhe_pmtu; | ||
1923 | expires = fnhe->fnhe_expires; | ||
1924 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1925 | goto restart; | ||
1926 | if (daddr != fnhe_daddr) | ||
1927 | continue; | ||
1928 | if (pmtu) { | ||
1929 | unsigned long diff = expires - jiffies; | ||
1930 | 1166 | ||
1931 | if (time_before(jiffies, expires)) { | 1167 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
1932 | rt->rt_pmtu = pmtu; | 1168 | __be32 daddr) |
1933 | dst_set_expires(&rt->dst, diff); | 1169 | { |
1934 | } | 1170 | __be32 fnhe_daddr, gw; |
1171 | unsigned long expires; | ||
1172 | unsigned int seq; | ||
1173 | u32 pmtu; | ||
1174 | |||
1175 | restart: | ||
1176 | seq = read_seqbegin(&fnhe_seqlock); | ||
1177 | fnhe_daddr = fnhe->fnhe_daddr; | ||
1178 | gw = fnhe->fnhe_gw; | ||
1179 | pmtu = fnhe->fnhe_pmtu; | ||
1180 | expires = fnhe->fnhe_expires; | ||
1181 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
1182 | goto restart; | ||
1183 | |||
1184 | if (daddr != fnhe_daddr) | ||
1185 | return; | ||
1186 | |||
1187 | if (pmtu) { | ||
1188 | unsigned long diff = expires - jiffies; | ||
1189 | |||
1190 | if (time_before(jiffies, expires)) { | ||
1191 | rt->rt_pmtu = pmtu; | ||
1192 | dst_set_expires(&rt->dst, diff); | ||
1935 | } | 1193 | } |
1936 | if (gw) | ||
1937 | rt->rt_gateway = gw; | ||
1938 | fnhe->fnhe_stamp = jiffies; | ||
1939 | break; | ||
1940 | } | 1194 | } |
1195 | if (gw) { | ||
1196 | rt->rt_flags |= RTCF_REDIRECTED; | ||
1197 | rt->rt_gateway = gw; | ||
1198 | } | ||
1199 | fnhe->fnhe_stamp = jiffies; | ||
1941 | } | 1200 | } |
1942 | 1201 | ||
1943 | static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | 1202 | static inline void rt_release_rcu(struct rcu_head *head) |
1203 | { | ||
1204 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | ||
1205 | dst_release(dst); | ||
1206 | } | ||
1207 | |||
1208 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | ||
1209 | { | ||
1210 | struct rtable *orig, *prev, **p = &nh->nh_rth_output; | ||
1211 | |||
1212 | if (rt_is_input_route(rt)) | ||
1213 | p = &nh->nh_rth_input; | ||
1214 | |||
1215 | orig = *p; | ||
1216 | |||
1217 | prev = cmpxchg(p, orig, rt); | ||
1218 | if (prev == orig) { | ||
1219 | dst_clone(&rt->dst); | ||
1220 | if (orig) | ||
1221 | call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | static bool rt_cache_valid(struct rtable *rt) | ||
1226 | { | ||
1227 | return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK); | ||
1228 | } | ||
1229 | |||
1230 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | ||
1944 | const struct fib_result *res, | 1231 | const struct fib_result *res, |
1232 | struct fib_nh_exception *fnhe, | ||
1945 | struct fib_info *fi, u16 type, u32 itag) | 1233 | struct fib_info *fi, u16 type, u32 itag) |
1946 | { | 1234 | { |
1947 | if (fi) { | 1235 | if (fi) { |
@@ -1949,12 +1237,14 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | |||
1949 | 1237 | ||
1950 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1238 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) |
1951 | rt->rt_gateway = nh->nh_gw; | 1239 | rt->rt_gateway = nh->nh_gw; |
1952 | if (unlikely(nh->nh_exceptions)) | 1240 | if (unlikely(fnhe)) |
1953 | rt_bind_exception(rt, nh, fl4->daddr); | 1241 | rt_bind_exception(rt, fnhe, daddr); |
1954 | rt_init_metrics(rt, fl4, fi); | 1242 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
1955 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1243 | #ifdef CONFIG_IP_ROUTE_CLASSID |
1956 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1244 | rt->dst.tclassid = nh->nh_tclassid; |
1957 | #endif | 1245 | #endif |
1246 | if (!(rt->dst.flags & DST_HOST)) | ||
1247 | rt_cache_route(nh, rt); | ||
1958 | } | 1248 | } |
1959 | 1249 | ||
1960 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1250 | #ifdef CONFIG_IP_ROUTE_CLASSID |
@@ -1966,10 +1256,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, | |||
1966 | } | 1256 | } |
1967 | 1257 | ||
1968 | static struct rtable *rt_dst_alloc(struct net_device *dev, | 1258 | static struct rtable *rt_dst_alloc(struct net_device *dev, |
1969 | bool nopolicy, bool noxfrm) | 1259 | bool nopolicy, bool noxfrm, bool will_cache) |
1970 | { | 1260 | { |
1971 | return dst_alloc(&ipv4_dst_ops, dev, 1, -1, | 1261 | return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, |
1972 | DST_HOST | | 1262 | (will_cache ? 0 : DST_HOST) | DST_NOCACHE | |
1973 | (nopolicy ? DST_NOPOLICY : 0) | | 1263 | (nopolicy ? DST_NOPOLICY : 0) | |
1974 | (noxfrm ? DST_NOXFRM : 0)); | 1264 | (noxfrm ? DST_NOXFRM : 0)); |
1975 | } | 1265 | } |
@@ -1978,7 +1268,6 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, | |||
1978 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1268 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
1979 | u8 tos, struct net_device *dev, int our) | 1269 | u8 tos, struct net_device *dev, int our) |
1980 | { | 1270 | { |
1981 | unsigned int hash; | ||
1982 | struct rtable *rth; | 1271 | struct rtable *rth; |
1983 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 1272 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1984 | u32 itag = 0; | 1273 | u32 itag = 0; |
@@ -2007,7 +1296,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2007 | goto e_err; | 1296 | goto e_err; |
2008 | } | 1297 | } |
2009 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, | 1298 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, |
2010 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1299 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); |
2011 | if (!rth) | 1300 | if (!rth) |
2012 | goto e_nobufs; | 1301 | goto e_nobufs; |
2013 | 1302 | ||
@@ -2016,21 +1305,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2016 | #endif | 1305 | #endif |
2017 | rth->dst.output = ip_rt_bug; | 1306 | rth->dst.output = ip_rt_bug; |
2018 | 1307 | ||
2019 | rth->rt_key_dst = daddr; | ||
2020 | rth->rt_key_src = saddr; | ||
2021 | rth->rt_genid = rt_genid(dev_net(dev)); | 1308 | rth->rt_genid = rt_genid(dev_net(dev)); |
2022 | rth->rt_flags = RTCF_MULTICAST; | 1309 | rth->rt_flags = RTCF_MULTICAST; |
2023 | rth->rt_type = RTN_MULTICAST; | 1310 | rth->rt_type = RTN_MULTICAST; |
2024 | rth->rt_key_tos = tos; | 1311 | rth->rt_is_input= 1; |
2025 | rth->rt_dst = daddr; | ||
2026 | rth->rt_src = saddr; | ||
2027 | rth->rt_route_iif = dev->ifindex; | ||
2028 | rth->rt_iif = dev->ifindex; | 1312 | rth->rt_iif = dev->ifindex; |
2029 | rth->rt_oif = 0; | ||
2030 | rth->rt_mark = skb->mark; | ||
2031 | rth->rt_pmtu = 0; | 1313 | rth->rt_pmtu = 0; |
2032 | rth->rt_gateway = daddr; | 1314 | rth->rt_gateway = 0; |
2033 | rth->fi = NULL; | ||
2034 | if (our) { | 1315 | if (our) { |
2035 | rth->dst.input= ip_local_deliver; | 1316 | rth->dst.input= ip_local_deliver; |
2036 | rth->rt_flags |= RTCF_LOCAL; | 1317 | rth->rt_flags |= RTCF_LOCAL; |
@@ -2042,9 +1323,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2042 | #endif | 1323 | #endif |
2043 | RT_CACHE_STAT_INC(in_slow_mc); | 1324 | RT_CACHE_STAT_INC(in_slow_mc); |
2044 | 1325 | ||
2045 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1326 | skb_dst_set(skb, &rth->dst); |
2046 | rth = rt_intern_hash(hash, rth, skb, dev->ifindex); | 1327 | return 0; |
2047 | return IS_ERR(rth) ? PTR_ERR(rth) : 0; | ||
2048 | 1328 | ||
2049 | e_nobufs: | 1329 | e_nobufs: |
2050 | return -ENOBUFS; | 1330 | return -ENOBUFS; |
@@ -2091,6 +1371,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2091 | int err; | 1371 | int err; |
2092 | struct in_device *out_dev; | 1372 | struct in_device *out_dev; |
2093 | unsigned int flags = 0; | 1373 | unsigned int flags = 0; |
1374 | bool do_cache; | ||
2094 | u32 itag; | 1375 | u32 itag; |
2095 | 1376 | ||
2096 | /* get a working reference to the output device */ | 1377 | /* get a working reference to the output device */ |
@@ -2133,35 +1414,39 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2133 | } | 1414 | } |
2134 | } | 1415 | } |
2135 | 1416 | ||
1417 | do_cache = false; | ||
1418 | if (res->fi) { | ||
1419 | if (!(flags & RTCF_DIRECTSRC) && !itag) { | ||
1420 | rth = FIB_RES_NH(*res).nh_rth_input; | ||
1421 | if (rt_cache_valid(rth)) { | ||
1422 | dst_hold(&rth->dst); | ||
1423 | goto out; | ||
1424 | } | ||
1425 | do_cache = true; | ||
1426 | } | ||
1427 | } | ||
1428 | |||
2136 | rth = rt_dst_alloc(out_dev->dev, | 1429 | rth = rt_dst_alloc(out_dev->dev, |
2137 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1430 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
2138 | IN_DEV_CONF_GET(out_dev, NOXFRM)); | 1431 | IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); |
2139 | if (!rth) { | 1432 | if (!rth) { |
2140 | err = -ENOBUFS; | 1433 | err = -ENOBUFS; |
2141 | goto cleanup; | 1434 | goto cleanup; |
2142 | } | 1435 | } |
2143 | 1436 | ||
2144 | rth->rt_key_dst = daddr; | ||
2145 | rth->rt_key_src = saddr; | ||
2146 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); | 1437 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2147 | rth->rt_flags = flags; | 1438 | rth->rt_flags = flags; |
2148 | rth->rt_type = res->type; | 1439 | rth->rt_type = res->type; |
2149 | rth->rt_key_tos = tos; | 1440 | rth->rt_is_input = 1; |
2150 | rth->rt_dst = daddr; | ||
2151 | rth->rt_src = saddr; | ||
2152 | rth->rt_route_iif = in_dev->dev->ifindex; | ||
2153 | rth->rt_iif = in_dev->dev->ifindex; | 1441 | rth->rt_iif = in_dev->dev->ifindex; |
2154 | rth->rt_oif = 0; | ||
2155 | rth->rt_mark = skb->mark; | ||
2156 | rth->rt_pmtu = 0; | 1442 | rth->rt_pmtu = 0; |
2157 | rth->rt_gateway = daddr; | 1443 | rth->rt_gateway = 0; |
2158 | rth->fi = NULL; | ||
2159 | 1444 | ||
2160 | rth->dst.input = ip_forward; | 1445 | rth->dst.input = ip_forward; |
2161 | rth->dst.output = ip_output; | 1446 | rth->dst.output = ip_output; |
2162 | 1447 | ||
2163 | rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); | 1448 | rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); |
2164 | 1449 | out: | |
2165 | *result = rth; | 1450 | *result = rth; |
2166 | err = 0; | 1451 | err = 0; |
2167 | cleanup: | 1452 | cleanup: |
@@ -2176,7 +1461,6 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2176 | { | 1461 | { |
2177 | struct rtable *rth = NULL; | 1462 | struct rtable *rth = NULL; |
2178 | int err; | 1463 | int err; |
2179 | unsigned int hash; | ||
2180 | 1464 | ||
2181 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1465 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
2182 | if (res->fi && res->fi->fib_nhs > 1) | 1466 | if (res->fi && res->fi->fib_nhs > 1) |
@@ -2188,12 +1472,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2188 | if (err) | 1472 | if (err) |
2189 | return err; | 1473 | return err; |
2190 | 1474 | ||
2191 | /* put it into the cache */ | 1475 | skb_dst_set(skb, &rth->dst); |
2192 | hash = rt_hash(daddr, saddr, fl4->flowi4_iif, | ||
2193 | rt_genid(dev_net(rth->dst.dev))); | ||
2194 | rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); | ||
2195 | if (IS_ERR(rth)) | ||
2196 | return PTR_ERR(rth); | ||
2197 | return 0; | 1476 | return 0; |
2198 | } | 1477 | } |
2199 | 1478 | ||
@@ -2217,9 +1496,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2217 | unsigned int flags = 0; | 1496 | unsigned int flags = 0; |
2218 | u32 itag = 0; | 1497 | u32 itag = 0; |
2219 | struct rtable *rth; | 1498 | struct rtable *rth; |
2220 | unsigned int hash; | ||
2221 | int err = -EINVAL; | 1499 | int err = -EINVAL; |
2222 | struct net *net = dev_net(dev); | 1500 | struct net *net = dev_net(dev); |
1501 | bool do_cache; | ||
2223 | 1502 | ||
2224 | /* IP on this device is disabled. */ | 1503 | /* IP on this device is disabled. */ |
2225 | 1504 | ||
@@ -2233,6 +1512,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2233 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) | 1512 | if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) |
2234 | goto martian_source; | 1513 | goto martian_source; |
2235 | 1514 | ||
1515 | res.fi = NULL; | ||
2236 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) | 1516 | if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) |
2237 | goto brd_input; | 1517 | goto brd_input; |
2238 | 1518 | ||
@@ -2308,8 +1588,20 @@ brd_input: | |||
2308 | RT_CACHE_STAT_INC(in_brd); | 1588 | RT_CACHE_STAT_INC(in_brd); |
2309 | 1589 | ||
2310 | local_input: | 1590 | local_input: |
1591 | do_cache = false; | ||
1592 | if (res.fi) { | ||
1593 | if (!(flags & RTCF_DIRECTSRC) && !itag) { | ||
1594 | rth = FIB_RES_NH(res).nh_rth_input; | ||
1595 | if (rt_cache_valid(rth)) { | ||
1596 | dst_hold(&rth->dst); | ||
1597 | goto set_and_out; | ||
1598 | } | ||
1599 | do_cache = true; | ||
1600 | } | ||
1601 | } | ||
1602 | |||
2311 | rth = rt_dst_alloc(net->loopback_dev, | 1603 | rth = rt_dst_alloc(net->loopback_dev, |
2312 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false); | 1604 | IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); |
2313 | if (!rth) | 1605 | if (!rth) |
2314 | goto e_nobufs; | 1606 | goto e_nobufs; |
2315 | 1607 | ||
@@ -2319,31 +1611,23 @@ local_input: | |||
2319 | rth->dst.tclassid = itag; | 1611 | rth->dst.tclassid = itag; |
2320 | #endif | 1612 | #endif |
2321 | 1613 | ||
2322 | rth->rt_key_dst = daddr; | ||
2323 | rth->rt_key_src = saddr; | ||
2324 | rth->rt_genid = rt_genid(net); | 1614 | rth->rt_genid = rt_genid(net); |
2325 | rth->rt_flags = flags|RTCF_LOCAL; | 1615 | rth->rt_flags = flags|RTCF_LOCAL; |
2326 | rth->rt_type = res.type; | 1616 | rth->rt_type = res.type; |
2327 | rth->rt_key_tos = tos; | 1617 | rth->rt_is_input = 1; |
2328 | rth->rt_dst = daddr; | ||
2329 | rth->rt_src = saddr; | ||
2330 | rth->rt_route_iif = dev->ifindex; | ||
2331 | rth->rt_iif = dev->ifindex; | 1618 | rth->rt_iif = dev->ifindex; |
2332 | rth->rt_oif = 0; | ||
2333 | rth->rt_mark = skb->mark; | ||
2334 | rth->rt_pmtu = 0; | 1619 | rth->rt_pmtu = 0; |
2335 | rth->rt_gateway = daddr; | 1620 | rth->rt_gateway = 0; |
2336 | rth->fi = NULL; | ||
2337 | if (res.type == RTN_UNREACHABLE) { | 1621 | if (res.type == RTN_UNREACHABLE) { |
2338 | rth->dst.input= ip_error; | 1622 | rth->dst.input= ip_error; |
2339 | rth->dst.error= -err; | 1623 | rth->dst.error= -err; |
2340 | rth->rt_flags &= ~RTCF_LOCAL; | 1624 | rth->rt_flags &= ~RTCF_LOCAL; |
2341 | } | 1625 | } |
2342 | hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); | 1626 | if (do_cache) |
2343 | rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); | 1627 | rt_cache_route(&FIB_RES_NH(res), rth); |
1628 | set_and_out: | ||
1629 | skb_dst_set(skb, &rth->dst); | ||
2344 | err = 0; | 1630 | err = 0; |
2345 | if (IS_ERR(rth)) | ||
2346 | err = PTR_ERR(rth); | ||
2347 | goto out; | 1631 | goto out; |
2348 | 1632 | ||
2349 | no_route: | 1633 | no_route: |
@@ -2379,49 +1663,13 @@ martian_source_keep_err: | |||
2379 | goto out; | 1663 | goto out; |
2380 | } | 1664 | } |
2381 | 1665 | ||
2382 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1666 | int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
2383 | u8 tos, struct net_device *dev, bool noref) | 1667 | u8 tos, struct net_device *dev) |
2384 | { | 1668 | { |
2385 | struct rtable *rth; | ||
2386 | unsigned int hash; | ||
2387 | int iif = dev->ifindex; | ||
2388 | struct net *net; | ||
2389 | int res; | 1669 | int res; |
2390 | 1670 | ||
2391 | net = dev_net(dev); | ||
2392 | |||
2393 | rcu_read_lock(); | 1671 | rcu_read_lock(); |
2394 | 1672 | ||
2395 | if (!rt_caching(net)) | ||
2396 | goto skip_cache; | ||
2397 | |||
2398 | tos &= IPTOS_RT_MASK; | ||
2399 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | ||
2400 | |||
2401 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | ||
2402 | rth = rcu_dereference(rth->dst.rt_next)) { | ||
2403 | if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | | ||
2404 | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | | ||
2405 | (rth->rt_route_iif ^ iif) | | ||
2406 | (rth->rt_key_tos ^ tos)) == 0 && | ||
2407 | rth->rt_mark == skb->mark && | ||
2408 | net_eq(dev_net(rth->dst.dev), net) && | ||
2409 | !rt_is_expired(rth)) { | ||
2410 | if (noref) { | ||
2411 | dst_use_noref(&rth->dst, jiffies); | ||
2412 | skb_dst_set_noref(skb, &rth->dst); | ||
2413 | } else { | ||
2414 | dst_use(&rth->dst, jiffies); | ||
2415 | skb_dst_set(skb, &rth->dst); | ||
2416 | } | ||
2417 | RT_CACHE_STAT_INC(in_hit); | ||
2418 | rcu_read_unlock(); | ||
2419 | return 0; | ||
2420 | } | ||
2421 | RT_CACHE_STAT_INC(in_hlist_search); | ||
2422 | } | ||
2423 | |||
2424 | skip_cache: | ||
2425 | /* Multicast recognition logic is moved from route cache to here. | 1673 | /* Multicast recognition logic is moved from route cache to here. |
2426 | The problem was that too many Ethernet cards have broken/missing | 1674 | The problem was that too many Ethernet cards have broken/missing |
2427 | hardware multicast filters :-( As result the host on multicasting | 1675 | hardware multicast filters :-( As result the host on multicasting |
@@ -2459,17 +1707,16 @@ skip_cache: | |||
2459 | rcu_read_unlock(); | 1707 | rcu_read_unlock(); |
2460 | return res; | 1708 | return res; |
2461 | } | 1709 | } |
2462 | EXPORT_SYMBOL(ip_route_input_common); | 1710 | EXPORT_SYMBOL(ip_route_input); |
2463 | 1711 | ||
2464 | /* called with rcu_read_lock() */ | 1712 | /* called with rcu_read_lock() */ |
2465 | static struct rtable *__mkroute_output(const struct fib_result *res, | 1713 | static struct rtable *__mkroute_output(const struct fib_result *res, |
2466 | const struct flowi4 *fl4, | 1714 | const struct flowi4 *fl4, int orig_oif, |
2467 | __be32 orig_daddr, __be32 orig_saddr, | ||
2468 | int orig_oif, __u8 orig_rtos, | ||
2469 | struct net_device *dev_out, | 1715 | struct net_device *dev_out, |
2470 | unsigned int flags) | 1716 | unsigned int flags) |
2471 | { | 1717 | { |
2472 | struct fib_info *fi = res->fi; | 1718 | struct fib_info *fi = res->fi; |
1719 | struct fib_nh_exception *fnhe; | ||
2473 | struct in_device *in_dev; | 1720 | struct in_device *in_dev; |
2474 | u16 type = res->type; | 1721 | u16 type = res->type; |
2475 | struct rtable *rth; | 1722 | struct rtable *rth; |
@@ -2508,29 +1755,33 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2508 | fi = NULL; | 1755 | fi = NULL; |
2509 | } | 1756 | } |
2510 | 1757 | ||
1758 | fnhe = NULL; | ||
1759 | if (fi) { | ||
1760 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | ||
1761 | if (!fnhe) { | ||
1762 | rth = FIB_RES_NH(*res).nh_rth_output; | ||
1763 | if (rt_cache_valid(rth)) { | ||
1764 | dst_hold(&rth->dst); | ||
1765 | return rth; | ||
1766 | } | ||
1767 | } | ||
1768 | } | ||
2511 | rth = rt_dst_alloc(dev_out, | 1769 | rth = rt_dst_alloc(dev_out, |
2512 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1770 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
2513 | IN_DEV_CONF_GET(in_dev, NOXFRM)); | 1771 | IN_DEV_CONF_GET(in_dev, NOXFRM), |
1772 | fi && !fnhe); | ||
2514 | if (!rth) | 1773 | if (!rth) |
2515 | return ERR_PTR(-ENOBUFS); | 1774 | return ERR_PTR(-ENOBUFS); |
2516 | 1775 | ||
2517 | rth->dst.output = ip_output; | 1776 | rth->dst.output = ip_output; |
2518 | 1777 | ||
2519 | rth->rt_key_dst = orig_daddr; | ||
2520 | rth->rt_key_src = orig_saddr; | ||
2521 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 1778 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2522 | rth->rt_flags = flags; | 1779 | rth->rt_flags = flags; |
2523 | rth->rt_type = type; | 1780 | rth->rt_type = type; |
2524 | rth->rt_key_tos = orig_rtos; | 1781 | rth->rt_is_input = 0; |
2525 | rth->rt_dst = fl4->daddr; | ||
2526 | rth->rt_src = fl4->saddr; | ||
2527 | rth->rt_route_iif = 0; | ||
2528 | rth->rt_iif = orig_oif ? : dev_out->ifindex; | 1782 | rth->rt_iif = orig_oif ? : dev_out->ifindex; |
2529 | rth->rt_oif = orig_oif; | ||
2530 | rth->rt_mark = fl4->flowi4_mark; | ||
2531 | rth->rt_pmtu = 0; | 1783 | rth->rt_pmtu = 0; |
2532 | rth->rt_gateway = fl4->daddr; | 1784 | rth->rt_gateway = 0; |
2533 | rth->fi = NULL; | ||
2534 | 1785 | ||
2535 | RT_CACHE_STAT_INC(out_slow_tot); | 1786 | RT_CACHE_STAT_INC(out_slow_tot); |
2536 | 1787 | ||
@@ -2553,36 +1804,28 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
2553 | #endif | 1804 | #endif |
2554 | } | 1805 | } |
2555 | 1806 | ||
2556 | rt_set_nexthop(rth, fl4, res, fi, type, 0); | 1807 | rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); |
2557 | |||
2558 | if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) | ||
2559 | rth->dst.flags |= DST_NOCACHE; | ||
2560 | 1808 | ||
2561 | return rth; | 1809 | return rth; |
2562 | } | 1810 | } |
2563 | 1811 | ||
2564 | /* | 1812 | /* |
2565 | * Major route resolver routine. | 1813 | * Major route resolver routine. |
2566 | * called with rcu_read_lock(); | ||
2567 | */ | 1814 | */ |
2568 | 1815 | ||
2569 | static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | 1816 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) |
2570 | { | 1817 | { |
2571 | struct net_device *dev_out = NULL; | 1818 | struct net_device *dev_out = NULL; |
2572 | __u8 tos = RT_FL_TOS(fl4); | 1819 | __u8 tos = RT_FL_TOS(fl4); |
2573 | unsigned int flags = 0; | 1820 | unsigned int flags = 0; |
2574 | struct fib_result res; | 1821 | struct fib_result res; |
2575 | struct rtable *rth; | 1822 | struct rtable *rth; |
2576 | __be32 orig_daddr; | ||
2577 | __be32 orig_saddr; | ||
2578 | int orig_oif; | 1823 | int orig_oif; |
2579 | 1824 | ||
2580 | res.tclassid = 0; | 1825 | res.tclassid = 0; |
2581 | res.fi = NULL; | 1826 | res.fi = NULL; |
2582 | res.table = NULL; | 1827 | res.table = NULL; |
2583 | 1828 | ||
2584 | orig_daddr = fl4->daddr; | ||
2585 | orig_saddr = fl4->saddr; | ||
2586 | orig_oif = fl4->flowi4_oif; | 1829 | orig_oif = fl4->flowi4_oif; |
2587 | 1830 | ||
2588 | fl4->flowi4_iif = net->loopback_dev->ifindex; | 1831 | fl4->flowi4_iif = net->loopback_dev->ifindex; |
@@ -2744,59 +1987,12 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) | |||
2744 | 1987 | ||
2745 | 1988 | ||
2746 | make_route: | 1989 | make_route: |
2747 | rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, | 1990 | rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags); |
2748 | tos, dev_out, flags); | ||
2749 | if (!IS_ERR(rth)) { | ||
2750 | unsigned int hash; | ||
2751 | |||
2752 | hash = rt_hash(orig_daddr, orig_saddr, orig_oif, | ||
2753 | rt_genid(dev_net(dev_out))); | ||
2754 | rth = rt_intern_hash(hash, rth, NULL, orig_oif); | ||
2755 | } | ||
2756 | 1991 | ||
2757 | out: | 1992 | out: |
2758 | rcu_read_unlock(); | 1993 | rcu_read_unlock(); |
2759 | return rth; | 1994 | return rth; |
2760 | } | 1995 | } |
2761 | |||
2762 | struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) | ||
2763 | { | ||
2764 | struct rtable *rth; | ||
2765 | unsigned int hash; | ||
2766 | |||
2767 | if (!rt_caching(net)) | ||
2768 | goto slow_output; | ||
2769 | |||
2770 | hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); | ||
2771 | |||
2772 | rcu_read_lock_bh(); | ||
2773 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | ||
2774 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | ||
2775 | if (rth->rt_key_dst == flp4->daddr && | ||
2776 | rth->rt_key_src == flp4->saddr && | ||
2777 | rt_is_output_route(rth) && | ||
2778 | rth->rt_oif == flp4->flowi4_oif && | ||
2779 | rth->rt_mark == flp4->flowi4_mark && | ||
2780 | !((rth->rt_key_tos ^ flp4->flowi4_tos) & | ||
2781 | (IPTOS_RT_MASK | RTO_ONLINK)) && | ||
2782 | net_eq(dev_net(rth->dst.dev), net) && | ||
2783 | !rt_is_expired(rth)) { | ||
2784 | dst_use(&rth->dst, jiffies); | ||
2785 | RT_CACHE_STAT_INC(out_hit); | ||
2786 | rcu_read_unlock_bh(); | ||
2787 | if (!flp4->saddr) | ||
2788 | flp4->saddr = rth->rt_src; | ||
2789 | if (!flp4->daddr) | ||
2790 | flp4->daddr = rth->rt_dst; | ||
2791 | return rth; | ||
2792 | } | ||
2793 | RT_CACHE_STAT_INC(out_hlist_search); | ||
2794 | } | ||
2795 | rcu_read_unlock_bh(); | ||
2796 | |||
2797 | slow_output: | ||
2798 | return ip_route_output_slow(net, flp4); | ||
2799 | } | ||
2800 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 1996 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2801 | 1997 | ||
2802 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) | 1998 | static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) |
@@ -2830,7 +2026,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, | |||
2830 | static struct dst_ops ipv4_dst_blackhole_ops = { | 2026 | static struct dst_ops ipv4_dst_blackhole_ops = { |
2831 | .family = AF_INET, | 2027 | .family = AF_INET, |
2832 | .protocol = cpu_to_be16(ETH_P_IP), | 2028 | .protocol = cpu_to_be16(ETH_P_IP), |
2833 | .destroy = ipv4_dst_destroy, | ||
2834 | .check = ipv4_blackhole_dst_check, | 2029 | .check = ipv4_blackhole_dst_check, |
2835 | .mtu = ipv4_blackhole_mtu, | 2030 | .mtu = ipv4_blackhole_mtu, |
2836 | .default_advmss = ipv4_default_advmss, | 2031 | .default_advmss = ipv4_default_advmss, |
@@ -2842,9 +2037,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2842 | 2037 | ||
2843 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 2038 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) |
2844 | { | 2039 | { |
2845 | struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); | ||
2846 | struct rtable *ort = (struct rtable *) dst_orig; | 2040 | struct rtable *ort = (struct rtable *) dst_orig; |
2041 | struct rtable *rt; | ||
2847 | 2042 | ||
2043 | rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); | ||
2848 | if (rt) { | 2044 | if (rt) { |
2849 | struct dst_entry *new = &rt->dst; | 2045 | struct dst_entry *new = &rt->dst; |
2850 | 2046 | ||
@@ -2856,24 +2052,14 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2856 | if (new->dev) | 2052 | if (new->dev) |
2857 | dev_hold(new->dev); | 2053 | dev_hold(new->dev); |
2858 | 2054 | ||
2859 | rt->rt_key_dst = ort->rt_key_dst; | 2055 | rt->rt_is_input = ort->rt_is_input; |
2860 | rt->rt_key_src = ort->rt_key_src; | ||
2861 | rt->rt_key_tos = ort->rt_key_tos; | ||
2862 | rt->rt_route_iif = ort->rt_route_iif; | ||
2863 | rt->rt_iif = ort->rt_iif; | 2056 | rt->rt_iif = ort->rt_iif; |
2864 | rt->rt_oif = ort->rt_oif; | ||
2865 | rt->rt_mark = ort->rt_mark; | ||
2866 | rt->rt_pmtu = ort->rt_pmtu; | 2057 | rt->rt_pmtu = ort->rt_pmtu; |
2867 | 2058 | ||
2868 | rt->rt_genid = rt_genid(net); | 2059 | rt->rt_genid = rt_genid(net); |
2869 | rt->rt_flags = ort->rt_flags; | 2060 | rt->rt_flags = ort->rt_flags; |
2870 | rt->rt_type = ort->rt_type; | 2061 | rt->rt_type = ort->rt_type; |
2871 | rt->rt_dst = ort->rt_dst; | ||
2872 | rt->rt_src = ort->rt_src; | ||
2873 | rt->rt_gateway = ort->rt_gateway; | 2062 | rt->rt_gateway = ort->rt_gateway; |
2874 | rt->fi = ort->fi; | ||
2875 | if (rt->fi) | ||
2876 | atomic_inc(&rt->fi->fib_clntref); | ||
2877 | 2063 | ||
2878 | dst_free(new); | 2064 | dst_free(new); |
2879 | } | 2065 | } |
@@ -2900,9 +2086,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | |||
2900 | } | 2086 | } |
2901 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2087 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2902 | 2088 | ||
2903 | static int rt_fill_info(struct net *net, | 2089 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, |
2904 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2090 | struct flowi4 *fl4, struct sk_buff *skb, u32 pid, |
2905 | int nowait, unsigned int flags) | 2091 | u32 seq, int event, int nowait, unsigned int flags) |
2906 | { | 2092 | { |
2907 | struct rtable *rt = skb_rtable(skb); | 2093 | struct rtable *rt = skb_rtable(skb); |
2908 | struct rtmsg *r; | 2094 | struct rtmsg *r; |
@@ -2919,7 +2105,7 @@ static int rt_fill_info(struct net *net, | |||
2919 | r->rtm_family = AF_INET; | 2105 | r->rtm_family = AF_INET; |
2920 | r->rtm_dst_len = 32; | 2106 | r->rtm_dst_len = 32; |
2921 | r->rtm_src_len = 0; | 2107 | r->rtm_src_len = 0; |
2922 | r->rtm_tos = rt->rt_key_tos; | 2108 | r->rtm_tos = fl4->flowi4_tos; |
2923 | r->rtm_table = RT_TABLE_MAIN; | 2109 | r->rtm_table = RT_TABLE_MAIN; |
2924 | if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) | 2110 | if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) |
2925 | goto nla_put_failure; | 2111 | goto nla_put_failure; |
@@ -2930,11 +2116,11 @@ static int rt_fill_info(struct net *net, | |||
2930 | if (rt->rt_flags & RTCF_NOTIFY) | 2116 | if (rt->rt_flags & RTCF_NOTIFY) |
2931 | r->rtm_flags |= RTM_F_NOTIFY; | 2117 | r->rtm_flags |= RTM_F_NOTIFY; |
2932 | 2118 | ||
2933 | if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) | 2119 | if (nla_put_be32(skb, RTA_DST, dst)) |
2934 | goto nla_put_failure; | 2120 | goto nla_put_failure; |
2935 | if (rt->rt_key_src) { | 2121 | if (src) { |
2936 | r->rtm_src_len = 32; | 2122 | r->rtm_src_len = 32; |
2937 | if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) | 2123 | if (nla_put_be32(skb, RTA_SRC, src)) |
2938 | goto nla_put_failure; | 2124 | goto nla_put_failure; |
2939 | } | 2125 | } |
2940 | if (rt->dst.dev && | 2126 | if (rt->dst.dev && |
@@ -2946,11 +2132,11 @@ static int rt_fill_info(struct net *net, | |||
2946 | goto nla_put_failure; | 2132 | goto nla_put_failure; |
2947 | #endif | 2133 | #endif |
2948 | if (!rt_is_input_route(rt) && | 2134 | if (!rt_is_input_route(rt) && |
2949 | rt->rt_src != rt->rt_key_src) { | 2135 | fl4->saddr != src) { |
2950 | if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) | 2136 | if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) |
2951 | goto nla_put_failure; | 2137 | goto nla_put_failure; |
2952 | } | 2138 | } |
2953 | if (rt->rt_dst != rt->rt_gateway && | 2139 | if (rt->rt_gateway && |
2954 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) | 2140 | nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) |
2955 | goto nla_put_failure; | 2141 | goto nla_put_failure; |
2956 | 2142 | ||
@@ -2960,8 +2146,8 @@ static int rt_fill_info(struct net *net, | |||
2960 | if (rtnetlink_put_metrics(skb, metrics) < 0) | 2146 | if (rtnetlink_put_metrics(skb, metrics) < 0) |
2961 | goto nla_put_failure; | 2147 | goto nla_put_failure; |
2962 | 2148 | ||
2963 | if (rt->rt_mark && | 2149 | if (fl4->flowi4_mark && |
2964 | nla_put_be32(skb, RTA_MARK, rt->rt_mark)) | 2150 | nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark)) |
2965 | goto nla_put_failure; | 2151 | goto nla_put_failure; |
2966 | 2152 | ||
2967 | error = rt->dst.error; | 2153 | error = rt->dst.error; |
@@ -2974,29 +2160,8 @@ static int rt_fill_info(struct net *net, | |||
2974 | } | 2160 | } |
2975 | 2161 | ||
2976 | if (rt_is_input_route(rt)) { | 2162 | if (rt_is_input_route(rt)) { |
2977 | #ifdef CONFIG_IP_MROUTE | 2163 | if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) |
2978 | __be32 dst = rt->rt_dst; | 2164 | goto nla_put_failure; |
2979 | |||
2980 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && | ||
2981 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { | ||
2982 | int err = ipmr_get_route(net, skb, | ||
2983 | rt->rt_src, rt->rt_dst, | ||
2984 | r, nowait); | ||
2985 | if (err <= 0) { | ||
2986 | if (!nowait) { | ||
2987 | if (err == 0) | ||
2988 | return 0; | ||
2989 | goto nla_put_failure; | ||
2990 | } else { | ||
2991 | if (err == -EMSGSIZE) | ||
2992 | goto nla_put_failure; | ||
2993 | error = err; | ||
2994 | } | ||
2995 | } | ||
2996 | } else | ||
2997 | #endif | ||
2998 | if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) | ||
2999 | goto nla_put_failure; | ||
3000 | } | 2165 | } |
3001 | 2166 | ||
3002 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) | 2167 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) |
@@ -3015,6 +2180,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void | |||
3015 | struct rtmsg *rtm; | 2180 | struct rtmsg *rtm; |
3016 | struct nlattr *tb[RTA_MAX+1]; | 2181 | struct nlattr *tb[RTA_MAX+1]; |
3017 | struct rtable *rt = NULL; | 2182 | struct rtable *rt = NULL; |
2183 | struct flowi4 fl4; | ||
3018 | __be32 dst = 0; | 2184 | __be32 dst = 0; |
3019 | __be32 src = 0; | 2185 | __be32 src = 0; |
3020 | u32 iif; | 2186 | u32 iif; |
@@ -3049,6 +2215,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void | |||
3049 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; | 2215 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
3050 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; | 2216 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; |
3051 | 2217 | ||
2218 | memset(&fl4, 0, sizeof(fl4)); | ||
2219 | fl4.daddr = dst; | ||
2220 | fl4.saddr = src; | ||
2221 | fl4.flowi4_tos = rtm->rtm_tos; | ||
2222 | fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; | ||
2223 | fl4.flowi4_mark = mark; | ||
2224 | |||
3052 | if (iif) { | 2225 | if (iif) { |
3053 | struct net_device *dev; | 2226 | struct net_device *dev; |
3054 | 2227 | ||
@@ -3069,13 +2242,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void | |||
3069 | if (err == 0 && rt->dst.error) | 2242 | if (err == 0 && rt->dst.error) |
3070 | err = -rt->dst.error; | 2243 | err = -rt->dst.error; |
3071 | } else { | 2244 | } else { |
3072 | struct flowi4 fl4 = { | ||
3073 | .daddr = dst, | ||
3074 | .saddr = src, | ||
3075 | .flowi4_tos = rtm->rtm_tos, | ||
3076 | .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | ||
3077 | .flowi4_mark = mark, | ||
3078 | }; | ||
3079 | rt = ip_route_output_key(net, &fl4); | 2245 | rt = ip_route_output_key(net, &fl4); |
3080 | 2246 | ||
3081 | err = 0; | 2247 | err = 0; |
@@ -3090,7 +2256,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void | |||
3090 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2256 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
3091 | rt->rt_flags |= RTCF_NOTIFY; | 2257 | rt->rt_flags |= RTCF_NOTIFY; |
3092 | 2258 | ||
3093 | err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2259 | err = rt_fill_info(net, dst, src, &fl4, skb, |
2260 | NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | ||
3094 | RTM_NEWROUTE, 0, 0); | 2261 | RTM_NEWROUTE, 0, 0); |
3095 | if (err <= 0) | 2262 | if (err <= 0) |
3096 | goto errout_free; | 2263 | goto errout_free; |
@@ -3106,43 +2273,6 @@ errout_free: | |||
3106 | 2273 | ||
3107 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | 2274 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) |
3108 | { | 2275 | { |
3109 | struct rtable *rt; | ||
3110 | int h, s_h; | ||
3111 | int idx, s_idx; | ||
3112 | struct net *net; | ||
3113 | |||
3114 | net = sock_net(skb->sk); | ||
3115 | |||
3116 | s_h = cb->args[0]; | ||
3117 | if (s_h < 0) | ||
3118 | s_h = 0; | ||
3119 | s_idx = idx = cb->args[1]; | ||
3120 | for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { | ||
3121 | if (!rt_hash_table[h].chain) | ||
3122 | continue; | ||
3123 | rcu_read_lock_bh(); | ||
3124 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; | ||
3125 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { | ||
3126 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) | ||
3127 | continue; | ||
3128 | if (rt_is_expired(rt)) | ||
3129 | continue; | ||
3130 | skb_dst_set_noref(skb, &rt->dst); | ||
3131 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | ||
3132 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | ||
3133 | 1, NLM_F_MULTI) <= 0) { | ||
3134 | skb_dst_drop(skb); | ||
3135 | rcu_read_unlock_bh(); | ||
3136 | goto done; | ||
3137 | } | ||
3138 | skb_dst_drop(skb); | ||
3139 | } | ||
3140 | rcu_read_unlock_bh(); | ||
3141 | } | ||
3142 | |||
3143 | done: | ||
3144 | cb->args[0] = h; | ||
3145 | cb->args[1] = idx; | ||
3146 | return skb->len; | 2276 | return skb->len; |
3147 | } | 2277 | } |
3148 | 2278 | ||
@@ -3376,22 +2506,6 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { | |||
3376 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 2506 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; |
3377 | #endif /* CONFIG_IP_ROUTE_CLASSID */ | 2507 | #endif /* CONFIG_IP_ROUTE_CLASSID */ |
3378 | 2508 | ||
3379 | static __initdata unsigned long rhash_entries; | ||
3380 | static int __init set_rhash_entries(char *str) | ||
3381 | { | ||
3382 | ssize_t ret; | ||
3383 | |||
3384 | if (!str) | ||
3385 | return 0; | ||
3386 | |||
3387 | ret = kstrtoul(str, 0, &rhash_entries); | ||
3388 | if (ret) | ||
3389 | return 0; | ||
3390 | |||
3391 | return 1; | ||
3392 | } | ||
3393 | __setup("rhash_entries=", set_rhash_entries); | ||
3394 | |||
3395 | int __init ip_rt_init(void) | 2509 | int __init ip_rt_init(void) |
3396 | { | 2510 | { |
3397 | int rc = 0; | 2511 | int rc = 0; |
@@ -3414,31 +2528,12 @@ int __init ip_rt_init(void) | |||
3414 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) | 2528 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) |
3415 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | 2529 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); |
3416 | 2530 | ||
3417 | rt_hash_table = (struct rt_hash_bucket *) | 2531 | ipv4_dst_ops.gc_thresh = ~0; |
3418 | alloc_large_system_hash("IP route cache", | 2532 | ip_rt_max_size = INT_MAX; |
3419 | sizeof(struct rt_hash_bucket), | ||
3420 | rhash_entries, | ||
3421 | (totalram_pages >= 128 * 1024) ? | ||
3422 | 15 : 17, | ||
3423 | 0, | ||
3424 | &rt_hash_log, | ||
3425 | &rt_hash_mask, | ||
3426 | 0, | ||
3427 | rhash_entries ? 0 : 512 * 1024); | ||
3428 | memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); | ||
3429 | rt_hash_lock_init(); | ||
3430 | |||
3431 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); | ||
3432 | ip_rt_max_size = (rt_hash_mask + 1) * 16; | ||
3433 | 2533 | ||
3434 | devinet_init(); | 2534 | devinet_init(); |
3435 | ip_fib_init(); | 2535 | ip_fib_init(); |
3436 | 2536 | ||
3437 | INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); | ||
3438 | expires_ljiffies = jiffies; | ||
3439 | schedule_delayed_work(&expires_work, | ||
3440 | net_random() % ip_rt_gc_interval + ip_rt_gc_interval); | ||
3441 | |||
3442 | if (ip_rt_proc_init()) | 2537 | if (ip_rt_proc_init()) |
3443 | pr_err("Unable to create route proc files\n"); | 2538 | pr_err("Unable to create route proc files\n"); |
3444 | #ifdef CONFIG_XFRM | 2539 | #ifdef CONFIG_XFRM |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8b75a58981..59110caeb074 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, | |||
824 | struct sk_buff * skb; | 824 | struct sk_buff * skb; |
825 | 825 | ||
826 | /* First, grab a route. */ | 826 | /* First, grab a route. */ |
827 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) | 827 | if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) |
828 | return -1; | 828 | return -1; |
829 | 829 | ||
830 | skb = tcp_make_synack(sk, dst, req, rvp); | 830 | skb = tcp_make_synack(sk, dst, req, rvp); |
@@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1378 | */ | 1378 | */ |
1379 | if (tmp_opt.saw_tstamp && | 1379 | if (tmp_opt.saw_tstamp && |
1380 | tcp_death_row.sysctl_tw_recycle && | 1380 | tcp_death_row.sysctl_tw_recycle && |
1381 | (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && | 1381 | (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && |
1382 | fl4.daddr == saddr) { | 1382 | fl4.daddr == saddr) { |
1383 | if (!tcp_peer_is_proven(req, dst, true)) { | 1383 | if (!tcp_peer_is_proven(req, dst, true)) { |
1384 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); | 1384 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216dc..58d23a572509 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) | |||
27 | if (skb_dst(skb) == NULL) { | 27 | if (skb_dst(skb) == NULL) { |
28 | const struct iphdr *iph = ip_hdr(skb); | 28 | const struct iphdr *iph = ip_hdr(skb); |
29 | 29 | ||
30 | if (ip_route_input_noref(skb, iph->daddr, iph->saddr, | 30 | if (ip_route_input(skb, iph->daddr, iph->saddr, |
31 | iph->tos, skb->dev)) | 31 | iph->tos, skb->dev)) |
32 | goto drop; | 32 | goto drop; |
33 | } | 33 | } |
34 | return dst_input(skb); | 34 | return dst_input(skb); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fcf7678bc009..c6281847f16a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -79,24 +79,17 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
79 | struct rtable *rt = (struct rtable *)xdst->route; | 79 | struct rtable *rt = (struct rtable *)xdst->route; |
80 | const struct flowi4 *fl4 = &fl->u.ip4; | 80 | const struct flowi4 *fl4 = &fl->u.ip4; |
81 | 81 | ||
82 | xdst->u.rt.rt_key_dst = fl4->daddr; | ||
83 | xdst->u.rt.rt_key_src = fl4->saddr; | ||
84 | xdst->u.rt.rt_key_tos = fl4->flowi4_tos; | ||
85 | xdst->u.rt.rt_route_iif = fl4->flowi4_iif; | ||
86 | xdst->u.rt.rt_iif = fl4->flowi4_iif; | 82 | xdst->u.rt.rt_iif = fl4->flowi4_iif; |
87 | xdst->u.rt.rt_oif = fl4->flowi4_oif; | ||
88 | xdst->u.rt.rt_mark = fl4->flowi4_mark; | ||
89 | 83 | ||
90 | xdst->u.dst.dev = dev; | 84 | xdst->u.dst.dev = dev; |
91 | dev_hold(dev); | 85 | dev_hold(dev); |
92 | 86 | ||
93 | /* Sheit... I remember I did this right. Apparently, | 87 | /* Sheit... I remember I did this right. Apparently, |
94 | * it was magically lost, so this code needs audit */ | 88 | * it was magically lost, so this code needs audit */ |
89 | xdst->u.rt.rt_is_input = rt->rt_is_input; | ||
95 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | | 90 | xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | |
96 | RTCF_LOCAL); | 91 | RTCF_LOCAL); |
97 | xdst->u.rt.rt_type = rt->rt_type; | 92 | xdst->u.rt.rt_type = rt->rt_type; |
98 | xdst->u.rt.rt_src = rt->rt_src; | ||
99 | xdst->u.rt.rt_dst = rt->rt_dst; | ||
100 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 93 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
101 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; |
102 | 95 | ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 84f6564dd372..cf02cb97bbdd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -281,7 +281,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, | |||
281 | struct fib6_table *table) | 281 | struct fib6_table *table) |
282 | { | 282 | { |
283 | struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, | 283 | struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, |
284 | 0, 0, flags); | 284 | 0, DST_OBSOLETE_NONE, flags); |
285 | 285 | ||
286 | if (rt) { | 286 | if (rt) { |
287 | struct dst_entry *dst = &rt->dst; | 287 | struct dst_entry *dst = &rt->dst; |
@@ -985,7 +985,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori | |||
985 | struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; | 985 | struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; |
986 | struct dst_entry *new = NULL; | 986 | struct dst_entry *new = NULL; |
987 | 987 | ||
988 | rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); | 988 | rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); |
989 | if (rt) { | 989 | if (rt) { |
990 | new = &rt->dst; | 990 | new = &rt->dst; |
991 | 991 | ||
diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d1c652ed2f3d..c97472b248a2 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c | |||
@@ -217,7 +217,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport, | |||
217 | void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) | 217 | void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) |
218 | { | 218 | { |
219 | /* If we don't have a fresh route, look one up */ | 219 | /* If we don't have a fresh route, look one up */ |
220 | if (!transport->dst || transport->dst->obsolete > 1) { | 220 | if (!transport->dst || transport->dst->obsolete) { |
221 | dst_release(transport->dst); | 221 | dst_release(transport->dst); |
222 | transport->af_specific->get_dst(transport, &transport->saddr, | 222 | transport->af_specific->get_dst(transport, &transport->saddr, |
223 | &transport->fl, sk); | 223 | &transport->fl, sk); |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 65bd1ca51517..c5a5165a5927 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
@@ -1350,7 +1350,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) | |||
1350 | default: | 1350 | default: |
1351 | BUG(); | 1351 | BUG(); |
1352 | } | 1352 | } |
1353 | xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); | 1353 | xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); |
1354 | 1354 | ||
1355 | if (likely(xdst)) { | 1355 | if (likely(xdst)) { |
1356 | struct dst_entry *dst = &xdst->u.dst; | 1356 | struct dst_entry *dst = &xdst->u.dst; |
@@ -1477,7 +1477,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, | |||
1477 | dst1->xfrm = xfrm[i]; | 1477 | dst1->xfrm = xfrm[i]; |
1478 | xdst->xfrm_genid = xfrm[i]->genid; | 1478 | xdst->xfrm_genid = xfrm[i]->genid; |
1479 | 1479 | ||
1480 | dst1->obsolete = -1; | 1480 | dst1->obsolete = DST_OBSOLETE_FORCE_CHK; |
1481 | dst1->flags |= DST_HOST; | 1481 | dst1->flags |= DST_HOST; |
1482 | dst1->lastuse = now; | 1482 | dst1->lastuse = now; |
1483 | 1483 | ||
@@ -2219,12 +2219,13 @@ EXPORT_SYMBOL(__xfrm_route_forward); | |||
2219 | static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) | 2219 | static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) |
2220 | { | 2220 | { |
2221 | /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete | 2221 | /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete |
2222 | * to "-1" to force all XFRM destinations to get validated by | 2222 | * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to |
2223 | * dst_ops->check on every use. We do this because when a | 2223 | * get validated by dst_ops->check on every use. We do this |
2224 | * normal route referenced by an XFRM dst is obsoleted we do | 2224 | * because when a normal route referenced by an XFRM dst is |
2225 | * not go looking around for all parent referencing XFRM dsts | 2225 | * obsoleted we do not go looking around for all parent |
2226 | * so that we can invalidate them. It is just too much work. | 2226 | * referencing XFRM dsts so that we can invalidate them. It |
2227 | * Instead we make the checks here on every use. For example: | 2227 | * is just too much work. Instead we make the checks here on |
2228 | * every use. For example: | ||
2228 | * | 2229 | * |
2229 | * XFRM dst A --> IPv4 dst X | 2230 | * XFRM dst A --> IPv4 dst X |
2230 | * | 2231 | * |
@@ -2234,9 +2235,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) | |||
2234 | * stale_bundle() check. | 2235 | * stale_bundle() check. |
2235 | * | 2236 | * |
2236 | * When a policy's bundle is pruned, we dst_free() the XFRM | 2237 | * When a policy's bundle is pruned, we dst_free() the XFRM |
2237 | * dst which causes it's ->obsolete field to be set to a | 2238 | * dst which causes it's ->obsolete field to be set to |
2238 | * positive non-zero integer. If an XFRM dst has been pruned | 2239 | * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like |
2239 | * like this, we want to force a new route lookup. | 2240 | * this, we want to force a new route lookup. |
2240 | */ | 2241 | */ |
2241 | if (dst->obsolete < 0 && !stale_bundle(dst)) | 2242 | if (dst->obsolete < 0 && !stale_bundle(dst)) |
2242 | return dst; | 2243 | return dst; |