aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c120
1 files changed, 63 insertions, 57 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4c011ec69ed4..cbadb942c332 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
89#include <linux/rcupdate.h> 89#include <linux/rcupdate.h>
90#include <linux/times.h> 90#include <linux/times.h>
91#include <linux/slab.h> 91#include <linux/slab.h>
92#include <linux/jhash.h>
92#include <net/dst.h> 93#include <net/dst.h>
93#include <net/net_namespace.h> 94#include <net/net_namespace.h>
94#include <net/protocol.h> 95#include <net/protocol.h>
@@ -139,11 +140,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb); 140 struct sk_buff *skb);
140static void ipv4_dst_destroy(struct dst_entry *dst); 141static void ipv4_dst_destroy(struct dst_entry *dst);
141 142
142static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
143 int how)
144{
145}
146
147static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) 143static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
148{ 144{
149 WARN_ON(1); 145 WARN_ON(1);
@@ -162,7 +158,6 @@ static struct dst_ops ipv4_dst_ops = {
162 .mtu = ipv4_mtu, 158 .mtu = ipv4_mtu,
163 .cow_metrics = ipv4_cow_metrics, 159 .cow_metrics = ipv4_cow_metrics,
164 .destroy = ipv4_dst_destroy, 160 .destroy = ipv4_dst_destroy,
165 .ifdown = ipv4_dst_ifdown,
166 .negative_advice = ipv4_negative_advice, 161 .negative_advice = ipv4_negative_advice,
167 .link_failure = ipv4_link_failure, 162 .link_failure = ipv4_link_failure,
168 .update_pmtu = ip_rt_update_pmtu, 163 .update_pmtu = ip_rt_update_pmtu,
@@ -194,7 +189,7 @@ const __u8 ip_tos2prio[16] = {
194EXPORT_SYMBOL(ip_tos2prio); 189EXPORT_SYMBOL(ip_tos2prio);
195 190
196static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 191static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
197#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) 192#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
198 193
199#ifdef CONFIG_PROC_FS 194#ifdef CONFIG_PROC_FS
200static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 195static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
@@ -462,39 +457,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
462 return neigh_create(&arp_tbl, pkey, dev); 457 return neigh_create(&arp_tbl, pkey, dev);
463} 458}
464 459
465/* 460#define IP_IDENTS_SZ 2048u
466 * Peer allocation may fail only in serious out-of-memory conditions. However 461struct ip_ident_bucket {
467 * we still can generate some output. 462 atomic_t id;
468 * Random ID selection looks a bit dangerous because we have no chances to 463 u32 stamp32;
469 * select ID being unique in a reasonable period of time. 464};
470 * But broken packet identifier may be better than no packet at all. 465
466static struct ip_ident_bucket *ip_idents __read_mostly;
467
468/* In order to protect privacy, we add a perturbation to identifiers
469 * if one generator is seldom used. This makes hard for an attacker
470 * to infer how many packets were sent between two points in time.
471 */ 471 */
472static void ip_select_fb_ident(struct iphdr *iph) 472u32 ip_idents_reserve(u32 hash, int segs)
473{ 473{
474 static DEFINE_SPINLOCK(ip_fb_id_lock); 474 struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
475 static u32 ip_fallback_id; 475 u32 old = ACCESS_ONCE(bucket->stamp32);
476 u32 salt; 476 u32 now = (u32)jiffies;
477 u32 delta = 0;
478
479 if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
480 delta = prandom_u32_max(now - old);
477 481
478 spin_lock_bh(&ip_fb_id_lock); 482 return atomic_add_return(segs + delta, &bucket->id) - segs;
479 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
480 iph->id = htons(salt & 0xFFFF);
481 ip_fallback_id = salt;
482 spin_unlock_bh(&ip_fb_id_lock);
483} 483}
484EXPORT_SYMBOL(ip_idents_reserve);
484 485
485void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) 486void __ip_select_ident(struct iphdr *iph, int segs)
486{ 487{
487 struct net *net = dev_net(dst->dev); 488 static u32 ip_idents_hashrnd __read_mostly;
488 struct inet_peer *peer; 489 u32 hash, id;
489 490
490 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); 491 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
491 if (peer) {
492 iph->id = htons(inet_getid(peer, more));
493 inet_putpeer(peer);
494 return;
495 }
496 492
497 ip_select_fb_ident(iph); 493 hash = jhash_3words((__force u32)iph->daddr,
494 (__force u32)iph->saddr,
495 iph->protocol,
496 ip_idents_hashrnd);
497 id = ip_idents_reserve(hash, segs);
498 iph->id = htons(id);
498} 499}
499EXPORT_SYMBOL(__ip_select_ident); 500EXPORT_SYMBOL(__ip_select_ident);
500 501
@@ -697,7 +698,6 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
697 698
698out_unlock: 699out_unlock:
699 spin_unlock_bh(&fnhe_lock); 700 spin_unlock_bh(&fnhe_lock);
700 return;
701} 701}
702 702
703static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, 703static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
@@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
746 } 746 }
747 747
748 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); 748 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
749 if (n) { 749 if (!IS_ERR(n)) {
750 if (!(n->nud_state & NUD_VALID)) { 750 if (!(n->nud_state & NUD_VALID)) {
751 neigh_event_send(n, NULL); 751 neigh_event_send(n, NULL);
752 } else { 752 } else {
@@ -1000,6 +1000,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1000 struct flowi4 fl4; 1000 struct flowi4 fl4;
1001 struct rtable *rt; 1001 struct rtable *rt;
1002 1002
1003 if (!mark)
1004 mark = IP4_REPLY_MARK(net, skb->mark);
1005
1003 __build_flow_key(&fl4, NULL, iph, oif, 1006 __build_flow_key(&fl4, NULL, iph, oif,
1004 RT_TOS(iph->tos), protocol, mark, flow_flags); 1007 RT_TOS(iph->tos), protocol, mark, flow_flags);
1005 rt = __ip_route_output_key(net, &fl4); 1008 rt = __ip_route_output_key(net, &fl4);
@@ -1017,6 +1020,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1017 struct rtable *rt; 1020 struct rtable *rt;
1018 1021
1019 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1022 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1023
1024 if (!fl4.flowi4_mark)
1025 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1026
1020 rt = __ip_route_output_key(sock_net(sk), &fl4); 1027 rt = __ip_route_output_key(sock_net(sk), &fl4);
1021 if (!IS_ERR(rt)) { 1028 if (!IS_ERR(rt)) {
1022 __ip_rt_update_pmtu(rt, &fl4, mtu); 1029 __ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1029,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1029 const struct iphdr *iph = (const struct iphdr *) skb->data; 1036 const struct iphdr *iph = (const struct iphdr *) skb->data;
1030 struct flowi4 fl4; 1037 struct flowi4 fl4;
1031 struct rtable *rt; 1038 struct rtable *rt;
1032 struct dst_entry *dst; 1039 struct dst_entry *odst = NULL;
1033 bool new = false; 1040 bool new = false;
1034 1041
1035 bh_lock_sock(sk); 1042 bh_lock_sock(sk);
@@ -1037,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1037 if (!ip_sk_accept_pmtu(sk)) 1044 if (!ip_sk_accept_pmtu(sk))
1038 goto out; 1045 goto out;
1039 1046
1040 rt = (struct rtable *) __sk_dst_get(sk); 1047 odst = sk_dst_get(sk);
1041 1048
1042 if (sock_owned_by_user(sk) || !rt) { 1049 if (sock_owned_by_user(sk) || !odst) {
1043 __ipv4_sk_update_pmtu(skb, sk, mtu); 1050 __ipv4_sk_update_pmtu(skb, sk, mtu);
1044 goto out; 1051 goto out;
1045 } 1052 }
1046 1053
1047 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1054 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1048 1055
1049 if (!__sk_dst_check(sk, 0)) { 1056 rt = (struct rtable *)odst;
1057 if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
1050 rt = ip_route_output_flow(sock_net(sk), &fl4, sk); 1058 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1051 if (IS_ERR(rt)) 1059 if (IS_ERR(rt))
1052 goto out; 1060 goto out;
@@ -1056,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1056 1064
1057 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); 1065 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1058 1066
1059 dst = dst_check(&rt->dst, 0); 1067 if (!dst_check(&rt->dst, 0)) {
1060 if (!dst) {
1061 if (new) 1068 if (new)
1062 dst_release(&rt->dst); 1069 dst_release(&rt->dst);
1063 1070
@@ -1069,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1069 } 1076 }
1070 1077
1071 if (new) 1078 if (new)
1072 __sk_dst_set(sk, &rt->dst); 1079 sk_dst_set(sk, &rt->dst);
1073 1080
1074out: 1081out:
1075 bh_unlock_sock(sk); 1082 bh_unlock_sock(sk);
1083 dst_release(odst);
1076} 1084}
1077EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 1085EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
1078 1086
@@ -1136,7 +1144,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1136 dst_set_expires(&rt->dst, 0); 1144 dst_set_expires(&rt->dst, 0);
1137} 1145}
1138 1146
1139static int ip_rt_bug(struct sk_buff *skb) 1147static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
1140{ 1148{
1141 pr_debug("%s: %pI4 -> %pI4, %s\n", 1149 pr_debug("%s: %pI4 -> %pI4, %s\n",
1142 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, 1150 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1526,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb,
1526 struct in_device *out_dev; 1534 struct in_device *out_dev;
1527 unsigned int flags = 0; 1535 unsigned int flags = 0;
1528 bool do_cache; 1536 bool do_cache;
1529 u32 itag; 1537 u32 itag = 0;
1530 1538
1531 /* get a working reference to the output device */ 1539 /* get a working reference to the output device */
1532 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); 1540 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -1707,8 +1715,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1707 1715
1708 if (res.type == RTN_LOCAL) { 1716 if (res.type == RTN_LOCAL) {
1709 err = fib_validate_source(skb, saddr, daddr, tos, 1717 err = fib_validate_source(skb, saddr, daddr, tos,
1710 LOOPBACK_IFINDEX, 1718 0, dev, in_dev, &itag);
1711 dev, in_dev, &itag);
1712 if (err < 0) 1719 if (err < 0)
1713 goto martian_source_keep_err; 1720 goto martian_source_keep_err;
1714 goto local_input; 1721 goto local_input;
@@ -1791,8 +1798,6 @@ local_input:
1791no_route: 1798no_route:
1792 RT_CACHE_STAT_INC(in_no_route); 1799 RT_CACHE_STAT_INC(in_no_route);
1793 res.type = RTN_UNREACHABLE; 1800 res.type = RTN_UNREACHABLE;
1794 if (err == -ESRCH)
1795 err = -ENETUNREACH;
1796 goto local_input; 1801 goto local_input;
1797 1802
1798 /* 1803 /*
@@ -2225,7 +2230,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
2225 2230
2226 new->__use = 1; 2231 new->__use = 1;
2227 new->input = dst_discard; 2232 new->input = dst_discard;
2228 new->output = dst_discard; 2233 new->output = dst_discard_sk;
2229 2234
2230 new->dev = ort->dst.dev; 2235 new->dev = ort->dst.dev;
2231 if (new->dev) 2236 if (new->dev)
@@ -2260,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
2260 return rt; 2265 return rt;
2261 2266
2262 if (flp4->flowi4_proto) 2267 if (flp4->flowi4_proto)
2263 rt = (struct rtable *) xfrm_lookup(net, &rt->dst, 2268 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2264 flowi4_to_flowi(flp4), 2269 flowi4_to_flowi(flp4),
2265 sk, 0); 2270 sk, 0);
2266 2271
2267 return rt; 2272 return rt;
2268} 2273}
@@ -2364,7 +2369,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
2364 } 2369 }
2365 } else 2370 } else
2366#endif 2371#endif
2367 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) 2372 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
2368 goto nla_put_failure; 2373 goto nla_put_failure;
2369 } 2374 }
2370 2375
@@ -2475,11 +2480,6 @@ errout_free:
2475 goto errout; 2480 goto errout;
2476} 2481}
2477 2482
2478int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2479{
2480 return skb->len;
2481}
2482
2483void ip_rt_multicast_event(struct in_device *in_dev) 2483void ip_rt_multicast_event(struct in_device *in_dev)
2484{ 2484{
2485 rt_cache_flush(dev_net(in_dev->dev)); 2485 rt_cache_flush(dev_net(in_dev->dev));
@@ -2717,6 +2717,12 @@ int __init ip_rt_init(void)
2717{ 2717{
2718 int rc = 0; 2718 int rc = 0;
2719 2719
2720 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2721 if (!ip_idents)
2722 panic("IP: failed to allocate ip_idents\n");
2723
2724 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2725
2720#ifdef CONFIG_IP_ROUTE_CLASSID 2726#ifdef CONFIG_IP_ROUTE_CLASSID
2721 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 2727 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
2722 if (!ip_rt_acct) 2728 if (!ip_rt_acct)