diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 120 |
1 files changed, 63 insertions, 57 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4c011ec69ed4..cbadb942c332 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -89,6 +89,7 @@ | |||
89 | #include <linux/rcupdate.h> | 89 | #include <linux/rcupdate.h> |
90 | #include <linux/times.h> | 90 | #include <linux/times.h> |
91 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
92 | #include <linux/jhash.h> | ||
92 | #include <net/dst.h> | 93 | #include <net/dst.h> |
93 | #include <net/net_namespace.h> | 94 | #include <net/net_namespace.h> |
94 | #include <net/protocol.h> | 95 | #include <net/protocol.h> |
@@ -139,11 +140,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, | |||
139 | struct sk_buff *skb); | 140 | struct sk_buff *skb); |
140 | static void ipv4_dst_destroy(struct dst_entry *dst); | 141 | static void ipv4_dst_destroy(struct dst_entry *dst); |
141 | 142 | ||
142 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
143 | int how) | ||
144 | { | ||
145 | } | ||
146 | |||
147 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | 143 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) |
148 | { | 144 | { |
149 | WARN_ON(1); | 145 | WARN_ON(1); |
@@ -162,7 +158,6 @@ static struct dst_ops ipv4_dst_ops = { | |||
162 | .mtu = ipv4_mtu, | 158 | .mtu = ipv4_mtu, |
163 | .cow_metrics = ipv4_cow_metrics, | 159 | .cow_metrics = ipv4_cow_metrics, |
164 | .destroy = ipv4_dst_destroy, | 160 | .destroy = ipv4_dst_destroy, |
165 | .ifdown = ipv4_dst_ifdown, | ||
166 | .negative_advice = ipv4_negative_advice, | 161 | .negative_advice = ipv4_negative_advice, |
167 | .link_failure = ipv4_link_failure, | 162 | .link_failure = ipv4_link_failure, |
168 | .update_pmtu = ip_rt_update_pmtu, | 163 | .update_pmtu = ip_rt_update_pmtu, |
@@ -194,7 +189,7 @@ const __u8 ip_tos2prio[16] = { | |||
194 | EXPORT_SYMBOL(ip_tos2prio); | 189 | EXPORT_SYMBOL(ip_tos2prio); |
195 | 190 | ||
196 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 191 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
197 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) | 192 | #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) |
198 | 193 | ||
199 | #ifdef CONFIG_PROC_FS | 194 | #ifdef CONFIG_PROC_FS |
200 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 195 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
@@ -462,39 +457,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |||
462 | return neigh_create(&arp_tbl, pkey, dev); | 457 | return neigh_create(&arp_tbl, pkey, dev); |
463 | } | 458 | } |
464 | 459 | ||
465 | /* | 460 | #define IP_IDENTS_SZ 2048u |
466 | * Peer allocation may fail only in serious out-of-memory conditions. However | 461 | struct ip_ident_bucket { |
467 | * we still can generate some output. | 462 | atomic_t id; |
468 | * Random ID selection looks a bit dangerous because we have no chances to | 463 | u32 stamp32; |
469 | * select ID being unique in a reasonable period of time. | 464 | }; |
470 | * But broken packet identifier may be better than no packet at all. | 465 | |
466 | static struct ip_ident_bucket *ip_idents __read_mostly; | ||
467 | |||
468 | /* In order to protect privacy, we add a perturbation to identifiers | ||
469 | * if one generator is seldom used. This makes hard for an attacker | ||
470 | * to infer how many packets were sent between two points in time. | ||
471 | */ | 471 | */ |
472 | static void ip_select_fb_ident(struct iphdr *iph) | 472 | u32 ip_idents_reserve(u32 hash, int segs) |
473 | { | 473 | { |
474 | static DEFINE_SPINLOCK(ip_fb_id_lock); | 474 | struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; |
475 | static u32 ip_fallback_id; | 475 | u32 old = ACCESS_ONCE(bucket->stamp32); |
476 | u32 salt; | 476 | u32 now = (u32)jiffies; |
477 | u32 delta = 0; | ||
478 | |||
479 | if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) | ||
480 | delta = prandom_u32_max(now - old); | ||
477 | 481 | ||
478 | spin_lock_bh(&ip_fb_id_lock); | 482 | return atomic_add_return(segs + delta, &bucket->id) - segs; |
479 | salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); | ||
480 | iph->id = htons(salt & 0xFFFF); | ||
481 | ip_fallback_id = salt; | ||
482 | spin_unlock_bh(&ip_fb_id_lock); | ||
483 | } | 483 | } |
484 | EXPORT_SYMBOL(ip_idents_reserve); | ||
484 | 485 | ||
485 | void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | 486 | void __ip_select_ident(struct iphdr *iph, int segs) |
486 | { | 487 | { |
487 | struct net *net = dev_net(dst->dev); | 488 | static u32 ip_idents_hashrnd __read_mostly; |
488 | struct inet_peer *peer; | 489 | u32 hash, id; |
489 | 490 | ||
490 | peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); | 491 | net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); |
491 | if (peer) { | ||
492 | iph->id = htons(inet_getid(peer, more)); | ||
493 | inet_putpeer(peer); | ||
494 | return; | ||
495 | } | ||
496 | 492 | ||
497 | ip_select_fb_ident(iph); | 493 | hash = jhash_3words((__force u32)iph->daddr, |
494 | (__force u32)iph->saddr, | ||
495 | iph->protocol, | ||
496 | ip_idents_hashrnd); | ||
497 | id = ip_idents_reserve(hash, segs); | ||
498 | iph->id = htons(id); | ||
498 | } | 499 | } |
499 | EXPORT_SYMBOL(__ip_select_ident); | 500 | EXPORT_SYMBOL(__ip_select_ident); |
500 | 501 | ||
@@ -697,7 +698,6 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
697 | 698 | ||
698 | out_unlock: | 699 | out_unlock: |
699 | spin_unlock_bh(&fnhe_lock); | 700 | spin_unlock_bh(&fnhe_lock); |
700 | return; | ||
701 | } | 701 | } |
702 | 702 | ||
703 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, | 703 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, |
@@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow | |||
746 | } | 746 | } |
747 | 747 | ||
748 | n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); | 748 | n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); |
749 | if (n) { | 749 | if (!IS_ERR(n)) { |
750 | if (!(n->nud_state & NUD_VALID)) { | 750 | if (!(n->nud_state & NUD_VALID)) { |
751 | neigh_event_send(n, NULL); | 751 | neigh_event_send(n, NULL); |
752 | } else { | 752 | } else { |
@@ -1000,6 +1000,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | |||
1000 | struct flowi4 fl4; | 1000 | struct flowi4 fl4; |
1001 | struct rtable *rt; | 1001 | struct rtable *rt; |
1002 | 1002 | ||
1003 | if (!mark) | ||
1004 | mark = IP4_REPLY_MARK(net, skb->mark); | ||
1005 | |||
1003 | __build_flow_key(&fl4, NULL, iph, oif, | 1006 | __build_flow_key(&fl4, NULL, iph, oif, |
1004 | RT_TOS(iph->tos), protocol, mark, flow_flags); | 1007 | RT_TOS(iph->tos), protocol, mark, flow_flags); |
1005 | rt = __ip_route_output_key(net, &fl4); | 1008 | rt = __ip_route_output_key(net, &fl4); |
@@ -1017,6 +1020,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1017 | struct rtable *rt; | 1020 | struct rtable *rt; |
1018 | 1021 | ||
1019 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); | 1022 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); |
1023 | |||
1024 | if (!fl4.flowi4_mark) | ||
1025 | fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); | ||
1026 | |||
1020 | rt = __ip_route_output_key(sock_net(sk), &fl4); | 1027 | rt = __ip_route_output_key(sock_net(sk), &fl4); |
1021 | if (!IS_ERR(rt)) { | 1028 | if (!IS_ERR(rt)) { |
1022 | __ip_rt_update_pmtu(rt, &fl4, mtu); | 1029 | __ip_rt_update_pmtu(rt, &fl4, mtu); |
@@ -1029,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1029 | const struct iphdr *iph = (const struct iphdr *) skb->data; | 1036 | const struct iphdr *iph = (const struct iphdr *) skb->data; |
1030 | struct flowi4 fl4; | 1037 | struct flowi4 fl4; |
1031 | struct rtable *rt; | 1038 | struct rtable *rt; |
1032 | struct dst_entry *dst; | 1039 | struct dst_entry *odst = NULL; |
1033 | bool new = false; | 1040 | bool new = false; |
1034 | 1041 | ||
1035 | bh_lock_sock(sk); | 1042 | bh_lock_sock(sk); |
@@ -1037,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1037 | if (!ip_sk_accept_pmtu(sk)) | 1044 | if (!ip_sk_accept_pmtu(sk)) |
1038 | goto out; | 1045 | goto out; |
1039 | 1046 | ||
1040 | rt = (struct rtable *) __sk_dst_get(sk); | 1047 | odst = sk_dst_get(sk); |
1041 | 1048 | ||
1042 | if (sock_owned_by_user(sk) || !rt) { | 1049 | if (sock_owned_by_user(sk) || !odst) { |
1043 | __ipv4_sk_update_pmtu(skb, sk, mtu); | 1050 | __ipv4_sk_update_pmtu(skb, sk, mtu); |
1044 | goto out; | 1051 | goto out; |
1045 | } | 1052 | } |
1046 | 1053 | ||
1047 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); | 1054 | __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); |
1048 | 1055 | ||
1049 | if (!__sk_dst_check(sk, 0)) { | 1056 | rt = (struct rtable *)odst; |
1057 | if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { | ||
1050 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); | 1058 | rt = ip_route_output_flow(sock_net(sk), &fl4, sk); |
1051 | if (IS_ERR(rt)) | 1059 | if (IS_ERR(rt)) |
1052 | goto out; | 1060 | goto out; |
@@ -1056,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1056 | 1064 | ||
1057 | __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); | 1065 | __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); |
1058 | 1066 | ||
1059 | dst = dst_check(&rt->dst, 0); | 1067 | if (!dst_check(&rt->dst, 0)) { |
1060 | if (!dst) { | ||
1061 | if (new) | 1068 | if (new) |
1062 | dst_release(&rt->dst); | 1069 | dst_release(&rt->dst); |
1063 | 1070 | ||
@@ -1069,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | |||
1069 | } | 1076 | } |
1070 | 1077 | ||
1071 | if (new) | 1078 | if (new) |
1072 | __sk_dst_set(sk, &rt->dst); | 1079 | sk_dst_set(sk, &rt->dst); |
1073 | 1080 | ||
1074 | out: | 1081 | out: |
1075 | bh_unlock_sock(sk); | 1082 | bh_unlock_sock(sk); |
1083 | dst_release(odst); | ||
1076 | } | 1084 | } |
1077 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); | 1085 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); |
1078 | 1086 | ||
@@ -1136,7 +1144,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1136 | dst_set_expires(&rt->dst, 0); | 1144 | dst_set_expires(&rt->dst, 0); |
1137 | } | 1145 | } |
1138 | 1146 | ||
1139 | static int ip_rt_bug(struct sk_buff *skb) | 1147 | static int ip_rt_bug(struct sock *sk, struct sk_buff *skb) |
1140 | { | 1148 | { |
1141 | pr_debug("%s: %pI4 -> %pI4, %s\n", | 1149 | pr_debug("%s: %pI4 -> %pI4, %s\n", |
1142 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 1150 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
@@ -1526,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1526 | struct in_device *out_dev; | 1534 | struct in_device *out_dev; |
1527 | unsigned int flags = 0; | 1535 | unsigned int flags = 0; |
1528 | bool do_cache; | 1536 | bool do_cache; |
1529 | u32 itag; | 1537 | u32 itag = 0; |
1530 | 1538 | ||
1531 | /* get a working reference to the output device */ | 1539 | /* get a working reference to the output device */ |
1532 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); | 1540 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
@@ -1707,8 +1715,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1707 | 1715 | ||
1708 | if (res.type == RTN_LOCAL) { | 1716 | if (res.type == RTN_LOCAL) { |
1709 | err = fib_validate_source(skb, saddr, daddr, tos, | 1717 | err = fib_validate_source(skb, saddr, daddr, tos, |
1710 | LOOPBACK_IFINDEX, | 1718 | 0, dev, in_dev, &itag); |
1711 | dev, in_dev, &itag); | ||
1712 | if (err < 0) | 1719 | if (err < 0) |
1713 | goto martian_source_keep_err; | 1720 | goto martian_source_keep_err; |
1714 | goto local_input; | 1721 | goto local_input; |
@@ -1791,8 +1798,6 @@ local_input: | |||
1791 | no_route: | 1798 | no_route: |
1792 | RT_CACHE_STAT_INC(in_no_route); | 1799 | RT_CACHE_STAT_INC(in_no_route); |
1793 | res.type = RTN_UNREACHABLE; | 1800 | res.type = RTN_UNREACHABLE; |
1794 | if (err == -ESRCH) | ||
1795 | err = -ENETUNREACH; | ||
1796 | goto local_input; | 1801 | goto local_input; |
1797 | 1802 | ||
1798 | /* | 1803 | /* |
@@ -2225,7 +2230,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
2225 | 2230 | ||
2226 | new->__use = 1; | 2231 | new->__use = 1; |
2227 | new->input = dst_discard; | 2232 | new->input = dst_discard; |
2228 | new->output = dst_discard; | 2233 | new->output = dst_discard_sk; |
2229 | 2234 | ||
2230 | new->dev = ort->dst.dev; | 2235 | new->dev = ort->dst.dev; |
2231 | if (new->dev) | 2236 | if (new->dev) |
@@ -2260,9 +2265,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | |||
2260 | return rt; | 2265 | return rt; |
2261 | 2266 | ||
2262 | if (flp4->flowi4_proto) | 2267 | if (flp4->flowi4_proto) |
2263 | rt = (struct rtable *) xfrm_lookup(net, &rt->dst, | 2268 | rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, |
2264 | flowi4_to_flowi(flp4), | 2269 | flowi4_to_flowi(flp4), |
2265 | sk, 0); | 2270 | sk, 0); |
2266 | 2271 | ||
2267 | return rt; | 2272 | return rt; |
2268 | } | 2273 | } |
@@ -2364,7 +2369,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | |||
2364 | } | 2369 | } |
2365 | } else | 2370 | } else |
2366 | #endif | 2371 | #endif |
2367 | if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) | 2372 | if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) |
2368 | goto nla_put_failure; | 2373 | goto nla_put_failure; |
2369 | } | 2374 | } |
2370 | 2375 | ||
@@ -2475,11 +2480,6 @@ errout_free: | |||
2475 | goto errout; | 2480 | goto errout; |
2476 | } | 2481 | } |
2477 | 2482 | ||
2478 | int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | ||
2479 | { | ||
2480 | return skb->len; | ||
2481 | } | ||
2482 | |||
2483 | void ip_rt_multicast_event(struct in_device *in_dev) | 2483 | void ip_rt_multicast_event(struct in_device *in_dev) |
2484 | { | 2484 | { |
2485 | rt_cache_flush(dev_net(in_dev->dev)); | 2485 | rt_cache_flush(dev_net(in_dev->dev)); |
@@ -2717,6 +2717,12 @@ int __init ip_rt_init(void) | |||
2717 | { | 2717 | { |
2718 | int rc = 0; | 2718 | int rc = 0; |
2719 | 2719 | ||
2720 | ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); | ||
2721 | if (!ip_idents) | ||
2722 | panic("IP: failed to allocate ip_idents\n"); | ||
2723 | |||
2724 | prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); | ||
2725 | |||
2720 | #ifdef CONFIG_IP_ROUTE_CLASSID | 2726 | #ifdef CONFIG_IP_ROUTE_CLASSID |
2721 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 2727 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); |
2722 | if (!ip_rt_acct) | 2728 | if (!ip_rt_acct) |