aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2011-12-16 01:39:20 -0500
committerSteve French <sfrench@us.ibm.com>2011-12-16 01:39:20 -0500
commitaaf015890754d58dcb71a4aa44ed246bb082bcf6 (patch)
tree17b51ff707fd1b3efec3a3ab872f0d7a7416aca5 /net/ipv4/route.c
parent9c32c63bb70b2fafc3b18bee29959c3bf245ceba (diff)
parent8def5f51b012efb00e77ba2d04696cc0aadd0609 (diff)
Merge branch 'master' of git+ssh://git.samba.org/data/git/sfrench/cifs-2.6
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c196
1 files changed, 115 insertions, 81 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 155138d8ec8b..46af62363b8c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
112#include <net/secure_seq.h> 112#include <net/secure_seq.h>
113 113
114#define RT_FL_TOS(oldflp4) \ 114#define RT_FL_TOS(oldflp4) \
115 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 115 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
116 116
117#define IP_MAX_MTU 0xFFF0 117#define IP_MAX_MTU 0xFFF0
118 118
@@ -131,6 +131,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss __read_mostly = 256; 132static int ip_rt_min_advmss __read_mostly = 256;
133static int rt_chain_length_max __read_mostly = 20; 133static int rt_chain_length_max __read_mostly = 20;
134static int redirect_genid;
134 135
135/* 136/*
136 * Interface to generic destination cache. 137 * Interface to generic destination cache.
@@ -138,7 +139,7 @@ static int rt_chain_length_max __read_mostly = 20;
138 139
139static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 140static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
140static unsigned int ipv4_default_advmss(const struct dst_entry *dst); 141static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
141static unsigned int ipv4_default_mtu(const struct dst_entry *dst); 142static unsigned int ipv4_mtu(const struct dst_entry *dst);
142static void ipv4_dst_destroy(struct dst_entry *dst); 143static void ipv4_dst_destroy(struct dst_entry *dst);
143static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 144static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
144static void ipv4_link_failure(struct sk_buff *skb); 145static void ipv4_link_failure(struct sk_buff *skb);
@@ -193,7 +194,7 @@ static struct dst_ops ipv4_dst_ops = {
193 .gc = rt_garbage_collect, 194 .gc = rt_garbage_collect,
194 .check = ipv4_dst_check, 195 .check = ipv4_dst_check,
195 .default_advmss = ipv4_default_advmss, 196 .default_advmss = ipv4_default_advmss,
196 .default_mtu = ipv4_default_mtu, 197 .mtu = ipv4_mtu,
197 .cow_metrics = ipv4_cow_metrics, 198 .cow_metrics = ipv4_cow_metrics,
198 .destroy = ipv4_dst_destroy, 199 .destroy = ipv4_dst_destroy,
199 .ifdown = ipv4_dst_ifdown, 200 .ifdown = ipv4_dst_ifdown,
@@ -416,9 +417,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
416 else { 417 else {
417 struct rtable *r = v; 418 struct rtable *r = v;
418 struct neighbour *n; 419 struct neighbour *n;
419 int len; 420 int len, HHUptod;
420 421
422 rcu_read_lock();
421 n = dst_get_neighbour(&r->dst); 423 n = dst_get_neighbour(&r->dst);
424 HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
425 rcu_read_unlock();
426
422 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 427 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
423 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 428 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
424 r->dst.dev ? r->dst.dev->name : "*", 429 r->dst.dev ? r->dst.dev->name : "*",
@@ -432,7 +437,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
432 dst_metric(&r->dst, RTAX_RTTVAR)), 437 dst_metric(&r->dst, RTAX_RTTVAR)),
433 r->rt_key_tos, 438 r->rt_key_tos,
434 -1, 439 -1,
435 (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, 440 HHUptod,
436 r->rt_spec_dst, &len); 441 r->rt_spec_dst, &len);
437 442
438 seq_printf(seq, "%*s\n", 127 - len, ""); 443 seq_printf(seq, "%*s\n", 127 - len, "");
@@ -837,6 +842,7 @@ static void rt_cache_invalidate(struct net *net)
837 842
838 get_random_bytes(&shuffle, sizeof(shuffle)); 843 get_random_bytes(&shuffle, sizeof(shuffle));
839 atomic_add(shuffle + 1U, &net->ipv4.rt_genid); 844 atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
845 redirect_genid++;
840} 846}
841 847
842/* 848/*
@@ -1304,16 +1310,40 @@ static void rt_del(unsigned hash, struct rtable *rt)
1304 spin_unlock_bh(rt_hash_lock_addr(hash)); 1310 spin_unlock_bh(rt_hash_lock_addr(hash));
1305} 1311}
1306 1312
1313static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1314{
1315 struct rtable *rt = (struct rtable *) dst;
1316 __be32 orig_gw = rt->rt_gateway;
1317 struct neighbour *n, *old_n;
1318
1319 dst_confirm(&rt->dst);
1320
1321 rt->rt_gateway = peer->redirect_learned.a4;
1322
1323 n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1324 if (IS_ERR(n)) {
1325 rt->rt_gateway = orig_gw;
1326 return;
1327 }
1328 old_n = xchg(&rt->dst._neighbour, n);
1329 if (old_n)
1330 neigh_release(old_n);
1331 if (!(n->nud_state & NUD_VALID)) {
1332 neigh_event_send(n, NULL);
1333 } else {
1334 rt->rt_flags |= RTCF_REDIRECTED;
1335 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
1336 }
1337}
1338
1307/* called in rcu_read_lock() section */ 1339/* called in rcu_read_lock() section */
1308void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1340void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1309 __be32 saddr, struct net_device *dev) 1341 __be32 saddr, struct net_device *dev)
1310{ 1342{
1311 int s, i; 1343 int s, i;
1312 struct in_device *in_dev = __in_dev_get_rcu(dev); 1344 struct in_device *in_dev = __in_dev_get_rcu(dev);
1313 struct rtable *rt;
1314 __be32 skeys[2] = { saddr, 0 }; 1345 __be32 skeys[2] = { saddr, 0 };
1315 int ikeys[2] = { dev->ifindex, 0 }; 1346 int ikeys[2] = { dev->ifindex, 0 };
1316 struct flowi4 fl4;
1317 struct inet_peer *peer; 1347 struct inet_peer *peer;
1318 struct net *net; 1348 struct net *net;
1319 1349
@@ -1336,33 +1366,44 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1336 goto reject_redirect; 1366 goto reject_redirect;
1337 } 1367 }
1338 1368
1339 memset(&fl4, 0, sizeof(fl4));
1340 fl4.daddr = daddr;
1341 for (s = 0; s < 2; s++) { 1369 for (s = 0; s < 2; s++) {
1342 for (i = 0; i < 2; i++) { 1370 for (i = 0; i < 2; i++) {
1343 fl4.flowi4_oif = ikeys[i]; 1371 unsigned int hash;
1344 fl4.saddr = skeys[s]; 1372 struct rtable __rcu **rthp;
1345 rt = __ip_route_output_key(net, &fl4); 1373 struct rtable *rt;
1346 if (IS_ERR(rt)) 1374
1347 continue; 1375 hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net));
1348 1376
1349 if (rt->dst.error || rt->dst.dev != dev || 1377 rthp = &rt_hash_table[hash].chain;
1350 rt->rt_gateway != old_gw) { 1378
1351 ip_rt_put(rt); 1379 while ((rt = rcu_dereference(*rthp)) != NULL) {
1352 continue; 1380 rthp = &rt->dst.rt_next;
1353 } 1381
1354 1382 if (rt->rt_key_dst != daddr ||
1355 if (!rt->peer) 1383 rt->rt_key_src != skeys[s] ||
1356 rt_bind_peer(rt, rt->rt_dst, 1); 1384 rt->rt_oif != ikeys[i] ||
1385 rt_is_input_route(rt) ||
1386 rt_is_expired(rt) ||
1387 !net_eq(dev_net(rt->dst.dev), net) ||
1388 rt->dst.error ||
1389 rt->dst.dev != dev ||
1390 rt->rt_gateway != old_gw)
1391 continue;
1357 1392
1358 peer = rt->peer; 1393 if (!rt->peer)
1359 if (peer) { 1394 rt_bind_peer(rt, rt->rt_dst, 1);
1360 peer->redirect_learned.a4 = new_gw; 1395
1361 atomic_inc(&__rt_peer_genid); 1396 peer = rt->peer;
1397 if (peer) {
1398 if (peer->redirect_learned.a4 != new_gw ||
1399 peer->redirect_genid != redirect_genid) {
1400 peer->redirect_learned.a4 = new_gw;
1401 peer->redirect_genid = redirect_genid;
1402 atomic_inc(&__rt_peer_genid);
1403 }
1404 check_peer_redir(&rt->dst, peer);
1405 }
1362 } 1406 }
1363
1364 ip_rt_put(rt);
1365 return;
1366 } 1407 }
1367 } 1408 }
1368 return; 1409 return;
@@ -1649,40 +1690,9 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1649 } 1690 }
1650} 1691}
1651 1692
1652static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1653{
1654 struct rtable *rt = (struct rtable *) dst;
1655 __be32 orig_gw = rt->rt_gateway;
1656 struct neighbour *n, *old_n;
1657
1658 dst_confirm(&rt->dst);
1659
1660 rt->rt_gateway = peer->redirect_learned.a4;
1661
1662 n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1663 if (IS_ERR(n))
1664 return PTR_ERR(n);
1665 old_n = xchg(&rt->dst._neighbour, n);
1666 if (old_n)
1667 neigh_release(old_n);
1668 if (!n || !(n->nud_state & NUD_VALID)) {
1669 if (n)
1670 neigh_event_send(n, NULL);
1671 rt->rt_gateway = orig_gw;
1672 return -EAGAIN;
1673 } else {
1674 rt->rt_flags |= RTCF_REDIRECTED;
1675 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
1676 }
1677 return 0;
1678}
1679 1693
1680static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1694static void ipv4_validate_peer(struct rtable *rt)
1681{ 1695{
1682 struct rtable *rt = (struct rtable *) dst;
1683
1684 if (rt_is_expired(rt))
1685 return NULL;
1686 if (rt->rt_peer_genid != rt_peer_genid()) { 1696 if (rt->rt_peer_genid != rt_peer_genid()) {
1687 struct inet_peer *peer; 1697 struct inet_peer *peer;
1688 1698
@@ -1691,17 +1701,26 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1691 1701
1692 peer = rt->peer; 1702 peer = rt->peer;
1693 if (peer) { 1703 if (peer) {
1694 check_peer_pmtu(dst, peer); 1704 check_peer_pmtu(&rt->dst, peer);
1695 1705
1706 if (peer->redirect_genid != redirect_genid)
1707 peer->redirect_learned.a4 = 0;
1696 if (peer->redirect_learned.a4 && 1708 if (peer->redirect_learned.a4 &&
1697 peer->redirect_learned.a4 != rt->rt_gateway) { 1709 peer->redirect_learned.a4 != rt->rt_gateway)
1698 if (check_peer_redir(dst, peer)) 1710 check_peer_redir(&rt->dst, peer);
1699 return NULL;
1700 }
1701 } 1711 }
1702 1712
1703 rt->rt_peer_genid = rt_peer_genid(); 1713 rt->rt_peer_genid = rt_peer_genid();
1704 } 1714 }
1715}
1716
1717static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1718{
1719 struct rtable *rt = (struct rtable *) dst;
1720
1721 if (rt_is_expired(rt))
1722 return NULL;
1723 ipv4_validate_peer(rt);
1705 return dst; 1724 return dst;
1706} 1725}
1707 1726
@@ -1806,12 +1825,17 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1806 return advmss; 1825 return advmss;
1807} 1826}
1808 1827
1809static unsigned int ipv4_default_mtu(const struct dst_entry *dst) 1828static unsigned int ipv4_mtu(const struct dst_entry *dst)
1810{ 1829{
1811 unsigned int mtu = dst->dev->mtu; 1830 const struct rtable *rt = (const struct rtable *) dst;
1831 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1832
1833 if (mtu && rt_is_output_route(rt))
1834 return mtu;
1835
1836 mtu = dst->dev->mtu;
1812 1837
1813 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { 1838 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
1814 const struct rtable *rt = (const struct rtable *) dst;
1815 1839
1816 if (rt->rt_gateway != rt->rt_dst && mtu > 576) 1840 if (rt->rt_gateway != rt->rt_dst && mtu > 576)
1817 mtu = 576; 1841 mtu = 576;
@@ -1844,6 +1868,8 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
1844 dst_init_metrics(&rt->dst, peer->metrics, false); 1868 dst_init_metrics(&rt->dst, peer->metrics, false);
1845 1869
1846 check_peer_pmtu(&rt->dst, peer); 1870 check_peer_pmtu(&rt->dst, peer);
1871 if (peer->redirect_genid != redirect_genid)
1872 peer->redirect_learned.a4 = 0;
1847 if (peer->redirect_learned.a4 && 1873 if (peer->redirect_learned.a4 &&
1848 peer->redirect_learned.a4 != rt->rt_gateway) { 1874 peer->redirect_learned.a4 != rt->rt_gateway) {
1849 rt->rt_gateway = peer->redirect_learned.a4; 1875 rt->rt_gateway = peer->redirect_learned.a4;
@@ -2349,6 +2375,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2349 rth->rt_mark == skb->mark && 2375 rth->rt_mark == skb->mark &&
2350 net_eq(dev_net(rth->dst.dev), net) && 2376 net_eq(dev_net(rth->dst.dev), net) &&
2351 !rt_is_expired(rth)) { 2377 !rt_is_expired(rth)) {
2378 ipv4_validate_peer(rth);
2352 if (noref) { 2379 if (noref) {
2353 dst_use_noref(&rth->dst, jiffies); 2380 dst_use_noref(&rth->dst, jiffies);
2354 skb_dst_set_noref(skb, &rth->dst); 2381 skb_dst_set_noref(skb, &rth->dst);
@@ -2407,11 +2434,11 @@ EXPORT_SYMBOL(ip_route_input_common);
2407static struct rtable *__mkroute_output(const struct fib_result *res, 2434static struct rtable *__mkroute_output(const struct fib_result *res,
2408 const struct flowi4 *fl4, 2435 const struct flowi4 *fl4,
2409 __be32 orig_daddr, __be32 orig_saddr, 2436 __be32 orig_daddr, __be32 orig_saddr,
2410 int orig_oif, struct net_device *dev_out, 2437 int orig_oif, __u8 orig_rtos,
2438 struct net_device *dev_out,
2411 unsigned int flags) 2439 unsigned int flags)
2412{ 2440{
2413 struct fib_info *fi = res->fi; 2441 struct fib_info *fi = res->fi;
2414 u32 tos = RT_FL_TOS(fl4);
2415 struct in_device *in_dev; 2442 struct in_device *in_dev;
2416 u16 type = res->type; 2443 u16 type = res->type;
2417 struct rtable *rth; 2444 struct rtable *rth;
@@ -2462,7 +2489,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2462 rth->rt_genid = rt_genid(dev_net(dev_out)); 2489 rth->rt_genid = rt_genid(dev_net(dev_out));
2463 rth->rt_flags = flags; 2490 rth->rt_flags = flags;
2464 rth->rt_type = type; 2491 rth->rt_type = type;
2465 rth->rt_key_tos = tos; 2492 rth->rt_key_tos = orig_rtos;
2466 rth->rt_dst = fl4->daddr; 2493 rth->rt_dst = fl4->daddr;
2467 rth->rt_src = fl4->saddr; 2494 rth->rt_src = fl4->saddr;
2468 rth->rt_route_iif = 0; 2495 rth->rt_route_iif = 0;
@@ -2512,7 +2539,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2512static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) 2539static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2513{ 2540{
2514 struct net_device *dev_out = NULL; 2541 struct net_device *dev_out = NULL;
2515 u32 tos = RT_FL_TOS(fl4); 2542 __u8 tos = RT_FL_TOS(fl4);
2516 unsigned int flags = 0; 2543 unsigned int flags = 0;
2517 struct fib_result res; 2544 struct fib_result res;
2518 struct rtable *rth; 2545 struct rtable *rth;
@@ -2688,7 +2715,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
2688 2715
2689make_route: 2716make_route:
2690 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, 2717 rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
2691 dev_out, flags); 2718 tos, dev_out, flags);
2692 if (!IS_ERR(rth)) { 2719 if (!IS_ERR(rth)) {
2693 unsigned int hash; 2720 unsigned int hash;
2694 2721
@@ -2724,6 +2751,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4)
2724 (IPTOS_RT_MASK | RTO_ONLINK)) && 2751 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2725 net_eq(dev_net(rth->dst.dev), net) && 2752 net_eq(dev_net(rth->dst.dev), net) &&
2726 !rt_is_expired(rth)) { 2753 !rt_is_expired(rth)) {
2754 ipv4_validate_peer(rth);
2727 dst_use(&rth->dst, jiffies); 2755 dst_use(&rth->dst, jiffies);
2728 RT_CACHE_STAT_INC(out_hit); 2756 RT_CACHE_STAT_INC(out_hit);
2729 rcu_read_unlock_bh(); 2757 rcu_read_unlock_bh();
@@ -2747,9 +2775,11 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
2747 return NULL; 2775 return NULL;
2748} 2776}
2749 2777
2750static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) 2778static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
2751{ 2779{
2752 return 0; 2780 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2781
2782 return mtu ? : dst->dev->mtu;
2753} 2783}
2754 2784
2755static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2785static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2767,7 +2797,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2767 .protocol = cpu_to_be16(ETH_P_IP), 2797 .protocol = cpu_to_be16(ETH_P_IP),
2768 .destroy = ipv4_dst_destroy, 2798 .destroy = ipv4_dst_destroy,
2769 .check = ipv4_blackhole_dst_check, 2799 .check = ipv4_blackhole_dst_check,
2770 .default_mtu = ipv4_blackhole_default_mtu, 2800 .mtu = ipv4_blackhole_mtu,
2771 .default_advmss = ipv4_default_advmss, 2801 .default_advmss = ipv4_default_advmss,
2772 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2802 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2773 .cow_metrics = ipv4_rt_blackhole_cow_metrics, 2803 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
@@ -2845,7 +2875,7 @@ static int rt_fill_info(struct net *net,
2845 struct rtable *rt = skb_rtable(skb); 2875 struct rtable *rt = skb_rtable(skb);
2846 struct rtmsg *r; 2876 struct rtmsg *r;
2847 struct nlmsghdr *nlh; 2877 struct nlmsghdr *nlh;
2848 long expires = 0; 2878 unsigned long expires = 0;
2849 const struct inet_peer *peer = rt->peer; 2879 const struct inet_peer *peer = rt->peer;
2850 u32 id = 0, ts = 0, tsage = 0, error; 2880 u32 id = 0, ts = 0, tsage = 0, error;
2851 2881
@@ -2902,8 +2932,12 @@ static int rt_fill_info(struct net *net,
2902 tsage = get_seconds() - peer->tcp_ts_stamp; 2932 tsage = get_seconds() - peer->tcp_ts_stamp;
2903 } 2933 }
2904 expires = ACCESS_ONCE(peer->pmtu_expires); 2934 expires = ACCESS_ONCE(peer->pmtu_expires);
2905 if (expires) 2935 if (expires) {
2906 expires -= jiffies; 2936 if (time_before(jiffies, expires))
2937 expires -= jiffies;
2938 else
2939 expires = 0;
2940 }
2907 } 2941 }
2908 2942
2909 if (rt_is_input_route(rt)) { 2943 if (rt_is_input_route(rt)) {