diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 267 |
1 files changed, 121 insertions, 146 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 987bf9adb318..351dc4e85242 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -139,20 +139,26 @@ static unsigned long expires_ljiffies; | |||
139 | */ | 139 | */ |
140 | 140 | ||
141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 141 | static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); |
142 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst); | ||
143 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst); | ||
142 | static void ipv4_dst_destroy(struct dst_entry *dst); | 144 | static void ipv4_dst_destroy(struct dst_entry *dst); |
143 | static void ipv4_dst_ifdown(struct dst_entry *dst, | ||
144 | struct net_device *dev, int how); | ||
145 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | 145 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); |
146 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
148 | static int rt_garbage_collect(struct dst_ops *ops); | 148 | static int rt_garbage_collect(struct dst_ops *ops); |
149 | 149 | ||
150 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
151 | int how) | ||
152 | { | ||
153 | } | ||
150 | 154 | ||
151 | static struct dst_ops ipv4_dst_ops = { | 155 | static struct dst_ops ipv4_dst_ops = { |
152 | .family = AF_INET, | 156 | .family = AF_INET, |
153 | .protocol = cpu_to_be16(ETH_P_IP), | 157 | .protocol = cpu_to_be16(ETH_P_IP), |
154 | .gc = rt_garbage_collect, | 158 | .gc = rt_garbage_collect, |
155 | .check = ipv4_dst_check, | 159 | .check = ipv4_dst_check, |
160 | .default_advmss = ipv4_default_advmss, | ||
161 | .default_mtu = ipv4_default_mtu, | ||
156 | .destroy = ipv4_dst_destroy, | 162 | .destroy = ipv4_dst_destroy, |
157 | .ifdown = ipv4_dst_ifdown, | 163 | .ifdown = ipv4_dst_ifdown, |
158 | .negative_advice = ipv4_negative_advice, | 164 | .negative_advice = ipv4_negative_advice, |
@@ -381,8 +387,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
381 | (__force u32)r->rt_gateway, | 387 | (__force u32)r->rt_gateway, |
382 | r->rt_flags, atomic_read(&r->dst.__refcnt), | 388 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
383 | r->dst.__use, 0, (__force u32)r->rt_src, | 389 | r->dst.__use, 0, (__force u32)r->rt_src, |
384 | (dst_metric(&r->dst, RTAX_ADVMSS) ? | 390 | dst_metric_advmss(&r->dst) + 40, |
385 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), | ||
386 | dst_metric(&r->dst, RTAX_WINDOW), | 391 | dst_metric(&r->dst, RTAX_WINDOW), |
387 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + | 392 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
388 | dst_metric(&r->dst, RTAX_RTTVAR)), | 393 | dst_metric(&r->dst, RTAX_RTTVAR)), |
@@ -621,7 +626,7 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
621 | /* Kill broadcast/multicast entries very aggresively, if they | 626 | /* Kill broadcast/multicast entries very aggresively, if they |
622 | collide in hash table with more useful entries */ | 627 | collide in hash table with more useful entries */ |
623 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 628 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
624 | rth->fl.iif && rth->dst.rt_next; | 629 | rt_is_input_route(rth) && rth->dst.rt_next; |
625 | } | 630 | } |
626 | 631 | ||
627 | static inline int rt_valuable(struct rtable *rth) | 632 | static inline int rt_valuable(struct rtable *rth) |
@@ -666,7 +671,7 @@ static inline u32 rt_score(struct rtable *rt) | |||
666 | if (rt_valuable(rt)) | 671 | if (rt_valuable(rt)) |
667 | score |= (1<<31); | 672 | score |= (1<<31); |
668 | 673 | ||
669 | if (!rt->fl.iif || | 674 | if (rt_is_output_route(rt) || |
670 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) | 675 | !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) |
671 | score |= (1<<30); | 676 | score |= (1<<30); |
672 | 677 | ||
@@ -682,17 +687,17 @@ static inline bool rt_caching(const struct net *net) | |||
682 | static inline bool compare_hash_inputs(const struct flowi *fl1, | 687 | static inline bool compare_hash_inputs(const struct flowi *fl1, |
683 | const struct flowi *fl2) | 688 | const struct flowi *fl2) |
684 | { | 689 | { |
685 | return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 690 | return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | |
686 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 691 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | |
687 | (fl1->iif ^ fl2->iif)) == 0); | 692 | (fl1->iif ^ fl2->iif)) == 0); |
688 | } | 693 | } |
689 | 694 | ||
690 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 695 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
691 | { | 696 | { |
692 | return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | | 697 | return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | |
693 | ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | | 698 | ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | |
694 | (fl1->mark ^ fl2->mark) | | 699 | (fl1->mark ^ fl2->mark) | |
695 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | | 700 | (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | |
696 | (fl1->oif ^ fl2->oif) | | 701 | (fl1->oif ^ fl2->oif) | |
697 | (fl1->iif ^ fl2->iif)) == 0; | 702 | (fl1->iif ^ fl2->iif)) == 0; |
698 | } | 703 | } |
@@ -712,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) | |||
712 | * Can be called by a softirq or a process. | 717 | * Can be called by a softirq or a process. |
713 | * In the later case, we want to be reschedule if necessary | 718 | * In the later case, we want to be reschedule if necessary |
714 | */ | 719 | */ |
715 | static void rt_do_flush(int process_context) | 720 | static void rt_do_flush(struct net *net, int process_context) |
716 | { | 721 | { |
717 | unsigned int i; | 722 | unsigned int i; |
718 | struct rtable *rth, *next; | 723 | struct rtable *rth, *next; |
719 | struct rtable * tail; | ||
720 | 724 | ||
721 | for (i = 0; i <= rt_hash_mask; i++) { | 725 | for (i = 0; i <= rt_hash_mask; i++) { |
726 | struct rtable __rcu **pprev; | ||
727 | struct rtable *list; | ||
728 | |||
722 | if (process_context && need_resched()) | 729 | if (process_context && need_resched()) |
723 | cond_resched(); | 730 | cond_resched(); |
724 | rth = rcu_dereference_raw(rt_hash_table[i].chain); | 731 | rth = rcu_dereference_raw(rt_hash_table[i].chain); |
@@ -726,50 +733,32 @@ static void rt_do_flush(int process_context) | |||
726 | continue; | 733 | continue; |
727 | 734 | ||
728 | spin_lock_bh(rt_hash_lock_addr(i)); | 735 | spin_lock_bh(rt_hash_lock_addr(i)); |
729 | #ifdef CONFIG_NET_NS | ||
730 | { | ||
731 | struct rtable __rcu **prev; | ||
732 | struct rtable *p; | ||
733 | 736 | ||
734 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | 737 | list = NULL; |
738 | pprev = &rt_hash_table[i].chain; | ||
739 | rth = rcu_dereference_protected(*pprev, | ||
735 | lockdep_is_held(rt_hash_lock_addr(i))); | 740 | lockdep_is_held(rt_hash_lock_addr(i))); |
736 | 741 | ||
737 | /* defer releasing the head of the list after spin_unlock */ | 742 | while (rth) { |
738 | for (tail = rth; tail; | 743 | next = rcu_dereference_protected(rth->dst.rt_next, |
739 | tail = rcu_dereference_protected(tail->dst.rt_next, | ||
740 | lockdep_is_held(rt_hash_lock_addr(i)))) | ||
741 | if (!rt_is_expired(tail)) | ||
742 | break; | ||
743 | if (rth != tail) | ||
744 | rt_hash_table[i].chain = tail; | ||
745 | |||
746 | /* call rt_free on entries after the tail requiring flush */ | ||
747 | prev = &rt_hash_table[i].chain; | ||
748 | for (p = rcu_dereference_protected(*prev, | ||
749 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
750 | p != NULL; | ||
751 | p = next) { | ||
752 | next = rcu_dereference_protected(p->dst.rt_next, | ||
753 | lockdep_is_held(rt_hash_lock_addr(i))); | 744 | lockdep_is_held(rt_hash_lock_addr(i))); |
754 | if (!rt_is_expired(p)) { | 745 | |
755 | prev = &p->dst.rt_next; | 746 | if (!net || |
747 | net_eq(dev_net(rth->dst.dev), net)) { | ||
748 | rcu_assign_pointer(*pprev, next); | ||
749 | rcu_assign_pointer(rth->dst.rt_next, list); | ||
750 | list = rth; | ||
756 | } else { | 751 | } else { |
757 | *prev = next; | 752 | pprev = &rth->dst.rt_next; |
758 | rt_free(p); | ||
759 | } | 753 | } |
754 | rth = next; | ||
760 | } | 755 | } |
761 | } | 756 | |
762 | #else | ||
763 | rth = rcu_dereference_protected(rt_hash_table[i].chain, | ||
764 | lockdep_is_held(rt_hash_lock_addr(i))); | ||
765 | rcu_assign_pointer(rt_hash_table[i].chain, NULL); | ||
766 | tail = NULL; | ||
767 | #endif | ||
768 | spin_unlock_bh(rt_hash_lock_addr(i)); | 757 | spin_unlock_bh(rt_hash_lock_addr(i)); |
769 | 758 | ||
770 | for (; rth != tail; rth = next) { | 759 | for (; list; list = next) { |
771 | next = rcu_dereference_protected(rth->dst.rt_next, 1); | 760 | next = rcu_dereference_protected(list->dst.rt_next, 1); |
772 | rt_free(rth); | 761 | rt_free(list); |
773 | } | 762 | } |
774 | } | 763 | } |
775 | } | 764 | } |
@@ -917,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) | |||
917 | { | 906 | { |
918 | rt_cache_invalidate(net); | 907 | rt_cache_invalidate(net); |
919 | if (delay >= 0) | 908 | if (delay >= 0) |
920 | rt_do_flush(!in_softirq()); | 909 | rt_do_flush(net, !in_softirq()); |
921 | } | 910 | } |
922 | 911 | ||
923 | /* Flush previous cache invalidated entries from the cache */ | 912 | /* Flush previous cache invalidated entries from the cache */ |
924 | void rt_cache_flush_batch(void) | 913 | void rt_cache_flush_batch(struct net *net) |
925 | { | 914 | { |
926 | rt_do_flush(!in_softirq()); | 915 | rt_do_flush(net, !in_softirq()); |
927 | } | 916 | } |
928 | 917 | ||
929 | static void rt_emergency_hash_rebuild(struct net *net) | 918 | static void rt_emergency_hash_rebuild(struct net *net) |
@@ -1124,7 +1113,7 @@ restart: | |||
1124 | */ | 1113 | */ |
1125 | 1114 | ||
1126 | rt->dst.flags |= DST_NOCACHE; | 1115 | rt->dst.flags |= DST_NOCACHE; |
1127 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1116 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1128 | int err = arp_bind_neighbour(&rt->dst); | 1117 | int err = arp_bind_neighbour(&rt->dst); |
1129 | if (err) { | 1118 | if (err) { |
1130 | if (net_ratelimit()) | 1119 | if (net_ratelimit()) |
@@ -1222,7 +1211,7 @@ restart: | |||
1222 | /* Try to bind route to arp only if it is output | 1211 | /* Try to bind route to arp only if it is output |
1223 | route or unicast forwarding path. | 1212 | route or unicast forwarding path. |
1224 | */ | 1213 | */ |
1225 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1214 | if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { |
1226 | int err = arp_bind_neighbour(&rt->dst); | 1215 | int err = arp_bind_neighbour(&rt->dst); |
1227 | if (err) { | 1216 | if (err) { |
1228 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1217 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
@@ -1287,7 +1276,7 @@ void rt_bind_peer(struct rtable *rt, int create) | |||
1287 | { | 1276 | { |
1288 | struct inet_peer *peer; | 1277 | struct inet_peer *peer; |
1289 | 1278 | ||
1290 | peer = inet_getpeer(rt->rt_dst, create); | 1279 | peer = inet_getpeer_v4(rt->rt_dst, create); |
1291 | 1280 | ||
1292 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) | 1281 | if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) |
1293 | inet_putpeer(peer); | 1282 | inet_putpeer(peer); |
@@ -1404,7 +1393,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1404 | if (rth->fl.fl4_dst != daddr || | 1393 | if (rth->fl.fl4_dst != daddr || |
1405 | rth->fl.fl4_src != skeys[i] || | 1394 | rth->fl.fl4_src != skeys[i] || |
1406 | rth->fl.oif != ikeys[k] || | 1395 | rth->fl.oif != ikeys[k] || |
1407 | rth->fl.iif != 0 || | 1396 | rt_is_input_route(rth) || |
1408 | rt_is_expired(rth) || | 1397 | rt_is_expired(rth) || |
1409 | !net_eq(dev_net(rth->dst.dev), net)) { | 1398 | !net_eq(dev_net(rth->dst.dev), net)) { |
1410 | rthp = &rth->dst.rt_next; | 1399 | rthp = &rth->dst.rt_next; |
@@ -1433,8 +1422,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1433 | rt->dst.child = NULL; | 1422 | rt->dst.child = NULL; |
1434 | if (rt->dst.dev) | 1423 | if (rt->dst.dev) |
1435 | dev_hold(rt->dst.dev); | 1424 | dev_hold(rt->dst.dev); |
1436 | if (rt->idev) | ||
1437 | in_dev_hold(rt->idev); | ||
1438 | rt->dst.obsolete = -1; | 1425 | rt->dst.obsolete = -1; |
1439 | rt->dst.lastuse = jiffies; | 1426 | rt->dst.lastuse = jiffies; |
1440 | rt->dst.path = &rt->dst; | 1427 | rt->dst.path = &rt->dst; |
@@ -1666,7 +1653,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1666 | rth->rt_dst != daddr || | 1653 | rth->rt_dst != daddr || |
1667 | rth->rt_src != iph->saddr || | 1654 | rth->rt_src != iph->saddr || |
1668 | rth->fl.oif != ikeys[k] || | 1655 | rth->fl.oif != ikeys[k] || |
1669 | rth->fl.iif != 0 || | 1656 | rt_is_input_route(rth) || |
1670 | dst_metric_locked(&rth->dst, RTAX_MTU) || | 1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || |
1671 | !net_eq(dev_net(rth->dst.dev), net) || | 1658 | !net_eq(dev_net(rth->dst.dev), net) || |
1672 | rt_is_expired(rth)) | 1659 | rt_is_expired(rth)) |
@@ -1686,11 +1673,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1686 | if (mtu < dst_mtu(&rth->dst)) { | 1673 | if (mtu < dst_mtu(&rth->dst)) { |
1687 | dst_confirm(&rth->dst); | 1674 | dst_confirm(&rth->dst); |
1688 | if (mtu < ip_rt_min_pmtu) { | 1675 | if (mtu < ip_rt_min_pmtu) { |
1676 | u32 lock = dst_metric(&rth->dst, | ||
1677 | RTAX_LOCK); | ||
1689 | mtu = ip_rt_min_pmtu; | 1678 | mtu = ip_rt_min_pmtu; |
1690 | rth->dst.metrics[RTAX_LOCK-1] |= | 1679 | lock |= (1 << RTAX_MTU); |
1691 | (1 << RTAX_MTU); | 1680 | dst_metric_set(&rth->dst, RTAX_LOCK, |
1681 | lock); | ||
1692 | } | 1682 | } |
1693 | rth->dst.metrics[RTAX_MTU-1] = mtu; | 1683 | dst_metric_set(&rth->dst, RTAX_MTU, mtu); |
1694 | dst_set_expires(&rth->dst, | 1684 | dst_set_expires(&rth->dst, |
1695 | ip_rt_mtu_expires); | 1685 | ip_rt_mtu_expires); |
1696 | } | 1686 | } |
@@ -1708,10 +1698,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
1708 | if (dst_mtu(dst) > mtu && mtu >= 68 && | 1698 | if (dst_mtu(dst) > mtu && mtu >= 68 && |
1709 | !(dst_metric_locked(dst, RTAX_MTU))) { | 1699 | !(dst_metric_locked(dst, RTAX_MTU))) { |
1710 | if (mtu < ip_rt_min_pmtu) { | 1700 | if (mtu < ip_rt_min_pmtu) { |
1701 | u32 lock = dst_metric(dst, RTAX_LOCK); | ||
1711 | mtu = ip_rt_min_pmtu; | 1702 | mtu = ip_rt_min_pmtu; |
1712 | dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); | 1703 | dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); |
1713 | } | 1704 | } |
1714 | dst->metrics[RTAX_MTU-1] = mtu; | 1705 | dst_metric_set(dst, RTAX_MTU, mtu); |
1715 | dst_set_expires(dst, ip_rt_mtu_expires); | 1706 | dst_set_expires(dst, ip_rt_mtu_expires); |
1716 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); | 1707 | call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); |
1717 | } | 1708 | } |
@@ -1728,33 +1719,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst) | |||
1728 | { | 1719 | { |
1729 | struct rtable *rt = (struct rtable *) dst; | 1720 | struct rtable *rt = (struct rtable *) dst; |
1730 | struct inet_peer *peer = rt->peer; | 1721 | struct inet_peer *peer = rt->peer; |
1731 | struct in_device *idev = rt->idev; | ||
1732 | 1722 | ||
1733 | if (peer) { | 1723 | if (peer) { |
1734 | rt->peer = NULL; | 1724 | rt->peer = NULL; |
1735 | inet_putpeer(peer); | 1725 | inet_putpeer(peer); |
1736 | } | 1726 | } |
1737 | |||
1738 | if (idev) { | ||
1739 | rt->idev = NULL; | ||
1740 | in_dev_put(idev); | ||
1741 | } | ||
1742 | } | 1727 | } |
1743 | 1728 | ||
1744 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | ||
1745 | int how) | ||
1746 | { | ||
1747 | struct rtable *rt = (struct rtable *) dst; | ||
1748 | struct in_device *idev = rt->idev; | ||
1749 | if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) { | ||
1750 | struct in_device *loopback_idev = | ||
1751 | in_dev_get(dev_net(dev)->loopback_dev); | ||
1752 | if (loopback_idev) { | ||
1753 | rt->idev = loopback_idev; | ||
1754 | in_dev_put(idev); | ||
1755 | } | ||
1756 | } | ||
1757 | } | ||
1758 | 1729 | ||
1759 | static void ipv4_link_failure(struct sk_buff *skb) | 1730 | static void ipv4_link_failure(struct sk_buff *skb) |
1760 | { | 1731 | { |
@@ -1790,7 +1761,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1790 | __be32 src; | 1761 | __be32 src; |
1791 | struct fib_result res; | 1762 | struct fib_result res; |
1792 | 1763 | ||
1793 | if (rt->fl.iif == 0) | 1764 | if (rt_is_output_route(rt)) |
1794 | src = rt->rt_src; | 1765 | src = rt->rt_src; |
1795 | else { | 1766 | else { |
1796 | rcu_read_lock(); | 1767 | rcu_read_lock(); |
@@ -1814,38 +1785,55 @@ static void set_class_tag(struct rtable *rt, u32 tag) | |||
1814 | } | 1785 | } |
1815 | #endif | 1786 | #endif |
1816 | 1787 | ||
1788 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) | ||
1789 | { | ||
1790 | unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); | ||
1791 | |||
1792 | if (advmss == 0) { | ||
1793 | advmss = max_t(unsigned int, dst->dev->mtu - 40, | ||
1794 | ip_rt_min_advmss); | ||
1795 | if (advmss > 65535 - 40) | ||
1796 | advmss = 65535 - 40; | ||
1797 | } | ||
1798 | return advmss; | ||
1799 | } | ||
1800 | |||
1801 | static unsigned int ipv4_default_mtu(const struct dst_entry *dst) | ||
1802 | { | ||
1803 | unsigned int mtu = dst->dev->mtu; | ||
1804 | |||
1805 | if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { | ||
1806 | const struct rtable *rt = (const struct rtable *) dst; | ||
1807 | |||
1808 | if (rt->rt_gateway != rt->rt_dst && mtu > 576) | ||
1809 | mtu = 576; | ||
1810 | } | ||
1811 | |||
1812 | if (mtu > IP_MAX_MTU) | ||
1813 | mtu = IP_MAX_MTU; | ||
1814 | |||
1815 | return mtu; | ||
1816 | } | ||
1817 | |||
1817 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | 1818 | static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) |
1818 | { | 1819 | { |
1820 | struct dst_entry *dst = &rt->dst; | ||
1819 | struct fib_info *fi = res->fi; | 1821 | struct fib_info *fi = res->fi; |
1820 | 1822 | ||
1821 | if (fi) { | 1823 | if (fi) { |
1822 | if (FIB_RES_GW(*res) && | 1824 | if (FIB_RES_GW(*res) && |
1823 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1825 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1824 | rt->rt_gateway = FIB_RES_GW(*res); | 1826 | rt->rt_gateway = FIB_RES_GW(*res); |
1825 | memcpy(rt->dst.metrics, fi->fib_metrics, | 1827 | dst_import_metrics(dst, fi->fib_metrics); |
1826 | sizeof(rt->dst.metrics)); | ||
1827 | if (fi->fib_mtu == 0) { | ||
1828 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; | ||
1829 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && | ||
1830 | rt->rt_gateway != rt->rt_dst && | ||
1831 | rt->dst.dev->mtu > 576) | ||
1832 | rt->dst.metrics[RTAX_MTU-1] = 576; | ||
1833 | } | ||
1834 | #ifdef CONFIG_NET_CLS_ROUTE | 1828 | #ifdef CONFIG_NET_CLS_ROUTE |
1835 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1829 | dst->tclassid = FIB_RES_NH(*res).nh_tclassid; |
1836 | #endif | 1830 | #endif |
1837 | } else | 1831 | } |
1838 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; | 1832 | |
1839 | 1833 | if (dst_mtu(dst) > IP_MAX_MTU) | |
1840 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) | 1834 | dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); |
1841 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1835 | if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) |
1842 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) | 1836 | dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); |
1843 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | ||
1844 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) | ||
1845 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, | ||
1846 | ip_rt_min_advmss); | ||
1847 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) | ||
1848 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | ||
1849 | 1837 | ||
1850 | #ifdef CONFIG_NET_CLS_ROUTE | 1838 | #ifdef CONFIG_NET_CLS_ROUTE |
1851 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -1910,7 +1898,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1910 | rth->fl.iif = dev->ifindex; | 1898 | rth->fl.iif = dev->ifindex; |
1911 | rth->dst.dev = init_net.loopback_dev; | 1899 | rth->dst.dev = init_net.loopback_dev; |
1912 | dev_hold(rth->dst.dev); | 1900 | dev_hold(rth->dst.dev); |
1913 | rth->idev = in_dev_get(rth->dst.dev); | ||
1914 | rth->fl.oif = 0; | 1901 | rth->fl.oif = 0; |
1915 | rth->rt_gateway = daddr; | 1902 | rth->rt_gateway = daddr; |
1916 | rth->rt_spec_dst= spec_dst; | 1903 | rth->rt_spec_dst= spec_dst; |
@@ -2050,7 +2037,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2050 | rth->fl.iif = in_dev->dev->ifindex; | 2037 | rth->fl.iif = in_dev->dev->ifindex; |
2051 | rth->dst.dev = (out_dev)->dev; | 2038 | rth->dst.dev = (out_dev)->dev; |
2052 | dev_hold(rth->dst.dev); | 2039 | dev_hold(rth->dst.dev); |
2053 | rth->idev = in_dev_get(rth->dst.dev); | ||
2054 | rth->fl.oif = 0; | 2040 | rth->fl.oif = 0; |
2055 | rth->rt_spec_dst= spec_dst; | 2041 | rth->rt_spec_dst= spec_dst; |
2056 | 2042 | ||
@@ -2111,12 +2097,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2111 | { | 2097 | { |
2112 | struct fib_result res; | 2098 | struct fib_result res; |
2113 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 2099 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2114 | struct flowi fl = { .nl_u = { .ip4_u = | 2100 | struct flowi fl = { .fl4_dst = daddr, |
2115 | { .daddr = daddr, | 2101 | .fl4_src = saddr, |
2116 | .saddr = saddr, | 2102 | .fl4_tos = tos, |
2117 | .tos = tos, | 2103 | .fl4_scope = RT_SCOPE_UNIVERSE, |
2118 | .scope = RT_SCOPE_UNIVERSE, | ||
2119 | } }, | ||
2120 | .mark = skb->mark, | 2104 | .mark = skb->mark, |
2121 | .iif = dev->ifindex }; | 2105 | .iif = dev->ifindex }; |
2122 | unsigned flags = 0; | 2106 | unsigned flags = 0; |
@@ -2231,7 +2215,6 @@ local_input: | |||
2231 | rth->fl.iif = dev->ifindex; | 2215 | rth->fl.iif = dev->ifindex; |
2232 | rth->dst.dev = net->loopback_dev; | 2216 | rth->dst.dev = net->loopback_dev; |
2233 | dev_hold(rth->dst.dev); | 2217 | dev_hold(rth->dst.dev); |
2234 | rth->idev = in_dev_get(rth->dst.dev); | ||
2235 | rth->rt_gateway = daddr; | 2218 | rth->rt_gateway = daddr; |
2236 | rth->rt_spec_dst= spec_dst; | 2219 | rth->rt_spec_dst= spec_dst; |
2237 | rth->dst.input= ip_local_deliver; | 2220 | rth->dst.input= ip_local_deliver; |
@@ -2417,9 +2400,6 @@ static int __mkroute_output(struct rtable **result, | |||
2417 | if (!rth) | 2400 | if (!rth) |
2418 | return -ENOBUFS; | 2401 | return -ENOBUFS; |
2419 | 2402 | ||
2420 | in_dev_hold(in_dev); | ||
2421 | rth->idev = in_dev; | ||
2422 | |||
2423 | atomic_set(&rth->dst.__refcnt, 1); | 2403 | atomic_set(&rth->dst.__refcnt, 1); |
2424 | rth->dst.flags= DST_HOST; | 2404 | rth->dst.flags= DST_HOST; |
2425 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2405 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) |
@@ -2506,14 +2486,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2506 | const struct flowi *oldflp) | 2486 | const struct flowi *oldflp) |
2507 | { | 2487 | { |
2508 | u32 tos = RT_FL_TOS(oldflp); | 2488 | u32 tos = RT_FL_TOS(oldflp); |
2509 | struct flowi fl = { .nl_u = { .ip4_u = | 2489 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, |
2510 | { .daddr = oldflp->fl4_dst, | 2490 | .fl4_src = oldflp->fl4_src, |
2511 | .saddr = oldflp->fl4_src, | 2491 | .fl4_tos = tos & IPTOS_RT_MASK, |
2512 | .tos = tos & IPTOS_RT_MASK, | 2492 | .fl4_scope = ((tos & RTO_ONLINK) ? |
2513 | .scope = ((tos & RTO_ONLINK) ? | 2493 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), |
2514 | RT_SCOPE_LINK : | ||
2515 | RT_SCOPE_UNIVERSE), | ||
2516 | } }, | ||
2517 | .mark = oldflp->mark, | 2494 | .mark = oldflp->mark, |
2518 | .iif = net->loopback_dev->ifindex, | 2495 | .iif = net->loopback_dev->ifindex, |
2519 | .oif = oldflp->oif }; | 2496 | .oif = oldflp->oif }; |
@@ -2585,9 +2562,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2585 | goto out; | 2562 | goto out; |
2586 | 2563 | ||
2587 | /* RACE: Check return value of inet_select_addr instead. */ | 2564 | /* RACE: Check return value of inet_select_addr instead. */ |
2588 | if (rcu_dereference(dev_out->ip_ptr) == NULL) | 2565 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { |
2589 | goto out; /* Wrong error code */ | 2566 | err = -ENETUNREACH; |
2590 | 2567 | goto out; | |
2568 | } | ||
2591 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || | 2569 | if (ipv4_is_local_multicast(oldflp->fl4_dst) || |
2592 | ipv4_is_lbcast(oldflp->fl4_dst)) { | 2570 | ipv4_is_lbcast(oldflp->fl4_dst)) { |
2593 | if (!fl.fl4_src) | 2571 | if (!fl.fl4_src) |
@@ -2648,8 +2626,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, | |||
2648 | } | 2626 | } |
2649 | 2627 | ||
2650 | if (res.type == RTN_LOCAL) { | 2628 | if (res.type == RTN_LOCAL) { |
2651 | if (!fl.fl4_src) | 2629 | if (!fl.fl4_src) { |
2652 | fl.fl4_src = fl.fl4_dst; | 2630 | if (res.fi->fib_prefsrc) |
2631 | fl.fl4_src = res.fi->fib_prefsrc; | ||
2632 | else | ||
2633 | fl.fl4_src = fl.fl4_dst; | ||
2634 | } | ||
2653 | dev_out = net->loopback_dev; | 2635 | dev_out = net->loopback_dev; |
2654 | fl.oif = dev_out->ifindex; | 2636 | fl.oif = dev_out->ifindex; |
2655 | res.fi = NULL; | 2637 | res.fi = NULL; |
@@ -2695,7 +2677,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2695 | rth = rcu_dereference_bh(rth->dst.rt_next)) { | 2677 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2696 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2678 | if (rth->fl.fl4_dst == flp->fl4_dst && |
2697 | rth->fl.fl4_src == flp->fl4_src && | 2679 | rth->fl.fl4_src == flp->fl4_src && |
2698 | rth->fl.iif == 0 && | 2680 | rt_is_output_route(rth) && |
2699 | rth->fl.oif == flp->oif && | 2681 | rth->fl.oif == flp->oif && |
2700 | rth->fl.mark == flp->mark && | 2682 | rth->fl.mark == flp->mark && |
2701 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2683 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
@@ -2751,7 +2733,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2751 | new->__use = 1; | 2733 | new->__use = 1; |
2752 | new->input = dst_discard; | 2734 | new->input = dst_discard; |
2753 | new->output = dst_discard; | 2735 | new->output = dst_discard; |
2754 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); | 2736 | dst_copy_metrics(new, &ort->dst); |
2755 | 2737 | ||
2756 | new->dev = ort->dst.dev; | 2738 | new->dev = ort->dst.dev; |
2757 | if (new->dev) | 2739 | if (new->dev) |
@@ -2759,9 +2741,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2759 | 2741 | ||
2760 | rt->fl = ort->fl; | 2742 | rt->fl = ort->fl; |
2761 | 2743 | ||
2762 | rt->idev = ort->idev; | ||
2763 | if (rt->idev) | ||
2764 | in_dev_hold(rt->idev); | ||
2765 | rt->rt_genid = rt_genid(net); | 2744 | rt->rt_genid = rt_genid(net); |
2766 | rt->rt_flags = ort->rt_flags; | 2745 | rt->rt_flags = ort->rt_flags; |
2767 | rt->rt_type = ort->rt_type; | 2746 | rt->rt_type = ort->rt_type; |
@@ -2853,7 +2832,7 @@ static int rt_fill_info(struct net *net, | |||
2853 | if (rt->dst.tclassid) | 2832 | if (rt->dst.tclassid) |
2854 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); | 2833 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2855 | #endif | 2834 | #endif |
2856 | if (rt->fl.iif) | 2835 | if (rt_is_input_route(rt)) |
2857 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2836 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2858 | else if (rt->rt_src != rt->fl.fl4_src) | 2837 | else if (rt->rt_src != rt->fl.fl4_src) |
2859 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); | 2838 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); |
@@ -2861,7 +2840,7 @@ static int rt_fill_info(struct net *net, | |||
2861 | if (rt->rt_dst != rt->rt_gateway) | 2840 | if (rt->rt_dst != rt->rt_gateway) |
2862 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2841 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
2863 | 2842 | ||
2864 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) | 2843 | if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) |
2865 | goto nla_put_failure; | 2844 | goto nla_put_failure; |
2866 | 2845 | ||
2867 | if (rt->fl.mark) | 2846 | if (rt->fl.mark) |
@@ -2878,7 +2857,7 @@ static int rt_fill_info(struct net *net, | |||
2878 | } | 2857 | } |
2879 | } | 2858 | } |
2880 | 2859 | ||
2881 | if (rt->fl.iif) { | 2860 | if (rt_is_input_route(rt)) { |
2882 | #ifdef CONFIG_IP_MROUTE | 2861 | #ifdef CONFIG_IP_MROUTE |
2883 | __be32 dst = rt->rt_dst; | 2862 | __be32 dst = rt->rt_dst; |
2884 | 2863 | ||
@@ -2973,13 +2952,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2973 | err = -rt->dst.error; | 2952 | err = -rt->dst.error; |
2974 | } else { | 2953 | } else { |
2975 | struct flowi fl = { | 2954 | struct flowi fl = { |
2976 | .nl_u = { | 2955 | .fl4_dst = dst, |
2977 | .ip4_u = { | 2956 | .fl4_src = src, |
2978 | .daddr = dst, | 2957 | .fl4_tos = rtm->rtm_tos, |
2979 | .saddr = src, | ||
2980 | .tos = rtm->rtm_tos, | ||
2981 | }, | ||
2982 | }, | ||
2983 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2958 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2984 | .mark = mark, | 2959 | .mark = mark, |
2985 | }; | 2960 | }; |