aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c227
1 files changed, 174 insertions, 53 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2ea6dcc3e2cc..77bfba975959 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256; 130static int ip_rt_min_advmss __read_mostly = 256;
131static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; 131static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
132static int rt_chain_length_max __read_mostly = 20;
132 133
133static void rt_worker_func(struct work_struct *work); 134static void rt_worker_func(struct work_struct *work);
134static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); 135static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
@@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
145static void ipv4_link_failure(struct sk_buff *skb); 146static void ipv4_link_failure(struct sk_buff *skb);
146static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 147static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
147static int rt_garbage_collect(struct dst_ops *ops); 148static int rt_garbage_collect(struct dst_ops *ops);
149static void rt_emergency_hash_rebuild(struct net *net);
148 150
149 151
150static struct dst_ops ipv4_dst_ops = { 152static struct dst_ops ipv4_dst_ops = {
@@ -158,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = {
158 .link_failure = ipv4_link_failure, 160 .link_failure = ipv4_link_failure,
159 .update_pmtu = ip_rt_update_pmtu, 161 .update_pmtu = ip_rt_update_pmtu,
160 .local_out = __ip_local_out, 162 .local_out = __ip_local_out,
161 .entry_size = sizeof(struct rtable),
162 .entries = ATOMIC_INIT(0), 163 .entries = ATOMIC_INIT(0),
163}; 164};
164 165
@@ -201,6 +202,7 @@ const __u8 ip_tos2prio[16] = {
201struct rt_hash_bucket { 202struct rt_hash_bucket {
202 struct rtable *chain; 203 struct rtable *chain;
203}; 204};
205
204#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ 206#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
205 defined(CONFIG_PROVE_LOCKING) 207 defined(CONFIG_PROVE_LOCKING)
206/* 208/*
@@ -674,6 +676,20 @@ static inline u32 rt_score(struct rtable *rt)
674 return score; 676 return score;
675} 677}
676 678
679static inline bool rt_caching(const struct net *net)
680{
681 return net->ipv4.current_rt_cache_rebuild_count <=
682 net->ipv4.sysctl_rt_cache_rebuild_count;
683}
684
685static inline bool compare_hash_inputs(const struct flowi *fl1,
686 const struct flowi *fl2)
687{
688 return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
689 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) |
690 (fl1->iif ^ fl2->iif)) == 0);
691}
692
677static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 693static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
678{ 694{
679 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | 695 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
@@ -753,11 +769,24 @@ static void rt_do_flush(int process_context)
753 } 769 }
754} 770}
755 771
772/*
773 * While freeing expired entries, we compute average chain length
774 * and standard deviation, using fixed-point arithmetic.
775 * This to have an estimation of rt_chain_length_max
776 * rt_chain_length_max = max(elasticity, AVG + 4*SD)
777 * We use 3 bits for frational part, and 29 (or 61) for magnitude.
778 */
779
780#define FRACT_BITS 3
781#define ONE (1UL << FRACT_BITS)
782
756static void rt_check_expire(void) 783static void rt_check_expire(void)
757{ 784{
758 static unsigned int rover; 785 static unsigned int rover;
759 unsigned int i = rover, goal; 786 unsigned int i = rover, goal;
760 struct rtable *rth, **rthp; 787 struct rtable *rth, **rthp;
788 unsigned long length = 0, samples = 0;
789 unsigned long sum = 0, sum2 = 0;
761 u64 mult; 790 u64 mult;
762 791
763 mult = ((u64)ip_rt_gc_interval) << rt_hash_log; 792 mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
@@ -766,6 +795,7 @@ static void rt_check_expire(void)
766 goal = (unsigned int)mult; 795 goal = (unsigned int)mult;
767 if (goal > rt_hash_mask) 796 if (goal > rt_hash_mask)
768 goal = rt_hash_mask + 1; 797 goal = rt_hash_mask + 1;
798 length = 0;
769 for (; goal > 0; goal--) { 799 for (; goal > 0; goal--) {
770 unsigned long tmo = ip_rt_gc_timeout; 800 unsigned long tmo = ip_rt_gc_timeout;
771 801
@@ -775,6 +805,8 @@ static void rt_check_expire(void)
775 if (need_resched()) 805 if (need_resched())
776 cond_resched(); 806 cond_resched();
777 807
808 samples++;
809
778 if (*rthp == NULL) 810 if (*rthp == NULL)
779 continue; 811 continue;
780 spin_lock_bh(rt_hash_lock_addr(i)); 812 spin_lock_bh(rt_hash_lock_addr(i));
@@ -789,11 +821,29 @@ static void rt_check_expire(void)
789 if (time_before_eq(jiffies, rth->u.dst.expires)) { 821 if (time_before_eq(jiffies, rth->u.dst.expires)) {
790 tmo >>= 1; 822 tmo >>= 1;
791 rthp = &rth->u.dst.rt_next; 823 rthp = &rth->u.dst.rt_next;
824 /*
825 * Only bump our length if the hash
826 * inputs on entries n and n+1 are not
827 * the same, we only count entries on
828 * a chain with equal hash inputs once
829 * so that entries for different QOS
830 * levels, and other non-hash input
831 * attributes don't unfairly skew
832 * the length computation
833 */
834 if ((*rthp == NULL) ||
835 !compare_hash_inputs(&(*rthp)->fl,
836 &rth->fl))
837 length += ONE;
792 continue; 838 continue;
793 } 839 }
794 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { 840 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
795 tmo >>= 1; 841 tmo >>= 1;
796 rthp = &rth->u.dst.rt_next; 842 rthp = &rth->u.dst.rt_next;
843 if ((*rthp == NULL) ||
844 !compare_hash_inputs(&(*rthp)->fl,
845 &rth->fl))
846 length += ONE;
797 continue; 847 continue;
798 } 848 }
799 849
@@ -802,6 +852,15 @@ static void rt_check_expire(void)
802 rt_free(rth); 852 rt_free(rth);
803 } 853 }
804 spin_unlock_bh(rt_hash_lock_addr(i)); 854 spin_unlock_bh(rt_hash_lock_addr(i));
855 sum += length;
856 sum2 += length*length;
857 }
858 if (samples) {
859 unsigned long avg = sum / samples;
860 unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
861 rt_chain_length_max = max_t(unsigned long,
862 ip_rt_gc_elasticity,
863 (avg + 4*sd) >> FRACT_BITS);
805 } 864 }
806 rover = i; 865 rover = i;
807} 866}
@@ -851,6 +910,26 @@ static void rt_secret_rebuild(unsigned long __net)
851 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); 910 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
852} 911}
853 912
913static void rt_secret_rebuild_oneshot(struct net *net)
914{
915 del_timer_sync(&net->ipv4.rt_secret_timer);
916 rt_cache_invalidate(net);
917 if (ip_rt_secret_interval) {
918 net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval;
919 add_timer(&net->ipv4.rt_secret_timer);
920 }
921}
922
923static void rt_emergency_hash_rebuild(struct net *net)
924{
925 if (net_ratelimit()) {
926 printk(KERN_WARNING "Route hash chain too long!\n");
927 printk(KERN_WARNING "Adjust your secret_interval!\n");
928 }
929
930 rt_secret_rebuild_oneshot(net);
931}
932
854/* 933/*
855 Short description of GC goals. 934 Short description of GC goals.
856 935
@@ -989,6 +1068,7 @@ out: return 0;
989static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) 1068static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
990{ 1069{
991 struct rtable *rth, **rthp; 1070 struct rtable *rth, **rthp;
1071 struct rtable *rthi;
992 unsigned long now; 1072 unsigned long now;
993 struct rtable *cand, **candp; 1073 struct rtable *cand, **candp;
994 u32 min_score; 1074 u32 min_score;
@@ -1002,7 +1082,13 @@ restart:
1002 candp = NULL; 1082 candp = NULL;
1003 now = jiffies; 1083 now = jiffies;
1004 1084
1085 if (!rt_caching(dev_net(rt->u.dst.dev))) {
1086 rt_drop(rt);
1087 return 0;
1088 }
1089
1005 rthp = &rt_hash_table[hash].chain; 1090 rthp = &rt_hash_table[hash].chain;
1091 rthi = NULL;
1006 1092
1007 spin_lock_bh(rt_hash_lock_addr(hash)); 1093 spin_lock_bh(rt_hash_lock_addr(hash));
1008 while ((rth = *rthp) != NULL) { 1094 while ((rth = *rthp) != NULL) {
@@ -1048,6 +1134,17 @@ restart:
1048 chain_length++; 1134 chain_length++;
1049 1135
1050 rthp = &rth->u.dst.rt_next; 1136 rthp = &rth->u.dst.rt_next;
1137
1138 /*
1139 * check to see if the next entry in the chain
1140 * contains the same hash input values as rt. If it does
1141 * This is where we will insert into the list, instead of
1142 * at the head. This groups entries that differ by aspects not
1143 * relvant to the hash function together, which we use to adjust
1144 * our chain length
1145 */
1146 if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
1147 rthi = rth;
1051 } 1148 }
1052 1149
1053 if (cand) { 1150 if (cand) {
@@ -1061,6 +1158,16 @@ restart:
1061 *candp = cand->u.dst.rt_next; 1158 *candp = cand->u.dst.rt_next;
1062 rt_free(cand); 1159 rt_free(cand);
1063 } 1160 }
1161 } else {
1162 if (chain_length > rt_chain_length_max) {
1163 struct net *net = dev_net(rt->u.dst.dev);
1164 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1165 if (!rt_caching(dev_net(rt->u.dst.dev))) {
1166 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1167 rt->u.dst.dev->name, num);
1168 }
1169 rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev));
1170 }
1064 } 1171 }
1065 1172
1066 /* Try to bind route to arp only if it is output 1173 /* Try to bind route to arp only if it is output
@@ -1098,14 +1205,17 @@ restart:
1098 } 1205 }
1099 } 1206 }
1100 1207
1101 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1208 if (rthi)
1209 rt->u.dst.rt_next = rthi->u.dst.rt_next;
1210 else
1211 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1212
1102#if RT_CACHE_DEBUG >= 2 1213#if RT_CACHE_DEBUG >= 2
1103 if (rt->u.dst.rt_next) { 1214 if (rt->u.dst.rt_next) {
1104 struct rtable *trt; 1215 struct rtable *trt;
1105 printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, 1216 printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst);
1106 NIPQUAD(rt->rt_dst));
1107 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1217 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1108 printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); 1218 printk(" . %pI4", &trt->rt_dst);
1109 printk("\n"); 1219 printk("\n");
1110 } 1220 }
1111#endif 1221#endif
@@ -1114,7 +1224,11 @@ restart:
1114 * previous writes to rt are comitted to memory 1224 * previous writes to rt are comitted to memory
1115 * before making rt visible to other CPUS. 1225 * before making rt visible to other CPUS.
1116 */ 1226 */
1117 rcu_assign_pointer(rt_hash_table[hash].chain, rt); 1227 if (rthi)
1228 rcu_assign_pointer(rthi->u.dst.rt_next, rt);
1229 else
1230 rcu_assign_pointer(rt_hash_table[hash].chain, rt);
1231
1118 spin_unlock_bh(rt_hash_lock_addr(hash)); 1232 spin_unlock_bh(rt_hash_lock_addr(hash));
1119 *rp = rt; 1233 *rp = rt;
1120 return 0; 1234 return 0;
@@ -1217,6 +1331,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1217 || ipv4_is_zeronet(new_gw)) 1331 || ipv4_is_zeronet(new_gw))
1218 goto reject_redirect; 1332 goto reject_redirect;
1219 1333
1334 if (!rt_caching(net))
1335 goto reject_redirect;
1336
1220 if (!IN_DEV_SHARED_MEDIA(in_dev)) { 1337 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1221 if (!inet_addr_onlink(in_dev, new_gw, old_gw)) 1338 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1222 goto reject_redirect; 1339 goto reject_redirect;
@@ -1267,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1267 1384
1268 /* Copy all the information. */ 1385 /* Copy all the information. */
1269 *rt = *rth; 1386 *rt = *rth;
1270 INIT_RCU_HEAD(&rt->u.dst.rcu_head);
1271 rt->u.dst.__use = 1; 1387 rt->u.dst.__use = 1;
1272 atomic_set(&rt->u.dst.__refcnt, 1); 1388 atomic_set(&rt->u.dst.__refcnt, 1);
1273 rt->u.dst.child = NULL; 1389 rt->u.dst.child = NULL;
@@ -1280,7 +1396,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1280 rt->u.dst.path = &rt->u.dst; 1396 rt->u.dst.path = &rt->u.dst;
1281 rt->u.dst.neighbour = NULL; 1397 rt->u.dst.neighbour = NULL;
1282 rt->u.dst.hh = NULL; 1398 rt->u.dst.hh = NULL;
1399#ifdef CONFIG_XFRM
1283 rt->u.dst.xfrm = NULL; 1400 rt->u.dst.xfrm = NULL;
1401#endif
1284 rt->rt_genid = rt_genid(net); 1402 rt->rt_genid = rt_genid(net);
1285 rt->rt_flags |= RTCF_REDIRECTED; 1403 rt->rt_flags |= RTCF_REDIRECTED;
1286 1404
@@ -1324,11 +1442,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1324reject_redirect: 1442reject_redirect:
1325#ifdef CONFIG_IP_ROUTE_VERBOSE 1443#ifdef CONFIG_IP_ROUTE_VERBOSE
1326 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 1444 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1327 printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " 1445 printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n"
1328 NIPQUAD_FMT " ignored.\n" 1446 " Advised path = %pI4 -> %pI4\n",
1329 " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", 1447 &old_gw, dev->name, &new_gw,
1330 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), 1448 &saddr, &daddr);
1331 NIPQUAD(saddr), NIPQUAD(daddr));
1332#endif 1449#endif
1333 in_dev_put(in_dev); 1450 in_dev_put(in_dev);
1334} 1451}
@@ -1348,9 +1465,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1348 rt->fl.oif, 1465 rt->fl.oif,
1349 rt_genid(dev_net(dst->dev))); 1466 rt_genid(dev_net(dst->dev)));
1350#if RT_CACHE_DEBUG >= 1 1467#if RT_CACHE_DEBUG >= 1
1351 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " 1468 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n",
1352 NIPQUAD_FMT "/%02x dropped\n", 1469 &rt->rt_dst, rt->fl.fl4_tos);
1353 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1354#endif 1470#endif
1355 rt_del(hash, rt); 1471 rt_del(hash, rt);
1356 ret = NULL; 1472 ret = NULL;
@@ -1414,10 +1530,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1414 if (IN_DEV_LOG_MARTIANS(in_dev) && 1530 if (IN_DEV_LOG_MARTIANS(in_dev) &&
1415 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1531 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1416 net_ratelimit()) 1532 net_ratelimit())
1417 printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " 1533 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1418 "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", 1534 &rt->rt_src, rt->rt_iif,
1419 NIPQUAD(rt->rt_src), rt->rt_iif, 1535 &rt->rt_dst, &rt->rt_gateway);
1420 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1421#endif 1536#endif
1422 } 1537 }
1423out: 1538out:
@@ -1610,8 +1725,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
1610 1725
1611static int ip_rt_bug(struct sk_buff *skb) 1726static int ip_rt_bug(struct sk_buff *skb)
1612{ 1727{
1613 printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", 1728 printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n",
1614 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), 1729 &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1615 skb->dev ? skb->dev->name : "?"); 1730 skb->dev ? skb->dev->name : "?");
1616 kfree_skb(skb); 1731 kfree_skb(skb);
1617 return 0; 1732 return 0;
@@ -1788,9 +1903,8 @@ static void ip_handle_martian_source(struct net_device *dev,
1788 * RFC1812 recommendation, if source is martian, 1903 * RFC1812 recommendation, if source is martian,
1789 * the only hint is MAC header. 1904 * the only hint is MAC header.
1790 */ 1905 */
1791 printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " 1906 printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n",
1792 NIPQUAD_FMT", on dev %s\n", 1907 &daddr, &saddr, dev->name);
1793 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1794 if (dev->hard_header_len && skb_mac_header_was_set(skb)) { 1908 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1795 int i; 1909 int i;
1796 const unsigned char *p = skb_mac_header(skb); 1910 const unsigned char *p = skb_mac_header(skb);
@@ -2099,9 +2213,8 @@ martian_destination:
2099 RT_CACHE_STAT_INC(in_martian_dst); 2213 RT_CACHE_STAT_INC(in_martian_dst);
2100#ifdef CONFIG_IP_ROUTE_VERBOSE 2214#ifdef CONFIG_IP_ROUTE_VERBOSE
2101 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) 2215 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
2102 printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " 2216 printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n",
2103 NIPQUAD_FMT ", dev %s\n", 2217 &daddr, &saddr, dev->name);
2104 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
2105#endif 2218#endif
2106 2219
2107e_hostunreach: 2220e_hostunreach:
@@ -2130,6 +2243,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2130 struct net *net; 2243 struct net *net;
2131 2244
2132 net = dev_net(dev); 2245 net = dev_net(dev);
2246
2247 if (!rt_caching(net))
2248 goto skip_cache;
2249
2133 tos &= IPTOS_RT_MASK; 2250 tos &= IPTOS_RT_MASK;
2134 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2251 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2135 2252
@@ -2154,6 +2271,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2154 } 2271 }
2155 rcu_read_unlock(); 2272 rcu_read_unlock();
2156 2273
2274skip_cache:
2157 /* Multicast recognition logic is moved from route cache to here. 2275 /* Multicast recognition logic is moved from route cache to here.
2158 The problem was that too many Ethernet cards have broken/missing 2276 The problem was that too many Ethernet cards have broken/missing
2159 hardware multicast filters :-( As result the host on multicasting 2277 hardware multicast filters :-( As result the host on multicasting
@@ -2539,6 +2657,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2539 unsigned hash; 2657 unsigned hash;
2540 struct rtable *rth; 2658 struct rtable *rth;
2541 2659
2660 if (!rt_caching(net))
2661 goto slow_output;
2662
2542 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); 2663 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
2543 2664
2544 rcu_read_lock_bh(); 2665 rcu_read_lock_bh();
@@ -2563,6 +2684,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2563 } 2684 }
2564 rcu_read_unlock_bh(); 2685 rcu_read_unlock_bh();
2565 2686
2687slow_output:
2566 return ip_route_output_slow(net, rp, flp); 2688 return ip_route_output_slow(net, rp, flp);
2567} 2689}
2568 2690
@@ -2578,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2578 .destroy = ipv4_dst_destroy, 2700 .destroy = ipv4_dst_destroy,
2579 .check = ipv4_dst_check, 2701 .check = ipv4_dst_check,
2580 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2581 .entry_size = sizeof(struct rtable),
2582 .entries = ATOMIC_INIT(0), 2703 .entries = ATOMIC_INIT(0),
2583}; 2704};
2584 2705
@@ -2640,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2640 flp->fl4_src = (*rp)->rt_src; 2761 flp->fl4_src = (*rp)->rt_src;
2641 if (!flp->fl4_dst) 2762 if (!flp->fl4_dst)
2642 flp->fl4_dst = (*rp)->rt_dst; 2763 flp->fl4_dst = (*rp)->rt_dst;
2643 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, 2764 err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk,
2644 flags ? XFRM_LOOKUP_WAIT : 0); 2765 flags ? XFRM_LOOKUP_WAIT : 0);
2645 if (err == -EREMOTE) 2766 if (err == -EREMOTE)
2646 err = ipv4_dst_blackhole(net, rp, flp); 2767 err = ipv4_dst_blackhole(net, rp, flp);
@@ -2995,7 +3116,7 @@ static ctl_table ipv4_route_table[] = {
2995 .data = &ipv4_dst_ops.gc_thresh, 3116 .data = &ipv4_dst_ops.gc_thresh,
2996 .maxlen = sizeof(int), 3117 .maxlen = sizeof(int),
2997 .mode = 0644, 3118 .mode = 0644,
2998 .proc_handler = &proc_dointvec, 3119 .proc_handler = proc_dointvec,
2999 }, 3120 },
3000 { 3121 {
3001 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, 3122 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE,
@@ -3003,7 +3124,7 @@ static ctl_table ipv4_route_table[] = {
3003 .data = &ip_rt_max_size, 3124 .data = &ip_rt_max_size,
3004 .maxlen = sizeof(int), 3125 .maxlen = sizeof(int),
3005 .mode = 0644, 3126 .mode = 0644,
3006 .proc_handler = &proc_dointvec, 3127 .proc_handler = proc_dointvec,
3007 }, 3128 },
3008 { 3129 {
3009 /* Deprecated. Use gc_min_interval_ms */ 3130 /* Deprecated. Use gc_min_interval_ms */
@@ -3013,8 +3134,8 @@ static ctl_table ipv4_route_table[] = {
3013 .data = &ip_rt_gc_min_interval, 3134 .data = &ip_rt_gc_min_interval,
3014 .maxlen = sizeof(int), 3135 .maxlen = sizeof(int),
3015 .mode = 0644, 3136 .mode = 0644,
3016 .proc_handler = &proc_dointvec_jiffies, 3137 .proc_handler = proc_dointvec_jiffies,
3017 .strategy = &sysctl_jiffies, 3138 .strategy = sysctl_jiffies,
3018 }, 3139 },
3019 { 3140 {
3020 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, 3141 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
@@ -3022,8 +3143,8 @@ static ctl_table ipv4_route_table[] = {
3022 .data = &ip_rt_gc_min_interval, 3143 .data = &ip_rt_gc_min_interval,
3023 .maxlen = sizeof(int), 3144 .maxlen = sizeof(int),
3024 .mode = 0644, 3145 .mode = 0644,
3025 .proc_handler = &proc_dointvec_ms_jiffies, 3146 .proc_handler = proc_dointvec_ms_jiffies,
3026 .strategy = &sysctl_ms_jiffies, 3147 .strategy = sysctl_ms_jiffies,
3027 }, 3148 },
3028 { 3149 {
3029 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, 3150 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT,
@@ -3031,8 +3152,8 @@ static ctl_table ipv4_route_table[] = {
3031 .data = &ip_rt_gc_timeout, 3152 .data = &ip_rt_gc_timeout,
3032 .maxlen = sizeof(int), 3153 .maxlen = sizeof(int),
3033 .mode = 0644, 3154 .mode = 0644,
3034 .proc_handler = &proc_dointvec_jiffies, 3155 .proc_handler = proc_dointvec_jiffies,
3035 .strategy = &sysctl_jiffies, 3156 .strategy = sysctl_jiffies,
3036 }, 3157 },
3037 { 3158 {
3038 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, 3159 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL,
@@ -3040,8 +3161,8 @@ static ctl_table ipv4_route_table[] = {
3040 .data = &ip_rt_gc_interval, 3161 .data = &ip_rt_gc_interval,
3041 .maxlen = sizeof(int), 3162 .maxlen = sizeof(int),
3042 .mode = 0644, 3163 .mode = 0644,
3043 .proc_handler = &proc_dointvec_jiffies, 3164 .proc_handler = proc_dointvec_jiffies,
3044 .strategy = &sysctl_jiffies, 3165 .strategy = sysctl_jiffies,
3045 }, 3166 },
3046 { 3167 {
3047 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, 3168 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD,
@@ -3049,7 +3170,7 @@ static ctl_table ipv4_route_table[] = {
3049 .data = &ip_rt_redirect_load, 3170 .data = &ip_rt_redirect_load,
3050 .maxlen = sizeof(int), 3171 .maxlen = sizeof(int),
3051 .mode = 0644, 3172 .mode = 0644,
3052 .proc_handler = &proc_dointvec, 3173 .proc_handler = proc_dointvec,
3053 }, 3174 },
3054 { 3175 {
3055 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, 3176 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER,
@@ -3057,7 +3178,7 @@ static ctl_table ipv4_route_table[] = {
3057 .data = &ip_rt_redirect_number, 3178 .data = &ip_rt_redirect_number,
3058 .maxlen = sizeof(int), 3179 .maxlen = sizeof(int),
3059 .mode = 0644, 3180 .mode = 0644,
3060 .proc_handler = &proc_dointvec, 3181 .proc_handler = proc_dointvec,
3061 }, 3182 },
3062 { 3183 {
3063 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, 3184 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE,
@@ -3065,7 +3186,7 @@ static ctl_table ipv4_route_table[] = {
3065 .data = &ip_rt_redirect_silence, 3186 .data = &ip_rt_redirect_silence,
3066 .maxlen = sizeof(int), 3187 .maxlen = sizeof(int),
3067 .mode = 0644, 3188 .mode = 0644,
3068 .proc_handler = &proc_dointvec, 3189 .proc_handler = proc_dointvec,
3069 }, 3190 },
3070 { 3191 {
3071 .ctl_name = NET_IPV4_ROUTE_ERROR_COST, 3192 .ctl_name = NET_IPV4_ROUTE_ERROR_COST,
@@ -3073,7 +3194,7 @@ static ctl_table ipv4_route_table[] = {
3073 .data = &ip_rt_error_cost, 3194 .data = &ip_rt_error_cost,
3074 .maxlen = sizeof(int), 3195 .maxlen = sizeof(int),
3075 .mode = 0644, 3196 .mode = 0644,
3076 .proc_handler = &proc_dointvec, 3197 .proc_handler = proc_dointvec,
3077 }, 3198 },
3078 { 3199 {
3079 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, 3200 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST,
@@ -3081,7 +3202,7 @@ static ctl_table ipv4_route_table[] = {
3081 .data = &ip_rt_error_burst, 3202 .data = &ip_rt_error_burst,
3082 .maxlen = sizeof(int), 3203 .maxlen = sizeof(int),
3083 .mode = 0644, 3204 .mode = 0644,
3084 .proc_handler = &proc_dointvec, 3205 .proc_handler = proc_dointvec,
3085 }, 3206 },
3086 { 3207 {
3087 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, 3208 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY,
@@ -3089,7 +3210,7 @@ static ctl_table ipv4_route_table[] = {
3089 .data = &ip_rt_gc_elasticity, 3210 .data = &ip_rt_gc_elasticity,
3090 .maxlen = sizeof(int), 3211 .maxlen = sizeof(int),
3091 .mode = 0644, 3212 .mode = 0644,
3092 .proc_handler = &proc_dointvec, 3213 .proc_handler = proc_dointvec,
3093 }, 3214 },
3094 { 3215 {
3095 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, 3216 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES,
@@ -3097,8 +3218,8 @@ static ctl_table ipv4_route_table[] = {
3097 .data = &ip_rt_mtu_expires, 3218 .data = &ip_rt_mtu_expires,
3098 .maxlen = sizeof(int), 3219 .maxlen = sizeof(int),
3099 .mode = 0644, 3220 .mode = 0644,
3100 .proc_handler = &proc_dointvec_jiffies, 3221 .proc_handler = proc_dointvec_jiffies,
3101 .strategy = &sysctl_jiffies, 3222 .strategy = sysctl_jiffies,
3102 }, 3223 },
3103 { 3224 {
3104 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, 3225 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU,
@@ -3106,7 +3227,7 @@ static ctl_table ipv4_route_table[] = {
3106 .data = &ip_rt_min_pmtu, 3227 .data = &ip_rt_min_pmtu,
3107 .maxlen = sizeof(int), 3228 .maxlen = sizeof(int),
3108 .mode = 0644, 3229 .mode = 0644,
3109 .proc_handler = &proc_dointvec, 3230 .proc_handler = proc_dointvec,
3110 }, 3231 },
3111 { 3232 {
3112 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, 3233 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS,
@@ -3114,7 +3235,7 @@ static ctl_table ipv4_route_table[] = {
3114 .data = &ip_rt_min_advmss, 3235 .data = &ip_rt_min_advmss,
3115 .maxlen = sizeof(int), 3236 .maxlen = sizeof(int),
3116 .mode = 0644, 3237 .mode = 0644,
3117 .proc_handler = &proc_dointvec, 3238 .proc_handler = proc_dointvec,
3118 }, 3239 },
3119 { 3240 {
3120 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, 3241 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL,
@@ -3122,8 +3243,8 @@ static ctl_table ipv4_route_table[] = {
3122 .data = &ip_rt_secret_interval, 3243 .data = &ip_rt_secret_interval,
3123 .maxlen = sizeof(int), 3244 .maxlen = sizeof(int),
3124 .mode = 0644, 3245 .mode = 0644,
3125 .proc_handler = &ipv4_sysctl_rt_secret_interval, 3246 .proc_handler = ipv4_sysctl_rt_secret_interval,
3126 .strategy = &ipv4_sysctl_rt_secret_interval_strategy, 3247 .strategy = ipv4_sysctl_rt_secret_interval_strategy,
3127 }, 3248 },
3128 { .ctl_name = 0 } 3249 { .ctl_name = 0 }
3129}; 3250};
@@ -3151,8 +3272,8 @@ static struct ctl_table ipv4_route_flush_table[] = {
3151 .procname = "flush", 3272 .procname = "flush",
3152 .maxlen = sizeof(int), 3273 .maxlen = sizeof(int),
3153 .mode = 0200, 3274 .mode = 0200,
3154 .proc_handler = &ipv4_sysctl_rtcache_flush, 3275 .proc_handler = ipv4_sysctl_rtcache_flush,
3155 .strategy = &ipv4_sysctl_rtcache_flush_strategy, 3276 .strategy = ipv4_sysctl_rtcache_flush_strategy,
3156 }, 3277 },
3157 { .ctl_name = 0 }, 3278 { .ctl_name = 0 },
3158}; 3279};