diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 227 |
1 files changed, 174 insertions, 53 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2ea6dcc3e2cc..77bfba975959 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; | |||
129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; | 129 | static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; |
130 | static int ip_rt_min_advmss __read_mostly = 256; | 130 | static int ip_rt_min_advmss __read_mostly = 256; |
131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; | 131 | static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; |
132 | static int rt_chain_length_max __read_mostly = 20; | ||
132 | 133 | ||
133 | static void rt_worker_func(struct work_struct *work); | 134 | static void rt_worker_func(struct work_struct *work); |
134 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); | 135 | static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); |
@@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); | |||
145 | static void ipv4_link_failure(struct sk_buff *skb); | 146 | static void ipv4_link_failure(struct sk_buff *skb); |
146 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); | 147 | static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); |
147 | static int rt_garbage_collect(struct dst_ops *ops); | 148 | static int rt_garbage_collect(struct dst_ops *ops); |
149 | static void rt_emergency_hash_rebuild(struct net *net); | ||
148 | 150 | ||
149 | 151 | ||
150 | static struct dst_ops ipv4_dst_ops = { | 152 | static struct dst_ops ipv4_dst_ops = { |
@@ -158,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { | |||
158 | .link_failure = ipv4_link_failure, | 160 | .link_failure = ipv4_link_failure, |
159 | .update_pmtu = ip_rt_update_pmtu, | 161 | .update_pmtu = ip_rt_update_pmtu, |
160 | .local_out = __ip_local_out, | 162 | .local_out = __ip_local_out, |
161 | .entry_size = sizeof(struct rtable), | ||
162 | .entries = ATOMIC_INIT(0), | 163 | .entries = ATOMIC_INIT(0), |
163 | }; | 164 | }; |
164 | 165 | ||
@@ -201,6 +202,7 @@ const __u8 ip_tos2prio[16] = { | |||
201 | struct rt_hash_bucket { | 202 | struct rt_hash_bucket { |
202 | struct rtable *chain; | 203 | struct rtable *chain; |
203 | }; | 204 | }; |
205 | |||
204 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | 206 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ |
205 | defined(CONFIG_PROVE_LOCKING) | 207 | defined(CONFIG_PROVE_LOCKING) |
206 | /* | 208 | /* |
@@ -674,6 +676,20 @@ static inline u32 rt_score(struct rtable *rt) | |||
674 | return score; | 676 | return score; |
675 | } | 677 | } |
676 | 678 | ||
679 | static inline bool rt_caching(const struct net *net) | ||
680 | { | ||
681 | return net->ipv4.current_rt_cache_rebuild_count <= | ||
682 | net->ipv4.sysctl_rt_cache_rebuild_count; | ||
683 | } | ||
684 | |||
685 | static inline bool compare_hash_inputs(const struct flowi *fl1, | ||
686 | const struct flowi *fl2) | ||
687 | { | ||
688 | return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | ||
689 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | | ||
690 | (fl1->iif ^ fl2->iif)) == 0); | ||
691 | } | ||
692 | |||
677 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | 693 | static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) |
678 | { | 694 | { |
679 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | 695 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | |
@@ -753,11 +769,24 @@ static void rt_do_flush(int process_context) | |||
753 | } | 769 | } |
754 | } | 770 | } |
755 | 771 | ||
772 | /* | ||
773 | * While freeing expired entries, we compute average chain length | ||
774 | * and standard deviation, using fixed-point arithmetic. | ||
775 | * This to have an estimation of rt_chain_length_max | ||
776 | * rt_chain_length_max = max(elasticity, AVG + 4*SD) | ||
777 | * We use 3 bits for frational part, and 29 (or 61) for magnitude. | ||
778 | */ | ||
779 | |||
780 | #define FRACT_BITS 3 | ||
781 | #define ONE (1UL << FRACT_BITS) | ||
782 | |||
756 | static void rt_check_expire(void) | 783 | static void rt_check_expire(void) |
757 | { | 784 | { |
758 | static unsigned int rover; | 785 | static unsigned int rover; |
759 | unsigned int i = rover, goal; | 786 | unsigned int i = rover, goal; |
760 | struct rtable *rth, **rthp; | 787 | struct rtable *rth, **rthp; |
788 | unsigned long length = 0, samples = 0; | ||
789 | unsigned long sum = 0, sum2 = 0; | ||
761 | u64 mult; | 790 | u64 mult; |
762 | 791 | ||
763 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; | 792 | mult = ((u64)ip_rt_gc_interval) << rt_hash_log; |
@@ -766,6 +795,7 @@ static void rt_check_expire(void) | |||
766 | goal = (unsigned int)mult; | 795 | goal = (unsigned int)mult; |
767 | if (goal > rt_hash_mask) | 796 | if (goal > rt_hash_mask) |
768 | goal = rt_hash_mask + 1; | 797 | goal = rt_hash_mask + 1; |
798 | length = 0; | ||
769 | for (; goal > 0; goal--) { | 799 | for (; goal > 0; goal--) { |
770 | unsigned long tmo = ip_rt_gc_timeout; | 800 | unsigned long tmo = ip_rt_gc_timeout; |
771 | 801 | ||
@@ -775,6 +805,8 @@ static void rt_check_expire(void) | |||
775 | if (need_resched()) | 805 | if (need_resched()) |
776 | cond_resched(); | 806 | cond_resched(); |
777 | 807 | ||
808 | samples++; | ||
809 | |||
778 | if (*rthp == NULL) | 810 | if (*rthp == NULL) |
779 | continue; | 811 | continue; |
780 | spin_lock_bh(rt_hash_lock_addr(i)); | 812 | spin_lock_bh(rt_hash_lock_addr(i)); |
@@ -789,11 +821,29 @@ static void rt_check_expire(void) | |||
789 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 821 | if (time_before_eq(jiffies, rth->u.dst.expires)) { |
790 | tmo >>= 1; | 822 | tmo >>= 1; |
791 | rthp = &rth->u.dst.rt_next; | 823 | rthp = &rth->u.dst.rt_next; |
824 | /* | ||
825 | * Only bump our length if the hash | ||
826 | * inputs on entries n and n+1 are not | ||
827 | * the same, we only count entries on | ||
828 | * a chain with equal hash inputs once | ||
829 | * so that entries for different QOS | ||
830 | * levels, and other non-hash input | ||
831 | * attributes don't unfairly skew | ||
832 | * the length computation | ||
833 | */ | ||
834 | if ((*rthp == NULL) || | ||
835 | !compare_hash_inputs(&(*rthp)->fl, | ||
836 | &rth->fl)) | ||
837 | length += ONE; | ||
792 | continue; | 838 | continue; |
793 | } | 839 | } |
794 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { | 840 | } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { |
795 | tmo >>= 1; | 841 | tmo >>= 1; |
796 | rthp = &rth->u.dst.rt_next; | 842 | rthp = &rth->u.dst.rt_next; |
843 | if ((*rthp == NULL) || | ||
844 | !compare_hash_inputs(&(*rthp)->fl, | ||
845 | &rth->fl)) | ||
846 | length += ONE; | ||
797 | continue; | 847 | continue; |
798 | } | 848 | } |
799 | 849 | ||
@@ -802,6 +852,15 @@ static void rt_check_expire(void) | |||
802 | rt_free(rth); | 852 | rt_free(rth); |
803 | } | 853 | } |
804 | spin_unlock_bh(rt_hash_lock_addr(i)); | 854 | spin_unlock_bh(rt_hash_lock_addr(i)); |
855 | sum += length; | ||
856 | sum2 += length*length; | ||
857 | } | ||
858 | if (samples) { | ||
859 | unsigned long avg = sum / samples; | ||
860 | unsigned long sd = int_sqrt(sum2 / samples - avg*avg); | ||
861 | rt_chain_length_max = max_t(unsigned long, | ||
862 | ip_rt_gc_elasticity, | ||
863 | (avg + 4*sd) >> FRACT_BITS); | ||
805 | } | 864 | } |
806 | rover = i; | 865 | rover = i; |
807 | } | 866 | } |
@@ -851,6 +910,26 @@ static void rt_secret_rebuild(unsigned long __net) | |||
851 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); | 910 | mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); |
852 | } | 911 | } |
853 | 912 | ||
913 | static void rt_secret_rebuild_oneshot(struct net *net) | ||
914 | { | ||
915 | del_timer_sync(&net->ipv4.rt_secret_timer); | ||
916 | rt_cache_invalidate(net); | ||
917 | if (ip_rt_secret_interval) { | ||
918 | net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; | ||
919 | add_timer(&net->ipv4.rt_secret_timer); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static void rt_emergency_hash_rebuild(struct net *net) | ||
924 | { | ||
925 | if (net_ratelimit()) { | ||
926 | printk(KERN_WARNING "Route hash chain too long!\n"); | ||
927 | printk(KERN_WARNING "Adjust your secret_interval!\n"); | ||
928 | } | ||
929 | |||
930 | rt_secret_rebuild_oneshot(net); | ||
931 | } | ||
932 | |||
854 | /* | 933 | /* |
855 | Short description of GC goals. | 934 | Short description of GC goals. |
856 | 935 | ||
@@ -989,6 +1068,7 @@ out: return 0; | |||
989 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) | 1068 | static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) |
990 | { | 1069 | { |
991 | struct rtable *rth, **rthp; | 1070 | struct rtable *rth, **rthp; |
1071 | struct rtable *rthi; | ||
992 | unsigned long now; | 1072 | unsigned long now; |
993 | struct rtable *cand, **candp; | 1073 | struct rtable *cand, **candp; |
994 | u32 min_score; | 1074 | u32 min_score; |
@@ -1002,7 +1082,13 @@ restart: | |||
1002 | candp = NULL; | 1082 | candp = NULL; |
1003 | now = jiffies; | 1083 | now = jiffies; |
1004 | 1084 | ||
1085 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | ||
1086 | rt_drop(rt); | ||
1087 | return 0; | ||
1088 | } | ||
1089 | |||
1005 | rthp = &rt_hash_table[hash].chain; | 1090 | rthp = &rt_hash_table[hash].chain; |
1091 | rthi = NULL; | ||
1006 | 1092 | ||
1007 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1093 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1008 | while ((rth = *rthp) != NULL) { | 1094 | while ((rth = *rthp) != NULL) { |
@@ -1048,6 +1134,17 @@ restart: | |||
1048 | chain_length++; | 1134 | chain_length++; |
1049 | 1135 | ||
1050 | rthp = &rth->u.dst.rt_next; | 1136 | rthp = &rth->u.dst.rt_next; |
1137 | |||
1138 | /* | ||
1139 | * check to see if the next entry in the chain | ||
1140 | * contains the same hash input values as rt. If it does | ||
1141 | * This is where we will insert into the list, instead of | ||
1142 | * at the head. This groups entries that differ by aspects not | ||
1143 | * relvant to the hash function together, which we use to adjust | ||
1144 | * our chain length | ||
1145 | */ | ||
1146 | if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) | ||
1147 | rthi = rth; | ||
1051 | } | 1148 | } |
1052 | 1149 | ||
1053 | if (cand) { | 1150 | if (cand) { |
@@ -1061,6 +1158,16 @@ restart: | |||
1061 | *candp = cand->u.dst.rt_next; | 1158 | *candp = cand->u.dst.rt_next; |
1062 | rt_free(cand); | 1159 | rt_free(cand); |
1063 | } | 1160 | } |
1161 | } else { | ||
1162 | if (chain_length > rt_chain_length_max) { | ||
1163 | struct net *net = dev_net(rt->u.dst.dev); | ||
1164 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | ||
1165 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | ||
1166 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | ||
1167 | rt->u.dst.dev->name, num); | ||
1168 | } | ||
1169 | rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); | ||
1170 | } | ||
1064 | } | 1171 | } |
1065 | 1172 | ||
1066 | /* Try to bind route to arp only if it is output | 1173 | /* Try to bind route to arp only if it is output |
@@ -1098,14 +1205,17 @@ restart: | |||
1098 | } | 1205 | } |
1099 | } | 1206 | } |
1100 | 1207 | ||
1101 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | 1208 | if (rthi) |
1209 | rt->u.dst.rt_next = rthi->u.dst.rt_next; | ||
1210 | else | ||
1211 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | ||
1212 | |||
1102 | #if RT_CACHE_DEBUG >= 2 | 1213 | #if RT_CACHE_DEBUG >= 2 |
1103 | if (rt->u.dst.rt_next) { | 1214 | if (rt->u.dst.rt_next) { |
1104 | struct rtable *trt; | 1215 | struct rtable *trt; |
1105 | printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash, | 1216 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); |
1106 | NIPQUAD(rt->rt_dst)); | ||
1107 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1217 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) |
1108 | printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst)); | 1218 | printk(" . %pI4", &trt->rt_dst); |
1109 | printk("\n"); | 1219 | printk("\n"); |
1110 | } | 1220 | } |
1111 | #endif | 1221 | #endif |
@@ -1114,7 +1224,11 @@ restart: | |||
1114 | * previous writes to rt are comitted to memory | 1224 | * previous writes to rt are comitted to memory |
1115 | * before making rt visible to other CPUS. | 1225 | * before making rt visible to other CPUS. |
1116 | */ | 1226 | */ |
1117 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | 1227 | if (rthi) |
1228 | rcu_assign_pointer(rthi->u.dst.rt_next, rt); | ||
1229 | else | ||
1230 | rcu_assign_pointer(rt_hash_table[hash].chain, rt); | ||
1231 | |||
1118 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1232 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1119 | *rp = rt; | 1233 | *rp = rt; |
1120 | return 0; | 1234 | return 0; |
@@ -1217,6 +1331,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1217 | || ipv4_is_zeronet(new_gw)) | 1331 | || ipv4_is_zeronet(new_gw)) |
1218 | goto reject_redirect; | 1332 | goto reject_redirect; |
1219 | 1333 | ||
1334 | if (!rt_caching(net)) | ||
1335 | goto reject_redirect; | ||
1336 | |||
1220 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 1337 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { |
1221 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) | 1338 | if (!inet_addr_onlink(in_dev, new_gw, old_gw)) |
1222 | goto reject_redirect; | 1339 | goto reject_redirect; |
@@ -1267,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1267 | 1384 | ||
1268 | /* Copy all the information. */ | 1385 | /* Copy all the information. */ |
1269 | *rt = *rth; | 1386 | *rt = *rth; |
1270 | INIT_RCU_HEAD(&rt->u.dst.rcu_head); | ||
1271 | rt->u.dst.__use = 1; | 1387 | rt->u.dst.__use = 1; |
1272 | atomic_set(&rt->u.dst.__refcnt, 1); | 1388 | atomic_set(&rt->u.dst.__refcnt, 1); |
1273 | rt->u.dst.child = NULL; | 1389 | rt->u.dst.child = NULL; |
@@ -1280,7 +1396,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1280 | rt->u.dst.path = &rt->u.dst; | 1396 | rt->u.dst.path = &rt->u.dst; |
1281 | rt->u.dst.neighbour = NULL; | 1397 | rt->u.dst.neighbour = NULL; |
1282 | rt->u.dst.hh = NULL; | 1398 | rt->u.dst.hh = NULL; |
1399 | #ifdef CONFIG_XFRM | ||
1283 | rt->u.dst.xfrm = NULL; | 1400 | rt->u.dst.xfrm = NULL; |
1401 | #endif | ||
1284 | rt->rt_genid = rt_genid(net); | 1402 | rt->rt_genid = rt_genid(net); |
1285 | rt->rt_flags |= RTCF_REDIRECTED; | 1403 | rt->rt_flags |= RTCF_REDIRECTED; |
1286 | 1404 | ||
@@ -1324,11 +1442,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1324 | reject_redirect: | 1442 | reject_redirect: |
1325 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1443 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1326 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 1444 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
1327 | printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about " | 1445 | printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" |
1328 | NIPQUAD_FMT " ignored.\n" | 1446 | " Advised path = %pI4 -> %pI4\n", |
1329 | " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n", | 1447 | &old_gw, dev->name, &new_gw, |
1330 | NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), | 1448 | &saddr, &daddr); |
1331 | NIPQUAD(saddr), NIPQUAD(daddr)); | ||
1332 | #endif | 1449 | #endif |
1333 | in_dev_put(in_dev); | 1450 | in_dev_put(in_dev); |
1334 | } | 1451 | } |
@@ -1348,9 +1465,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1348 | rt->fl.oif, | 1465 | rt->fl.oif, |
1349 | rt_genid(dev_net(dst->dev))); | 1466 | rt_genid(dev_net(dst->dev))); |
1350 | #if RT_CACHE_DEBUG >= 1 | 1467 | #if RT_CACHE_DEBUG >= 1 |
1351 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to " | 1468 | printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", |
1352 | NIPQUAD_FMT "/%02x dropped\n", | 1469 | &rt->rt_dst, rt->fl.fl4_tos); |
1353 | NIPQUAD(rt->rt_dst), rt->fl.fl4_tos); | ||
1354 | #endif | 1470 | #endif |
1355 | rt_del(hash, rt); | 1471 | rt_del(hash, rt); |
1356 | ret = NULL; | 1472 | ret = NULL; |
@@ -1414,10 +1530,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1414 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 1530 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
1415 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1531 | rt->u.dst.rate_tokens == ip_rt_redirect_number && |
1416 | net_ratelimit()) | 1532 | net_ratelimit()) |
1417 | printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores " | 1533 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1418 | "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n", | 1534 | &rt->rt_src, rt->rt_iif, |
1419 | NIPQUAD(rt->rt_src), rt->rt_iif, | 1535 | &rt->rt_dst, &rt->rt_gateway); |
1420 | NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway)); | ||
1421 | #endif | 1536 | #endif |
1422 | } | 1537 | } |
1423 | out: | 1538 | out: |
@@ -1610,8 +1725,8 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1610 | 1725 | ||
1611 | static int ip_rt_bug(struct sk_buff *skb) | 1726 | static int ip_rt_bug(struct sk_buff *skb) |
1612 | { | 1727 | { |
1613 | printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n", | 1728 | printk(KERN_DEBUG "ip_rt_bug: %pI4 -> %pI4, %s\n", |
1614 | NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr), | 1729 | &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, |
1615 | skb->dev ? skb->dev->name : "?"); | 1730 | skb->dev ? skb->dev->name : "?"); |
1616 | kfree_skb(skb); | 1731 | kfree_skb(skb); |
1617 | return 0; | 1732 | return 0; |
@@ -1788,9 +1903,8 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1788 | * RFC1812 recommendation, if source is martian, | 1903 | * RFC1812 recommendation, if source is martian, |
1789 | * the only hint is MAC header. | 1904 | * the only hint is MAC header. |
1790 | */ | 1905 | */ |
1791 | printk(KERN_WARNING "martian source " NIPQUAD_FMT " from " | 1906 | printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", |
1792 | NIPQUAD_FMT", on dev %s\n", | 1907 | &daddr, &saddr, dev->name); |
1793 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | ||
1794 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { | 1908 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { |
1795 | int i; | 1909 | int i; |
1796 | const unsigned char *p = skb_mac_header(skb); | 1910 | const unsigned char *p = skb_mac_header(skb); |
@@ -2099,9 +2213,8 @@ martian_destination: | |||
2099 | RT_CACHE_STAT_INC(in_martian_dst); | 2213 | RT_CACHE_STAT_INC(in_martian_dst); |
2100 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 2214 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
2101 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) | 2215 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) |
2102 | printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from " | 2216 | printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", |
2103 | NIPQUAD_FMT ", dev %s\n", | 2217 | &daddr, &saddr, dev->name); |
2104 | NIPQUAD(daddr), NIPQUAD(saddr), dev->name); | ||
2105 | #endif | 2218 | #endif |
2106 | 2219 | ||
2107 | e_hostunreach: | 2220 | e_hostunreach: |
@@ -2130,6 +2243,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2130 | struct net *net; | 2243 | struct net *net; |
2131 | 2244 | ||
2132 | net = dev_net(dev); | 2245 | net = dev_net(dev); |
2246 | |||
2247 | if (!rt_caching(net)) | ||
2248 | goto skip_cache; | ||
2249 | |||
2133 | tos &= IPTOS_RT_MASK; | 2250 | tos &= IPTOS_RT_MASK; |
2134 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2251 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
2135 | 2252 | ||
@@ -2154,6 +2271,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2154 | } | 2271 | } |
2155 | rcu_read_unlock(); | 2272 | rcu_read_unlock(); |
2156 | 2273 | ||
2274 | skip_cache: | ||
2157 | /* Multicast recognition logic is moved from route cache to here. | 2275 | /* Multicast recognition logic is moved from route cache to here. |
2158 | The problem was that too many Ethernet cards have broken/missing | 2276 | The problem was that too many Ethernet cards have broken/missing |
2159 | hardware multicast filters :-( As result the host on multicasting | 2277 | hardware multicast filters :-( As result the host on multicasting |
@@ -2539,6 +2657,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2539 | unsigned hash; | 2657 | unsigned hash; |
2540 | struct rtable *rth; | 2658 | struct rtable *rth; |
2541 | 2659 | ||
2660 | if (!rt_caching(net)) | ||
2661 | goto slow_output; | ||
2662 | |||
2542 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); | 2663 | hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); |
2543 | 2664 | ||
2544 | rcu_read_lock_bh(); | 2665 | rcu_read_lock_bh(); |
@@ -2563,6 +2684,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2563 | } | 2684 | } |
2564 | rcu_read_unlock_bh(); | 2685 | rcu_read_unlock_bh(); |
2565 | 2686 | ||
2687 | slow_output: | ||
2566 | return ip_route_output_slow(net, rp, flp); | 2688 | return ip_route_output_slow(net, rp, flp); |
2567 | } | 2689 | } |
2568 | 2690 | ||
@@ -2578,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2578 | .destroy = ipv4_dst_destroy, | 2700 | .destroy = ipv4_dst_destroy, |
2579 | .check = ipv4_dst_check, | 2701 | .check = ipv4_dst_check, |
2580 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2702 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2581 | .entry_size = sizeof(struct rtable), | ||
2582 | .entries = ATOMIC_INIT(0), | 2703 | .entries = ATOMIC_INIT(0), |
2583 | }; | 2704 | }; |
2584 | 2705 | ||
@@ -2640,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
2640 | flp->fl4_src = (*rp)->rt_src; | 2761 | flp->fl4_src = (*rp)->rt_src; |
2641 | if (!flp->fl4_dst) | 2762 | if (!flp->fl4_dst) |
2642 | flp->fl4_dst = (*rp)->rt_dst; | 2763 | flp->fl4_dst = (*rp)->rt_dst; |
2643 | err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, | 2764 | err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, |
2644 | flags ? XFRM_LOOKUP_WAIT : 0); | 2765 | flags ? XFRM_LOOKUP_WAIT : 0); |
2645 | if (err == -EREMOTE) | 2766 | if (err == -EREMOTE) |
2646 | err = ipv4_dst_blackhole(net, rp, flp); | 2767 | err = ipv4_dst_blackhole(net, rp, flp); |
@@ -2995,7 +3116,7 @@ static ctl_table ipv4_route_table[] = { | |||
2995 | .data = &ipv4_dst_ops.gc_thresh, | 3116 | .data = &ipv4_dst_ops.gc_thresh, |
2996 | .maxlen = sizeof(int), | 3117 | .maxlen = sizeof(int), |
2997 | .mode = 0644, | 3118 | .mode = 0644, |
2998 | .proc_handler = &proc_dointvec, | 3119 | .proc_handler = proc_dointvec, |
2999 | }, | 3120 | }, |
3000 | { | 3121 | { |
3001 | .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, | 3122 | .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, |
@@ -3003,7 +3124,7 @@ static ctl_table ipv4_route_table[] = { | |||
3003 | .data = &ip_rt_max_size, | 3124 | .data = &ip_rt_max_size, |
3004 | .maxlen = sizeof(int), | 3125 | .maxlen = sizeof(int), |
3005 | .mode = 0644, | 3126 | .mode = 0644, |
3006 | .proc_handler = &proc_dointvec, | 3127 | .proc_handler = proc_dointvec, |
3007 | }, | 3128 | }, |
3008 | { | 3129 | { |
3009 | /* Deprecated. Use gc_min_interval_ms */ | 3130 | /* Deprecated. Use gc_min_interval_ms */ |
@@ -3013,8 +3134,8 @@ static ctl_table ipv4_route_table[] = { | |||
3013 | .data = &ip_rt_gc_min_interval, | 3134 | .data = &ip_rt_gc_min_interval, |
3014 | .maxlen = sizeof(int), | 3135 | .maxlen = sizeof(int), |
3015 | .mode = 0644, | 3136 | .mode = 0644, |
3016 | .proc_handler = &proc_dointvec_jiffies, | 3137 | .proc_handler = proc_dointvec_jiffies, |
3017 | .strategy = &sysctl_jiffies, | 3138 | .strategy = sysctl_jiffies, |
3018 | }, | 3139 | }, |
3019 | { | 3140 | { |
3020 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, | 3141 | .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, |
@@ -3022,8 +3143,8 @@ static ctl_table ipv4_route_table[] = { | |||
3022 | .data = &ip_rt_gc_min_interval, | 3143 | .data = &ip_rt_gc_min_interval, |
3023 | .maxlen = sizeof(int), | 3144 | .maxlen = sizeof(int), |
3024 | .mode = 0644, | 3145 | .mode = 0644, |
3025 | .proc_handler = &proc_dointvec_ms_jiffies, | 3146 | .proc_handler = proc_dointvec_ms_jiffies, |
3026 | .strategy = &sysctl_ms_jiffies, | 3147 | .strategy = sysctl_ms_jiffies, |
3027 | }, | 3148 | }, |
3028 | { | 3149 | { |
3029 | .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, | 3150 | .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, |
@@ -3031,8 +3152,8 @@ static ctl_table ipv4_route_table[] = { | |||
3031 | .data = &ip_rt_gc_timeout, | 3152 | .data = &ip_rt_gc_timeout, |
3032 | .maxlen = sizeof(int), | 3153 | .maxlen = sizeof(int), |
3033 | .mode = 0644, | 3154 | .mode = 0644, |
3034 | .proc_handler = &proc_dointvec_jiffies, | 3155 | .proc_handler = proc_dointvec_jiffies, |
3035 | .strategy = &sysctl_jiffies, | 3156 | .strategy = sysctl_jiffies, |
3036 | }, | 3157 | }, |
3037 | { | 3158 | { |
3038 | .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, | 3159 | .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, |
@@ -3040,8 +3161,8 @@ static ctl_table ipv4_route_table[] = { | |||
3040 | .data = &ip_rt_gc_interval, | 3161 | .data = &ip_rt_gc_interval, |
3041 | .maxlen = sizeof(int), | 3162 | .maxlen = sizeof(int), |
3042 | .mode = 0644, | 3163 | .mode = 0644, |
3043 | .proc_handler = &proc_dointvec_jiffies, | 3164 | .proc_handler = proc_dointvec_jiffies, |
3044 | .strategy = &sysctl_jiffies, | 3165 | .strategy = sysctl_jiffies, |
3045 | }, | 3166 | }, |
3046 | { | 3167 | { |
3047 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, | 3168 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, |
@@ -3049,7 +3170,7 @@ static ctl_table ipv4_route_table[] = { | |||
3049 | .data = &ip_rt_redirect_load, | 3170 | .data = &ip_rt_redirect_load, |
3050 | .maxlen = sizeof(int), | 3171 | .maxlen = sizeof(int), |
3051 | .mode = 0644, | 3172 | .mode = 0644, |
3052 | .proc_handler = &proc_dointvec, | 3173 | .proc_handler = proc_dointvec, |
3053 | }, | 3174 | }, |
3054 | { | 3175 | { |
3055 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, | 3176 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, |
@@ -3057,7 +3178,7 @@ static ctl_table ipv4_route_table[] = { | |||
3057 | .data = &ip_rt_redirect_number, | 3178 | .data = &ip_rt_redirect_number, |
3058 | .maxlen = sizeof(int), | 3179 | .maxlen = sizeof(int), |
3059 | .mode = 0644, | 3180 | .mode = 0644, |
3060 | .proc_handler = &proc_dointvec, | 3181 | .proc_handler = proc_dointvec, |
3061 | }, | 3182 | }, |
3062 | { | 3183 | { |
3063 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, | 3184 | .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, |
@@ -3065,7 +3186,7 @@ static ctl_table ipv4_route_table[] = { | |||
3065 | .data = &ip_rt_redirect_silence, | 3186 | .data = &ip_rt_redirect_silence, |
3066 | .maxlen = sizeof(int), | 3187 | .maxlen = sizeof(int), |
3067 | .mode = 0644, | 3188 | .mode = 0644, |
3068 | .proc_handler = &proc_dointvec, | 3189 | .proc_handler = proc_dointvec, |
3069 | }, | 3190 | }, |
3070 | { | 3191 | { |
3071 | .ctl_name = NET_IPV4_ROUTE_ERROR_COST, | 3192 | .ctl_name = NET_IPV4_ROUTE_ERROR_COST, |
@@ -3073,7 +3194,7 @@ static ctl_table ipv4_route_table[] = { | |||
3073 | .data = &ip_rt_error_cost, | 3194 | .data = &ip_rt_error_cost, |
3074 | .maxlen = sizeof(int), | 3195 | .maxlen = sizeof(int), |
3075 | .mode = 0644, | 3196 | .mode = 0644, |
3076 | .proc_handler = &proc_dointvec, | 3197 | .proc_handler = proc_dointvec, |
3077 | }, | 3198 | }, |
3078 | { | 3199 | { |
3079 | .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, | 3200 | .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, |
@@ -3081,7 +3202,7 @@ static ctl_table ipv4_route_table[] = { | |||
3081 | .data = &ip_rt_error_burst, | 3202 | .data = &ip_rt_error_burst, |
3082 | .maxlen = sizeof(int), | 3203 | .maxlen = sizeof(int), |
3083 | .mode = 0644, | 3204 | .mode = 0644, |
3084 | .proc_handler = &proc_dointvec, | 3205 | .proc_handler = proc_dointvec, |
3085 | }, | 3206 | }, |
3086 | { | 3207 | { |
3087 | .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, | 3208 | .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, |
@@ -3089,7 +3210,7 @@ static ctl_table ipv4_route_table[] = { | |||
3089 | .data = &ip_rt_gc_elasticity, | 3210 | .data = &ip_rt_gc_elasticity, |
3090 | .maxlen = sizeof(int), | 3211 | .maxlen = sizeof(int), |
3091 | .mode = 0644, | 3212 | .mode = 0644, |
3092 | .proc_handler = &proc_dointvec, | 3213 | .proc_handler = proc_dointvec, |
3093 | }, | 3214 | }, |
3094 | { | 3215 | { |
3095 | .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, | 3216 | .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, |
@@ -3097,8 +3218,8 @@ static ctl_table ipv4_route_table[] = { | |||
3097 | .data = &ip_rt_mtu_expires, | 3218 | .data = &ip_rt_mtu_expires, |
3098 | .maxlen = sizeof(int), | 3219 | .maxlen = sizeof(int), |
3099 | .mode = 0644, | 3220 | .mode = 0644, |
3100 | .proc_handler = &proc_dointvec_jiffies, | 3221 | .proc_handler = proc_dointvec_jiffies, |
3101 | .strategy = &sysctl_jiffies, | 3222 | .strategy = sysctl_jiffies, |
3102 | }, | 3223 | }, |
3103 | { | 3224 | { |
3104 | .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, | 3225 | .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, |
@@ -3106,7 +3227,7 @@ static ctl_table ipv4_route_table[] = { | |||
3106 | .data = &ip_rt_min_pmtu, | 3227 | .data = &ip_rt_min_pmtu, |
3107 | .maxlen = sizeof(int), | 3228 | .maxlen = sizeof(int), |
3108 | .mode = 0644, | 3229 | .mode = 0644, |
3109 | .proc_handler = &proc_dointvec, | 3230 | .proc_handler = proc_dointvec, |
3110 | }, | 3231 | }, |
3111 | { | 3232 | { |
3112 | .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, | 3233 | .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, |
@@ -3114,7 +3235,7 @@ static ctl_table ipv4_route_table[] = { | |||
3114 | .data = &ip_rt_min_advmss, | 3235 | .data = &ip_rt_min_advmss, |
3115 | .maxlen = sizeof(int), | 3236 | .maxlen = sizeof(int), |
3116 | .mode = 0644, | 3237 | .mode = 0644, |
3117 | .proc_handler = &proc_dointvec, | 3238 | .proc_handler = proc_dointvec, |
3118 | }, | 3239 | }, |
3119 | { | 3240 | { |
3120 | .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, | 3241 | .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, |
@@ -3122,8 +3243,8 @@ static ctl_table ipv4_route_table[] = { | |||
3122 | .data = &ip_rt_secret_interval, | 3243 | .data = &ip_rt_secret_interval, |
3123 | .maxlen = sizeof(int), | 3244 | .maxlen = sizeof(int), |
3124 | .mode = 0644, | 3245 | .mode = 0644, |
3125 | .proc_handler = &ipv4_sysctl_rt_secret_interval, | 3246 | .proc_handler = ipv4_sysctl_rt_secret_interval, |
3126 | .strategy = &ipv4_sysctl_rt_secret_interval_strategy, | 3247 | .strategy = ipv4_sysctl_rt_secret_interval_strategy, |
3127 | }, | 3248 | }, |
3128 | { .ctl_name = 0 } | 3249 | { .ctl_name = 0 } |
3129 | }; | 3250 | }; |
@@ -3151,8 +3272,8 @@ static struct ctl_table ipv4_route_flush_table[] = { | |||
3151 | .procname = "flush", | 3272 | .procname = "flush", |
3152 | .maxlen = sizeof(int), | 3273 | .maxlen = sizeof(int), |
3153 | .mode = 0200, | 3274 | .mode = 0200, |
3154 | .proc_handler = &ipv4_sysctl_rtcache_flush, | 3275 | .proc_handler = ipv4_sysctl_rtcache_flush, |
3155 | .strategy = &ipv4_sysctl_rtcache_flush_strategy, | 3276 | .strategy = ipv4_sysctl_rtcache_flush_strategy, |
3156 | }, | 3277 | }, |
3157 | { .ctl_name = 0 }, | 3278 | { .ctl_name = 0 }, |
3158 | }; | 3279 | }; |