aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2013-06-27 03:27:05 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-29 00:27:47 -0400
commit2ffae99d1fac272952b5a395759823717760ce37 (patch)
tree0c1cd0ea84fa58072e0eef58a0d8507567c6d785
parentb173ee488dcc545e77ed482158a2f0d06d7a5860 (diff)
ipv4: use next hop exceptions also for input routes
Commit d2d68ba9 (ipv4: Cache input routes in fib_info nexthops) assmued that "locally destined, and routed packets, never trigger PMTU events or redirects that will be processed by us". However, it seems that tunnel devices do trigger PMTU events in certain cases. At least ip_gre, ip6_gre, sit, and ipip do use the inner flow's skb_dst(skb)->ops->update_pmtu to propage mtu information from the outer flows. These can cause the inner flow mtu to be decreased. If next hop exceptions are not consulted for pmtu, IP fragmentation will not be done properly for these routes. It also seems that we really need to have the PMTU information always for netfilter TCPMSS clamp-to-pmtu feature to work properly. So for the time being, cache separate copies of input routes for each next hop exception. Signed-off-by: Timo Teräs <timo.teras@iki.fi> Reviewed-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/route.c65
3 files changed, 54 insertions, 17 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 44424e9dab2a..aac85534d7b5 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -56,7 +56,8 @@ struct fib_nh_exception {
56 u32 fnhe_pmtu; 56 u32 fnhe_pmtu;
57 __be32 fnhe_gw; 57 __be32 fnhe_gw;
58 unsigned long fnhe_expires; 58 unsigned long fnhe_expires;
59 struct rtable __rcu *fnhe_rth; 59 struct rtable __rcu *fnhe_rth_input;
60 struct rtable __rcu *fnhe_rth_output;
60 unsigned long fnhe_stamp; 61 unsigned long fnhe_stamp;
61}; 62};
62 63
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8f6cb7a87cd6..d5dbca5ecf62 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh)
169 169
170 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 170 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
171 171
172 rt_fibinfo_free(&fnhe->fnhe_rth); 172 rt_fibinfo_free(&fnhe->fnhe_rth_input);
173 rt_fibinfo_free(&fnhe->fnhe_rth_output);
173 174
174 kfree(fnhe); 175 kfree(fnhe);
175 176
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f3fa42eac461..a9a54a236832 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt)
565 565
566static DEFINE_SPINLOCK(fnhe_lock); 566static DEFINE_SPINLOCK(fnhe_lock);
567 567
568static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
569{
570 struct rtable *rt;
571
572 rt = rcu_dereference(fnhe->fnhe_rth_input);
573 if (rt) {
574 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
575 rt_free(rt);
576 }
577 rt = rcu_dereference(fnhe->fnhe_rth_output);
578 if (rt) {
579 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
580 rt_free(rt);
581 }
582}
583
568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 584static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
569{ 585{
570 struct fib_nh_exception *fnhe, *oldest; 586 struct fib_nh_exception *fnhe, *oldest;
571 struct rtable *orig;
572 587
573 oldest = rcu_dereference(hash->chain); 588 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 589 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 591 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe; 592 oldest = fnhe;
578 } 593 }
579 orig = rcu_dereference(oldest->fnhe_rth); 594 fnhe_flush_routes(oldest);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
584 return oldest; 595 return oldest;
585} 596}
586 597
@@ -644,7 +655,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
644 fnhe->fnhe_expires = max(1UL, expires); 655 fnhe->fnhe_expires = max(1UL, expires);
645 } 656 }
646 /* Update all cached dsts too */ 657 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth); 658 rt = rcu_dereference(fnhe->fnhe_rth_input);
659 if (rt)
660 fill_route_from_fnhe(rt, fnhe);
661 rt = rcu_dereference(fnhe->fnhe_rth_output);
648 if (rt) 662 if (rt)
649 fill_route_from_fnhe(rt, fnhe); 663 fill_route_from_fnhe(rt, fnhe);
650 } else { 664 } else {
@@ -668,6 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
668 * stale, so anyone caching it rechecks if this exception 682 * stale, so anyone caching it rechecks if this exception
669 * applies to them. 683 * applies to them.
670 */ 684 */
685 rt = rcu_dereference(nh->nh_rth_input);
686 if (rt)
687 rt->dst.obsolete = DST_OBSOLETE_KILL;
688
671 for_each_possible_cpu(i) { 689 for_each_possible_cpu(i) {
672 struct rtable __rcu **prt; 690 struct rtable __rcu **prt;
673 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); 691 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
@@ -1242,25 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1242 spin_lock_bh(&fnhe_lock); 1260 spin_lock_bh(&fnhe_lock);
1243 1261
1244 if (daddr == fnhe->fnhe_daddr) { 1262 if (daddr == fnhe->fnhe_daddr) {
1263 struct rtable __rcu **porig;
1264 struct rtable *orig;
1245 int genid = fnhe_genid(dev_net(rt->dst.dev)); 1265 int genid = fnhe_genid(dev_net(rt->dst.dev));
1246 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); 1266
1267 if (rt_is_input_route(rt))
1268 porig = &fnhe->fnhe_rth_input;
1269 else
1270 porig = &fnhe->fnhe_rth_output;
1271 orig = rcu_dereference(*porig);
1247 1272
1248 if (fnhe->fnhe_genid != genid) { 1273 if (fnhe->fnhe_genid != genid) {
1249 fnhe->fnhe_genid = genid; 1274 fnhe->fnhe_genid = genid;
1250 fnhe->fnhe_gw = 0; 1275 fnhe->fnhe_gw = 0;
1251 fnhe->fnhe_pmtu = 0; 1276 fnhe->fnhe_pmtu = 0;
1252 fnhe->fnhe_expires = 0; 1277 fnhe->fnhe_expires = 0;
1278 fnhe_flush_routes(fnhe);
1279 orig = NULL;
1253 } 1280 }
1254 fill_route_from_fnhe(rt, fnhe); 1281 fill_route_from_fnhe(rt, fnhe);
1255 if (!rt->rt_gateway) 1282 if (!rt->rt_gateway)
1256 rt->rt_gateway = daddr; 1283 rt->rt_gateway = daddr;
1257 1284
1258 rcu_assign_pointer(fnhe->fnhe_rth, rt); 1285 if (!(rt->dst.flags & DST_NOCACHE)) {
1259 if (orig) 1286 rcu_assign_pointer(*porig, rt);
1260 rt_free(orig); 1287 if (orig)
1288 rt_free(orig);
1289 ret = true;
1290 }
1261 1291
1262 fnhe->fnhe_stamp = jiffies; 1292 fnhe->fnhe_stamp = jiffies;
1263 ret = true;
1264 } 1293 }
1265 spin_unlock_bh(&fnhe_lock); 1294 spin_unlock_bh(&fnhe_lock);
1266 1295
@@ -1492,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb,
1492 struct in_device *in_dev, 1521 struct in_device *in_dev,
1493 __be32 daddr, __be32 saddr, u32 tos) 1522 __be32 daddr, __be32 saddr, u32 tos)
1494{ 1523{
1524 struct fib_nh_exception *fnhe;
1495 struct rtable *rth; 1525 struct rtable *rth;
1496 int err; 1526 int err;
1497 struct in_device *out_dev; 1527 struct in_device *out_dev;
@@ -1538,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb,
1538 } 1568 }
1539 } 1569 }
1540 1570
1571 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1541 if (do_cache) { 1572 if (do_cache) {
1542 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 1573 if (fnhe != NULL)
1574 rth = rcu_dereference(fnhe->fnhe_rth_input);
1575 else
1576 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1577
1543 if (rt_cache_valid(rth)) { 1578 if (rt_cache_valid(rth)) {
1544 skb_dst_set_noref(skb, &rth->dst); 1579 skb_dst_set_noref(skb, &rth->dst);
1545 goto out; 1580 goto out;
@@ -1567,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb,
1567 rth->dst.input = ip_forward; 1602 rth->dst.input = ip_forward;
1568 rth->dst.output = ip_output; 1603 rth->dst.output = ip_output;
1569 1604
1570 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); 1605 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
1571 skb_dst_set(skb, &rth->dst); 1606 skb_dst_set(skb, &rth->dst);
1572out: 1607out:
1573 err = 0; 1608 err = 0;
@@ -1882,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1882 1917
1883 fnhe = find_exception(nh, fl4->daddr); 1918 fnhe = find_exception(nh, fl4->daddr);
1884 if (fnhe) 1919 if (fnhe)
1885 prth = &fnhe->fnhe_rth; 1920 prth = &fnhe->fnhe_rth_output;
1886 else { 1921 else {
1887 if (unlikely(fl4->flowi4_flags & 1922 if (unlikely(fl4->flowi4_flags &
1888 FLOWI_FLAG_KNOWN_NH && 1923 FLOWI_FLAG_KNOWN_NH &&