aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-07-31 18:02:02 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-31 18:02:02 -0400
commitc5038a8327b980a5b279fa193163c468011de009 (patch)
tree6d24fd005340e73360f9c1aa20dd3a1d21fd1625
parentd26b3a7c4b3b26319f18bb645de93eba8f4bdcd5 (diff)
ipv4: Cache routes in nexthop exception entries.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--net/ipv4/fib_semantics.c39
-rw-r--r--net/ipv4/route.c103
3 files changed, 79 insertions, 64 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e331746029b4..926142ed8d7a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -55,6 +55,7 @@ struct fib_nh_exception {
55 u32 fnhe_pmtu; 55 u32 fnhe_pmtu;
56 __be32 fnhe_gw; 56 __be32 fnhe_gw;
57 unsigned long fnhe_expires; 57 unsigned long fnhe_expires;
58 struct rtable __rcu *fnhe_rth;
58 unsigned long fnhe_stamp; 59 unsigned long fnhe_stamp;
59}; 60};
60 61
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index fe2ca02a1979..da80dc14cc76 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
140 }, 140 },
141}; 141};
142 142
143static void rt_fibinfo_free(struct rtable __rcu **rtp)
144{
145 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
146
147 if (!rt)
148 return;
149
150 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
151 * because we waited an RCU grace period before calling
152 * free_fib_info_rcu()
153 */
154
155 dst_free(&rt->dst);
156}
157
143static void free_nh_exceptions(struct fib_nh *nh) 158static void free_nh_exceptions(struct fib_nh *nh)
144{ 159{
145 struct fnhe_hash_bucket *hash = nh->nh_exceptions; 160 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
153 struct fib_nh_exception *next; 168 struct fib_nh_exception *next;
154 169
155 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 170 next = rcu_dereference_protected(fnhe->fnhe_next, 1);
171
172 rt_fibinfo_free(&fnhe->fnhe_rth);
173
156 kfree(fnhe); 174 kfree(fnhe);
157 175
158 fnhe = next; 176 fnhe = next;
@@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
161 kfree(hash); 179 kfree(hash);
162} 180}
163 181
164static void rt_nexthop_free(struct rtable __rcu **rtp) 182static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
165{
166 struct rtable *rt = rcu_dereference_protected(*rtp, 1);
167
168 if (!rt)
169 return;
170
171 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
172 * because we waited an RCU grace period before calling
173 * free_fib_info_rcu()
174 */
175
176 dst_free(&rt->dst);
177}
178
179static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp)
180{ 183{
181 int cpu; 184 int cpu;
182 185
@@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
203 dev_put(nexthop_nh->nh_dev); 206 dev_put(nexthop_nh->nh_dev);
204 if (nexthop_nh->nh_exceptions) 207 if (nexthop_nh->nh_exceptions)
205 free_nh_exceptions(nexthop_nh); 208 free_nh_exceptions(nexthop_nh);
206 rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); 209 rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
207 rt_nexthop_free(&nexthop_nh->nh_rth_input); 210 rt_fibinfo_free(&nexthop_nh->nh_rth_input);
208 } endfor_nexthops(fi); 211 } endfor_nexthops(fi);
209 212
210 release_net(fi->fib_net); 213 release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4f6276ce0af3..b102eeb16e34 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
587 build_sk_flow_key(fl4, sk); 587 build_sk_flow_key(fl4, sk);
588} 588}
589 589
590static DEFINE_SEQLOCK(fnhe_seqlock); 590static inline void rt_free(struct rtable *rt)
591{
592 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
593}
594
595static DEFINE_SPINLOCK(fnhe_lock);
591 596
592static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) 597static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
593{ 598{
594 struct fib_nh_exception *fnhe, *oldest; 599 struct fib_nh_exception *fnhe, *oldest;
600 struct rtable *orig;
595 601
596 oldest = rcu_dereference(hash->chain); 602 oldest = rcu_dereference(hash->chain);
597 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; 603 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
599 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) 605 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
600 oldest = fnhe; 606 oldest = fnhe;
601 } 607 }
608 orig = rcu_dereference(oldest->fnhe_rth);
609 if (orig) {
610 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
611 rt_free(orig);
612 }
602 return oldest; 613 return oldest;
603} 614}
604 615
@@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
620 int depth; 631 int depth;
621 u32 hval = fnhe_hashfun(daddr); 632 u32 hval = fnhe_hashfun(daddr);
622 633
623 write_seqlock_bh(&fnhe_seqlock); 634 spin_lock_bh(&fnhe_lock);
624 635
625 hash = nh->nh_exceptions; 636 hash = nh->nh_exceptions;
626 if (!hash) { 637 if (!hash) {
@@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
667 fnhe->fnhe_stamp = jiffies; 678 fnhe->fnhe_stamp = jiffies;
668 679
669out_unlock: 680out_unlock:
670 write_sequnlock_bh(&fnhe_seqlock); 681 spin_unlock_bh(&fnhe_lock);
671 return; 682 return;
672} 683}
673 684
@@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
1167static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1178static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
1168 __be32 daddr) 1179 __be32 daddr)
1169{ 1180{
1170 __be32 fnhe_daddr, gw; 1181 spin_lock_bh(&fnhe_lock);
1171 unsigned long expires;
1172 unsigned int seq;
1173 u32 pmtu;
1174
1175restart:
1176 seq = read_seqbegin(&fnhe_seqlock);
1177 fnhe_daddr = fnhe->fnhe_daddr;
1178 gw = fnhe->fnhe_gw;
1179 pmtu = fnhe->fnhe_pmtu;
1180 expires = fnhe->fnhe_expires;
1181 if (read_seqretry(&fnhe_seqlock, seq))
1182 goto restart;
1183
1184 if (daddr != fnhe_daddr)
1185 return;
1186 1182
1187 if (pmtu) { 1183 if (daddr == fnhe->fnhe_daddr) {
1188 unsigned long diff = expires - jiffies; 1184 struct rtable *orig;
1189 1185
1190 if (time_before(jiffies, expires)) { 1186 if (fnhe->fnhe_pmtu) {
1191 rt->rt_pmtu = pmtu; 1187 unsigned long expires = fnhe->fnhe_expires;
1192 dst_set_expires(&rt->dst, diff); 1188 unsigned long diff = expires - jiffies;
1189
1190 if (time_before(jiffies, expires)) {
1191 rt->rt_pmtu = fnhe->fnhe_pmtu;
1192 dst_set_expires(&rt->dst, diff);
1193 }
1194 }
1195 if (fnhe->fnhe_gw) {
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = fnhe->fnhe_gw;
1193 } 1198 }
1194 }
1195 if (gw) {
1196 rt->rt_flags |= RTCF_REDIRECTED;
1197 rt->rt_gateway = gw;
1198 }
1199 fnhe->fnhe_stamp = jiffies;
1200}
1201 1199
1202static inline void rt_free(struct rtable *rt) 1200 orig = rcu_dereference(fnhe->fnhe_rth);
1203{ 1201 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1204 call_rcu(&rt->dst.rcu_head, dst_rcu_free); 1202 if (orig)
1203 rt_free(orig);
1204
1205 fnhe->fnhe_stamp = jiffies;
1206 } else {
1207 /* Routes we intend to cache in nexthop exception have
1208 * the DST_NOCACHE bit clear. However, if we are
1209 * unsuccessful at storing this route into the cache
1210 * we really need to set it.
1211 */
1212 rt->dst.flags |= DST_NOCACHE;
1213 }
1214 spin_unlock_bh(&fnhe_lock);
1205} 1215}
1206 1216
1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1217static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
@@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
1249 1259
1250 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1260 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
1251 rt->rt_gateway = nh->nh_gw; 1261 rt->rt_gateway = nh->nh_gw;
1252 if (unlikely(fnhe))
1253 rt_bind_exception(rt, fnhe, daddr);
1254 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1262 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
1255#ifdef CONFIG_IP_ROUTE_CLASSID 1263#ifdef CONFIG_IP_ROUTE_CLASSID
1256 rt->dst.tclassid = nh->nh_tclassid; 1264 rt->dst.tclassid = nh->nh_tclassid;
1257#endif 1265#endif
1258 if (!(rt->dst.flags & DST_NOCACHE)) 1266 if (unlikely(fnhe))
1267 rt_bind_exception(rt, fnhe, daddr);
1268 else if (!(rt->dst.flags & DST_NOCACHE))
1259 rt_cache_route(nh, rt); 1269 rt_cache_route(nh, rt);
1260 } 1270 }
1261 1271
@@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1753 1763
1754 fnhe = NULL; 1764 fnhe = NULL;
1755 if (fi) { 1765 if (fi) {
1756 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); 1766 struct rtable __rcu **prth;
1757 if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) {
1758 struct rtable __rcu **prth;
1759 1767
1768 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1769 if (fnhe)
1770 prth = &fnhe->fnhe_rth;
1771 else
1760 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); 1772 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1761 rth = rcu_dereference(*prth); 1773 rth = rcu_dereference(*prth);
1762 if (rt_cache_valid(rth)) { 1774 if (rt_cache_valid(rth)) {
1763 dst_hold(&rth->dst); 1775 dst_hold(&rth->dst);
1764 return rth; 1776 return rth;
1765 }
1766 } 1777 }
1767 } 1778 }
1768 rth = rt_dst_alloc(dev_out, 1779 rth = rt_dst_alloc(dev_out,
1769 IN_DEV_CONF_GET(in_dev, NOPOLICY), 1780 IN_DEV_CONF_GET(in_dev, NOPOLICY),
1770 IN_DEV_CONF_GET(in_dev, NOXFRM), 1781 IN_DEV_CONF_GET(in_dev, NOXFRM),
1771 fi && !fnhe); 1782 fi);
1772 if (!rth) 1783 if (!rth)
1773 return ERR_PTR(-ENOBUFS); 1784 return ERR_PTR(-ENOBUFS);
1774 1785