aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-07-31 01:45:30 -0400
committerDavid S. Miller <davem@davemloft.net>2012-07-31 17:41:39 -0400
commitd26b3a7c4b3b26319f18bb645de93eba8f4bdcd5 (patch)
treeca86c03450fafdc89dac98ce403b1906fcaa025d
parent54764bb647b2e847c512acf8d443df965da35000 (diff)
ipv4: percpu nh_rth_output cache
Input path is mostly run under RCU and doesnt touch dst refcnt But output path on forwarding or UDP workloads hits badly dst refcount, and we have lot of false sharing, for example in ipv4_mtu() when reading rt->rt_pmtu Using a percpu cache for nh_rth_output gives a nice performance increase at a small cost. 24 udpflood test on my 24 cpu machine (dummy0 output device) (each process sends 1.000.000 udp frames, 24 processes are started) before : 5.24 s after : 2.06 s For reference, time on linux-3.5 : 6.60 s Signed-off-by: Eric Dumazet <edumazet@google.com> Tested-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--net/ipv4/fib_semantics.c20
-rw-r--r--net/ipv4/route.c18
3 files changed, 34 insertions, 7 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e521a03515b1..e331746029b4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -21,6 +21,7 @@
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <net/fib_rules.h> 22#include <net/fib_rules.h>
23#include <net/inetpeer.h> 23#include <net/inetpeer.h>
24#include <linux/percpu.h>
24 25
25struct fib_config { 26struct fib_config {
26 u8 fc_dst_len; 27 u8 fc_dst_len;
@@ -81,7 +82,7 @@ struct fib_nh {
81 __be32 nh_gw; 82 __be32 nh_gw;
82 __be32 nh_saddr; 83 __be32 nh_saddr;
83 int nh_saddr_genid; 84 int nh_saddr_genid;
84 struct rtable __rcu *nh_rth_output; 85 struct rtable __rcu * __percpu *nh_pcpu_rth_output;
85 struct rtable __rcu *nh_rth_input; 86 struct rtable __rcu *nh_rth_input;
86 struct fnhe_hash_bucket *nh_exceptions; 87 struct fnhe_hash_bucket *nh_exceptions;
87}; 88};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 625cf185c489..fe2ca02a1979 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -176,6 +176,23 @@ static void rt_nexthop_free(struct rtable __rcu **rtp)
176 dst_free(&rt->dst); 176 dst_free(&rt->dst);
177} 177}
178 178
179static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp)
180{
181 int cpu;
182
183 if (!rtp)
184 return;
185
186 for_each_possible_cpu(cpu) {
187 struct rtable *rt;
188
189 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
190 if (rt)
191 dst_free(&rt->dst);
192 }
193 free_percpu(rtp);
194}
195
179/* Release a nexthop info record */ 196/* Release a nexthop info record */
180static void free_fib_info_rcu(struct rcu_head *head) 197static void free_fib_info_rcu(struct rcu_head *head)
181{ 198{
@@ -186,7 +203,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
186 dev_put(nexthop_nh->nh_dev); 203 dev_put(nexthop_nh->nh_dev);
187 if (nexthop_nh->nh_exceptions) 204 if (nexthop_nh->nh_exceptions)
188 free_nh_exceptions(nexthop_nh); 205 free_nh_exceptions(nexthop_nh);
189 rt_nexthop_free(&nexthop_nh->nh_rth_output); 206 rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output);
190 rt_nexthop_free(&nexthop_nh->nh_rth_input); 207 rt_nexthop_free(&nexthop_nh->nh_rth_input);
191 } endfor_nexthops(fi); 208 } endfor_nexthops(fi);
192 209
@@ -817,6 +834,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
817 fi->fib_nhs = nhs; 834 fi->fib_nhs = nhs;
818 change_nexthops(fi) { 835 change_nexthops(fi) {
819 nexthop_nh->nh_parent = fi; 836 nexthop_nh->nh_parent = fi;
837 nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
820 } endfor_nexthops(fi) 838 } endfor_nexthops(fi)
821 839
822 if (cfg->fc_mx) { 840 if (cfg->fc_mx) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2bd107477469..4f6276ce0af3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1206,11 +1206,15 @@ static inline void rt_free(struct rtable *rt)
1206 1206
1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1207static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1208{ 1208{
1209 struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; 1209 struct rtable *orig, *prev, **p;
1210 1210
1211 if (rt_is_input_route(rt)) 1211 if (rt_is_input_route(rt)) {
1212 p = (struct rtable **)&nh->nh_rth_input; 1212 p = (struct rtable **)&nh->nh_rth_input;
1213 1213 } else {
1214 if (!nh->nh_pcpu_rth_output)
1215 goto nocache;
1216 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1217 }
1214 orig = *p; 1218 orig = *p;
1215 1219
1216 prev = cmpxchg(p, orig, rt); 1220 prev = cmpxchg(p, orig, rt);
@@ -1223,6 +1227,7 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
1223 * unsuccessful at storing this route into the cache 1227 * unsuccessful at storing this route into the cache
1224 * we really need to set it. 1228 * we really need to set it.
1225 */ 1229 */
1230nocache:
1226 rt->dst.flags |= DST_NOCACHE; 1231 rt->dst.flags |= DST_NOCACHE;
1227 } 1232 }
1228} 1233}
@@ -1749,8 +1754,11 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
1749 fnhe = NULL; 1754 fnhe = NULL;
1750 if (fi) { 1755 if (fi) {
1751 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); 1756 fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
1752 if (!fnhe) { 1757 if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) {
1753 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); 1758 struct rtable __rcu **prth;
1759
1760 prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
1761 rth = rcu_dereference(*prth);
1754 if (rt_cache_valid(rth)) { 1762 if (rt_cache_valid(rth)) {
1755 dst_hold(&rth->dst); 1763 dst_hold(&rth->dst);
1756 return rth; 1764 return rth;