aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-08 02:37:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-11 16:06:53 -0400
commitfc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch)
treeac3a7f08ad741a67ff683bf93e5669ddcae95ed7 /net/ipv6
parent0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff)
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/route.c28
-rw-r--r--net/ipv6/xfrm6_policy.c10
2 files changed, 26 insertions, 12 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 17e217933885..25661f968f3f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = {
109 .link_failure = ip6_link_failure, 109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu, 110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out, 111 .local_out = __ip6_local_out,
112 .entries = ATOMIC_INIT(0),
113}; 112};
114 113
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
122 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check, 122 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entries = ATOMIC_INIT(0),
126}; 124};
127 125
128static struct rt6_info ip6_null_entry_template = { 126static struct rt6_info ip6_null_entry_template = {
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
1058 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1059 int entries;
1061 1060
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) && 1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 atomic_read(&ops->entries) <= rt_max_size) 1063 entries <= rt_max_size)
1064 goto out; 1064 goto out;
1065 1065
1066 net->ipv6.ip6_rt_gc_expire++; 1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now; 1068 net->ipv6.ip6_rt_last_gc = now;
1069 if (atomic_read(&ops->entries) < ops->gc_thresh) 1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1072out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return atomic_read(&ops->entries) > rt_max_size; 1074 return entries > rt_max_size;
1074} 1075}
1075 1076
1076/* Clean host part of a prefix. Not necessary in radix tree, 1077/* Clean host part of a prefix. Not necessary in radix tree,
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2524 net->ipv6.rt6_stats->fib_rt_alloc, 2525 net->ipv6.rt6_stats->fib_rt_alloc,
2525 net->ipv6.rt6_stats->fib_rt_entries, 2526 net->ipv6.rt6_stats->fib_rt_entries,
2526 net->ipv6.rt6_stats->fib_rt_cache, 2527 net->ipv6.rt6_stats->fib_rt_cache,
2527 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2528 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2528 net->ipv6.rt6_stats->fib_discarded_routes); 2529 net->ipv6.rt6_stats->fib_discarded_routes);
2529 2530
2530 return 0; 2531 return 0;
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net)
2666 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2667 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2667 sizeof(net->ipv6.ip6_dst_ops)); 2668 sizeof(net->ipv6.ip6_dst_ops));
2668 2669
2670 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2671 goto out_ip6_dst_ops;
2672
2669 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2673 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2670 sizeof(*net->ipv6.ip6_null_entry), 2674 sizeof(*net->ipv6.ip6_null_entry),
2671 GFP_KERNEL); 2675 GFP_KERNEL);
2672 if (!net->ipv6.ip6_null_entry) 2676 if (!net->ipv6.ip6_null_entry)
2673 goto out_ip6_dst_ops; 2677 goto out_ip6_dst_entries;
2674 net->ipv6.ip6_null_entry->dst.path = 2678 net->ipv6.ip6_null_entry->dst.path =
2675 (struct dst_entry *)net->ipv6.ip6_null_entry; 2679 (struct dst_entry *)net->ipv6.ip6_null_entry;
2676 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2680 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry:
2720out_ip6_null_entry: 2724out_ip6_null_entry:
2721 kfree(net->ipv6.ip6_null_entry); 2725 kfree(net->ipv6.ip6_null_entry);
2722#endif 2726#endif
2727out_ip6_dst_entries:
2728 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2723out_ip6_dst_ops: 2729out_ip6_dst_ops:
2724 goto out; 2730 goto out;
2725} 2731}
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void)
2758 if (!ip6_dst_ops_template.kmem_cachep) 2764 if (!ip6_dst_ops_template.kmem_cachep)
2759 goto out; 2765 goto out;
2760 2766
2761 ret = register_pernet_subsys(&ip6_route_net_ops); 2767 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2762 if (ret) 2768 if (ret)
2763 goto out_kmem_cache; 2769 goto out_kmem_cache;
2764 2770
2771 ret = register_pernet_subsys(&ip6_route_net_ops);
2772 if (ret)
2773 goto out_dst_entries;
2774
2765 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2766 2776
2767 /* Registering of the loopback is done before this portion of code, 2777 /* Registering of the loopback is done before this portion of code,
@@ -2808,6 +2818,8 @@ out_fib6_init:
2808 fib6_gc_cleanup(); 2818 fib6_gc_cleanup();
2809out_register_subsys: 2819out_register_subsys:
2810 unregister_pernet_subsys(&ip6_route_net_ops); 2820 unregister_pernet_subsys(&ip6_route_net_ops);
2821out_dst_entries:
2822 dst_entries_destroy(&ip6_dst_blackhole_ops);
2811out_kmem_cache: 2823out_kmem_cache:
2812 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2824 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2813 goto out; 2825 goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 39676eac3a37..7e74023ea6e4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return atomic_read(&ops->entries) > ops->gc_thresh * 2; 202 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = {
255 .ifdown = xfrm6_dst_ifdown, 255 .ifdown = xfrm6_dst_ifdown,
256 .local_out = __ip6_local_out, 256 .local_out = __ip6_local_out,
257 .gc_thresh = 1024, 257 .gc_thresh = 1024,
258 .entries = ATOMIC_INIT(0),
259}; 258};
260 259
261static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 260static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@ int __init xfrm6_init(void)
312 */ 311 */
313 gc_thresh = FIB6_TABLE_HASHSZ * 8; 312 gc_thresh = FIB6_TABLE_HASHSZ * 8;
314 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; 313 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
314 dst_entries_init(&xfrm6_dst_ops);
315 315
316 ret = xfrm6_policy_init(); 316 ret = xfrm6_policy_init();
317 if (ret) 317 if (ret) {
318 dst_entries_destroy(&xfrm6_dst_ops);
318 goto out; 319 goto out;
319 320 }
320 ret = xfrm6_state_init(); 321 ret = xfrm6_state_init();
321 if (ret) 322 if (ret)
322 goto out_policy; 323 goto out_policy;
@@ -341,4 +342,5 @@ void xfrm6_fini(void)
341 //xfrm6_input_fini(); 342 //xfrm6_input_fini();
342 xfrm6_policy_fini(); 343 xfrm6_policy_fini();
343 xfrm6_state_fini(); 344 xfrm6_state_fini();
345 dst_entries_destroy(&xfrm6_dst_ops);
344} 346}