diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-08 02:37:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-11 16:06:53 -0400 |
commit | fc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch) | |
tree | ac3a7f08ad741a67ff683bf93e5669ddcae95ed7 /net/ipv6 | |
parent | 0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff) |
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field,
subject to high cache line contention in stress workload.
Switch to a percpu_counter, to reduce number of time we need to dirty a
central location. Place it on a separate cache line to avoid dirtying
read only fields.
Stress test :
(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE, SLUB/NUMA)
Before:
real 0m51.179s
user 0m15.329s
sys 10m15.942s
After:
real 0m45.570s
user 0m15.525s
sys 9m56.669s
With a small reordering of struct neighbour fields, subject of a
following patch, (to separate refcnt from other read mostly fields)
real 0m41.841s
user 0m15.261s
sys 8m45.949s
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/route.c | 28 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 10 |
2 files changed, 26 insertions, 12 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 17e217933885..25661f968f3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { | |||
109 | .link_failure = ip6_link_failure, | 109 | .link_failure = ip6_link_failure, |
110 | .update_pmtu = ip6_rt_update_pmtu, | 110 | .update_pmtu = ip6_rt_update_pmtu, |
111 | .local_out = __ip6_local_out, | 111 | .local_out = __ip6_local_out, |
112 | .entries = ATOMIC_INIT(0), | ||
113 | }; | 112 | }; |
114 | 113 | ||
115 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 114 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { | |||
122 | .destroy = ip6_dst_destroy, | 121 | .destroy = ip6_dst_destroy, |
123 | .check = ip6_dst_check, | 122 | .check = ip6_dst_check, |
124 | .update_pmtu = ip6_rt_blackhole_update_pmtu, | 123 | .update_pmtu = ip6_rt_blackhole_update_pmtu, |
125 | .entries = ATOMIC_INIT(0), | ||
126 | }; | 124 | }; |
127 | 125 | ||
128 | static struct rt6_info ip6_null_entry_template = { | 126 | static struct rt6_info ip6_null_entry_template = { |
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) | |||
1058 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; | 1056 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; |
1059 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; | 1057 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; |
1060 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; | 1058 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; |
1059 | int entries; | ||
1061 | 1060 | ||
1061 | entries = dst_entries_get_fast(ops); | ||
1062 | if (time_after(rt_last_gc + rt_min_interval, now) && | 1062 | if (time_after(rt_last_gc + rt_min_interval, now) && |
1063 | atomic_read(&ops->entries) <= rt_max_size) | 1063 | entries <= rt_max_size) |
1064 | goto out; | 1064 | goto out; |
1065 | 1065 | ||
1066 | net->ipv6.ip6_rt_gc_expire++; | 1066 | net->ipv6.ip6_rt_gc_expire++; |
1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | 1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); |
1068 | net->ipv6.ip6_rt_last_gc = now; | 1068 | net->ipv6.ip6_rt_last_gc = now; |
1069 | if (atomic_read(&ops->entries) < ops->gc_thresh) | 1069 | entries = dst_entries_get_slow(ops); |
1070 | if (entries < ops->gc_thresh) | ||
1070 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; | 1071 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; |
1071 | out: | 1072 | out: |
1072 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; | 1073 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; |
1073 | return atomic_read(&ops->entries) > rt_max_size; | 1074 | return entries > rt_max_size; |
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | /* Clean host part of a prefix. Not necessary in radix tree, | 1077 | /* Clean host part of a prefix. Not necessary in radix tree, |
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) | |||
2524 | net->ipv6.rt6_stats->fib_rt_alloc, | 2525 | net->ipv6.rt6_stats->fib_rt_alloc, |
2525 | net->ipv6.rt6_stats->fib_rt_entries, | 2526 | net->ipv6.rt6_stats->fib_rt_entries, |
2526 | net->ipv6.rt6_stats->fib_rt_cache, | 2527 | net->ipv6.rt6_stats->fib_rt_cache, |
2527 | atomic_read(&net->ipv6.ip6_dst_ops.entries), | 2528 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), |
2528 | net->ipv6.rt6_stats->fib_discarded_routes); | 2529 | net->ipv6.rt6_stats->fib_discarded_routes); |
2529 | 2530 | ||
2530 | return 0; | 2531 | return 0; |
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) | |||
2666 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, | 2667 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, |
2667 | sizeof(net->ipv6.ip6_dst_ops)); | 2668 | sizeof(net->ipv6.ip6_dst_ops)); |
2668 | 2669 | ||
2670 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) | ||
2671 | goto out_ip6_dst_ops; | ||
2672 | |||
2669 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, | 2673 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, |
2670 | sizeof(*net->ipv6.ip6_null_entry), | 2674 | sizeof(*net->ipv6.ip6_null_entry), |
2671 | GFP_KERNEL); | 2675 | GFP_KERNEL); |
2672 | if (!net->ipv6.ip6_null_entry) | 2676 | if (!net->ipv6.ip6_null_entry) |
2673 | goto out_ip6_dst_ops; | 2677 | goto out_ip6_dst_entries; |
2674 | net->ipv6.ip6_null_entry->dst.path = | 2678 | net->ipv6.ip6_null_entry->dst.path = |
2675 | (struct dst_entry *)net->ipv6.ip6_null_entry; | 2679 | (struct dst_entry *)net->ipv6.ip6_null_entry; |
2676 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2680 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry: | |||
2720 | out_ip6_null_entry: | 2724 | out_ip6_null_entry: |
2721 | kfree(net->ipv6.ip6_null_entry); | 2725 | kfree(net->ipv6.ip6_null_entry); |
2722 | #endif | 2726 | #endif |
2727 | out_ip6_dst_entries: | ||
2728 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | ||
2723 | out_ip6_dst_ops: | 2729 | out_ip6_dst_ops: |
2724 | goto out; | 2730 | goto out; |
2725 | } | 2731 | } |
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void) | |||
2758 | if (!ip6_dst_ops_template.kmem_cachep) | 2764 | if (!ip6_dst_ops_template.kmem_cachep) |
2759 | goto out; | 2765 | goto out; |
2760 | 2766 | ||
2761 | ret = register_pernet_subsys(&ip6_route_net_ops); | 2767 | ret = dst_entries_init(&ip6_dst_blackhole_ops); |
2762 | if (ret) | 2768 | if (ret) |
2763 | goto out_kmem_cache; | 2769 | goto out_kmem_cache; |
2764 | 2770 | ||
2771 | ret = register_pernet_subsys(&ip6_route_net_ops); | ||
2772 | if (ret) | ||
2773 | goto out_dst_entries; | ||
2774 | |||
2765 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; | 2775 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; |
2766 | 2776 | ||
2767 | /* Registering of the loopback is done before this portion of code, | 2777 | /* Registering of the loopback is done before this portion of code, |
@@ -2808,6 +2818,8 @@ out_fib6_init: | |||
2808 | fib6_gc_cleanup(); | 2818 | fib6_gc_cleanup(); |
2809 | out_register_subsys: | 2819 | out_register_subsys: |
2810 | unregister_pernet_subsys(&ip6_route_net_ops); | 2820 | unregister_pernet_subsys(&ip6_route_net_ops); |
2821 | out_dst_entries: | ||
2822 | dst_entries_destroy(&ip6_dst_blackhole_ops); | ||
2811 | out_kmem_cache: | 2823 | out_kmem_cache: |
2812 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2824 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2813 | goto out; | 2825 | goto out; |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 39676eac3a37..7e74023ea6e4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) | |||
199 | struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); | 199 | struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); |
200 | 200 | ||
201 | xfrm6_policy_afinfo.garbage_collect(net); | 201 | xfrm6_policy_afinfo.garbage_collect(net); |
202 | return atomic_read(&ops->entries) > ops->gc_thresh * 2; | 202 | return dst_entries_get_fast(ops) > ops->gc_thresh * 2; |
203 | } | 203 | } |
204 | 204 | ||
205 | static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) | 205 | static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = { | |||
255 | .ifdown = xfrm6_dst_ifdown, | 255 | .ifdown = xfrm6_dst_ifdown, |
256 | .local_out = __ip6_local_out, | 256 | .local_out = __ip6_local_out, |
257 | .gc_thresh = 1024, | 257 | .gc_thresh = 1024, |
258 | .entries = ATOMIC_INIT(0), | ||
259 | }; | 258 | }; |
260 | 259 | ||
261 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { | 260 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { |
@@ -312,11 +311,13 @@ int __init xfrm6_init(void) | |||
312 | */ | 311 | */ |
313 | gc_thresh = FIB6_TABLE_HASHSZ * 8; | 312 | gc_thresh = FIB6_TABLE_HASHSZ * 8; |
314 | xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; | 313 | xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; |
314 | dst_entries_init(&xfrm6_dst_ops); | ||
315 | 315 | ||
316 | ret = xfrm6_policy_init(); | 316 | ret = xfrm6_policy_init(); |
317 | if (ret) | 317 | if (ret) { |
318 | dst_entries_destroy(&xfrm6_dst_ops); | ||
318 | goto out; | 319 | goto out; |
319 | 320 | } | |
320 | ret = xfrm6_state_init(); | 321 | ret = xfrm6_state_init(); |
321 | if (ret) | 322 | if (ret) |
322 | goto out_policy; | 323 | goto out_policy; |
@@ -341,4 +342,5 @@ void xfrm6_fini(void) | |||
341 | //xfrm6_input_fini(); | 342 | //xfrm6_input_fini(); |
342 | xfrm6_policy_fini(); | 343 | xfrm6_policy_fini(); |
343 | xfrm6_state_fini(); | 344 | xfrm6_state_fini(); |
345 | dst_entries_destroy(&xfrm6_dst_ops); | ||
344 | } | 346 | } |