diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-08 02:37:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-11 16:06:53 -0400 |
commit | fc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch) | |
tree | ac3a7f08ad741a67ff683bf93e5669ddcae95ed7 /net/ipv6/route.c | |
parent | 0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff) |
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field,
subject to high cache line contention in stress workload.
Switch to a percpu_counter, to reduce number of time we need to dirty a
central location. Place it on a separate cache line to avoid dirtying
read only fields.
Stress test :
(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE, SLUB/NUMA)
Before:
real 0m51.179s
user 0m15.329s
sys 10m15.942s
After:
real 0m45.570s
user 0m15.525s
sys 9m56.669s
With a small reordering of struct neighbour fields, subject of a
following patch, (to separate refcnt from other read mostly fields)
real 0m41.841s
user 0m15.261s
sys 8m45.949s
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 17e217933885..25661f968f3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { | |||
109 | .link_failure = ip6_link_failure, | 109 | .link_failure = ip6_link_failure, |
110 | .update_pmtu = ip6_rt_update_pmtu, | 110 | .update_pmtu = ip6_rt_update_pmtu, |
111 | .local_out = __ip6_local_out, | 111 | .local_out = __ip6_local_out, |
112 | .entries = ATOMIC_INIT(0), | ||
113 | }; | 112 | }; |
114 | 113 | ||
115 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 114 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { | |||
122 | .destroy = ip6_dst_destroy, | 121 | .destroy = ip6_dst_destroy, |
123 | .check = ip6_dst_check, | 122 | .check = ip6_dst_check, |
124 | .update_pmtu = ip6_rt_blackhole_update_pmtu, | 123 | .update_pmtu = ip6_rt_blackhole_update_pmtu, |
125 | .entries = ATOMIC_INIT(0), | ||
126 | }; | 124 | }; |
127 | 125 | ||
128 | static struct rt6_info ip6_null_entry_template = { | 126 | static struct rt6_info ip6_null_entry_template = { |
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) | |||
1058 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; | 1056 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; |
1059 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; | 1057 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; |
1060 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; | 1058 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; |
1059 | int entries; | ||
1061 | 1060 | ||
1061 | entries = dst_entries_get_fast(ops); | ||
1062 | if (time_after(rt_last_gc + rt_min_interval, now) && | 1062 | if (time_after(rt_last_gc + rt_min_interval, now) && |
1063 | atomic_read(&ops->entries) <= rt_max_size) | 1063 | entries <= rt_max_size) |
1064 | goto out; | 1064 | goto out; |
1065 | 1065 | ||
1066 | net->ipv6.ip6_rt_gc_expire++; | 1066 | net->ipv6.ip6_rt_gc_expire++; |
1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | 1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); |
1068 | net->ipv6.ip6_rt_last_gc = now; | 1068 | net->ipv6.ip6_rt_last_gc = now; |
1069 | if (atomic_read(&ops->entries) < ops->gc_thresh) | 1069 | entries = dst_entries_get_slow(ops); |
1070 | if (entries < ops->gc_thresh) | ||
1070 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; | 1071 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; |
1071 | out: | 1072 | out: |
1072 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; | 1073 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; |
1073 | return atomic_read(&ops->entries) > rt_max_size; | 1074 | return entries > rt_max_size; |
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | /* Clean host part of a prefix. Not necessary in radix tree, | 1077 | /* Clean host part of a prefix. Not necessary in radix tree, |
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) | |||
2524 | net->ipv6.rt6_stats->fib_rt_alloc, | 2525 | net->ipv6.rt6_stats->fib_rt_alloc, |
2525 | net->ipv6.rt6_stats->fib_rt_entries, | 2526 | net->ipv6.rt6_stats->fib_rt_entries, |
2526 | net->ipv6.rt6_stats->fib_rt_cache, | 2527 | net->ipv6.rt6_stats->fib_rt_cache, |
2527 | atomic_read(&net->ipv6.ip6_dst_ops.entries), | 2528 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), |
2528 | net->ipv6.rt6_stats->fib_discarded_routes); | 2529 | net->ipv6.rt6_stats->fib_discarded_routes); |
2529 | 2530 | ||
2530 | return 0; | 2531 | return 0; |
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) | |||
2666 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, | 2667 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, |
2667 | sizeof(net->ipv6.ip6_dst_ops)); | 2668 | sizeof(net->ipv6.ip6_dst_ops)); |
2668 | 2669 | ||
2670 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) | ||
2671 | goto out_ip6_dst_ops; | ||
2672 | |||
2669 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, | 2673 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, |
2670 | sizeof(*net->ipv6.ip6_null_entry), | 2674 | sizeof(*net->ipv6.ip6_null_entry), |
2671 | GFP_KERNEL); | 2675 | GFP_KERNEL); |
2672 | if (!net->ipv6.ip6_null_entry) | 2676 | if (!net->ipv6.ip6_null_entry) |
2673 | goto out_ip6_dst_ops; | 2677 | goto out_ip6_dst_entries; |
2674 | net->ipv6.ip6_null_entry->dst.path = | 2678 | net->ipv6.ip6_null_entry->dst.path = |
2675 | (struct dst_entry *)net->ipv6.ip6_null_entry; | 2679 | (struct dst_entry *)net->ipv6.ip6_null_entry; |
2676 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2680 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry: | |||
2720 | out_ip6_null_entry: | 2724 | out_ip6_null_entry: |
2721 | kfree(net->ipv6.ip6_null_entry); | 2725 | kfree(net->ipv6.ip6_null_entry); |
2722 | #endif | 2726 | #endif |
2727 | out_ip6_dst_entries: | ||
2728 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | ||
2723 | out_ip6_dst_ops: | 2729 | out_ip6_dst_ops: |
2724 | goto out; | 2730 | goto out; |
2725 | } | 2731 | } |
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void) | |||
2758 | if (!ip6_dst_ops_template.kmem_cachep) | 2764 | if (!ip6_dst_ops_template.kmem_cachep) |
2759 | goto out; | 2765 | goto out; |
2760 | 2766 | ||
2761 | ret = register_pernet_subsys(&ip6_route_net_ops); | 2767 | ret = dst_entries_init(&ip6_dst_blackhole_ops); |
2762 | if (ret) | 2768 | if (ret) |
2763 | goto out_kmem_cache; | 2769 | goto out_kmem_cache; |
2764 | 2770 | ||
2771 | ret = register_pernet_subsys(&ip6_route_net_ops); | ||
2772 | if (ret) | ||
2773 | goto out_dst_entries; | ||
2774 | |||
2765 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; | 2775 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; |
2766 | 2776 | ||
2767 | /* Registering of the loopback is done before this portion of code, | 2777 | /* Registering of the loopback is done before this portion of code, |
@@ -2808,6 +2818,8 @@ out_fib6_init: | |||
2808 | fib6_gc_cleanup(); | 2818 | fib6_gc_cleanup(); |
2809 | out_register_subsys: | 2819 | out_register_subsys: |
2810 | unregister_pernet_subsys(&ip6_route_net_ops); | 2820 | unregister_pernet_subsys(&ip6_route_net_ops); |
2821 | out_dst_entries: | ||
2822 | dst_entries_destroy(&ip6_dst_blackhole_ops); | ||
2811 | out_kmem_cache: | 2823 | out_kmem_cache: |
2812 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2824 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2813 | goto out; | 2825 | goto out; |