aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-08 02:37:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-11 16:06:53 -0400
commitfc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch)
treeac3a7f08ad741a67ff683bf93e5669ddcae95ed7
parent0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff)
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/dst_ops.h37
-rw-r--r--net/bridge/br_netfilter.c11
-rw-r--r--net/core/dst.c6
-rw-r--r--net/decnet/dn_route.c3
-rw-r--r--net/ipv4/route.c36
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv6/route.c28
-rw-r--r--net/ipv6/xfrm6_policy.c10
8 files changed, 100 insertions, 35 deletions
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index d1ff9b7e99b8..1fa5306e3e23 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -1,6 +1,7 @@
1#ifndef _NET_DST_OPS_H 1#ifndef _NET_DST_OPS_H
2#define _NET_DST_OPS_H 2#define _NET_DST_OPS_H
3#include <linux/types.h> 3#include <linux/types.h>
4#include <linux/percpu_counter.h>
4 5
5struct dst_entry; 6struct dst_entry;
6struct kmem_cachep; 7struct kmem_cachep;
@@ -22,7 +23,41 @@ struct dst_ops {
22 void (*update_pmtu)(struct dst_entry *dst, u32 mtu); 23 void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
23 int (*local_out)(struct sk_buff *skb); 24 int (*local_out)(struct sk_buff *skb);
24 25
25 atomic_t entries;
26 struct kmem_cache *kmem_cachep; 26 struct kmem_cache *kmem_cachep;
27
28 struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp;
27}; 29};
30
31static inline int dst_entries_get_fast(struct dst_ops *dst)
32{
33 return percpu_counter_read_positive(&dst->pcpuc_entries);
34}
35
36static inline int dst_entries_get_slow(struct dst_ops *dst)
37{
38 int res;
39
40 local_bh_disable();
41 res = percpu_counter_sum_positive(&dst->pcpuc_entries);
42 local_bh_enable();
43 return res;
44}
45
46static inline void dst_entries_add(struct dst_ops *dst, int val)
47{
48 local_bh_disable();
49 percpu_counter_add(&dst->pcpuc_entries, val);
50 local_bh_enable();
51}
52
53static inline int dst_entries_init(struct dst_ops *dst)
54{
55 return percpu_counter_init(&dst->pcpuc_entries, 0);
56}
57
58static inline void dst_entries_destroy(struct dst_ops *dst)
59{
60 percpu_counter_destroy(&dst->pcpuc_entries);
61}
62
28#endif 63#endif
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 77f7b5fda45a..7f9ce9600ef3 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -106,7 +106,6 @@ static struct dst_ops fake_dst_ops = {
106 .family = AF_INET, 106 .family = AF_INET,
107 .protocol = cpu_to_be16(ETH_P_IP), 107 .protocol = cpu_to_be16(ETH_P_IP),
108 .update_pmtu = fake_update_pmtu, 108 .update_pmtu = fake_update_pmtu,
109 .entries = ATOMIC_INIT(0),
110}; 109};
111 110
112/* 111/*
@@ -1003,15 +1002,22 @@ int __init br_netfilter_init(void)
1003{ 1002{
1004 int ret; 1003 int ret;
1005 1004
1006 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1005 ret = dst_entries_init(&fake_dst_ops);
1007 if (ret < 0) 1006 if (ret < 0)
1008 return ret; 1007 return ret;
1008
1009 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1010 if (ret < 0) {
1011 dst_entries_destroy(&fake_dst_ops);
1012 return ret;
1013 }
1009#ifdef CONFIG_SYSCTL 1014#ifdef CONFIG_SYSCTL
1010 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); 1015 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
1011 if (brnf_sysctl_header == NULL) { 1016 if (brnf_sysctl_header == NULL) {
1012 printk(KERN_WARNING 1017 printk(KERN_WARNING
1013 "br_netfilter: can't register to sysctl.\n"); 1018 "br_netfilter: can't register to sysctl.\n");
1014 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1019 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1020 dst_entries_destroy(&fake_dst_ops);
1015 return -ENOMEM; 1021 return -ENOMEM;
1016 } 1022 }
1017#endif 1023#endif
@@ -1025,4 +1031,5 @@ void br_netfilter_fini(void)
1025#ifdef CONFIG_SYSCTL 1031#ifdef CONFIG_SYSCTL
1026 unregister_sysctl_table(brnf_sysctl_header); 1032 unregister_sysctl_table(brnf_sysctl_header);
1027#endif 1033#endif
1034 dst_entries_destroy(&fake_dst_ops);
1028} 1035}
diff --git a/net/core/dst.c b/net/core/dst.c
index 978a1ee1f7d0..32e542d7f472 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
168{ 168{
169 struct dst_entry *dst; 169 struct dst_entry *dst;
170 170
171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 171 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
172 if (ops->gc(ops)) 172 if (ops->gc(ops))
173 return NULL; 173 return NULL;
174 } 174 }
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
183#if RT_CACHE_DEBUG >= 2 183#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 184 atomic_inc(&dst_total);
185#endif 185#endif
186 atomic_inc(&ops->entries); 186 dst_entries_add(ops, 1);
187 return dst; 187 return dst;
188} 188}
189EXPORT_SYMBOL(dst_alloc); 189EXPORT_SYMBOL(dst_alloc);
@@ -236,7 +236,7 @@ again:
236 neigh_release(neigh); 236 neigh_release(neigh);
237 } 237 }
238 238
239 atomic_dec(&dst->ops->entries); 239 dst_entries_add(dst->ops, -1);
240 240
241 if (dst->ops->destroy) 241 if (dst->ops->destroy)
242 dst->ops->destroy(dst); 242 dst->ops->destroy(dst);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6585ea6d1182..df0f3e54ff8a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = {
132 .negative_advice = dn_dst_negative_advice, 132 .negative_advice = dn_dst_negative_advice,
133 .link_failure = dn_dst_link_failure, 133 .link_failure = dn_dst_link_failure,
134 .update_pmtu = dn_dst_update_pmtu, 134 .update_pmtu = dn_dst_update_pmtu,
135 .entries = ATOMIC_INIT(0),
136}; 135};
137 136
138static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 137static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void)
1758 dn_dst_ops.kmem_cachep = 1757 dn_dst_ops.kmem_cachep =
1759 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1758 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1760 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1759 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1760 dst_entries_init(&dn_dst_ops);
1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0); 1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1763 add_timer(&dn_route_timer); 1763 add_timer(&dn_route_timer);
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void)
1816 dn_run_flush(0); 1816 dn_run_flush(0);
1817 1817
1818 proc_net_remove(&init_net, "decnet_cache"); 1818 proc_net_remove(&init_net, "decnet_cache");
1819 dst_entries_destroy(&dn_dst_ops);
1819} 1820}
1820 1821
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3888f6ba0a5c..0755aa4af86c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
159 .link_failure = ipv4_link_failure, 159 .link_failure = ipv4_link_failure,
160 .update_pmtu = ip_rt_update_pmtu, 160 .update_pmtu = ip_rt_update_pmtu,
161 .local_out = __ip_local_out, 161 .local_out = __ip_local_out,
162 .entries = ATOMIC_INIT(0),
163}; 162};
164 163
165#define ECN_OR_COST(class) TC_PRIO_##class 164#define ECN_OR_COST(class) TC_PRIO_##class
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
466 465
467 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 466 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
468 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 467 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
469 atomic_read(&ipv4_dst_ops.entries), 468 dst_entries_get_slow(&ipv4_dst_ops),
470 st->in_hit, 469 st->in_hit,
471 st->in_slow_tot, 470 st->in_slow_tot,
472 st->in_slow_mc, 471 st->in_slow_mc,
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
945 struct rtable *rth, **rthp; 944 struct rtable *rth, **rthp;
946 unsigned long now = jiffies; 945 unsigned long now = jiffies;
947 int goal; 946 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops);
948 948
949 /* 949 /*
950 * Garbage collection is pretty expensive, 950 * Garbage collection is pretty expensive,
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
954 RT_CACHE_STAT_INC(gc_total); 954 RT_CACHE_STAT_INC(gc_total);
955 955
956 if (now - last_gc < ip_rt_gc_min_interval && 956 if (now - last_gc < ip_rt_gc_min_interval &&
957 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 957 entries < ip_rt_max_size) {
958 RT_CACHE_STAT_INC(gc_ignored); 958 RT_CACHE_STAT_INC(gc_ignored);
959 goto out; 959 goto out;
960 } 960 }
961 961
962 entries = dst_entries_get_slow(&ipv4_dst_ops);
962 /* Calculate number of entries, which we want to expire now. */ 963 /* Calculate number of entries, which we want to expire now. */
963 goal = atomic_read(&ipv4_dst_ops.entries) - 964 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
964 (ip_rt_gc_elasticity << rt_hash_log);
965 if (goal <= 0) { 965 if (goal <= 0) {
966 if (equilibrium < ipv4_dst_ops.gc_thresh) 966 if (equilibrium < ipv4_dst_ops.gc_thresh)
967 equilibrium = ipv4_dst_ops.gc_thresh; 967 equilibrium = ipv4_dst_ops.gc_thresh;
968 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 968 goal = entries - equilibrium;
969 if (goal > 0) { 969 if (goal > 0) {
970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); 970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
971 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 971 goal = entries - equilibrium;
972 } 972 }
973 } else { 973 } else {
974 /* We are in dangerous area. Try to reduce cache really 974 /* We are in dangerous area. Try to reduce cache really
975 * aggressively. 975 * aggressively.
976 */ 976 */
977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); 977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
978 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 978 equilibrium = entries - goal;
979 } 979 }
980 980
981 if (now - last_gc >= ip_rt_gc_min_interval) 981 if (now - last_gc >= ip_rt_gc_min_interval)
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 expire >>= 1; 1032 expire >>= 1;
1033#if RT_CACHE_DEBUG >= 2 1033#if RT_CACHE_DEBUG >= 2
1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1035 atomic_read(&ipv4_dst_ops.entries), goal, i); 1035 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1036#endif 1036#endif
1037 1037
1038 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1038 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1039 goto out; 1039 goto out;
1040 } while (!in_softirq() && time_before_eq(jiffies, now)); 1040 } while (!in_softirq() && time_before_eq(jiffies, now));
1041 1041
1042 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1042 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out;
1044 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out; 1045 goto out;
1044 if (net_ratelimit()) 1046 if (net_ratelimit())
1045 printk(KERN_WARNING "dst cache overflow\n"); 1047 printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
1049work_done: 1051work_done:
1050 expire += ip_rt_gc_min_interval; 1052 expire += ip_rt_gc_min_interval;
1051 if (expire > ip_rt_gc_timeout || 1053 if (expire > ip_rt_gc_timeout ||
1052 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 1054 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1055 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1053 expire = ip_rt_gc_timeout; 1056 expire = ip_rt_gc_timeout;
1054#if RT_CACHE_DEBUG >= 2 1057#if RT_CACHE_DEBUG >= 2
1055 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 1058 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1056 atomic_read(&ipv4_dst_ops.entries), goal, rover); 1059 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1057#endif 1060#endif
1058out: return 0; 1061out: return 0;
1059} 1062}
@@ -2717,7 +2720,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2717 .destroy = ipv4_dst_destroy, 2720 .destroy = ipv4_dst_destroy,
2718 .check = ipv4_blackhole_dst_check, 2721 .check = ipv4_blackhole_dst_check,
2719 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2722 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2720 .entries = ATOMIC_INIT(0),
2721}; 2723};
2722 2724
2723 2725
@@ -3287,6 +3289,12 @@ int __init ip_rt_init(void)
3287 3289
3288 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 3290 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3289 3291
3292 if (dst_entries_init(&ipv4_dst_ops) < 0)
3293 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3294
3295 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3296 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3297
3290 rt_hash_table = (struct rt_hash_bucket *) 3298 rt_hash_table = (struct rt_hash_bucket *)
3291 alloc_large_system_hash("IP route cache", 3299 alloc_large_system_hash("IP route cache",
3292 sizeof(struct rt_hash_bucket), 3300 sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a580349f0b8a..4464f3bff6a7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); 174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
175 175
176 xfrm4_policy_afinfo.garbage_collect(net); 176 xfrm4_policy_afinfo.garbage_collect(net);
177 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 177 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
178} 178}
179 179
180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) 180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = {
232 .ifdown = xfrm4_dst_ifdown, 232 .ifdown = xfrm4_dst_ifdown,
233 .local_out = __ip_local_out, 233 .local_out = __ip_local_out,
234 .gc_thresh = 1024, 234 .gc_thresh = 1024,
235 .entries = ATOMIC_INIT(0),
236}; 235};
237 236
238static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 237static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size)
288 * and start cleaning when were 1/2 full 287 * and start cleaning when were 1/2 full
289 */ 288 */
290 xfrm4_dst_ops.gc_thresh = rt_max_size/2; 289 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
290 dst_entries_init(&xfrm4_dst_ops);
291 291
292 xfrm4_state_init(); 292 xfrm4_state_init();
293 xfrm4_policy_init(); 293 xfrm4_policy_init();
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 17e217933885..25661f968f3f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = {
109 .link_failure = ip6_link_failure, 109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu, 110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out, 111 .local_out = __ip6_local_out,
112 .entries = ATOMIC_INIT(0),
113}; 112};
114 113
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
122 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check, 122 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entries = ATOMIC_INIT(0),
126}; 124};
127 125
128static struct rt6_info ip6_null_entry_template = { 126static struct rt6_info ip6_null_entry_template = {
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
1058 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1059 int entries;
1061 1060
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) && 1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 atomic_read(&ops->entries) <= rt_max_size) 1063 entries <= rt_max_size)
1064 goto out; 1064 goto out;
1065 1065
1066 net->ipv6.ip6_rt_gc_expire++; 1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now; 1068 net->ipv6.ip6_rt_last_gc = now;
1069 if (atomic_read(&ops->entries) < ops->gc_thresh) 1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1072out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return atomic_read(&ops->entries) > rt_max_size; 1074 return entries > rt_max_size;
1074} 1075}
1075 1076
1076/* Clean host part of a prefix. Not necessary in radix tree, 1077/* Clean host part of a prefix. Not necessary in radix tree,
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2524 net->ipv6.rt6_stats->fib_rt_alloc, 2525 net->ipv6.rt6_stats->fib_rt_alloc,
2525 net->ipv6.rt6_stats->fib_rt_entries, 2526 net->ipv6.rt6_stats->fib_rt_entries,
2526 net->ipv6.rt6_stats->fib_rt_cache, 2527 net->ipv6.rt6_stats->fib_rt_cache,
2527 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2528 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2528 net->ipv6.rt6_stats->fib_discarded_routes); 2529 net->ipv6.rt6_stats->fib_discarded_routes);
2529 2530
2530 return 0; 2531 return 0;
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net)
2666 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2667 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2667 sizeof(net->ipv6.ip6_dst_ops)); 2668 sizeof(net->ipv6.ip6_dst_ops));
2668 2669
2670 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2671 goto out_ip6_dst_ops;
2672
2669 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2673 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2670 sizeof(*net->ipv6.ip6_null_entry), 2674 sizeof(*net->ipv6.ip6_null_entry),
2671 GFP_KERNEL); 2675 GFP_KERNEL);
2672 if (!net->ipv6.ip6_null_entry) 2676 if (!net->ipv6.ip6_null_entry)
2673 goto out_ip6_dst_ops; 2677 goto out_ip6_dst_entries;
2674 net->ipv6.ip6_null_entry->dst.path = 2678 net->ipv6.ip6_null_entry->dst.path =
2675 (struct dst_entry *)net->ipv6.ip6_null_entry; 2679 (struct dst_entry *)net->ipv6.ip6_null_entry;
2676 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2680 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry:
2720out_ip6_null_entry: 2724out_ip6_null_entry:
2721 kfree(net->ipv6.ip6_null_entry); 2725 kfree(net->ipv6.ip6_null_entry);
2722#endif 2726#endif
2727out_ip6_dst_entries:
2728 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2723out_ip6_dst_ops: 2729out_ip6_dst_ops:
2724 goto out; 2730 goto out;
2725} 2731}
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void)
2758 if (!ip6_dst_ops_template.kmem_cachep) 2764 if (!ip6_dst_ops_template.kmem_cachep)
2759 goto out; 2765 goto out;
2760 2766
2761 ret = register_pernet_subsys(&ip6_route_net_ops); 2767 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2762 if (ret) 2768 if (ret)
2763 goto out_kmem_cache; 2769 goto out_kmem_cache;
2764 2770
2771 ret = register_pernet_subsys(&ip6_route_net_ops);
2772 if (ret)
2773 goto out_dst_entries;
2774
2765 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2766 2776
2767 /* Registering of the loopback is done before this portion of code, 2777 /* Registering of the loopback is done before this portion of code,
@@ -2808,6 +2818,8 @@ out_fib6_init:
2808 fib6_gc_cleanup(); 2818 fib6_gc_cleanup();
2809out_register_subsys: 2819out_register_subsys:
2810 unregister_pernet_subsys(&ip6_route_net_ops); 2820 unregister_pernet_subsys(&ip6_route_net_ops);
2821out_dst_entries:
2822 dst_entries_destroy(&ip6_dst_blackhole_ops);
2811out_kmem_cache: 2823out_kmem_cache:
2812 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2824 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2813 goto out; 2825 goto out;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 39676eac3a37..7e74023ea6e4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return atomic_read(&ops->entries) > ops->gc_thresh * 2; 202 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = {
255 .ifdown = xfrm6_dst_ifdown, 255 .ifdown = xfrm6_dst_ifdown,
256 .local_out = __ip6_local_out, 256 .local_out = __ip6_local_out,
257 .gc_thresh = 1024, 257 .gc_thresh = 1024,
258 .entries = ATOMIC_INIT(0),
259}; 258};
260 259
261static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 260static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@ int __init xfrm6_init(void)
312 */ 311 */
313 gc_thresh = FIB6_TABLE_HASHSZ * 8; 312 gc_thresh = FIB6_TABLE_HASHSZ * 8;
314 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; 313 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
314 dst_entries_init(&xfrm6_dst_ops);
315 315
316 ret = xfrm6_policy_init(); 316 ret = xfrm6_policy_init();
317 if (ret) 317 if (ret) {
318 dst_entries_destroy(&xfrm6_dst_ops);
318 goto out; 319 goto out;
319 320 }
320 ret = xfrm6_state_init(); 321 ret = xfrm6_state_init();
321 if (ret) 322 if (ret)
322 goto out_policy; 323 goto out_policy;
@@ -341,4 +342,5 @@ void xfrm6_fini(void)
341 //xfrm6_input_fini(); 342 //xfrm6_input_fini();
342 xfrm6_policy_fini(); 343 xfrm6_policy_fini();
343 xfrm6_state_fini(); 344 xfrm6_state_fini();
345 dst_entries_destroy(&xfrm6_dst_ops);
344} 346}