diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-08 02:37:34 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-11 16:06:53 -0400 |
commit | fc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch) | |
tree | ac3a7f08ad741a67ff683bf93e5669ddcae95ed7 | |
parent | 0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff) |
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field,
subject to high cache line contention in stress workload.
Switch to a percpu_counter, to reduce number of time we need to dirty a
central location. Place it on a separate cache line to avoid dirtying
read only fields.
Stress test :
(Sending 160.000.000 UDP frames,
IP route cache disabled, dual E5540 @2.53GHz,
32bit kernel, FIB_TRIE, SLUB/NUMA)
Before:
real 0m51.179s
user 0m15.329s
sys 10m15.942s
After:
real 0m45.570s
user 0m15.525s
sys 9m56.669s
With a small reordering of struct neighbour fields, subject of a
following patch, (to separate refcnt from other read mostly fields)
real 0m41.841s
user 0m15.261s
sys 8m45.949s
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/dst_ops.h | 37 | ||||
-rw-r--r-- | net/bridge/br_netfilter.c | 11 | ||||
-rw-r--r-- | net/core/dst.c | 6 | ||||
-rw-r--r-- | net/decnet/dn_route.c | 3 | ||||
-rw-r--r-- | net/ipv4/route.c | 36 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 4 | ||||
-rw-r--r-- | net/ipv6/route.c | 28 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 10 |
8 files changed, 100 insertions, 35 deletions
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d1ff9b7e99b8..1fa5306e3e23 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _NET_DST_OPS_H | 1 | #ifndef _NET_DST_OPS_H |
2 | #define _NET_DST_OPS_H | 2 | #define _NET_DST_OPS_H |
3 | #include <linux/types.h> | 3 | #include <linux/types.h> |
4 | #include <linux/percpu_counter.h> | ||
4 | 5 | ||
5 | struct dst_entry; | 6 | struct dst_entry; |
6 | struct kmem_cachep; | 7 | struct kmem_cachep; |
@@ -22,7 +23,41 @@ struct dst_ops { | |||
22 | void (*update_pmtu)(struct dst_entry *dst, u32 mtu); | 23 | void (*update_pmtu)(struct dst_entry *dst, u32 mtu); |
23 | int (*local_out)(struct sk_buff *skb); | 24 | int (*local_out)(struct sk_buff *skb); |
24 | 25 | ||
25 | atomic_t entries; | ||
26 | struct kmem_cache *kmem_cachep; | 26 | struct kmem_cache *kmem_cachep; |
27 | |||
28 | struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; | ||
27 | }; | 29 | }; |
30 | |||
31 | static inline int dst_entries_get_fast(struct dst_ops *dst) | ||
32 | { | ||
33 | return percpu_counter_read_positive(&dst->pcpuc_entries); | ||
34 | } | ||
35 | |||
36 | static inline int dst_entries_get_slow(struct dst_ops *dst) | ||
37 | { | ||
38 | int res; | ||
39 | |||
40 | local_bh_disable(); | ||
41 | res = percpu_counter_sum_positive(&dst->pcpuc_entries); | ||
42 | local_bh_enable(); | ||
43 | return res; | ||
44 | } | ||
45 | |||
46 | static inline void dst_entries_add(struct dst_ops *dst, int val) | ||
47 | { | ||
48 | local_bh_disable(); | ||
49 | percpu_counter_add(&dst->pcpuc_entries, val); | ||
50 | local_bh_enable(); | ||
51 | } | ||
52 | |||
53 | static inline int dst_entries_init(struct dst_ops *dst) | ||
54 | { | ||
55 | return percpu_counter_init(&dst->pcpuc_entries, 0); | ||
56 | } | ||
57 | |||
58 | static inline void dst_entries_destroy(struct dst_ops *dst) | ||
59 | { | ||
60 | percpu_counter_destroy(&dst->pcpuc_entries); | ||
61 | } | ||
62 | |||
28 | #endif | 63 | #endif |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 77f7b5fda45a..7f9ce9600ef3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -106,7 +106,6 @@ static struct dst_ops fake_dst_ops = { | |||
106 | .family = AF_INET, | 106 | .family = AF_INET, |
107 | .protocol = cpu_to_be16(ETH_P_IP), | 107 | .protocol = cpu_to_be16(ETH_P_IP), |
108 | .update_pmtu = fake_update_pmtu, | 108 | .update_pmtu = fake_update_pmtu, |
109 | .entries = ATOMIC_INIT(0), | ||
110 | }; | 109 | }; |
111 | 110 | ||
112 | /* | 111 | /* |
@@ -1003,15 +1002,22 @@ int __init br_netfilter_init(void) | |||
1003 | { | 1002 | { |
1004 | int ret; | 1003 | int ret; |
1005 | 1004 | ||
1006 | ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); | 1005 | ret = dst_entries_init(&fake_dst_ops); |
1007 | if (ret < 0) | 1006 | if (ret < 0) |
1008 | return ret; | 1007 | return ret; |
1008 | |||
1009 | ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); | ||
1010 | if (ret < 0) { | ||
1011 | dst_entries_destroy(&fake_dst_ops); | ||
1012 | return ret; | ||
1013 | } | ||
1009 | #ifdef CONFIG_SYSCTL | 1014 | #ifdef CONFIG_SYSCTL |
1010 | brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); | 1015 | brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); |
1011 | if (brnf_sysctl_header == NULL) { | 1016 | if (brnf_sysctl_header == NULL) { |
1012 | printk(KERN_WARNING | 1017 | printk(KERN_WARNING |
1013 | "br_netfilter: can't register to sysctl.\n"); | 1018 | "br_netfilter: can't register to sysctl.\n"); |
1014 | nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); | 1019 | nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); |
1020 | dst_entries_destroy(&fake_dst_ops); | ||
1015 | return -ENOMEM; | 1021 | return -ENOMEM; |
1016 | } | 1022 | } |
1017 | #endif | 1023 | #endif |
@@ -1025,4 +1031,5 @@ void br_netfilter_fini(void) | |||
1025 | #ifdef CONFIG_SYSCTL | 1031 | #ifdef CONFIG_SYSCTL |
1026 | unregister_sysctl_table(brnf_sysctl_header); | 1032 | unregister_sysctl_table(brnf_sysctl_header); |
1027 | #endif | 1033 | #endif |
1034 | dst_entries_destroy(&fake_dst_ops); | ||
1028 | } | 1035 | } |
diff --git a/net/core/dst.c b/net/core/dst.c index 978a1ee1f7d0..32e542d7f472 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops) | |||
168 | { | 168 | { |
169 | struct dst_entry *dst; | 169 | struct dst_entry *dst; |
170 | 170 | ||
171 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { | 171 | if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { |
172 | if (ops->gc(ops)) | 172 | if (ops->gc(ops)) |
173 | return NULL; | 173 | return NULL; |
174 | } | 174 | } |
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops) | |||
183 | #if RT_CACHE_DEBUG >= 2 | 183 | #if RT_CACHE_DEBUG >= 2 |
184 | atomic_inc(&dst_total); | 184 | atomic_inc(&dst_total); |
185 | #endif | 185 | #endif |
186 | atomic_inc(&ops->entries); | 186 | dst_entries_add(ops, 1); |
187 | return dst; | 187 | return dst; |
188 | } | 188 | } |
189 | EXPORT_SYMBOL(dst_alloc); | 189 | EXPORT_SYMBOL(dst_alloc); |
@@ -236,7 +236,7 @@ again: | |||
236 | neigh_release(neigh); | 236 | neigh_release(neigh); |
237 | } | 237 | } |
238 | 238 | ||
239 | atomic_dec(&dst->ops->entries); | 239 | dst_entries_add(dst->ops, -1); |
240 | 240 | ||
241 | if (dst->ops->destroy) | 241 | if (dst->ops->destroy) |
242 | dst->ops->destroy(dst); | 242 | dst->ops->destroy(dst); |
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6585ea6d1182..df0f3e54ff8a 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = { | |||
132 | .negative_advice = dn_dst_negative_advice, | 132 | .negative_advice = dn_dst_negative_advice, |
133 | .link_failure = dn_dst_link_failure, | 133 | .link_failure = dn_dst_link_failure, |
134 | .update_pmtu = dn_dst_update_pmtu, | 134 | .update_pmtu = dn_dst_update_pmtu, |
135 | .entries = ATOMIC_INIT(0), | ||
136 | }; | 135 | }; |
137 | 136 | ||
138 | static __inline__ unsigned dn_hash(__le16 src, __le16 dst) | 137 | static __inline__ unsigned dn_hash(__le16 src, __le16 dst) |
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void) | |||
1758 | dn_dst_ops.kmem_cachep = | 1757 | dn_dst_ops.kmem_cachep = |
1759 | kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, | 1758 | kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, |
1760 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1759 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1760 | dst_entries_init(&dn_dst_ops); | ||
1761 | setup_timer(&dn_route_timer, dn_dst_check_expire, 0); | 1761 | setup_timer(&dn_route_timer, dn_dst_check_expire, 0); |
1762 | dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; | 1762 | dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; |
1763 | add_timer(&dn_route_timer); | 1763 | add_timer(&dn_route_timer); |
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void) | |||
1816 | dn_run_flush(0); | 1816 | dn_run_flush(0); |
1817 | 1817 | ||
1818 | proc_net_remove(&init_net, "decnet_cache"); | 1818 | proc_net_remove(&init_net, "decnet_cache"); |
1819 | dst_entries_destroy(&dn_dst_ops); | ||
1819 | } | 1820 | } |
1820 | 1821 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3888f6ba0a5c..0755aa4af86c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = { | |||
159 | .link_failure = ipv4_link_failure, | 159 | .link_failure = ipv4_link_failure, |
160 | .update_pmtu = ip_rt_update_pmtu, | 160 | .update_pmtu = ip_rt_update_pmtu, |
161 | .local_out = __ip_local_out, | 161 | .local_out = __ip_local_out, |
162 | .entries = ATOMIC_INIT(0), | ||
163 | }; | 162 | }; |
164 | 163 | ||
165 | #define ECN_OR_COST(class) TC_PRIO_##class | 164 | #define ECN_OR_COST(class) TC_PRIO_##class |
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) | |||
466 | 465 | ||
467 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " | 466 | seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " |
468 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", | 467 | " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", |
469 | atomic_read(&ipv4_dst_ops.entries), | 468 | dst_entries_get_slow(&ipv4_dst_ops), |
470 | st->in_hit, | 469 | st->in_hit, |
471 | st->in_slow_tot, | 470 | st->in_slow_tot, |
472 | st->in_slow_mc, | 471 | st->in_slow_mc, |
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
945 | struct rtable *rth, **rthp; | 944 | struct rtable *rth, **rthp; |
946 | unsigned long now = jiffies; | 945 | unsigned long now = jiffies; |
947 | int goal; | 946 | int goal; |
947 | int entries = dst_entries_get_fast(&ipv4_dst_ops); | ||
948 | 948 | ||
949 | /* | 949 | /* |
950 | * Garbage collection is pretty expensive, | 950 | * Garbage collection is pretty expensive, |
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
954 | RT_CACHE_STAT_INC(gc_total); | 954 | RT_CACHE_STAT_INC(gc_total); |
955 | 955 | ||
956 | if (now - last_gc < ip_rt_gc_min_interval && | 956 | if (now - last_gc < ip_rt_gc_min_interval && |
957 | atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { | 957 | entries < ip_rt_max_size) { |
958 | RT_CACHE_STAT_INC(gc_ignored); | 958 | RT_CACHE_STAT_INC(gc_ignored); |
959 | goto out; | 959 | goto out; |
960 | } | 960 | } |
961 | 961 | ||
962 | entries = dst_entries_get_slow(&ipv4_dst_ops); | ||
962 | /* Calculate number of entries, which we want to expire now. */ | 963 | /* Calculate number of entries, which we want to expire now. */ |
963 | goal = atomic_read(&ipv4_dst_ops.entries) - | 964 | goal = entries - (ip_rt_gc_elasticity << rt_hash_log); |
964 | (ip_rt_gc_elasticity << rt_hash_log); | ||
965 | if (goal <= 0) { | 965 | if (goal <= 0) { |
966 | if (equilibrium < ipv4_dst_ops.gc_thresh) | 966 | if (equilibrium < ipv4_dst_ops.gc_thresh) |
967 | equilibrium = ipv4_dst_ops.gc_thresh; | 967 | equilibrium = ipv4_dst_ops.gc_thresh; |
968 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 968 | goal = entries - equilibrium; |
969 | if (goal > 0) { | 969 | if (goal > 0) { |
970 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 970 | equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
971 | goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; | 971 | goal = entries - equilibrium; |
972 | } | 972 | } |
973 | } else { | 973 | } else { |
974 | /* We are in dangerous area. Try to reduce cache really | 974 | /* We are in dangerous area. Try to reduce cache really |
975 | * aggressively. | 975 | * aggressively. |
976 | */ | 976 | */ |
977 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); | 977 | goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); |
978 | equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; | 978 | equilibrium = entries - goal; |
979 | } | 979 | } |
980 | 980 | ||
981 | if (now - last_gc >= ip_rt_gc_min_interval) | 981 | if (now - last_gc >= ip_rt_gc_min_interval) |
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1032 | expire >>= 1; | 1032 | expire >>= 1; |
1033 | #if RT_CACHE_DEBUG >= 2 | 1033 | #if RT_CACHE_DEBUG >= 2 |
1034 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, | 1034 | printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, |
1035 | atomic_read(&ipv4_dst_ops.entries), goal, i); | 1035 | dst_entries_get_fast(&ipv4_dst_ops), goal, i); |
1036 | #endif | 1036 | #endif |
1037 | 1037 | ||
1038 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 1038 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
1039 | goto out; | 1039 | goto out; |
1040 | } while (!in_softirq() && time_before_eq(jiffies, now)); | 1040 | } while (!in_softirq() && time_before_eq(jiffies, now)); |
1041 | 1041 | ||
1042 | if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) | 1042 | if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) |
1043 | goto out; | ||
1044 | if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) | ||
1043 | goto out; | 1045 | goto out; |
1044 | if (net_ratelimit()) | 1046 | if (net_ratelimit()) |
1045 | printk(KERN_WARNING "dst cache overflow\n"); | 1047 | printk(KERN_WARNING "dst cache overflow\n"); |
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1049 | work_done: | 1051 | work_done: |
1050 | expire += ip_rt_gc_min_interval; | 1052 | expire += ip_rt_gc_min_interval; |
1051 | if (expire > ip_rt_gc_timeout || | 1053 | if (expire > ip_rt_gc_timeout || |
1052 | atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) | 1054 | dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || |
1055 | dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) | ||
1053 | expire = ip_rt_gc_timeout; | 1056 | expire = ip_rt_gc_timeout; |
1054 | #if RT_CACHE_DEBUG >= 2 | 1057 | #if RT_CACHE_DEBUG >= 2 |
1055 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, | 1058 | printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, |
1056 | atomic_read(&ipv4_dst_ops.entries), goal, rover); | 1059 | dst_entries_get_fast(&ipv4_dst_ops), goal, rover); |
1057 | #endif | 1060 | #endif |
1058 | out: return 0; | 1061 | out: return 0; |
1059 | } | 1062 | } |
@@ -2717,7 +2720,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { | |||
2717 | .destroy = ipv4_dst_destroy, | 2720 | .destroy = ipv4_dst_destroy, |
2718 | .check = ipv4_blackhole_dst_check, | 2721 | .check = ipv4_blackhole_dst_check, |
2719 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2722 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
2720 | .entries = ATOMIC_INIT(0), | ||
2721 | }; | 2723 | }; |
2722 | 2724 | ||
2723 | 2725 | ||
@@ -3287,6 +3289,12 @@ int __init ip_rt_init(void) | |||
3287 | 3289 | ||
3288 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; | 3290 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; |
3289 | 3291 | ||
3292 | if (dst_entries_init(&ipv4_dst_ops) < 0) | ||
3293 | panic("IP: failed to allocate ipv4_dst_ops counter\n"); | ||
3294 | |||
3295 | if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) | ||
3296 | panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | ||
3297 | |||
3290 | rt_hash_table = (struct rt_hash_bucket *) | 3298 | rt_hash_table = (struct rt_hash_bucket *) |
3291 | alloc_large_system_hash("IP route cache", | 3299 | alloc_large_system_hash("IP route cache", |
3292 | sizeof(struct rt_hash_bucket), | 3300 | sizeof(struct rt_hash_bucket), |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a580349f0b8a..4464f3bff6a7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) | |||
174 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); | 174 | struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); |
175 | 175 | ||
176 | xfrm4_policy_afinfo.garbage_collect(net); | 176 | xfrm4_policy_afinfo.garbage_collect(net); |
177 | return (atomic_read(&ops->entries) > ops->gc_thresh * 2); | 177 | return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); |
178 | } | 178 | } |
179 | 179 | ||
180 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) | 180 | static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = { | |||
232 | .ifdown = xfrm4_dst_ifdown, | 232 | .ifdown = xfrm4_dst_ifdown, |
233 | .local_out = __ip_local_out, | 233 | .local_out = __ip_local_out, |
234 | .gc_thresh = 1024, | 234 | .gc_thresh = 1024, |
235 | .entries = ATOMIC_INIT(0), | ||
236 | }; | 235 | }; |
237 | 236 | ||
238 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | 237 | static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { |
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size) | |||
288 | * and start cleaning when were 1/2 full | 287 | * and start cleaning when were 1/2 full |
289 | */ | 288 | */ |
290 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | 289 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; |
290 | dst_entries_init(&xfrm4_dst_ops); | ||
291 | 291 | ||
292 | xfrm4_state_init(); | 292 | xfrm4_state_init(); |
293 | xfrm4_policy_init(); | 293 | xfrm4_policy_init(); |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 17e217933885..25661f968f3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { | |||
109 | .link_failure = ip6_link_failure, | 109 | .link_failure = ip6_link_failure, |
110 | .update_pmtu = ip6_rt_update_pmtu, | 110 | .update_pmtu = ip6_rt_update_pmtu, |
111 | .local_out = __ip6_local_out, | 111 | .local_out = __ip6_local_out, |
112 | .entries = ATOMIC_INIT(0), | ||
113 | }; | 112 | }; |
114 | 113 | ||
115 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 114 | static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { | |||
122 | .destroy = ip6_dst_destroy, | 121 | .destroy = ip6_dst_destroy, |
123 | .check = ip6_dst_check, | 122 | .check = ip6_dst_check, |
124 | .update_pmtu = ip6_rt_blackhole_update_pmtu, | 123 | .update_pmtu = ip6_rt_blackhole_update_pmtu, |
125 | .entries = ATOMIC_INIT(0), | ||
126 | }; | 124 | }; |
127 | 125 | ||
128 | static struct rt6_info ip6_null_entry_template = { | 126 | static struct rt6_info ip6_null_entry_template = { |
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) | |||
1058 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; | 1056 | int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; |
1059 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; | 1057 | int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; |
1060 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; | 1058 | unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; |
1059 | int entries; | ||
1061 | 1060 | ||
1061 | entries = dst_entries_get_fast(ops); | ||
1062 | if (time_after(rt_last_gc + rt_min_interval, now) && | 1062 | if (time_after(rt_last_gc + rt_min_interval, now) && |
1063 | atomic_read(&ops->entries) <= rt_max_size) | 1063 | entries <= rt_max_size) |
1064 | goto out; | 1064 | goto out; |
1065 | 1065 | ||
1066 | net->ipv6.ip6_rt_gc_expire++; | 1066 | net->ipv6.ip6_rt_gc_expire++; |
1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | 1067 | fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); |
1068 | net->ipv6.ip6_rt_last_gc = now; | 1068 | net->ipv6.ip6_rt_last_gc = now; |
1069 | if (atomic_read(&ops->entries) < ops->gc_thresh) | 1069 | entries = dst_entries_get_slow(ops); |
1070 | if (entries < ops->gc_thresh) | ||
1070 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; | 1071 | net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; |
1071 | out: | 1072 | out: |
1072 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; | 1073 | net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; |
1073 | return atomic_read(&ops->entries) > rt_max_size; | 1074 | return entries > rt_max_size; |
1074 | } | 1075 | } |
1075 | 1076 | ||
1076 | /* Clean host part of a prefix. Not necessary in radix tree, | 1077 | /* Clean host part of a prefix. Not necessary in radix tree, |
@@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) | |||
2524 | net->ipv6.rt6_stats->fib_rt_alloc, | 2525 | net->ipv6.rt6_stats->fib_rt_alloc, |
2525 | net->ipv6.rt6_stats->fib_rt_entries, | 2526 | net->ipv6.rt6_stats->fib_rt_entries, |
2526 | net->ipv6.rt6_stats->fib_rt_cache, | 2527 | net->ipv6.rt6_stats->fib_rt_cache, |
2527 | atomic_read(&net->ipv6.ip6_dst_ops.entries), | 2528 | dst_entries_get_slow(&net->ipv6.ip6_dst_ops), |
2528 | net->ipv6.rt6_stats->fib_discarded_routes); | 2529 | net->ipv6.rt6_stats->fib_discarded_routes); |
2529 | 2530 | ||
2530 | return 0; | 2531 | return 0; |
@@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) | |||
2666 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, | 2667 | memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, |
2667 | sizeof(net->ipv6.ip6_dst_ops)); | 2668 | sizeof(net->ipv6.ip6_dst_ops)); |
2668 | 2669 | ||
2670 | if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) | ||
2671 | goto out_ip6_dst_ops; | ||
2672 | |||
2669 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, | 2673 | net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, |
2670 | sizeof(*net->ipv6.ip6_null_entry), | 2674 | sizeof(*net->ipv6.ip6_null_entry), |
2671 | GFP_KERNEL); | 2675 | GFP_KERNEL); |
2672 | if (!net->ipv6.ip6_null_entry) | 2676 | if (!net->ipv6.ip6_null_entry) |
2673 | goto out_ip6_dst_ops; | 2677 | goto out_ip6_dst_entries; |
2674 | net->ipv6.ip6_null_entry->dst.path = | 2678 | net->ipv6.ip6_null_entry->dst.path = |
2675 | (struct dst_entry *)net->ipv6.ip6_null_entry; | 2679 | (struct dst_entry *)net->ipv6.ip6_null_entry; |
2676 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; | 2680 | net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; |
@@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry: | |||
2720 | out_ip6_null_entry: | 2724 | out_ip6_null_entry: |
2721 | kfree(net->ipv6.ip6_null_entry); | 2725 | kfree(net->ipv6.ip6_null_entry); |
2722 | #endif | 2726 | #endif |
2727 | out_ip6_dst_entries: | ||
2728 | dst_entries_destroy(&net->ipv6.ip6_dst_ops); | ||
2723 | out_ip6_dst_ops: | 2729 | out_ip6_dst_ops: |
2724 | goto out; | 2730 | goto out; |
2725 | } | 2731 | } |
@@ -2758,10 +2764,14 @@ int __init ip6_route_init(void) | |||
2758 | if (!ip6_dst_ops_template.kmem_cachep) | 2764 | if (!ip6_dst_ops_template.kmem_cachep) |
2759 | goto out; | 2765 | goto out; |
2760 | 2766 | ||
2761 | ret = register_pernet_subsys(&ip6_route_net_ops); | 2767 | ret = dst_entries_init(&ip6_dst_blackhole_ops); |
2762 | if (ret) | 2768 | if (ret) |
2763 | goto out_kmem_cache; | 2769 | goto out_kmem_cache; |
2764 | 2770 | ||
2771 | ret = register_pernet_subsys(&ip6_route_net_ops); | ||
2772 | if (ret) | ||
2773 | goto out_dst_entries; | ||
2774 | |||
2765 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; | 2775 | ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; |
2766 | 2776 | ||
2767 | /* Registering of the loopback is done before this portion of code, | 2777 | /* Registering of the loopback is done before this portion of code, |
@@ -2808,6 +2818,8 @@ out_fib6_init: | |||
2808 | fib6_gc_cleanup(); | 2818 | fib6_gc_cleanup(); |
2809 | out_register_subsys: | 2819 | out_register_subsys: |
2810 | unregister_pernet_subsys(&ip6_route_net_ops); | 2820 | unregister_pernet_subsys(&ip6_route_net_ops); |
2821 | out_dst_entries: | ||
2822 | dst_entries_destroy(&ip6_dst_blackhole_ops); | ||
2811 | out_kmem_cache: | 2823 | out_kmem_cache: |
2812 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); | 2824 | kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); |
2813 | goto out; | 2825 | goto out; |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 39676eac3a37..7e74023ea6e4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) | |||
199 | struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); | 199 | struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); |
200 | 200 | ||
201 | xfrm6_policy_afinfo.garbage_collect(net); | 201 | xfrm6_policy_afinfo.garbage_collect(net); |
202 | return atomic_read(&ops->entries) > ops->gc_thresh * 2; | 202 | return dst_entries_get_fast(ops) > ops->gc_thresh * 2; |
203 | } | 203 | } |
204 | 204 | ||
205 | static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) | 205 | static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = { | |||
255 | .ifdown = xfrm6_dst_ifdown, | 255 | .ifdown = xfrm6_dst_ifdown, |
256 | .local_out = __ip6_local_out, | 256 | .local_out = __ip6_local_out, |
257 | .gc_thresh = 1024, | 257 | .gc_thresh = 1024, |
258 | .entries = ATOMIC_INIT(0), | ||
259 | }; | 258 | }; |
260 | 259 | ||
261 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { | 260 | static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { |
@@ -312,11 +311,13 @@ int __init xfrm6_init(void) | |||
312 | */ | 311 | */ |
313 | gc_thresh = FIB6_TABLE_HASHSZ * 8; | 312 | gc_thresh = FIB6_TABLE_HASHSZ * 8; |
314 | xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; | 313 | xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; |
314 | dst_entries_init(&xfrm6_dst_ops); | ||
315 | 315 | ||
316 | ret = xfrm6_policy_init(); | 316 | ret = xfrm6_policy_init(); |
317 | if (ret) | 317 | if (ret) { |
318 | dst_entries_destroy(&xfrm6_dst_ops); | ||
318 | goto out; | 319 | goto out; |
319 | 320 | } | |
320 | ret = xfrm6_state_init(); | 321 | ret = xfrm6_state_init(); |
321 | if (ret) | 322 | if (ret) |
322 | goto out_policy; | 323 | goto out_policy; |
@@ -341,4 +342,5 @@ void xfrm6_fini(void) | |||
341 | //xfrm6_input_fini(); | 342 | //xfrm6_input_fini(); |
342 | xfrm6_policy_fini(); | 343 | xfrm6_policy_fini(); |
343 | xfrm6_state_fini(); | 344 | xfrm6_state_fini(); |
345 | dst_entries_destroy(&xfrm6_dst_ops); | ||
344 | } | 346 | } |