aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-08 02:37:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-11 16:06:53 -0400
commitfc66f95c68b6d4535a0ea2ea15d5cf626e310956 (patch)
treeac3a7f08ad741a67ff683bf93e5669ddcae95ed7 /net/ipv4
parent0ed8ddf4045fcfcac36bad753dc4046118c603ec (diff)
net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/route.c36
-rw-r--r--net/ipv4/xfrm4_policy.c4
2 files changed, 24 insertions, 16 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3888f6ba0a5c..0755aa4af86c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
159 .link_failure = ipv4_link_failure, 159 .link_failure = ipv4_link_failure,
160 .update_pmtu = ip_rt_update_pmtu, 160 .update_pmtu = ip_rt_update_pmtu,
161 .local_out = __ip_local_out, 161 .local_out = __ip_local_out,
162 .entries = ATOMIC_INIT(0),
163}; 162};
164 163
165#define ECN_OR_COST(class) TC_PRIO_##class 164#define ECN_OR_COST(class) TC_PRIO_##class
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
466 465
467 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 466 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
468 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 467 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
469 atomic_read(&ipv4_dst_ops.entries), 468 dst_entries_get_slow(&ipv4_dst_ops),
470 st->in_hit, 469 st->in_hit,
471 st->in_slow_tot, 470 st->in_slow_tot,
472 st->in_slow_mc, 471 st->in_slow_mc,
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
945 struct rtable *rth, **rthp; 944 struct rtable *rth, **rthp;
946 unsigned long now = jiffies; 945 unsigned long now = jiffies;
947 int goal; 946 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops);
948 948
949 /* 949 /*
950 * Garbage collection is pretty expensive, 950 * Garbage collection is pretty expensive,
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
954 RT_CACHE_STAT_INC(gc_total); 954 RT_CACHE_STAT_INC(gc_total);
955 955
956 if (now - last_gc < ip_rt_gc_min_interval && 956 if (now - last_gc < ip_rt_gc_min_interval &&
957 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 957 entries < ip_rt_max_size) {
958 RT_CACHE_STAT_INC(gc_ignored); 958 RT_CACHE_STAT_INC(gc_ignored);
959 goto out; 959 goto out;
960 } 960 }
961 961
962 entries = dst_entries_get_slow(&ipv4_dst_ops);
962 /* Calculate number of entries, which we want to expire now. */ 963 /* Calculate number of entries, which we want to expire now. */
963 goal = atomic_read(&ipv4_dst_ops.entries) - 964 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
964 (ip_rt_gc_elasticity << rt_hash_log);
965 if (goal <= 0) { 965 if (goal <= 0) {
966 if (equilibrium < ipv4_dst_ops.gc_thresh) 966 if (equilibrium < ipv4_dst_ops.gc_thresh)
967 equilibrium = ipv4_dst_ops.gc_thresh; 967 equilibrium = ipv4_dst_ops.gc_thresh;
968 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 968 goal = entries - equilibrium;
969 if (goal > 0) { 969 if (goal > 0) {
970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); 970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
971 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 971 goal = entries - equilibrium;
972 } 972 }
973 } else { 973 } else {
974 /* We are in dangerous area. Try to reduce cache really 974 /* We are in dangerous area. Try to reduce cache really
975 * aggressively. 975 * aggressively.
976 */ 976 */
977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); 977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
978 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 978 equilibrium = entries - goal;
979 } 979 }
980 980
981 if (now - last_gc >= ip_rt_gc_min_interval) 981 if (now - last_gc >= ip_rt_gc_min_interval)
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 expire >>= 1; 1032 expire >>= 1;
1033#if RT_CACHE_DEBUG >= 2 1033#if RT_CACHE_DEBUG >= 2
1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1035 atomic_read(&ipv4_dst_ops.entries), goal, i); 1035 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1036#endif 1036#endif
1037 1037
1038 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1038 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1039 goto out; 1039 goto out;
1040 } while (!in_softirq() && time_before_eq(jiffies, now)); 1040 } while (!in_softirq() && time_before_eq(jiffies, now));
1041 1041
1042 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1042 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out;
1044 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out; 1045 goto out;
1044 if (net_ratelimit()) 1046 if (net_ratelimit())
1045 printk(KERN_WARNING "dst cache overflow\n"); 1047 printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
1049work_done: 1051work_done:
1050 expire += ip_rt_gc_min_interval; 1052 expire += ip_rt_gc_min_interval;
1051 if (expire > ip_rt_gc_timeout || 1053 if (expire > ip_rt_gc_timeout ||
1052 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 1054 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1055 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1053 expire = ip_rt_gc_timeout; 1056 expire = ip_rt_gc_timeout;
1054#if RT_CACHE_DEBUG >= 2 1057#if RT_CACHE_DEBUG >= 2
1055 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 1058 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1056 atomic_read(&ipv4_dst_ops.entries), goal, rover); 1059 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1057#endif 1060#endif
1058out: return 0; 1061out: return 0;
1059} 1062}
@@ -2717,7 +2720,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2717 .destroy = ipv4_dst_destroy, 2720 .destroy = ipv4_dst_destroy,
2718 .check = ipv4_blackhole_dst_check, 2721 .check = ipv4_blackhole_dst_check,
2719 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2722 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2720 .entries = ATOMIC_INIT(0),
2721}; 2723};
2722 2724
2723 2725
@@ -3287,6 +3289,12 @@ int __init ip_rt_init(void)
3287 3289
3288 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 3290 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3289 3291
3292 if (dst_entries_init(&ipv4_dst_ops) < 0)
3293 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3294
3295 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3296 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3297
3290 rt_hash_table = (struct rt_hash_bucket *) 3298 rt_hash_table = (struct rt_hash_bucket *)
3291 alloc_large_system_hash("IP route cache", 3299 alloc_large_system_hash("IP route cache",
3292 sizeof(struct rt_hash_bucket), 3300 sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a580349f0b8a..4464f3bff6a7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); 174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
175 175
176 xfrm4_policy_afinfo.garbage_collect(net); 176 xfrm4_policy_afinfo.garbage_collect(net);
177 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 177 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
178} 178}
179 179
180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) 180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = {
232 .ifdown = xfrm4_dst_ifdown, 232 .ifdown = xfrm4_dst_ifdown,
233 .local_out = __ip_local_out, 233 .local_out = __ip_local_out,
234 .gc_thresh = 1024, 234 .gc_thresh = 1024,
235 .entries = ATOMIC_INIT(0),
236}; 235};
237 236
238static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 237static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size)
288 * and start cleaning when were 1/2 full 287 * and start cleaning when were 1/2 full
289 */ 288 */
290 xfrm4_dst_ops.gc_thresh = rt_max_size/2; 289 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
290 dst_entries_init(&xfrm4_dst_ops);
291 291
292 xfrm4_state_init(); 292 xfrm4_state_init();
293 xfrm4_policy_init(); 293 xfrm4_policy_init();