aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2006-01-17 05:54:36 -0500
committerDavid S. Miller <davem@davemloft.net>2006-01-17 05:54:36 -0500
commit2f970d83576cf4938fd75551c465050f6a16c33c (patch)
tree7bb43aabfbd8afdab49549e4d56c0a15015d5995 /net
parent8243126c5e29030bf1a3fb75187a513966dcba62 (diff)
[IPV4]: rt_cache_stat can be statically defined
Using __get_cpu_var(obj) is slightly faster than per_cpu_ptr(obj, raw_smp_processor_id()). 1) Smaller code and memory use For static and small objects, DEFINE_PER_CPU(type, object) is preferred over a alloc_percpu() : Better and smaller code to access them, and no extra memory (storing the pointer, and the percpu array of pointers) x86_64 code before patch mov 1237577(%rip),%rax # ffffffff803e5990 <rt_cache_stat> not %rax # part of per_cpu machinery mov %gs:0x3c,%edx # get cpu number movslq %edx,%rdx # extend 32 bits cpu number to 64 bits mov (%rax,%rdx,8),%rax # get the pointer for this cpu incl 0x38(%rax) x86_64 code after patch mov $per_cpu__rt_cache_stat,%rdx mov %gs:0x48,%rax # get percpu data offset incl 0x38(%rax,%rdx,1) 2) False sharing avoidance for SMP : For a small NR_CPUS, the array of per cpu pointers allocated in alloc_percpu() can be <= 32 bytes. This let slab code gives a part of a cache line. If the other part of this 64 bytes (or 128 bytes) cache line is used by a mostly written object, we can have false sharing and expensive per_cpu_ptr() operations. Size of rt_cache_stat is 64 bytes, so this patch is not a danger of a too big increase of bss (in UP mode) or static per_cpu data for SMP (PERCPU_ENOUGH_ROOM is currently 32768 bytes) Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/route.c14
1 files changed, 4 insertions, 10 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f701a136a6ae..f2e82afc15b3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -240,9 +240,8 @@ static unsigned rt_hash_mask;
240static int rt_hash_log; 240static int rt_hash_log;
241static unsigned int rt_hash_rnd; 241static unsigned int rt_hash_rnd;
242 242
243static struct rt_cache_stat *rt_cache_stat; 243static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
244#define RT_CACHE_STAT_INC(field) \ 244#define RT_CACHE_STAT_INC(field) (__get_cpu_var(rt_cache_stat).field++)
245 (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
246 245
247static int rt_intern_hash(unsigned hash, struct rtable *rth, 246static int rt_intern_hash(unsigned hash, struct rtable *rth,
248 struct rtable **res); 247 struct rtable **res);
@@ -401,7 +400,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
401 if (!cpu_possible(cpu)) 400 if (!cpu_possible(cpu))
402 continue; 401 continue;
403 *pos = cpu+1; 402 *pos = cpu+1;
404 return per_cpu_ptr(rt_cache_stat, cpu); 403 return &per_cpu(rt_cache_stat, cpu);
405 } 404 }
406 return NULL; 405 return NULL;
407} 406}
@@ -414,7 +413,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
414 if (!cpu_possible(cpu)) 413 if (!cpu_possible(cpu))
415 continue; 414 continue;
416 *pos = cpu+1; 415 *pos = cpu+1;
417 return per_cpu_ptr(rt_cache_stat, cpu); 416 return &per_cpu(rt_cache_stat, cpu);
418 } 417 }
419 return NULL; 418 return NULL;
420 419
@@ -3160,10 +3159,6 @@ int __init ip_rt_init(void)
3160 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3159 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3161 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3160 ip_rt_max_size = (rt_hash_mask + 1) * 16;
3162 3161
3163 rt_cache_stat = alloc_percpu(struct rt_cache_stat);
3164 if (!rt_cache_stat)
3165 return -ENOMEM;
3166
3167 devinet_init(); 3162 devinet_init();
3168 ip_fib_init(); 3163 ip_fib_init();
3169 3164
@@ -3191,7 +3186,6 @@ int __init ip_rt_init(void)
3191 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || 3186 if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
3192 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, 3187 !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
3193 proc_net_stat))) { 3188 proc_net_stat))) {
3194 free_percpu(rt_cache_stat);
3195 return -ENOMEM; 3189 return -ENOMEM;
3196 } 3190 }
3197 rtstat_pde->proc_fops = &rt_cpu_seq_fops; 3191 rtstat_pde->proc_fops = &rt_cpu_seq_fops;