diff options
| author | Eric Dumazet <dada1@cosmosbay.com> | 2006-01-17 05:54:36 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2006-01-17 05:54:36 -0500 |
| commit | 2f970d83576cf4938fd75551c465050f6a16c33c (patch) | |
| tree | 7bb43aabfbd8afdab49549e4d56c0a15015d5995 /net | |
| parent | 8243126c5e29030bf1a3fb75187a513966dcba62 (diff) | |
[IPV4]: rt_cache_stat can be statically defined
Using __get_cpu_var(obj) is slightly faster than per_cpu_ptr(obj,
raw_smp_processor_id()).
1) Smaller code and memory use
For static and small objects, DEFINE_PER_CPU(type, object) is preferred over a
alloc_percpu() : Better and smaller code to access them, and no extra memory
(storing the pointer, and the percpu array of pointers)
x86_64 code before patch
mov 1237577(%rip),%rax # ffffffff803e5990 <rt_cache_stat>
not %rax # part of per_cpu machinery
mov %gs:0x3c,%edx # get cpu number
movslq %edx,%rdx # extend 32 bits cpu number to 64 bits
mov (%rax,%rdx,8),%rax # get the pointer for this cpu
incl 0x38(%rax)
x86_64 code after patch
mov $per_cpu__rt_cache_stat,%rdx
mov %gs:0x48,%rax # get percpu data offset
incl 0x38(%rax,%rdx,1)
2) False sharing avoidance for SMP :
For a small NR_CPUS, the array of per cpu pointers allocated in alloc_percpu()
can be <= 32 bytes. This let slab code gives a part of a cache line. If the
other part of this 64 bytes (or 128 bytes) cache line is used by a mostly
written object, we can have false sharing and expensive per_cpu_ptr() operations.
Size of rt_cache_stat is 64 bytes, so this patch is not a danger of a too big
increase of bss (in UP mode) or static per_cpu data for SMP
(PERCPU_ENOUGH_ROOM is currently 32768 bytes)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/ipv4/route.c | 14 |
1 files changed, 4 insertions, 10 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f701a136a6ae..f2e82afc15b3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -240,9 +240,8 @@ static unsigned rt_hash_mask; | |||
| 240 | static int rt_hash_log; | 240 | static int rt_hash_log; |
| 241 | static unsigned int rt_hash_rnd; | 241 | static unsigned int rt_hash_rnd; |
| 242 | 242 | ||
| 243 | static struct rt_cache_stat *rt_cache_stat; | 243 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
| 244 | #define RT_CACHE_STAT_INC(field) \ | 244 | #define RT_CACHE_STAT_INC(field) (__get_cpu_var(rt_cache_stat).field++) |
| 245 | (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) | ||
| 246 | 245 | ||
| 247 | static int rt_intern_hash(unsigned hash, struct rtable *rth, | 246 | static int rt_intern_hash(unsigned hash, struct rtable *rth, |
| 248 | struct rtable **res); | 247 | struct rtable **res); |
| @@ -401,7 +400,7 @@ static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) | |||
| 401 | if (!cpu_possible(cpu)) | 400 | if (!cpu_possible(cpu)) |
| 402 | continue; | 401 | continue; |
| 403 | *pos = cpu+1; | 402 | *pos = cpu+1; |
| 404 | return per_cpu_ptr(rt_cache_stat, cpu); | 403 | return &per_cpu(rt_cache_stat, cpu); |
| 405 | } | 404 | } |
| 406 | return NULL; | 405 | return NULL; |
| 407 | } | 406 | } |
| @@ -414,7 +413,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 414 | if (!cpu_possible(cpu)) | 413 | if (!cpu_possible(cpu)) |
| 415 | continue; | 414 | continue; |
| 416 | *pos = cpu+1; | 415 | *pos = cpu+1; |
| 417 | return per_cpu_ptr(rt_cache_stat, cpu); | 416 | return &per_cpu(rt_cache_stat, cpu); |
| 418 | } | 417 | } |
| 419 | return NULL; | 418 | return NULL; |
| 420 | 419 | ||
| @@ -3160,10 +3159,6 @@ int __init ip_rt_init(void) | |||
| 3160 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); | 3159 | ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); |
| 3161 | ip_rt_max_size = (rt_hash_mask + 1) * 16; | 3160 | ip_rt_max_size = (rt_hash_mask + 1) * 16; |
| 3162 | 3161 | ||
| 3163 | rt_cache_stat = alloc_percpu(struct rt_cache_stat); | ||
| 3164 | if (!rt_cache_stat) | ||
| 3165 | return -ENOMEM; | ||
| 3166 | |||
| 3167 | devinet_init(); | 3162 | devinet_init(); |
| 3168 | ip_fib_init(); | 3163 | ip_fib_init(); |
| 3169 | 3164 | ||
| @@ -3191,7 +3186,6 @@ int __init ip_rt_init(void) | |||
| 3191 | if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || | 3186 | if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || |
| 3192 | !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, | 3187 | !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, |
| 3193 | proc_net_stat))) { | 3188 | proc_net_stat))) { |
| 3194 | free_percpu(rt_cache_stat); | ||
| 3195 | return -ENOMEM; | 3189 | return -ENOMEM; |
| 3196 | } | 3190 | } |
| 3197 | rtstat_pde->proc_fops = &rt_cpu_seq_fops; | 3191 | rtstat_pde->proc_fops = &rt_cpu_seq_fops; |
