diff options
author | Frederik Deweerdt <frederik.deweerdt@xprog.eu> | 2009-01-12 16:35:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-14 06:04:53 -0500 |
commit | 09b3ec7315a18d885127544204f1e389d41058d0 (patch) | |
tree | a87dbf01992ac7bc5c5f832635e5ecbe95e04e39 /arch/x86/kernel/tlb_64.c | |
parent | c59765042f53a79a7a65585042ff463b69cb248c (diff) |
x86, tlb flush_data: replace per_cpu with an array
Impact: micro-optimization, memory reduction
On x86_64 flush tlb data is stored in per_cpu variables. This is
unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries
are accessed.
This patch aims at making the code less confusing (there's nothing
really "per_cpu") by using a plain array. It also would save some memory
on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default).
[ Ravikiran G Thirumalai also pointed out that the correct alignment
is ____cacheline_internodealigned_in_smp, so that there's no
bouncing on vsmp. ]
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
Acked-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/tlb_64.c')
-rw-r--r-- | arch/x86/kernel/tlb_64.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e48..8cfea5d14517 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
@@ -33,7 +33,7 @@ | |||
33 | * To avoid global state use 8 different call vectors. | 33 | * To avoid global state use 8 different call vectors. |
34 | * Each CPU uses a specific vector to trigger flushes on other | 34 | * Each CPU uses a specific vector to trigger flushes on other |
35 | * CPUs. Depending on the received vector the target CPUs look into | 35 | * CPUs. Depending on the received vector the target CPUs look into |
36 | * the right per cpu variable for the flush data. | 36 | * the right array slot for the flush data. |
37 | * | 37 | * |
38 | * With more than 8 CPUs they are hashed to the 8 available | 38 | * With more than 8 CPUs they are hashed to the 8 available |
39 | * vectors. The limited global vector space forces us to this right now. | 39 | * vectors. The limited global vector space forces us to this right now. |
@@ -48,13 +48,13 @@ union smp_flush_state { | |||
48 | unsigned long flush_va; | 48 | unsigned long flush_va; |
49 | spinlock_t tlbstate_lock; | 49 | spinlock_t tlbstate_lock; |
50 | }; | 50 | }; |
51 | char pad[SMP_CACHE_BYTES]; | 51 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; |
52 | } ____cacheline_aligned; | 52 | } ____cacheline_internodealigned_in_smp; |
53 | 53 | ||
54 | /* State is put into the per CPU data section, but padded | 54 | /* State is put into the per CPU data section, but padded |
55 | to a full cache line because other CPUs can access it and we don't | 55 | to a full cache line because other CPUs can access it and we don't |
56 | want false sharing in the per cpu data segment. */ | 56 | want false sharing in the per cpu data segment. */ |
57 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | 57 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * We cannot call mmdrop() because we are in interrupt context, | 60 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
129 | * Use that to determine where the sender put the data. | 129 | * Use that to determine where the sender put the data. |
130 | */ | 130 | */ |
131 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | 131 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; |
132 | f = &per_cpu(flush_state, sender); | 132 | f = &flush_state[sender]; |
133 | 133 | ||
134 | if (!cpu_isset(cpu, f->flush_cpumask)) | 134 | if (!cpu_isset(cpu, f->flush_cpumask)) |
135 | goto out; | 135 | goto out; |
@@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |||
169 | 169 | ||
170 | /* Caller has disabled preemption */ | 170 | /* Caller has disabled preemption */ |
171 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 171 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; |
172 | f = &per_cpu(flush_state, sender); | 172 | f = &flush_state[sender]; |
173 | 173 | ||
174 | /* | 174 | /* |
175 | * Could avoid this lock when | 175 | * Could avoid this lock when |
@@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) | |||
205 | { | 205 | { |
206 | int i; | 206 | int i; |
207 | 207 | ||
208 | for_each_possible_cpu(i) | 208 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
209 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | 209 | spin_lock_init(&flush_state[i].tlbstate_lock); |
210 | 210 | ||
211 | return 0; | 211 | return 0; |
212 | } | 212 | } |