diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-21 16:47:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-21 16:47:29 -0400 |
commit | c3b86a29429dac1033e3f602f51fa8d00006a8eb (patch) | |
tree | bcedd0a553ca2396eeb58318ef6ee6b426e83652 /arch/x86/mm/tlb.c | |
parent | 8d8d2e9ccd331a1345c88b292ebee9d256fd8749 (diff) | |
parent | 2aeb66d3036dbafc297ac553a257a40283dadb3e (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86-32, percpu: Correct the ordering of the percpu readmostly section
x86, mm: Enable ARCH_DMA_ADDR_T_64BIT with X86_64 || HIGHMEM64G
x86: Spread tlb flush vector between nodes
percpu: Introduce a read-mostly percpu API
x86, mm: Fix incorrect data type in vmalloc_sync_all()
x86, mm: Hold mm->page_table_lock while doing vmalloc_sync
x86, mm: Fix bogus whitespace in sync_global_pgds()
x86-32: Fix sparse warning for the __PHYSICAL_MASK calculation
x86, mm: Add RESERVE_BRK_ARRAY() helper
mm, x86: Saving vmcore with non-lazy freeing of vmas
x86, kdump: Change copy_oldmem_page() to use cached addressing
x86, mm: fix uninitialized addr in kernel_physical_mapping_init()
x86, kmemcheck: Remove double test
x86, mm: Make spurious_fault check explicitly check the PRESENT bit
x86-64, mem: Update all PGDs for direct mapping and vmemmap mapping changes
x86, mm: Separate x86_64 vmalloc_sync_all() into separate functions
x86, mm: Avoid unnecessary TLB flush
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r-- | arch/x86/mm/tlb.c | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c03f14ab6667..49358481c733 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/cpu.h> | ||
8 | 9 | ||
9 | #include <asm/tlbflush.h> | 10 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 11 | #include <asm/mmu_context.h> |
@@ -52,6 +53,8 @@ union smp_flush_state { | |||
52 | want false sharing in the per cpu data segment. */ | 53 | want false sharing in the per cpu data segment. */ |
53 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | 54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; |
54 | 55 | ||
56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
57 | |||
55 | /* | 58 | /* |
56 | * We cannot call mmdrop() because we are in interrupt context, | 59 | * We cannot call mmdrop() because we are in interrupt context, |
57 | * instead update mm->cpu_vm_mask. | 60 | * instead update mm->cpu_vm_mask. |
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
173 | union smp_flush_state *f; | 176 | union smp_flush_state *f; |
174 | 177 | ||
175 | /* Caller has disabled preemption */ | 178 | /* Caller has disabled preemption */ |
176 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | 179 | sender = this_cpu_read(tlb_vector_offset); |
177 | f = &flush_state[sender]; | 180 | f = &flush_state[sender]; |
178 | 181 | ||
179 | /* | 182 | /* |
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
218 | flush_tlb_others_ipi(cpumask, mm, va); | 221 | flush_tlb_others_ipi(cpumask, mm, va); |
219 | } | 222 | } |
220 | 223 | ||
224 | static void __cpuinit calculate_tlb_offset(void) | ||
225 | { | ||
226 | int cpu, node, nr_node_vecs; | ||
227 | /* | ||
228 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
229 | * will not cause inconsistency, as the write is atomic under X86. we | ||
230 | * might see more lock contentions in a short time, but after all CPU's | ||
231 | * tlb_vector_offset are changed, everything should go normal | ||
232 | * | ||
233 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
234 | * waste some vectors. | ||
235 | **/ | ||
236 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
237 | nr_node_vecs = 1; | ||
238 | else | ||
239 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
240 | |||
241 | for_each_online_node(node) { | ||
242 | int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * | ||
243 | nr_node_vecs; | ||
244 | int cpu_offset = 0; | ||
245 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
246 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
247 | cpu_offset; | ||
248 | cpu_offset++; | ||
249 | cpu_offset = cpu_offset % nr_node_vecs; | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | |||
254 | static int tlb_cpuhp_notify(struct notifier_block *n, | ||
255 | unsigned long action, void *hcpu) | ||
256 | { | ||
257 | switch (action & 0xf) { | ||
258 | case CPU_ONLINE: | ||
259 | case CPU_DEAD: | ||
260 | calculate_tlb_offset(); | ||
261 | } | ||
262 | return NOTIFY_OK; | ||
263 | } | ||
264 | |||
221 | static int __cpuinit init_smp_flush(void) | 265 | static int __cpuinit init_smp_flush(void) |
222 | { | 266 | { |
223 | int i; | 267 | int i; |
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void) | |||
225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 269 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | 270 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
227 | 271 | ||
272 | calculate_tlb_offset(); | ||
273 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
228 | return 0; | 274 | return 0; |
229 | } | 275 | } |
230 | core_initcall(init_smp_flush); | 276 | core_initcall(init_smp_flush); |