aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVenki Pallipadi <venkatesh.pallipadi@intel.com>2008-01-30 07:32:01 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:32:01 -0500
commitbde6f5f59c2b2b48a7a849c129d5b48838fe77ee (patch)
tree4fa3befdfa227db56770a0dc85b8fc18be232f70
parent7d409d6057c7244f8757ce15245f6df27271be0c (diff)
x86: voluntary leave_mm before entering ACPI C3
Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before entering C3. The performance impact of TLB flush on C3 should not be significant with respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware while in C3 anyways. On a 8 logical CPU system, running make -j2, the number of tlbflush IPIs goes down from 40 per second to ~ 0. Total number of interrupts during the run of this workload was ~1200 per second, which makes it ~3% savings in wakeups. There was no measurable performance or power impact however. [ akpm@linux-foundation.org: symbol export fixes. ] Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/smp_32.c3
-rw-r--r--arch/x86/kernel/smp_64.c3
-rw-r--r--drivers/acpi/processor_idle.c2
-rw-r--r--include/asm-ia64/acpi.h2
-rw-r--r--include/asm-x86/acpi.h3
-rw-r--r--include/asm-x86/mmu.h8
-rw-r--r--include/asm-x86/mmu_context_32.h2
7 files changed, 19 insertions, 4 deletions
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index 070816ac79e1..dc0cde9d16fb 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -256,13 +256,14 @@ static DEFINE_SPINLOCK(tlbstate_lock);
256 * We need to reload %cr3 since the page tables may be going 256 * We need to reload %cr3 since the page tables may be going
257 * away from under us.. 257 * away from under us..
258 */ 258 */
259void leave_mm(unsigned long cpu) 259void leave_mm(int cpu)
260{ 260{
261 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) 261 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
262 BUG(); 262 BUG();
263 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); 263 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
264 load_cr3(swapper_pg_dir); 264 load_cr3(swapper_pg_dir);
265} 265}
266EXPORT_SYMBOL_GPL(leave_mm);
266 267
267/* 268/*
268 * 269 *
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 02a6533e8909..2fd74b06db67 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -69,13 +69,14 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
69 * We cannot call mmdrop() because we are in interrupt context, 69 * We cannot call mmdrop() because we are in interrupt context,
70 * instead update mm->cpu_vm_mask. 70 * instead update mm->cpu_vm_mask.
71 */ 71 */
72static inline void leave_mm(int cpu) 72void leave_mm(int cpu)
73{ 73{
74 if (read_pda(mmu_state) == TLBSTATE_OK) 74 if (read_pda(mmu_state) == TLBSTATE_OK)
75 BUG(); 75 BUG();
76 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 76 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
77 load_cr3(swapper_pg_dir); 77 load_cr3(swapper_pg_dir);
78} 78}
79EXPORT_SYMBOL_GPL(leave_mm);
79 80
80/* 81/*
81 * 82 *
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 2235f4e02d26..0721a8183c89 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -534,6 +534,7 @@ static void acpi_processor_idle(void)
534 break; 534 break;
535 535
536 case ACPI_STATE_C3: 536 case ACPI_STATE_C3:
537 acpi_unlazy_tlb(smp_processor_id());
537 /* 538 /*
538 * Must be done before busmaster disable as we might 539 * Must be done before busmaster disable as we might
539 * need to access HPET ! 540 * need to access HPET !
@@ -1423,6 +1424,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
1423 return 0; 1424 return 0;
1424 } 1425 }
1425 1426
1427 acpi_unlazy_tlb(smp_processor_id());
1426 /* 1428 /*
1427 * Must be done before busmaster disable as we might need to 1429 * Must be done before busmaster disable as we might need to
1428 * access HPET ! 1430 * access HPET !
diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h
index 81bcd5e51789..cd1cc39b5599 100644
--- a/include/asm-ia64/acpi.h
+++ b/include/asm-ia64/acpi.h
@@ -127,6 +127,8 @@ extern int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
127extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; 127extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
128#endif 128#endif
129 129
130#define acpi_unlazy_tlb(x)
131
130#endif /*__KERNEL__*/ 132#endif /*__KERNEL__*/
131 133
132#endif /*_ASM_ACPI_H*/ 134#endif /*_ASM_ACPI_H*/
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 2feb0c494be7..98a9ca266531 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -27,6 +27,7 @@
27 27
28#include <asm/numa.h> 28#include <asm/numa.h>
29#include <asm/processor.h> 29#include <asm/processor.h>
30#include <asm/mmu.h>
30 31
31#define COMPILER_DEPENDENT_INT64 long long 32#define COMPILER_DEPENDENT_INT64 long long
32#define COMPILER_DEPENDENT_UINT64 unsigned long long 33#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -167,4 +168,6 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
167} 168}
168#endif 169#endif
169 170
171#define acpi_unlazy_tlb(x) leave_mm(x)
172
170#endif /*__X86_ASM_ACPI_H*/ 173#endif /*__X86_ASM_ACPI_H*/
diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h
index 3f922c8e1c88..efa962c38897 100644
--- a/include/asm-x86/mmu.h
+++ b/include/asm-x86/mmu.h
@@ -20,4 +20,12 @@ typedef struct {
20 void *vdso; 20 void *vdso;
21} mm_context_t; 21} mm_context_t;
22 22
23#ifdef CONFIG_SMP
24void leave_mm(int cpu);
25#else
26static inline void leave_mm(int cpu)
27{
28}
29#endif
30
23#endif /* _ASM_X86_MMU_H */ 31#endif /* _ASM_X86_MMU_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 7eb0b0b1fb3c..8198d1cca1f3 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
32#endif 32#endif
33} 33}
34 34
35void leave_mm(unsigned long cpu);
36
37static inline void switch_mm(struct mm_struct *prev, 35static inline void switch_mm(struct mm_struct *prev,
38 struct mm_struct *next, 36 struct mm_struct *next,
39 struct task_struct *tsk) 37 struct task_struct *tsk)