aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-26 14:16:58 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-26 15:20:25 -0500
commit83ce400928680a6c8123d492684b27857f5a2d95 (patch)
tree384dfa725400a13b335204baa819a8741c47e0c4 /arch/x86
parentb342501cd31e5546d0c9ca8ceff5ded1832f9e5b (diff)
x86: set X86_FEATURE_TSC_RELIABLE
If the TSC is constant and non-stop, also set it reliable. (We will turn this off in DMI quirks for multi-chassis systems) The performance number on a 16-way Nehalem system running 32 tasks that context-switch between each other is significant: sched_clock_stable=0 sched_clock_stable=1 .................... .................... 22.456925 million/sec 24.306972 million/sec [+8.2%] lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to 0.59 microseconds - a 6.7% increase in context-switching performance. Perfstat of 1 million pipe context switches between two tasks: Performance counter stats for './pipe-test-1m': [before] [after] ............ ............ 37621.421089 36436.848378 task clock ticks (msecs) 0 0 CPU migrations (events) 2000274 2000189 context switches (events) 194 193 pagefaults (events) 8433799643 8171016416 CPU cycles (events) -3.21% 8370133368 8180999694 instructions (events) -2.31% 4158565 3895941 cache references (events) -6.74% 44312 46264 cache misses (events) 2349.287976 2279.362465 wall-time (msecs) -3.06% The speedup comes straight from the reduction in the instruction count. sched_clock_cpu() got simpler and the whole workload thus executes faster. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/intel.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..5fff00c70de0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
4#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6#include <linux/smp.h> 6#include <linux/smp.h>
7#include <linux/sched.h>
7#include <linux/thread_info.h> 8#include <linux/thread_info.h>
8#include <linux/module.h> 9#include <linux/module.h>
9 10
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
56 57
57 /* 58 /*
58 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 59 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
59 * with P/T states and does not stop in deep C-states 60 * with P/T states and does not stop in deep C-states.
61 *
62 * It is also reliable across cores and sockets. (but not across
63 * cabinets - we turn it off in that case explicitly.)
60 */ 64 */
61 if (c->x86_power & (1 << 8)) { 65 if (c->x86_power & (1 << 8)) {
62 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 66 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 67 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
68 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
69 sched_clock_stable = 1;
64 } 70 }
65 71
66} 72}