aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-01-11 16:45:24 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-11 22:04:58 -0500
commitc818a18146997d1356a4840b0c01f1168c16c8a4 (patch)
tree8d7e049742e87666bf3089fe46e04828b6417fd7
parent7f6c5b046978d68e69bdc73433ead41612a2a1c9 (diff)
[PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC
RDTSC serialization using cpuid is not needed for Intel platforms. This increases gettimeofday performance. Cc: vojtech@suse.cz Cc: rohit.seth@intel.com Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/x86_64/kernel/setup.c1
-rw-r--r--arch/x86_64/kernel/time.c26
-rw-r--r--arch/x86_64/kernel/vsyscall.c3
-rw-r--r--include/asm-x86_64/cpufeature.h1
-rw-r--r--include/asm-x86_64/timex.h16
5 files changed, 26 insertions, 21 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 8090a0a46882..46bf556efc30 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -1042,6 +1042,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1042 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 1042 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1043 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 1043 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1044 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); 1044 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1045 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1045 c->x86_max_cores = intel_num_cpu_cores(c); 1046 c->x86_max_cores = intel_num_cpu_cores(c);
1046 1047
1047 srat_detect_node(); 1048 srat_detect_node();
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index f8d4b69388d2..25bc58aac2a5 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
71struct timespec __xtime __section_xtime; 71struct timespec __xtime __section_xtime;
72struct timezone __sys_tz __section_sys_tz; 72struct timezone __sys_tz __section_sys_tz;
73 73
74static inline void rdtscll_sync(unsigned long *tsc)
75{
76#ifdef CONFIG_SMP
77 sync_core();
78#endif
79 rdtscll(*tsc);
80}
81
82/* 74/*
83 * do_gettimeoffset() returns microseconds since last timer interrupt was 75 * do_gettimeoffset() returns microseconds since last timer interrupt was
84 * triggered by hardware. A memory read of HPET is slower than a register read 76 * triggered by hardware. A memory read of HPET is slower than a register read
@@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
93{ 85{
94 unsigned long t; 86 unsigned long t;
95 unsigned long x; 87 unsigned long x;
96 rdtscll_sync(&t); 88 t = get_cycles_sync();
97 if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ 89 if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
98 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; 90 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
99 return x; 91 return x;
@@ -309,8 +301,7 @@ unsigned long long monotonic_clock(void)
309 last_offset = vxtime.last_tsc; 301 last_offset = vxtime.last_tsc;
310 base = monotonic_base; 302 base = monotonic_base;
311 } while (read_seqretry(&xtime_lock, seq)); 303 } while (read_seqretry(&xtime_lock, seq));
312 sync_core(); 304 this_offset = get_cycles_sync();
313 rdtscll(this_offset);
314 offset = (this_offset - last_offset)*1000/cpu_khz; 305 offset = (this_offset - last_offset)*1000/cpu_khz;
315 return base + offset; 306 return base + offset;
316 } 307 }
@@ -391,7 +382,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
391 delay = LATCH - 1 - delay; 382 delay = LATCH - 1 - delay;
392 } 383 }
393 384
394 rdtscll_sync(&tsc); 385 tsc = get_cycles_sync();
395 386
396 if (vxtime.mode == VXTIME_HPET) { 387 if (vxtime.mode == VXTIME_HPET) {
397 if (offset - vxtime.last > hpet_tick) { 388 if (offset - vxtime.last > hpet_tick) {
@@ -700,8 +691,7 @@ static unsigned int __init hpet_calibrate_tsc(void)
700 do { 691 do {
701 local_irq_disable(); 692 local_irq_disable();
702 hpet_now = hpet_readl(HPET_COUNTER); 693 hpet_now = hpet_readl(HPET_COUNTER);
703 sync_core(); 694 tsc_now = get_cycles_sync();
704 rdtscl(tsc_now);
705 local_irq_restore(flags); 695 local_irq_restore(flags);
706 } while ((tsc_now - tsc_start) < TICK_COUNT && 696 } while ((tsc_now - tsc_start) < TICK_COUNT &&
707 (hpet_now - hpet_start) < TICK_COUNT); 697 (hpet_now - hpet_start) < TICK_COUNT);
@@ -731,11 +721,9 @@ static unsigned int __init pit_calibrate_tsc(void)
731 outb(0xb0, 0x43); 721 outb(0xb0, 0x43);
732 outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); 722 outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
733 outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); 723 outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
734 rdtscll(start); 724 start = get_cycles_sync();
735 sync_core();
736 while ((inb(0x61) & 0x20) == 0); 725 while ((inb(0x61) & 0x20) == 0);
737 sync_core(); 726 end = get_cycles_sync();
738 rdtscll(end);
739 727
740 spin_unlock_irqrestore(&i8253_lock, flags); 728 spin_unlock_irqrestore(&i8253_lock, flags);
741 729
@@ -939,7 +927,7 @@ void __init time_init(void)
939 vxtime.mode = VXTIME_TSC; 927 vxtime.mode = VXTIME_TSC;
940 vxtime.quot = (1000000L << 32) / vxtime_hz; 928 vxtime.quot = (1000000L << 32) / vxtime_hz;
941 vxtime.tsc_quot = (1000L << 32) / cpu_khz; 929 vxtime.tsc_quot = (1000L << 32) / cpu_khz;
942 rdtscll_sync(&vxtime.last_tsc); 930 vxtime.last_tsc = get_cycles_sync();
943 setup_irq(0, &irq0); 931 setup_irq(0, &irq0);
944 932
945 set_cyc2ns_scale(cpu_khz); 933 set_cyc2ns_scale(cpu_khz);
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 70a0bd16085f..9e51047fc410 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -66,8 +66,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
66 (__jiffies - __wall_jiffies) * (1000000 / HZ); 66 (__jiffies - __wall_jiffies) * (1000000 / HZ);
67 67
68 if (__vxtime.mode != VXTIME_HPET) { 68 if (__vxtime.mode != VXTIME_HPET) {
69 sync_core(); 69 t = get_cycles_sync();
70 rdtscll(t);
71 if (t < __vxtime.last_tsc) 70 if (t < __vxtime.last_tsc)
72 t = __vxtime.last_tsc; 71 t = __vxtime.last_tsc;
73 usec += ((t - __vxtime.last_tsc) * 72 usec += ((t - __vxtime.last_tsc) *
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index aea308c65709..72b39f514798 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -63,6 +63,7 @@
63#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ 63#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
64#define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */ 64#define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */
65#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ 65#define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
66#define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */
66 67
67/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 68/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
68#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 69#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
index f971f45d6d78..f18443fcdf04 100644
--- a/include/asm-x86_64/timex.h
+++ b/include/asm-x86_64/timex.h
@@ -10,6 +10,9 @@
10#include <asm/msr.h> 10#include <asm/msr.h>
11#include <asm/vsyscall.h> 11#include <asm/vsyscall.h>
12#include <asm/hpet.h> 12#include <asm/hpet.h>
13#include <asm/system.h>
14#include <asm/processor.h>
15#include <linux/compiler.h>
13 16
14#define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ 17#define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */
15 18
@@ -23,6 +26,19 @@ static inline cycles_t get_cycles (void)
23 return ret; 26 return ret;
24} 27}
25 28
29/* Like get_cycles, but make sure the CPU is synchronized. */
30static __always_inline cycles_t get_cycles_sync(void)
31{
32 unsigned long long ret;
33 unsigned eax;
34 /* Don't do an additional sync on CPUs where we know
35 RDTSC is already synchronous. */
36 alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC,
37 "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
38 rdtscll(ret);
39 return ret;
40}
41
26extern unsigned int cpu_khz; 42extern unsigned int cpu_khz;
27 43
28extern int read_current_timer(unsigned long *timer_value); 44extern int read_current_timer(unsigned long *timer_value);