diff options
author | Andi Kleen <ak@suse.de> | 2006-01-11 16:45:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:04:58 -0500 |
commit | c818a18146997d1356a4840b0c01f1168c16c8a4 (patch) | |
tree | 8d7e049742e87666bf3089fe46e04828b6417fd7 | |
parent | 7f6c5b046978d68e69bdc73433ead41612a2a1c9 (diff) |
[PATCH] x86_64: On Intel CPUs don't do an additional CPU sync before RDTSC
RDTSC serialization using cpuid is not needed for Intel platforms.
This increases gettimeofday performance.
Cc: vojtech@suse.cz
Cc: rohit.seth@intel.com
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/x86_64/kernel/setup.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 26 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 3 | ||||
-rw-r--r-- | include/asm-x86_64/cpufeature.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/timex.h | 16 |
5 files changed, 26 insertions, 21 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 8090a0a46882..46bf556efc30 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -1042,6 +1042,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
1042 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 1042 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
1043 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 1043 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
1044 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 1044 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
1045 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
1045 | c->x86_max_cores = intel_num_cpu_cores(c); | 1046 | c->x86_max_cores = intel_num_cpu_cores(c); |
1046 | 1047 | ||
1047 | srat_detect_node(); | 1048 | srat_detect_node(); |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index f8d4b69388d2..25bc58aac2a5 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; | |||
71 | struct timespec __xtime __section_xtime; | 71 | struct timespec __xtime __section_xtime; |
72 | struct timezone __sys_tz __section_sys_tz; | 72 | struct timezone __sys_tz __section_sys_tz; |
73 | 73 | ||
74 | static inline void rdtscll_sync(unsigned long *tsc) | ||
75 | { | ||
76 | #ifdef CONFIG_SMP | ||
77 | sync_core(); | ||
78 | #endif | ||
79 | rdtscll(*tsc); | ||
80 | } | ||
81 | |||
82 | /* | 74 | /* |
83 | * do_gettimeoffset() returns microseconds since last timer interrupt was | 75 | * do_gettimeoffset() returns microseconds since last timer interrupt was |
84 | * triggered by hardware. A memory read of HPET is slower than a register read | 76 | * triggered by hardware. A memory read of HPET is slower than a register read |
@@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void) | |||
93 | { | 85 | { |
94 | unsigned long t; | 86 | unsigned long t; |
95 | unsigned long x; | 87 | unsigned long x; |
96 | rdtscll_sync(&t); | 88 | t = get_cycles_sync(); |
97 | if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ | 89 | if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ |
98 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; | 90 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; |
99 | return x; | 91 | return x; |
@@ -309,8 +301,7 @@ unsigned long long monotonic_clock(void) | |||
309 | last_offset = vxtime.last_tsc; | 301 | last_offset = vxtime.last_tsc; |
310 | base = monotonic_base; | 302 | base = monotonic_base; |
311 | } while (read_seqretry(&xtime_lock, seq)); | 303 | } while (read_seqretry(&xtime_lock, seq)); |
312 | sync_core(); | 304 | this_offset = get_cycles_sync(); |
313 | rdtscll(this_offset); | ||
314 | offset = (this_offset - last_offset)*1000/cpu_khz; | 305 | offset = (this_offset - last_offset)*1000/cpu_khz; |
315 | return base + offset; | 306 | return base + offset; |
316 | } | 307 | } |
@@ -391,7 +382,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
391 | delay = LATCH - 1 - delay; | 382 | delay = LATCH - 1 - delay; |
392 | } | 383 | } |
393 | 384 | ||
394 | rdtscll_sync(&tsc); | 385 | tsc = get_cycles_sync(); |
395 | 386 | ||
396 | if (vxtime.mode == VXTIME_HPET) { | 387 | if (vxtime.mode == VXTIME_HPET) { |
397 | if (offset - vxtime.last > hpet_tick) { | 388 | if (offset - vxtime.last > hpet_tick) { |
@@ -700,8 +691,7 @@ static unsigned int __init hpet_calibrate_tsc(void) | |||
700 | do { | 691 | do { |
701 | local_irq_disable(); | 692 | local_irq_disable(); |
702 | hpet_now = hpet_readl(HPET_COUNTER); | 693 | hpet_now = hpet_readl(HPET_COUNTER); |
703 | sync_core(); | 694 | tsc_now = get_cycles_sync(); |
704 | rdtscl(tsc_now); | ||
705 | local_irq_restore(flags); | 695 | local_irq_restore(flags); |
706 | } while ((tsc_now - tsc_start) < TICK_COUNT && | 696 | } while ((tsc_now - tsc_start) < TICK_COUNT && |
707 | (hpet_now - hpet_start) < TICK_COUNT); | 697 | (hpet_now - hpet_start) < TICK_COUNT); |
@@ -731,11 +721,9 @@ static unsigned int __init pit_calibrate_tsc(void) | |||
731 | outb(0xb0, 0x43); | 721 | outb(0xb0, 0x43); |
732 | outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 722 | outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); |
733 | outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 723 | outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); |
734 | rdtscll(start); | 724 | start = get_cycles_sync(); |
735 | sync_core(); | ||
736 | while ((inb(0x61) & 0x20) == 0); | 725 | while ((inb(0x61) & 0x20) == 0); |
737 | sync_core(); | 726 | end = get_cycles_sync(); |
738 | rdtscll(end); | ||
739 | 727 | ||
740 | spin_unlock_irqrestore(&i8253_lock, flags); | 728 | spin_unlock_irqrestore(&i8253_lock, flags); |
741 | 729 | ||
@@ -939,7 +927,7 @@ void __init time_init(void) | |||
939 | vxtime.mode = VXTIME_TSC; | 927 | vxtime.mode = VXTIME_TSC; |
940 | vxtime.quot = (1000000L << 32) / vxtime_hz; | 928 | vxtime.quot = (1000000L << 32) / vxtime_hz; |
941 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; | 929 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; |
942 | rdtscll_sync(&vxtime.last_tsc); | 930 | vxtime.last_tsc = get_cycles_sync(); |
943 | setup_irq(0, &irq0); | 931 | setup_irq(0, &irq0); |
944 | 932 | ||
945 | set_cyc2ns_scale(cpu_khz); | 933 | set_cyc2ns_scale(cpu_khz); |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 70a0bd16085f..9e51047fc410 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -66,8 +66,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); | 66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); |
67 | 67 | ||
68 | if (__vxtime.mode != VXTIME_HPET) { | 68 | if (__vxtime.mode != VXTIME_HPET) { |
69 | sync_core(); | 69 | t = get_cycles_sync(); |
70 | rdtscll(t); | ||
71 | if (t < __vxtime.last_tsc) | 70 | if (t < __vxtime.last_tsc) |
72 | t = __vxtime.last_tsc; | 71 | t = __vxtime.last_tsc; |
73 | usec += ((t - __vxtime.last_tsc) * | 72 | usec += ((t - __vxtime.last_tsc) * |
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index aea308c65709..72b39f514798 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h | |||
@@ -63,6 +63,7 @@ | |||
63 | #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ | 63 | #define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ |
64 | #define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */ | 64 | #define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */ |
65 | #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ | 65 | #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ |
66 | #define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ | ||
66 | 67 | ||
67 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 68 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
68 | #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ | 69 | #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ |
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index f971f45d6d78..f18443fcdf04 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h | |||
@@ -10,6 +10,9 @@ | |||
10 | #include <asm/msr.h> | 10 | #include <asm/msr.h> |
11 | #include <asm/vsyscall.h> | 11 | #include <asm/vsyscall.h> |
12 | #include <asm/hpet.h> | 12 | #include <asm/hpet.h> |
13 | #include <asm/system.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <linux/compiler.h> | ||
13 | 16 | ||
14 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ | 17 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ |
15 | 18 | ||
@@ -23,6 +26,19 @@ static inline cycles_t get_cycles (void) | |||
23 | return ret; | 26 | return ret; |
24 | } | 27 | } |
25 | 28 | ||
29 | /* Like get_cycles, but make sure the CPU is synchronized. */ | ||
30 | static __always_inline cycles_t get_cycles_sync(void) | ||
31 | { | ||
32 | unsigned long long ret; | ||
33 | unsigned eax; | ||
34 | /* Don't do an additional sync on CPUs where we know | ||
35 | RDTSC is already synchronous. */ | ||
36 | alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC, | ||
37 | "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); | ||
38 | rdtscll(ret); | ||
39 | return ret; | ||
40 | } | ||
41 | |||
26 | extern unsigned int cpu_khz; | 42 | extern unsigned int cpu_khz; |
27 | 43 | ||
28 | extern int read_current_timer(unsigned long *timer_value); | 44 | extern int read_current_timer(unsigned long *timer_value); |