aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-03-05 03:30:35 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-05 10:57:52 -0500
commit6cb9a8350aee789100a365794272ed20cc8f2401 (patch)
treec2e319b30ebcb6c9d247382303f7fcff1ab0d641
parent7507ba34e827ca3c6bbcd34d20a8df8ba365fca6 (diff)
[PATCH] vmi: sched clock paravirt op fix
The custom_sched_clock hook is broken. The result from sched_clock needs to be in nanoseconds, not in CPU cycles. The TSC is insufficient for this purpose, because TSC is poorly defined in a virtual environment, and mostly represents real world time instead of scheduled process time (which can be interrupted without notice when a virtual machine is descheduled). To make the scheduler consistent, we must expose a different nature of time, that is scheduled time. So deprecate this custom_sched_clock hack and turn it into a paravirt-op, as it should have been all along. This allows the tsc.c code which converts cycles to nanoseconds to be shared by all paravirt-ops backends. It is unfortunate to add a new paravirt-op, but this is a very distinct abstraction which is clearly different for all virtual machine implementations, and it gets rid of an ugly indirect function which I ashamedly admit I hacked in to try to get this to work earlier, and then even got in the wrong units. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/i386/kernel/paravirt.c2
-rw-r--r--arch/i386/kernel/tsc.c6
-rw-r--r--arch/i386/kernel/vmi.c2
-rw-r--r--arch/i386/kernel/vmitime.c2
-rw-r--r--include/asm-i386/paravirt.h3
-rw-r--r--include/asm-i386/time.h1
-rw-r--r--include/asm-i386/timer.h8
-rw-r--r--include/asm-i386/vmi_time.h2
8 files changed, 17 insertions, 9 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index c156ecfa3872..31bbe70d1e02 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -32,6 +32,7 @@
32#include <asm/fixmap.h> 32#include <asm/fixmap.h>
33#include <asm/apic.h> 33#include <asm/apic.h>
34#include <asm/tlbflush.h> 34#include <asm/tlbflush.h>
35#include <asm/timer.h>
35 36
36/* nop stub */ 37/* nop stub */
37static void native_nop(void) 38static void native_nop(void)
@@ -520,6 +521,7 @@ struct paravirt_ops paravirt_ops = {
520 .write_msr = native_write_msr, 521 .write_msr = native_write_msr,
521 .read_tsc = native_read_tsc, 522 .read_tsc = native_read_tsc,
522 .read_pmc = native_read_pmc, 523 .read_pmc = native_read_pmc,
524 .get_scheduled_cycles = native_read_tsc,
523 .load_tr_desc = native_load_tr_desc, 525 .load_tr_desc = native_load_tr_desc,
524 .set_ldt = native_set_ldt, 526 .set_ldt = native_set_ldt,
525 .load_gdt = native_load_gdt, 527 .load_gdt = native_load_gdt,
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 3082a418635c..c9c9d54c91f6 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -14,6 +14,7 @@
14#include <asm/delay.h> 14#include <asm/delay.h>
15#include <asm/tsc.h> 15#include <asm/tsc.h>
16#include <asm/io.h> 16#include <asm/io.h>
17#include <asm/timer.h>
17 18
18#include "mach_timer.h" 19#include "mach_timer.h"
19 20
@@ -102,9 +103,6 @@ unsigned long long sched_clock(void)
102{ 103{
103 unsigned long long this_offset; 104 unsigned long long this_offset;
104 105
105 if (unlikely(custom_sched_clock))
106 return (*custom_sched_clock)();
107
108 /* 106 /*
109 * Fall back to jiffies if there's no TSC available: 107 * Fall back to jiffies if there's no TSC available:
110 */ 108 */
@@ -113,7 +111,7 @@ unsigned long long sched_clock(void)
113 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 111 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
114 112
115 /* read the Time Stamp Counter: */ 113 /* read the Time Stamp Counter: */
116 rdtscll(this_offset); 114 get_scheduled_cycles(this_offset);
117 115
118 /* return the value in ns */ 116 /* return the value in ns */
119 return cycles_2_ns(this_offset); 117 return cycles_2_ns(this_offset);
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 8417f741fac8..556b9a6b7365 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -873,7 +873,7 @@ static inline int __init activate_vmi(void)
873 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; 873 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
874 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; 874 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
875#endif 875#endif
876 custom_sched_clock = vmi_sched_clock; 876 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
877 } 877 }
878 if (!disable_noidle) 878 if (!disable_noidle)
879 para_fill(safe_halt, Halt); 879 para_fill(safe_halt, Halt);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 694aa85d22c2..f2aa8fab8c02 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -172,7 +172,7 @@ int vmi_set_wallclock(unsigned long now)
172 return -1; 172 return -1;
173} 173}
174 174
175unsigned long long vmi_sched_clock(void) 175unsigned long long vmi_get_sched_cycles(void)
176{ 176{
177 return read_available_cycles(); 177 return read_available_cycles();
178} 178}
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 6317e0a4d735..a13230254f4f 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -94,6 +94,7 @@ struct paravirt_ops
94 94
95 u64 (*read_tsc)(void); 95 u64 (*read_tsc)(void);
96 u64 (*read_pmc)(void); 96 u64 (*read_pmc)(void);
97 u64 (*get_scheduled_cycles)(void);
97 98
98 void (*load_tr_desc)(void); 99 void (*load_tr_desc)(void);
99 void (*load_gdt)(const struct Xgt_desc_struct *); 100 void (*load_gdt)(const struct Xgt_desc_struct *);
@@ -273,6 +274,8 @@ static inline void halt(void)
273 274
274#define rdtscll(val) (val = paravirt_ops.read_tsc()) 275#define rdtscll(val) (val = paravirt_ops.read_tsc())
275 276
277#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles())
278
276#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 279#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
277 280
278#define rdpmc(counter,low,high) do { \ 281#define rdpmc(counter,low,high) do { \
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h
index 571b4294dc2e..ea8065af825a 100644
--- a/include/asm-i386/time.h
+++ b/include/asm-i386/time.h
@@ -30,7 +30,6 @@ static inline int native_set_wallclock(unsigned long nowtime)
30 30
31#ifdef CONFIG_PARAVIRT 31#ifdef CONFIG_PARAVIRT
32#include <asm/paravirt.h> 32#include <asm/paravirt.h>
33extern unsigned long long native_sched_clock(void);
34#else /* !CONFIG_PARAVIRT */ 33#else /* !CONFIG_PARAVIRT */
35 34
36#define get_wallclock() native_get_wallclock() 35#define get_wallclock() native_get_wallclock()
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index 4752c3a6a708..d1f7b4f575b4 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -4,13 +4,19 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5 5
6#define TICK_SIZE (tick_nsec / 1000) 6#define TICK_SIZE (tick_nsec / 1000)
7
7void setup_pit_timer(void); 8void setup_pit_timer(void);
9unsigned long long native_sched_clock(void);
10
8/* Modifiers for buggy PIT handling */ 11/* Modifiers for buggy PIT handling */
9extern int pit_latch_buggy; 12extern int pit_latch_buggy;
10extern int timer_ack; 13extern int timer_ack;
11extern int no_timer_check; 14extern int no_timer_check;
12extern unsigned long long (*custom_sched_clock)(void);
13extern int no_sync_cmos_clock; 15extern int no_sync_cmos_clock;
14extern int recalibrate_cpu_khz(void); 16extern int recalibrate_cpu_khz(void);
15 17
18#ifndef CONFIG_PARAVIRT
19#define get_scheduled_cycles(val) rdtscll(val)
20#endif
21
16#endif 22#endif
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index c12931211007..f59c35d37352 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h
@@ -49,7 +49,7 @@ extern struct vmi_timer_ops {
49extern void __init vmi_time_init(void); 49extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void); 50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now); 51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_sched_clock(void); 52extern unsigned long long vmi_get_sched_cycles(void);
53 53
54#ifdef CONFIG_X86_LOCAL_APIC 54#ifdef CONFIG_X86_LOCAL_APIC
55extern void __init vmi_timer_setup_boot_alarm(void); 55extern void __init vmi_timer_setup_boot_alarm(void);