diff options
author | Zachary Amsden <zach@vmware.com> | 2007-03-05 03:30:35 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-03-05 10:57:52 -0500 |
commit | 6cb9a8350aee789100a365794272ed20cc8f2401 (patch) | |
tree | c2e319b30ebcb6c9d247382303f7fcff1ab0d641 | |
parent | 7507ba34e827ca3c6bbcd34d20a8df8ba365fca6 (diff) |
[PATCH] vmi: sched clock paravirt op fix
The custom_sched_clock hook is broken. The result from sched_clock needs to
be in nanoseconds, not in CPU cycles. The TSC is insufficient for this
purpose, because TSC is poorly defined in a virtual environment, and mostly
represents real world time instead of scheduled process time (which can be
interrupted without notice when a virtual machine is descheduled).
To make the scheduler consistent, we must expose a different nature of time,
that is scheduled time. So deprecate this custom_sched_clock hack and turn it
into a paravirt-op, as it should have been all along. This allows the tsc.c
code which converts cycles to nanoseconds to be shared by all paravirt-ops
backends.
It is unfortunate to add a new paravirt-op, but this is a very distinct
abstraction which is clearly different for all virtual machine
implementations, and it gets rid of an ugly indirect function which I
ashamedly admit I hacked in to try to get this to work earlier, and then even
got in the wrong units.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/i386/kernel/paravirt.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/tsc.c | 6 | ||||
-rw-r--r-- | arch/i386/kernel/vmi.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/vmitime.c | 2 | ||||
-rw-r--r-- | include/asm-i386/paravirt.h | 3 | ||||
-rw-r--r-- | include/asm-i386/time.h | 1 | ||||
-rw-r--r-- | include/asm-i386/timer.h | 8 | ||||
-rw-r--r-- | include/asm-i386/vmi_time.h | 2 |
8 files changed, 17 insertions, 9 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index c156ecfa3872..31bbe70d1e02 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/fixmap.h> | 32 | #include <asm/fixmap.h> |
33 | #include <asm/apic.h> | 33 | #include <asm/apic.h> |
34 | #include <asm/tlbflush.h> | 34 | #include <asm/tlbflush.h> |
35 | #include <asm/timer.h> | ||
35 | 36 | ||
36 | /* nop stub */ | 37 | /* nop stub */ |
37 | static void native_nop(void) | 38 | static void native_nop(void) |
@@ -520,6 +521,7 @@ struct paravirt_ops paravirt_ops = { | |||
520 | .write_msr = native_write_msr, | 521 | .write_msr = native_write_msr, |
521 | .read_tsc = native_read_tsc, | 522 | .read_tsc = native_read_tsc, |
522 | .read_pmc = native_read_pmc, | 523 | .read_pmc = native_read_pmc, |
524 | .get_scheduled_cycles = native_read_tsc, | ||
523 | .load_tr_desc = native_load_tr_desc, | 525 | .load_tr_desc = native_load_tr_desc, |
524 | .set_ldt = native_set_ldt, | 526 | .set_ldt = native_set_ldt, |
525 | .load_gdt = native_load_gdt, | 527 | .load_gdt = native_load_gdt, |
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 3082a418635c..c9c9d54c91f6 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/delay.h> | 14 | #include <asm/delay.h> |
15 | #include <asm/tsc.h> | 15 | #include <asm/tsc.h> |
16 | #include <asm/io.h> | 16 | #include <asm/io.h> |
17 | #include <asm/timer.h> | ||
17 | 18 | ||
18 | #include "mach_timer.h" | 19 | #include "mach_timer.h" |
19 | 20 | ||
@@ -102,9 +103,6 @@ unsigned long long sched_clock(void) | |||
102 | { | 103 | { |
103 | unsigned long long this_offset; | 104 | unsigned long long this_offset; |
104 | 105 | ||
105 | if (unlikely(custom_sched_clock)) | ||
106 | return (*custom_sched_clock)(); | ||
107 | |||
108 | /* | 106 | /* |
109 | * Fall back to jiffies if there's no TSC available: | 107 | * Fall back to jiffies if there's no TSC available: |
110 | */ | 108 | */ |
@@ -113,7 +111,7 @@ unsigned long long sched_clock(void) | |||
113 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 111 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
114 | 112 | ||
115 | /* read the Time Stamp Counter: */ | 113 | /* read the Time Stamp Counter: */ |
116 | rdtscll(this_offset); | 114 | get_scheduled_cycles(this_offset); |
117 | 115 | ||
118 | /* return the value in ns */ | 116 | /* return the value in ns */ |
119 | return cycles_2_ns(this_offset); | 117 | return cycles_2_ns(this_offset); |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 8417f741fac8..556b9a6b7365 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -873,7 +873,7 @@ static inline int __init activate_vmi(void) | |||
873 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | 873 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; |
874 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | 874 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; |
875 | #endif | 875 | #endif |
876 | custom_sched_clock = vmi_sched_clock; | 876 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; |
877 | } | 877 | } |
878 | if (!disable_noidle) | 878 | if (!disable_noidle) |
879 | para_fill(safe_halt, Halt); | 879 | para_fill(safe_halt, Halt); |
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c index 694aa85d22c2..f2aa8fab8c02 100644 --- a/arch/i386/kernel/vmitime.c +++ b/arch/i386/kernel/vmitime.c | |||
@@ -172,7 +172,7 @@ int vmi_set_wallclock(unsigned long now) | |||
172 | return -1; | 172 | return -1; |
173 | } | 173 | } |
174 | 174 | ||
175 | unsigned long long vmi_sched_clock(void) | 175 | unsigned long long vmi_get_sched_cycles(void) |
176 | { | 176 | { |
177 | return read_available_cycles(); | 177 | return read_available_cycles(); |
178 | } | 178 | } |
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 6317e0a4d735..a13230254f4f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h | |||
@@ -94,6 +94,7 @@ struct paravirt_ops | |||
94 | 94 | ||
95 | u64 (*read_tsc)(void); | 95 | u64 (*read_tsc)(void); |
96 | u64 (*read_pmc)(void); | 96 | u64 (*read_pmc)(void); |
97 | u64 (*get_scheduled_cycles)(void); | ||
97 | 98 | ||
98 | void (*load_tr_desc)(void); | 99 | void (*load_tr_desc)(void); |
99 | void (*load_gdt)(const struct Xgt_desc_struct *); | 100 | void (*load_gdt)(const struct Xgt_desc_struct *); |
@@ -273,6 +274,8 @@ static inline void halt(void) | |||
273 | 274 | ||
274 | #define rdtscll(val) (val = paravirt_ops.read_tsc()) | 275 | #define rdtscll(val) (val = paravirt_ops.read_tsc()) |
275 | 276 | ||
277 | #define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) | ||
278 | |||
276 | #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) | 279 | #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) |
277 | 280 | ||
278 | #define rdpmc(counter,low,high) do { \ | 281 | #define rdpmc(counter,low,high) do { \ |
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h index 571b4294dc2e..ea8065af825a 100644 --- a/include/asm-i386/time.h +++ b/include/asm-i386/time.h | |||
@@ -30,7 +30,6 @@ static inline int native_set_wallclock(unsigned long nowtime) | |||
30 | 30 | ||
31 | #ifdef CONFIG_PARAVIRT | 31 | #ifdef CONFIG_PARAVIRT |
32 | #include <asm/paravirt.h> | 32 | #include <asm/paravirt.h> |
33 | extern unsigned long long native_sched_clock(void); | ||
34 | #else /* !CONFIG_PARAVIRT */ | 33 | #else /* !CONFIG_PARAVIRT */ |
35 | 34 | ||
36 | #define get_wallclock() native_get_wallclock() | 35 | #define get_wallclock() native_get_wallclock() |
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 4752c3a6a708..d1f7b4f575b4 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h | |||
@@ -4,13 +4,19 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | 5 | ||
6 | #define TICK_SIZE (tick_nsec / 1000) | 6 | #define TICK_SIZE (tick_nsec / 1000) |
7 | |||
7 | void setup_pit_timer(void); | 8 | void setup_pit_timer(void); |
9 | unsigned long long native_sched_clock(void); | ||
10 | |||
8 | /* Modifiers for buggy PIT handling */ | 11 | /* Modifiers for buggy PIT handling */ |
9 | extern int pit_latch_buggy; | 12 | extern int pit_latch_buggy; |
10 | extern int timer_ack; | 13 | extern int timer_ack; |
11 | extern int no_timer_check; | 14 | extern int no_timer_check; |
12 | extern unsigned long long (*custom_sched_clock)(void); | ||
13 | extern int no_sync_cmos_clock; | 15 | extern int no_sync_cmos_clock; |
14 | extern int recalibrate_cpu_khz(void); | 16 | extern int recalibrate_cpu_khz(void); |
15 | 17 | ||
18 | #ifndef CONFIG_PARAVIRT | ||
19 | #define get_scheduled_cycles(val) rdtscll(val) | ||
20 | #endif | ||
21 | |||
16 | #endif | 22 | #endif |
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index c12931211007..f59c35d37352 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h | |||
@@ -49,7 +49,7 @@ extern struct vmi_timer_ops { | |||
49 | extern void __init vmi_time_init(void); | 49 | extern void __init vmi_time_init(void); |
50 | extern unsigned long vmi_get_wallclock(void); | 50 | extern unsigned long vmi_get_wallclock(void); |
51 | extern int vmi_set_wallclock(unsigned long now); | 51 | extern int vmi_set_wallclock(unsigned long now); |
52 | extern unsigned long long vmi_sched_clock(void); | 52 | extern unsigned long long vmi_get_sched_cycles(void); |
53 | 53 | ||
54 | #ifdef CONFIG_X86_LOCAL_APIC | 54 | #ifdef CONFIG_X86_LOCAL_APIC |
55 | extern void __init vmi_timer_setup_boot_alarm(void); | 55 | extern void __init vmi_timer_setup_boot_alarm(void); |