aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlauber de Oliveira Costa <gcosta@redhat.com>2008-02-15 14:52:48 -0500
committerAvi Kivity <avi@qumranet.com>2008-04-27 04:53:22 -0400
commit790c73f6289a204f858ffdcbe4a2b38e91657ec6 (patch)
tree3d6b146538b11e45a713bb4fa1b44af4509ca27f
parent18068523d3a0b41fcee5b53cdb437a0ab4d65e4b (diff)
x86: KVM guest: paravirtualized clocksource
This is the guest part of kvm clock implementation It does not do tsc-only timing, as tsc can have deltas between cpus, and it did not seem worthy to me to keep adjusting them. We do use it, however, for fine-grained adjustment. Other than that, time comes from the host. [randy dunlap: add missing include] [randy dunlap: disallow on Voyager or Visual WS] Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/kvmclock.c160
-rw-r--r--arch/x86/kernel/setup_32.c5
-rw-r--r--arch/x86/kernel/setup_64.c5
5 files changed, 182 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fadf794483d..40cedc255eda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -373,6 +373,17 @@ config VMI
373 at the moment), by linking the kernel to a GPL-ed ROM module 373 at the moment), by linking the kernel to a GPL-ed ROM module
374 provided by the hypervisor. 374 provided by the hypervisor.
375 375
376config KVM_CLOCK
377 bool "KVM paravirtualized clock"
378 select PARAVIRT
379 depends on !(X86_VISWS || X86_VOYAGER)
380 help
381 Turning on this option will allow you to run a paravirtualized clock
382 when running over the KVM hypervisor. Instead of relying on a PIT
383 (or probably other) emulation by the underlying device model, the host
384 provides the guest with timing infrastructure such as time of day, and
385 system time
386
376source "arch/x86/lguest/Kconfig" 387source "arch/x86/lguest/Kconfig"
377 388
378config PARAVIRT 389config PARAVIRT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d0af0c..483047a33024 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 80obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
81 81
82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o 82obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
83obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
84 85
85ifdef CONFIG_INPUT_PCSPKR 86ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..b999f5e5b3bf
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,160 @@
1/* KVM paravirtual clock driver. A clocksource implementation
2 Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#include <linux/clocksource.h>
20#include <linux/kvm_para.h>
21#include <asm/arch_hooks.h>
22#include <asm/msr.h>
23#include <asm/apic.h>
24#include <linux/percpu.h>
25
26#define KVM_SCALE 22
27
28static int kvmclock = 1;
29
30static int parse_no_kvmclock(char *arg)
31{
32 kvmclock = 0;
33 return 0;
34}
35early_param("no-kvmclock", parse_no_kvmclock);
36
37/* The hypervisor will put information about time periodically here */
38static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
39#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
40
41static inline u64 kvm_get_delta(u64 last_tsc)
42{
43 int cpu = smp_processor_id();
44 u64 delta = native_read_tsc() - last_tsc;
45 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
46}
47
48static struct kvm_wall_clock wall_clock;
49static cycle_t kvm_clock_read(void);
50/*
51 * The wallclock is the time of day when we booted. Since then, some time may
52 * have elapsed since the hypervisor wrote the data. So we try to account for
53 * that with system time
54 */
55unsigned long kvm_get_wallclock(void)
56{
57 u32 wc_sec, wc_nsec;
58 u64 delta;
59 struct timespec ts;
60 int version, nsec;
61 int low, high;
62
63 low = (int)__pa(&wall_clock);
64 high = ((u64)__pa(&wall_clock) >> 32);
65
66 delta = kvm_clock_read();
67
68 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
69 do {
70 version = wall_clock.wc_version;
71 rmb();
72 wc_sec = wall_clock.wc_sec;
73 wc_nsec = wall_clock.wc_nsec;
74 rmb();
75 } while ((wall_clock.wc_version != version) || (version & 1));
76
77 delta = kvm_clock_read() - delta;
78 delta += wc_nsec;
79 nsec = do_div(delta, NSEC_PER_SEC);
80 set_normalized_timespec(&ts, wc_sec + delta, nsec);
81 /*
82 * Of all mechanisms of time adjustment I've tested, this one
83 * was the champion!
84 */
85 return ts.tv_sec + 1;
86}
87
88int kvm_set_wallclock(unsigned long now)
89{
90 return 0;
91}
92
93/*
94 * This is our read_clock function. The host puts an tsc timestamp each time
95 * it updates a new time. Without the tsc adjustment, we can have a situation
96 * in which a vcpu starts to run earlier (smaller system_time), but probes
97 * time later (compared to another vcpu), leading to backwards time
98 */
99static cycle_t kvm_clock_read(void)
100{
101 u64 last_tsc, now;
102 int cpu;
103
104 preempt_disable();
105 cpu = smp_processor_id();
106
107 last_tsc = get_clock(cpu, tsc_timestamp);
108 now = get_clock(cpu, system_time);
109
110 now += kvm_get_delta(last_tsc);
111 preempt_enable();
112
113 return now;
114}
115static struct clocksource kvm_clock = {
116 .name = "kvm-clock",
117 .read = kvm_clock_read,
118 .rating = 400,
119 .mask = CLOCKSOURCE_MASK(64),
120 .mult = 1 << KVM_SCALE,
121 .shift = KVM_SCALE,
122 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
123};
124
125static int kvm_register_clock(void)
126{
127 int cpu = smp_processor_id();
128 int low, high;
129 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
130 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
131
132 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
133}
134
135static void kvm_setup_secondary_clock(void)
136{
137 /*
138 * Now that the first cpu already had this clocksource initialized,
139 * we shouldn't fail.
140 */
141 WARN_ON(kvm_register_clock());
142 /* ok, done with our trickery, call native */
143 setup_secondary_APIC_clock();
144}
145
146void __init kvmclock_init(void)
147{
148 if (!kvm_para_available())
149 return;
150
151 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
152 if (kvm_register_clock())
153 return;
154 pv_time_ops.get_wallclock = kvm_get_wallclock;
155 pv_time_ops.set_wallclock = kvm_set_wallclock;
156 pv_time_ops.sched_clock = kvm_clock_read;
157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
158 clocksource_register(&kvm_clock);
159 }
160}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 44cc9b933932..5a849ddd09ee 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
47#include <linux/pfn.h> 47#include <linux/pfn.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/init_ohci1394_dma.h> 49#include <linux/init_ohci1394_dma.h>
50#include <linux/kvm_para.h>
50 51
51#include <video/edid.h> 52#include <video/edid.h>
52 53
@@ -820,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
820 821
821 max_low_pfn = setup_memory(); 822 max_low_pfn = setup_memory();
822 823
824#ifdef CONFIG_KVM_CLOCK
825 kvmclock_init();
826#endif
827
823#ifdef CONFIG_VMI 828#ifdef CONFIG_VMI
824 /* 829 /*
825 * Must be after max_low_pfn is determined, and before kernel 830 * Must be after max_low_pfn is determined, and before kernel
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 60e64c8eee92..8a9213c023ef 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
42#include <linux/ctype.h> 42#include <linux/ctype.h>
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44#include <linux/init_ohci1394_dma.h> 44#include <linux/init_ohci1394_dma.h>
45#include <linux/kvm_para.h>
45 46
46#include <asm/mtrr.h> 47#include <asm/mtrr.h>
47#include <asm/uaccess.h> 48#include <asm/uaccess.h>
@@ -384,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
384 385
385 io_delay_init(); 386 io_delay_init();
386 387
388#ifdef CONFIG_KVM_CLOCK
389 kvmclock_init();
390#endif
391
387#ifdef CONFIG_SMP 392#ifdef CONFIG_SMP
388 /* setup to use the early static init tables during kernel startup */ 393 /* setup to use the early static init tables during kernel startup */
389 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; 394 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;