aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/kgdb.tmpl20
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/kvmclock.c89
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kvm/i8254.c9
-rw-r--r--arch/x86/kvm/lapic.c1
-rw-r--r--arch/x86/kvm/mmu.c19
-rw-r--r--arch/x86/kvm/vmx.c19
-rw-r--r--arch/x86/kvm/x86.c91
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/enlighten.c51
-rw-r--r--arch/x86/xen/mmu.c19
-rw-r--r--arch/x86/xen/mmu.h24
-rw-r--r--arch/x86/xen/time.c132
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--drivers/char/drm/i915_drv.c1
-rw-r--r--drivers/watchdog/Makefile1
-rw-r--r--fs/gfs2/bmap.c23
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--include/asm-alpha/percpu.h2
-rw-r--r--include/asm-x86/kvm_host.h4
-rw-r--r--include/asm-x86/kvm_para.h18
-rw-r--r--include/asm-x86/pvclock-abi.h42
-rw-r--r--include/asm-x86/pvclock.h13
-rw-r--r--include/asm-x86/xen/page.h4
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/xen/interface/xen.h7
-rw-r--r--kernel/kgdb.c3
-rw-r--r--virt/kvm/ioapic.c31
30 files changed, 406 insertions, 374 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 028a8444d95e..e8acd1f03456 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -84,10 +84,9 @@
84 runs an instance of gdb against the vmlinux file which contains 84 runs an instance of gdb against the vmlinux file which contains
85 the symbols (not boot image such as bzImage, zImage, uImage...). 85 the symbols (not boot image such as bzImage, zImage, uImage...).
86 In gdb the developer specifies the connection parameters and 86 In gdb the developer specifies the connection parameters and
87 connects to kgdb. Depending on which kgdb I/O modules exist in 87 connects to kgdb. The type of connection a developer makes with
88 the kernel for a given architecture, it may be possible to debug 88 gdb depends on the availability of kgdb I/O modules compiled as
89 the test machine's kernel with the development machine using a 89 builtin's or kernel modules in the test machine's kernel.
90 rs232 or ethernet connection.
91 </para> 90 </para>
92 </chapter> 91 </chapter>
93 <chapter id="CompilingAKernel"> 92 <chapter id="CompilingAKernel">
@@ -223,7 +222,7 @@
223 </para> 222 </para>
224 <para> 223 <para>
225 IMPORTANT NOTE: Using this option with kgdb over the console 224 IMPORTANT NOTE: Using this option with kgdb over the console
226 (kgdboc) or kgdb over ethernet (kgdboe) is not supported. 225 (kgdboc) is not supported.
227 </para> 226 </para>
228 </sect1> 227 </sect1>
229 </chapter> 228 </chapter>
@@ -249,18 +248,11 @@
249 (gdb) target remote /dev/ttyS0 248 (gdb) target remote /dev/ttyS0
250 </programlisting> 249 </programlisting>
251 <para> 250 <para>
252 Example (kgdb to a terminal server): 251 Example (kgdb to a terminal server on tcp port 2012):
253 </para> 252 </para>
254 <programlisting> 253 <programlisting>
255 % gdb ./vmlinux 254 % gdb ./vmlinux
256 (gdb) target remote udp:192.168.2.2:6443 255 (gdb) target remote 192.168.2.2:2012
257 </programlisting>
258 <para>
259 Example (kgdb over ethernet):
260 </para>
261 <programlisting>
262 % gdb ./vmlinux
263 (gdb) target remote udp:192.168.2.2:6443
264 </programlisting> 256 </programlisting>
265 <para> 257 <para>
266 Once connected, you can debug a kernel the way you would debug an 258 Once connected, you can debug a kernel the way you would debug an
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52e18e6d2ba0..e0edaaa6920a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
383config KVM_CLOCK 383config KVM_CLOCK
384 bool "KVM paravirtualized clock" 384 bool "KVM paravirtualized clock"
385 select PARAVIRT 385 select PARAVIRT
386 select PARAVIRT_CLOCK
386 depends on !(X86_VISWS || X86_VOYAGER) 387 depends on !(X86_VISWS || X86_VOYAGER)
387 help 388 help
388 Turning on this option will allow you to run a paravirtualized clock 389 Turning on this option will allow you to run a paravirtualized clock
@@ -410,6 +411,10 @@ config PARAVIRT
410 over full virtualization. However, when run without a hypervisor 411 over full virtualization. However, when run without a hypervisor
411 the kernel is theoretically slower and slightly larger. 412 the kernel is theoretically slower and slightly larger.
412 413
414config PARAVIRT_CLOCK
415 bool
416 default n
417
413endif 418endif
414 419
415config MEMTEST_BOOTPARAM 420config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..77807d4769c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
82obj-$(CONFIG_KVM_GUEST) += kvm.o 82obj-$(CONFIG_KVM_GUEST) += kvm.o
83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
85obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
85 86
86obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 87obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
87 88
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h>
21#include <asm/arch_hooks.h> 22#include <asm/arch_hooks.h>
22#include <asm/msr.h> 23#include <asm/msr.h>
23#include <asm/apic.h> 24#include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
36early_param("no-kvmclock", parse_no_kvmclock); 37early_param("no-kvmclock", parse_no_kvmclock);
37 38
38/* The hypervisor will put information about time periodically here */ 39/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); 40static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field 41static struct pvclock_wall_clock wall_clock;
41 42
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/* 43/*
52 * The wallclock is the time of day when we booted. Since then, some time may 44 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for 45 * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
55 */ 47 */
56static unsigned long kvm_get_wallclock(void) 48static unsigned long kvm_get_wallclock(void)
57{ 49{
58 u32 wc_sec, wc_nsec; 50 struct pvclock_vcpu_time_info *vcpu_time;
59 u64 delta;
60 struct timespec ts; 51 struct timespec ts;
61 int version, nsec;
62 int low, high; 52 int low, high;
63 53
64 low = (int)__pa(&wall_clock); 54 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32); 55 high = ((u64)__pa(&wall_clock) >> 32);
56 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
66 57
67 delta = kvm_clock_read(); 58 vcpu_time = &get_cpu_var(hv_clock);
59 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
60 put_cpu_var(hv_clock);
68 61
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 62 return ts.tv_sec;
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87} 63}
88 64
89static int kvm_set_wallclock(unsigned long now) 65static int kvm_set_wallclock(unsigned long now)
90{ 66{
91 return 0; 67 return -1;
92} 68}
93 69
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void) 70static cycle_t kvm_clock_read(void)
101{ 71{
102 u64 last_tsc, now; 72 struct pvclock_vcpu_time_info *src;
103 int cpu; 73 cycle_t ret;
104 74
105 preempt_disable(); 75 src = &get_cpu_var(hv_clock);
106 cpu = smp_processor_id(); 76 ret = pvclock_clocksource_read(src);
107 77 put_cpu_var(hv_clock);
108 last_tsc = get_clock(cpu, tsc_timestamp); 78 return ret;
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115} 79}
80
116static struct clocksource kvm_clock = { 81static struct clocksource kvm_clock = {
117 .name = "kvm-clock", 82 .name = "kvm-clock",
118 .read = kvm_clock_read, 83 .read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 88 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124}; 89};
125 90
126static int kvm_register_clock(void) 91static int kvm_register_clock(char *txt)
127{ 92{
128 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
129 int low, high; 94 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 95 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 96 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132 97 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
98 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 99 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 100}
135 101
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
140 * Now that the first cpu already had this clocksource initialized, 106 * Now that the first cpu already had this clocksource initialized,
141 * we shouldn't fail. 107 * we shouldn't fail.
142 */ 108 */
143 WARN_ON(kvm_register_clock()); 109 WARN_ON(kvm_register_clock("secondary cpu clock"));
144 /* ok, done with our trickery, call native */ 110 /* ok, done with our trickery, call native */
145 setup_secondary_APIC_clock(); 111 setup_secondary_APIC_clock();
146} 112}
147#endif 113#endif
148 114
115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void)
117{
118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu();
120}
121#endif
122
149/* 123/*
150 * After the clock is registered, the host will keep writing to the 124 * After the clock is registered, the host will keep writing to the
151 * registered memory location. If the guest happens to shutdown, this memory 125 * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
174 return; 148 return;
175 149
176 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 150 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
177 if (kvm_register_clock()) 151 if (kvm_register_clock("boot clock"))
178 return; 152 return;
179 pv_time_ops.get_wallclock = kvm_get_wallclock; 153 pv_time_ops.get_wallclock = kvm_get_wallclock;
180 pv_time_ops.set_wallclock = kvm_set_wallclock; 154 pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
182#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif 158#endif
159#ifdef CONFIG_SMP
160 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
161#endif
185 machine_ops.shutdown = kvm_shutdown; 162 machine_ops.shutdown = kvm_shutdown;
186#ifdef CONFIG_KEXEC 163#ifdef CONFIG_KEXEC
187 machine_ops.crash_shutdown = kvm_crash_shutdown; 164 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
1/* paravirtual clock -- common code used by kvm/xen
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18#include <linux/kernel.h>
19#include <linux/percpu.h>
20#include <asm/pvclock.h>
21
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34};
35
36/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result.
39 */
40static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
41{
42 u64 product;
43#ifdef __i386__
44 u32 tmp1, tmp2;
45#endif
46
47 if (shift < 0)
48 delta >>= -shift;
49 else
50 delta <<= shift;
51
52#ifdef __i386__
53 __asm__ (
54 "mul %5 ; "
55 "mov %4,%%eax ; "
56 "mov %%edx,%4 ; "
57 "mul %5 ; "
58 "xor %5,%5 ; "
59 "add %4,%%eax ; "
60 "adc %5,%%edx ; "
61 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
62 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
63#elif __x86_64__
64 __asm__ (
65 "mul %%rdx ; shrd $32,%%rdx,%%rax"
66 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
67#else
68#error implement me!
69#endif
70
71 return product;
72}
73
74static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
75{
76 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
77 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
78}
79
80/*
81 * Reads a consistent set of time-base values from hypervisor,
82 * into a shadow data area.
83 */
84static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
85 struct pvclock_vcpu_time_info *src)
86{
87 do {
88 dst->version = src->version;
89 rmb(); /* fetch version before data */
90 dst->tsc_timestamp = src->tsc_timestamp;
91 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift;
94 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version));
96
97 return dst->version;
98}
99
100cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
101{
102 struct pvclock_shadow_time shadow;
103 unsigned version;
104 cycle_t ret, offset;
105
106 do {
107 version = pvclock_get_time_values(&shadow, src);
108 barrier();
109 offset = pvclock_get_nsec_offset(&shadow);
110 ret = shadow.system_timestamp + offset;
111 barrier();
112 } while (version != src->version);
113
114 return ret;
115}
116
117void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
118 struct pvclock_vcpu_time_info *vcpu_time,
119 struct timespec *ts)
120{
121 u32 version;
122 u64 delta;
123 struct timespec now;
124
125 /* get wallclock at system boot */
126 do {
127 version = wall_clock->version;
128 rmb(); /* fetch version before time */
129 now.tv_sec = wall_clock->sec;
130 now.tv_nsec = wall_clock->nsec;
131 rmb(); /* fetch time before checking version */
132 } while ((wall_clock->version & 1) || (version != wall_clock->version));
133
134 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
135 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
136
137 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
138 now.tv_sec = delta;
139
140 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
141}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f2f5d260874e..3829aa7b663f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
200 200
201 atomic_inc(&pt->pending); 201 atomic_inc(&pt->pending);
202 smp_mb__after_atomic_inc(); 202 smp_mb__after_atomic_inc();
203 if (vcpu0 && waitqueue_active(&vcpu0->wq)) { 203 if (vcpu0) {
204 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; 204 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
205 wake_up_interruptible(&vcpu0->wq); 205 if (waitqueue_active(&vcpu0->wq)) {
206 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
207 wake_up_interruptible(&vcpu0->wq);
208 }
206 } 209 }
207 210
208 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); 211 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c297c50eba63..ebc03f5ae162 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
940 wait_queue_head_t *q = &apic->vcpu->wq; 940 wait_queue_head_t *q = &apic->vcpu->wq;
941 941
942 atomic_inc(&apic->timer.pending); 942 atomic_inc(&apic->timer.pending);
943 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
943 if (waitqueue_active(q)) { 944 if (waitqueue_active(q)) {
944 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 945 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
945 wake_up_interruptible(q); 946 wake_up_interruptible(q);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee3f53098f0c..7e7c3969f7a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
640 rmap_remove(kvm, spte); 640 rmap_remove(kvm, spte);
641 --kvm->stat.lpages; 641 --kvm->stat.lpages;
642 set_shadow_pte(spte, shadow_trap_nonpresent_pte); 642 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
643 spte = NULL;
643 write_protected = 1; 644 write_protected = 1;
644 } 645 }
645 spte = rmap_next(kvm, rmapp, spte); 646 spte = rmap_next(kvm, rmapp, spte);
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1082 struct kvm_mmu_page *shadow; 1083 struct kvm_mmu_page *shadow;
1083 1084
1084 spte |= PT_WRITABLE_MASK; 1085 spte |= PT_WRITABLE_MASK;
1085 if (user_fault) {
1086 mmu_unshadow(vcpu->kvm, gfn);
1087 goto unshadowed;
1088 }
1089 1086
1090 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1087 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
1091 if (shadow || 1088 if (shadow ||
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1102 } 1099 }
1103 } 1100 }
1104 1101
1105unshadowed:
1106
1107 if (pte_access & ACC_WRITE_MASK) 1102 if (pte_access & ACC_WRITE_MASK)
1108 mark_page_dirty(vcpu->kvm, gfn); 1103 mark_page_dirty(vcpu->kvm, gfn);
1109 1104
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1580 u64 *spte, 1575 u64 *spte,
1581 const void *new) 1576 const void *new)
1582{ 1577{
1583 if ((sp->role.level != PT_PAGE_TABLE_LEVEL) 1578 if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
1584 && !vcpu->arch.update_pte.largepage) { 1579 if (!vcpu->arch.update_pte.largepage ||
1585 ++vcpu->kvm->stat.mmu_pde_zapped; 1580 sp->role.glevels == PT32_ROOT_LEVEL) {
1586 return; 1581 ++vcpu->kvm->stat.mmu_pde_zapped;
1587 } 1582 return;
1583 }
1584 }
1588 1585
1589 ++vcpu->kvm->stat.mmu_pte_updated; 1586 ++vcpu->kvm->stat.mmu_pte_updated;
1590 if (sp->role.glevels == PT32_ROOT_LEVEL) 1587 if (sp->role.glevels == PT32_ROOT_LEVEL)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 02efbe75f317..540e95179074 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
566 load_transition_efer(vmx); 566 load_transition_efer(vmx);
567} 567}
568 568
569static void vmx_load_host_state(struct vcpu_vmx *vmx) 569static void __vmx_load_host_state(struct vcpu_vmx *vmx)
570{ 570{
571 unsigned long flags; 571 unsigned long flags;
572 572
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
596 reload_host_efer(vmx); 596 reload_host_efer(vmx);
597} 597}
598 598
599static void vmx_load_host_state(struct vcpu_vmx *vmx)
600{
601 preempt_disable();
602 __vmx_load_host_state(vmx);
603 preempt_enable();
604}
605
599/* 606/*
600 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 607 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
601 * vcpu mutex is already taken. 608 * vcpu mutex is already taken.
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
654 661
655static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 662static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
656{ 663{
657 vmx_load_host_state(to_vmx(vcpu)); 664 __vmx_load_host_state(to_vmx(vcpu));
658} 665}
659 666
660static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 667static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
884 switch (msr_index) { 891 switch (msr_index) {
885#ifdef CONFIG_X86_64 892#ifdef CONFIG_X86_64
886 case MSR_EFER: 893 case MSR_EFER:
894 vmx_load_host_state(vmx);
887 ret = kvm_set_msr_common(vcpu, msr_index, data); 895 ret = kvm_set_msr_common(vcpu, msr_index, data);
888 if (vmx->host_state.loaded) {
889 reload_host_efer(vmx);
890 load_transition_efer(vmx);
891 }
892 break; 896 break;
893 case MSR_FS_BASE: 897 case MSR_FS_BASE:
894 vmcs_writel(GUEST_FS_BASE, data); 898 vmcs_writel(GUEST_FS_BASE, data);
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
910 guest_write_tsc(data); 914 guest_write_tsc(data);
911 break; 915 break;
912 default: 916 default:
917 vmx_load_host_state(vmx);
913 msr = find_msr_entry(vmx, msr_index); 918 msr = find_msr_entry(vmx, msr_index);
914 if (msr) { 919 if (msr) {
915 msr->data = data; 920 msr->data = data;
916 if (vmx->host_state.loaded)
917 load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
918 break; 921 break;
919 } 922 }
920 ret = kvm_set_msr_common(vcpu, msr_index, data); 923 ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00acf1301a15..63a77caa59f1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
493{ 493{
494 static int version; 494 static int version;
495 struct kvm_wall_clock wc; 495 struct pvclock_wall_clock wc;
496 struct timespec wc_ts; 496 struct timespec now, sys, boot;
497 497
498 if (!wall_clock) 498 if (!wall_clock)
499 return; 499 return;
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
502 502
503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
504 504
505 wc_ts = current_kernel_time(); 505 /*
506 wc.wc_sec = wc_ts.tv_sec; 506 * The guest calculates current wall clock time by adding
507 wc.wc_nsec = wc_ts.tv_nsec; 507 * system time (updated by kvm_write_guest_time below) to the
508 wc.wc_version = version; 508 * wall clock specified here. guest system time equals host
509 * system time for us, thus we must fill in host boot time here.
510 */
511 now = current_kernel_time();
512 ktime_get_ts(&sys);
513 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
514
515 wc.sec = boot.tv_sec;
516 wc.nsec = boot.tv_nsec;
517 wc.version = version;
509 518
510 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); 519 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
511 520
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
513 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 522 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
514} 523}
515 524
525static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
526{
527 uint32_t quotient, remainder;
528
529 /* Don't try to replace with do_div(), this one calculates
530 * "(dividend << 32) / divisor" */
531 __asm__ ( "divl %4"
532 : "=a" (quotient), "=d" (remainder)
533 : "0" (0), "1" (dividend), "r" (divisor) );
534 return quotient;
535}
536
537static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
538{
539 uint64_t nsecs = 1000000000LL;
540 int32_t shift = 0;
541 uint64_t tps64;
542 uint32_t tps32;
543
544 tps64 = tsc_khz * 1000LL;
545 while (tps64 > nsecs*2) {
546 tps64 >>= 1;
547 shift--;
548 }
549
550 tps32 = (uint32_t)tps64;
551 while (tps32 <= (uint32_t)nsecs) {
552 tps32 <<= 1;
553 shift++;
554 }
555
556 hv_clock->tsc_shift = shift;
557 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
558
559 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
560 __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
561 hv_clock->tsc_to_system_mul);
562}
563
516static void kvm_write_guest_time(struct kvm_vcpu *v) 564static void kvm_write_guest_time(struct kvm_vcpu *v)
517{ 565{
518 struct timespec ts; 566 struct timespec ts;
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
523 if ((!vcpu->time_page)) 571 if ((!vcpu->time_page))
524 return; 572 return;
525 573
574 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
575 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
576 vcpu->hv_clock_tsc_khz = tsc_khz;
577 }
578
526 /* Keep irq disabled to prevent changes to the clock */ 579 /* Keep irq disabled to prevent changes to the clock */
527 local_irq_save(flags); 580 local_irq_save(flags);
528 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, 581 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
537 /* 590 /*
538 * The interface expects us to write an even number signaling that the 591 * The interface expects us to write an even number signaling that the
539 * update is finished. Since the guest won't see the intermediate 592 * update is finished. Since the guest won't see the intermediate
540 * state, we just write "2" at the end 593 * state, we just increase by 2 at the end.
541 */ 594 */
542 vcpu->hv_clock.version = 2; 595 vcpu->hv_clock.version += 2;
543 596
544 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 597 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
545 598
546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 599 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
547 sizeof(vcpu->hv_clock)); 600 sizeof(vcpu->hv_clock));
548 601
549 kunmap_atomic(shared_kaddr, KM_USER0); 602 kunmap_atomic(shared_kaddr, KM_USER0);
550 603
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
599 /* ...but clean it before doing the actual write */ 652 /* ...but clean it before doing the actual write */
600 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 653 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
601 654
602 vcpu->arch.hv_clock.tsc_to_system_mul =
603 clocksource_khz2mult(tsc_khz, 22);
604 vcpu->arch.hv_clock.tsc_shift = 22;
605
606 down_read(&current->mm->mmap_sem); 655 down_read(&current->mm->mmap_sem);
607 vcpu->arch.time_page = 656 vcpu->arch.time_page =
608 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 657 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
@@ -2759,6 +2808,8 @@ again:
2759 if (vcpu->requests) { 2808 if (vcpu->requests) {
2760 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 2809 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2761 __kvm_migrate_timers(vcpu); 2810 __kvm_migrate_timers(vcpu);
2811 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2812 kvm_x86_ops->tlb_flush(vcpu);
2762 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 2813 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
2763 &vcpu->requests)) { 2814 &vcpu->requests)) {
2764 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; 2815 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -2772,6 +2823,7 @@ again:
2772 } 2823 }
2773 } 2824 }
2774 2825
2826 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
2775 kvm_inject_pending_timer_irqs(vcpu); 2827 kvm_inject_pending_timer_irqs(vcpu);
2776 2828
2777 preempt_disable(); 2829 preempt_disable();
@@ -2781,21 +2833,13 @@ again:
2781 2833
2782 local_irq_disable(); 2834 local_irq_disable();
2783 2835
2784 if (need_resched()) { 2836 if (vcpu->requests || need_resched()) {
2785 local_irq_enable(); 2837 local_irq_enable();
2786 preempt_enable(); 2838 preempt_enable();
2787 r = 1; 2839 r = 1;
2788 goto out; 2840 goto out;
2789 } 2841 }
2790 2842
2791 if (vcpu->requests)
2792 if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
2793 local_irq_enable();
2794 preempt_enable();
2795 r = 1;
2796 goto out;
2797 }
2798
2799 if (signal_pending(current)) { 2843 if (signal_pending(current)) {
2800 local_irq_enable(); 2844 local_irq_enable();
2801 preempt_enable(); 2845 preempt_enable();
@@ -2825,9 +2869,6 @@ again:
2825 2869
2826 kvm_guest_enter(); 2870 kvm_guest_enter();
2827 2871
2828 if (vcpu->requests)
2829 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2830 kvm_x86_ops->tlb_flush(vcpu);
2831 2872
2832 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 2873 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
2833 kvm_x86_ops->run(vcpu, kvm_run); 2874 kvm_x86_ops->run(vcpu, kvm_run);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be2737e..6c388e593bc8 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,8 +5,9 @@
5config XEN 5config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 select PARAVIRT_CLOCK
8 depends on X86_32 9 depends on X86_32
9 depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) 10 depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
10 help 11 help
11 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
12 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c048de34d6a1..f09c1c69c37a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
785static __init void xen_pagetable_setup_start(pgd_t *base) 785static __init void xen_pagetable_setup_start(pgd_t *base)
786{ 786{
787 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; 787 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
788 int i;
788 789
789 /* special set_pte for pagetable initialization */ 790 /* special set_pte for pagetable initialization */
790 pv_mmu_ops.set_pte = xen_set_pte_init; 791 pv_mmu_ops.set_pte = xen_set_pte_init;
791 792
792 init_mm.pgd = base; 793 init_mm.pgd = base;
793 /* 794 /*
794 * copy top-level of Xen-supplied pagetable into place. For 795 * copy top-level of Xen-supplied pagetable into place. This
795 * !PAE we can use this as-is, but for PAE it is a stand-in 796 * is a stand-in while we copy the pmd pages.
796 * while we copy the pmd pages.
797 */ 797 */
798 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); 798 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
799 799
800 if (PTRS_PER_PMD > 1) { 800 /*
801 int i; 801 * For PAE, need to allocate new pmds, rather than
802 /* 802 * share Xen's, since Xen doesn't like pmd's being
803 * For PAE, need to allocate new pmds, rather than 803 * shared between address spaces.
804 * share Xen's, since Xen doesn't like pmd's being 804 */
805 * shared between address spaces. 805 for (i = 0; i < PTRS_PER_PGD; i++) {
806 */ 806 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
807 for (i = 0; i < PTRS_PER_PGD; i++) { 807 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
808 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
809 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
810 808
811 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), 809 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
812 PAGE_SIZE); 810 PAGE_SIZE);
813 811
814 make_lowmem_page_readonly(pmd); 812 make_lowmem_page_readonly(pmd);
815 813
816 set_pgd(&base[i], __pgd(1 + __pa(pmd))); 814 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
817 } else 815 } else
818 pgd_clear(&base[i]); 816 pgd_clear(&base[i]);
819 }
820 } 817 }
821 818
822 /* make sure zero_page is mapped RO so we can use it in pagetables */ 819 /* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
873 870
874 /* Actually pin the pagetable down, but we can't set PG_pinned 871 /* Actually pin the pagetable down, but we can't set PG_pinned
875 yet because the page structures don't exist yet. */ 872 yet because the page structures don't exist yet. */
876 { 873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
877 unsigned level;
878
879#ifdef CONFIG_X86_PAE
880 level = MMUEXT_PIN_L3_TABLE;
881#else
882 level = MMUEXT_PIN_L2_TABLE;
883#endif
884
885 pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
886 }
887} 874}
888 875
889/* This is called once we have the cpu_possible_map */ 876/* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1093 .make_pte = xen_make_pte, 1080 .make_pte = xen_make_pte,
1094 .make_pgd = xen_make_pgd, 1081 .make_pgd = xen_make_pgd,
1095 1082
1096#ifdef CONFIG_X86_PAE
1097 .set_pte_atomic = xen_set_pte_atomic, 1083 .set_pte_atomic = xen_set_pte_atomic,
1098 .set_pte_present = xen_set_pte_at, 1084 .set_pte_present = xen_set_pte_at,
1099 .set_pud = xen_set_pud, 1085 .set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1102 1088
1103 .make_pmd = xen_make_pmd, 1089 .make_pmd = xen_make_pmd,
1104 .pmd_val = xen_pmd_val, 1090 .pmd_val = xen_pmd_val,
1105#endif /* PAE */
1106 1091
1107 .activate_mm = xen_activate_mm, 1092 .activate_mm = xen_activate_mm,
1108 .dup_mmap = xen_dup_mmap, 1093 .dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 265601d5a6ae..df40bf74ea75 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -228,7 +228,7 @@ pmdval_t xen_pmd_val(pmd_t pmd)
228{ 228{
229 return pte_mfn_to_pfn(pmd.pmd); 229 return pte_mfn_to_pfn(pmd.pmd);
230} 230}
231#ifdef CONFIG_X86_PAE 231
232void xen_set_pud(pud_t *ptr, pud_t val) 232void xen_set_pud(pud_t *ptr, pud_t val)
233{ 233{
234 struct multicall_space mcs; 234 struct multicall_space mcs;
@@ -276,12 +276,6 @@ pmd_t xen_make_pmd(pmdval_t pmd)
276 pmd = pte_pfn_to_mfn(pmd); 276 pmd = pte_pfn_to_mfn(pmd);
277 return native_make_pmd(pmd); 277 return native_make_pmd(pmd);
278} 278}
279#else /* !PAE */
280void xen_set_pte(pte_t *ptep, pte_t pte)
281{
282 *ptep = pte;
283}
284#endif /* CONFIG_X86_PAE */
285 279
286/* 280/*
287 (Yet another) pagetable walker. This one is intended for pinning a 281 (Yet another) pagetable walker. This one is intended for pinning a
@@ -434,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level)
434 read-only, and can be pinned. */ 428 read-only, and can be pinned. */
435void xen_pgd_pin(pgd_t *pgd) 429void xen_pgd_pin(pgd_t *pgd)
436{ 430{
437 unsigned level;
438
439 xen_mc_batch(); 431 xen_mc_batch();
440 432
441 if (pgd_walk(pgd, pin_page, TASK_SIZE)) { 433 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -445,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd)
445 xen_mc_batch(); 437 xen_mc_batch();
446 } 438 }
447 439
448#ifdef CONFIG_X86_PAE 440 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
449 level = MMUEXT_PIN_L3_TABLE;
450#else
451 level = MMUEXT_PIN_L2_TABLE;
452#endif
453
454 xen_do_pin(level, PFN_DOWN(__pa(pgd)));
455
456 xen_mc_issue(0); 441 xen_mc_issue(0);
457} 442}
458 443
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b1519d..5fe961caffd4 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
37void xen_pgd_pin(pgd_t *pgd); 37void xen_pgd_pin(pgd_t *pgd);
38//void xen_pgd_unpin(pgd_t *pgd); 38//void xen_pgd_unpin(pgd_t *pgd);
39 39
40#ifdef CONFIG_X86_PAE 40pteval_t xen_pte_val(pte_t);
41unsigned long long xen_pte_val(pte_t); 41pmdval_t xen_pmd_val(pmd_t);
42unsigned long long xen_pmd_val(pmd_t); 42pgdval_t xen_pgd_val(pgd_t);
43unsigned long long xen_pgd_val(pgd_t);
44 43
45pte_t xen_make_pte(unsigned long long); 44pte_t xen_make_pte(pteval_t);
46pmd_t xen_make_pmd(unsigned long long); 45pmd_t xen_make_pmd(pmdval_t);
47pgd_t xen_make_pgd(unsigned long long); 46pgd_t xen_make_pgd(pgdval_t);
48 47
49void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
50 pte_t *ptep, pte_t pteval); 49 pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
53void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
54void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
55 54
56
57#else
58unsigned long xen_pte_val(pte_t);
59unsigned long xen_pmd_val(pmd_t);
60unsigned long xen_pgd_val(pgd_t);
61
62pte_t xen_make_pte(unsigned long);
63pmd_t xen_make_pmd(unsigned long);
64pgd_t xen_make_pgd(unsigned long);
65#endif
66
67#endif /* _XEN_MMU_H */ 55#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 52b2e3856980..41e217503c96 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/math64.h> 15#include <linux/math64.h>
16 16
17#include <asm/pvclock.h>
17#include <asm/xen/hypervisor.h> 18#include <asm/xen/hypervisor.h>
18#include <asm/xen/hypercall.h> 19#include <asm/xen/hypercall.h>
19 20
@@ -31,17 +32,6 @@
31 32
32static cycle_t xen_clocksource_read(void); 33static cycle_t xen_clocksource_read(void);
33 34
34/* These are perodically updated in shared_info, and then copied here. */
35struct shadow_time_info {
36 u64 tsc_timestamp; /* TSC at last update of time vals. */
37 u64 system_timestamp; /* Time, in nanosecs, since boot. */
38 u32 tsc_to_nsec_mul;
39 int tsc_shift;
40 u32 version;
41};
42
43static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
44
45/* runstate info updated by Xen */ 35/* runstate info updated by Xen */
46static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); 36static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
47 37
@@ -211,7 +201,7 @@ unsigned long long xen_sched_clock(void)
211unsigned long xen_cpu_khz(void) 201unsigned long xen_cpu_khz(void)
212{ 202{
213 u64 xen_khz = 1000000ULL << 32; 203 u64 xen_khz = 1000000ULL << 32;
214 const struct vcpu_time_info *info = 204 const struct pvclock_vcpu_time_info *info =
215 &HYPERVISOR_shared_info->vcpu_info[0].time; 205 &HYPERVISOR_shared_info->vcpu_info[0].time;
216 206
217 do_div(xen_khz, info->tsc_to_system_mul); 207 do_div(xen_khz, info->tsc_to_system_mul);
@@ -223,121 +213,26 @@ unsigned long xen_cpu_khz(void)
223 return xen_khz; 213 return xen_khz;
224} 214}
225 215
226/*
227 * Reads a consistent set of time-base values from Xen, into a shadow data
228 * area.
229 */
230static unsigned get_time_values_from_xen(void)
231{
232 struct vcpu_time_info *src;
233 struct shadow_time_info *dst;
234
235 /* src is shared memory with the hypervisor, so we need to
236 make sure we get a consistent snapshot, even in the face of
237 being preempted. */
238 src = &__get_cpu_var(xen_vcpu)->time;
239 dst = &__get_cpu_var(shadow_time);
240
241 do {
242 dst->version = src->version;
243 rmb(); /* fetch version before data */
244 dst->tsc_timestamp = src->tsc_timestamp;
245 dst->system_timestamp = src->system_time;
246 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
247 dst->tsc_shift = src->tsc_shift;
248 rmb(); /* test version after fetching data */
249 } while ((src->version & 1) | (dst->version ^ src->version));
250
251 return dst->version;
252}
253
254/*
255 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
256 * yielding a 64-bit result.
257 */
258static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
259{
260 u64 product;
261#ifdef __i386__
262 u32 tmp1, tmp2;
263#endif
264
265 if (shift < 0)
266 delta >>= -shift;
267 else
268 delta <<= shift;
269
270#ifdef __i386__
271 __asm__ (
272 "mul %5 ; "
273 "mov %4,%%eax ; "
274 "mov %%edx,%4 ; "
275 "mul %5 ; "
276 "xor %5,%5 ; "
277 "add %4,%%eax ; "
278 "adc %5,%%edx ; "
279 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
280 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
281#elif __x86_64__
282 __asm__ (
283 "mul %%rdx ; shrd $32,%%rdx,%%rax"
284 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
285#else
286#error implement me!
287#endif
288
289 return product;
290}
291
292static u64 get_nsec_offset(struct shadow_time_info *shadow)
293{
294 u64 now, delta;
295 now = native_read_tsc();
296 delta = now - shadow->tsc_timestamp;
297 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
298}
299
300static cycle_t xen_clocksource_read(void) 216static cycle_t xen_clocksource_read(void)
301{ 217{
302 struct shadow_time_info *shadow = &get_cpu_var(shadow_time); 218 struct pvclock_vcpu_time_info *src;
303 cycle_t ret; 219 cycle_t ret;
304 unsigned version;
305
306 do {
307 version = get_time_values_from_xen();
308 barrier();
309 ret = shadow->system_timestamp + get_nsec_offset(shadow);
310 barrier();
311 } while (version != __get_cpu_var(xen_vcpu)->time.version);
312
313 put_cpu_var(shadow_time);
314 220
221 src = &get_cpu_var(xen_vcpu)->time;
222 ret = pvclock_clocksource_read(src);
223 put_cpu_var(xen_vcpu);
315 return ret; 224 return ret;
316} 225}
317 226
318static void xen_read_wallclock(struct timespec *ts) 227static void xen_read_wallclock(struct timespec *ts)
319{ 228{
320 const struct shared_info *s = HYPERVISOR_shared_info; 229 struct shared_info *s = HYPERVISOR_shared_info;
321 u32 version; 230 struct pvclock_wall_clock *wall_clock = &(s->wc);
322 u64 delta; 231 struct pvclock_vcpu_time_info *vcpu_time;
323 struct timespec now;
324
325 /* get wallclock at system boot */
326 do {
327 version = s->wc_version;
328 rmb(); /* fetch version before time */
329 now.tv_sec = s->wc_sec;
330 now.tv_nsec = s->wc_nsec;
331 rmb(); /* fetch time before checking version */
332 } while ((s->wc_version & 1) | (version ^ s->wc_version));
333 232
334 delta = xen_clocksource_read(); /* time since system boot */ 233 vcpu_time = &get_cpu_var(xen_vcpu)->time;
335 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 234 pvclock_read_wallclock(wall_clock, vcpu_time, ts);
336 235 put_cpu_var(xen_vcpu);
337 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
338 now.tv_sec = delta;
339
340 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
341} 236}
342 237
343unsigned long xen_get_wallclock(void) 238unsigned long xen_get_wallclock(void)
@@ -345,7 +240,6 @@ unsigned long xen_get_wallclock(void)
345 struct timespec ts; 240 struct timespec ts;
346 241
347 xen_read_wallclock(&ts); 242 xen_read_wallclock(&ts);
348
349 return ts.tv_sec; 243 return ts.tv_sec;
350} 244}
351 245
@@ -569,8 +463,6 @@ __init void xen_time_init(void)
569{ 463{
570 int cpu = smp_processor_id(); 464 int cpu = smp_processor_id();
571 465
572 get_time_values_from_xen();
573
574 clocksource_register(&xen_clocksource); 466 clocksource_register(&xen_clocksource);
575 467
576 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { 468 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 3175e973fd0d..6ec3b4f7719b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
30 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) 30 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
31 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) 31 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33#ifdef CONFIG_X86_PAE
34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
35#else
36 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
37#endif
38 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
39 35
40#endif /*CONFIG_XEN */ 36#endif /*CONFIG_XEN */
diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c
index e8f3d682e3b1..93aed1c38bd2 100644
--- a/drivers/char/drm/i915_drv.c
+++ b/drivers/char/drm/i915_drv.c
@@ -389,6 +389,7 @@ static int i915_resume(struct drm_device *dev)
389 pci_restore_state(dev->pdev); 389 pci_restore_state(dev->pdev);
390 if (pci_enable_device(dev->pdev)) 390 if (pci_enable_device(dev->pdev))
391 return -1; 391 return -1;
392 pci_set_master(dev->pdev);
392 393
393 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); 394 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
394 395
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 8662a6b7a30b..25b352b664d9 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
68obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o 68obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o
69obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o 69obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o
70obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o 70obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o
71CFLAGS_hpwdt.o += -O
72obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o 71obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o
73obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o 72obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
74obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o 73obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c19184f2e70e..bec76b1c2bb0 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
246 246
247} 247}
248 248
249static inline unsigned int zero_metapath_length(const struct metapath *mp, 249static inline unsigned int metapath_branch_start(const struct metapath *mp)
250 unsigned height)
251{ 250{
252 unsigned int i; 251 if (mp->mp_list[0] == 0)
253 for (i = 0; i < height - 1; i++) { 252 return 2;
254 if (mp->mp_list[i] != 0) 253 return 1;
255 return i;
256 }
257 return height;
258} 254}
259 255
260/** 256/**
@@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
436 struct gfs2_sbd *sdp = GFS2_SB(inode); 432 struct gfs2_sbd *sdp = GFS2_SB(inode);
437 struct buffer_head *dibh = mp->mp_bh[0]; 433 struct buffer_head *dibh = mp->mp_bh[0];
438 u64 bn, dblock = 0; 434 u64 bn, dblock = 0;
439 unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; 435 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
440 unsigned dblks = 0; 436 unsigned dblks = 0;
441 unsigned ptrs_per_blk; 437 unsigned ptrs_per_blk;
442 const unsigned end_of_metadata = height - 1; 438 const unsigned end_of_metadata = height - 1;
@@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
471 /* Building up tree height */ 467 /* Building up tree height */
472 state = ALLOC_GROW_HEIGHT; 468 state = ALLOC_GROW_HEIGHT;
473 iblks = height - ip->i_height; 469 iblks = height - ip->i_height;
474 zmpl = zero_metapath_length(mp, height); 470 branch_start = metapath_branch_start(mp);
475 iblks -= zmpl; 471 iblks += (height - branch_start);
476 iblks += height;
477 } 472 }
478 } 473 }
479 474
@@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
509 sizeof(struct gfs2_meta_header)); 504 sizeof(struct gfs2_meta_header));
510 *ptr = zero_bn; 505 *ptr = zero_bn;
511 state = ALLOC_GROW_DEPTH; 506 state = ALLOC_GROW_DEPTH;
512 for(i = zmpl; i < height; i++) { 507 for(i = branch_start; i < height; i++) {
513 if (mp->mp_bh[i] == NULL) 508 if (mp->mp_bh[i] == NULL)
514 break; 509 break;
515 brelse(mp->mp_bh[i]); 510 brelse(mp->mp_bh[i]);
516 mp->mp_bh[i] = NULL; 511 mp->mp_bh[i] = NULL;
517 } 512 }
518 i = zmpl; 513 i = branch_start;
519 } 514 }
520 if (n == 0) 515 if (n == 0)
521 break; 516 break;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6387523a3153..3401628d742b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -195,7 +195,7 @@ ulong_aligned:
195 depending on architecture. I've experimented with several ways 195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto 196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */ 197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - 1) { 198 while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 prefetch(plong + 1); 199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval) 200 if (((*plong) & LBITMASK) != lskipval)
201 break; 201 break;
diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h
index 82e8a94b4b2f..3495e8e00d70 100644
--- a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -69,6 +69,8 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
69#define __get_cpu_var(var) per_cpu_var(var) 69#define __get_cpu_var(var) per_cpu_var(var)
70#define __raw_get_cpu_var(var) per_cpu_var(var) 70#define __raw_get_cpu_var(var) per_cpu_var(var)
71 71
72#define PER_CPU_ATTRIBUTES
73
72#endif /* SMP */ 74#endif /* SMP */
73 75
74#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) 76#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1d8cd01fa514..844f2a89afbc 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -18,6 +18,7 @@
18#include <linux/kvm_para.h> 18#include <linux/kvm_para.h>
19#include <linux/kvm_types.h> 19#include <linux/kvm_types.h>
20 20
21#include <asm/pvclock-abi.h>
21#include <asm/desc.h> 22#include <asm/desc.h>
22 23
23#define KVM_MAX_VCPUS 16 24#define KVM_MAX_VCPUS 16
@@ -282,7 +283,8 @@ struct kvm_vcpu_arch {
282 struct x86_emulate_ctxt emulate_ctxt; 283 struct x86_emulate_ctxt emulate_ctxt;
283 284
284 gpa_t time; 285 gpa_t time;
285 struct kvm_vcpu_time_info hv_clock; 286 struct pvclock_vcpu_time_info hv_clock;
287 unsigned int hv_clock_tsc_khz;
286 unsigned int time_offset; 288 unsigned int time_offset;
287 struct page *time_page; 289 struct page *time_page;
288}; 290};
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 509845942070..bfd9900742bf 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt {
48#ifdef __KERNEL__ 48#ifdef __KERNEL__
49#include <asm/processor.h> 49#include <asm/processor.h>
50 50
51/* xen binary-compatible interface. See xen headers for details */
52struct kvm_vcpu_time_info {
53 uint32_t version;
54 uint32_t pad0;
55 uint64_t tsc_timestamp;
56 uint64_t system_time;
57 uint32_t tsc_to_system_mul;
58 int8_t tsc_shift;
59 int8_t pad[3];
60} __attribute__((__packed__)); /* 32 bytes */
61
62struct kvm_wall_clock {
63 uint32_t wc_version;
64 uint32_t wc_sec;
65 uint32_t wc_nsec;
66} __attribute__((__packed__));
67
68
69extern void kvmclock_init(void); 51extern void kvmclock_init(void);
70 52
71 53
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
new file mode 100644
index 000000000000..6857f840b243
--- /dev/null
+++ b/include/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
1#ifndef _ASM_X86_PVCLOCK_ABI_H_
2#define _ASM_X86_PVCLOCK_ABI_H_
3#ifndef __ASSEMBLY__
4
5/*
6 * These structs MUST NOT be changed.
7 * They are the ABI between hypervisor and guest OS.
8 * Both Xen and KVM are using this.
9 *
10 * pvclock_vcpu_time_info holds the system time and the tsc timestamp
11 * of the last update. So the guest can use the tsc delta to get a
12 * more precise system time. There is one per virtual cpu.
13 *
14 * pvclock_wall_clock references the point in time when the system
15 * time was zero (usually boot time), thus the guest calculates the
16 * current wall clock by adding the system time.
17 *
18 * Protocol for the "version" fields is: hypervisor raises it (making
19 * it uneven) before it starts updating the fields and raises it again
20 * (making it even) when it is done. Thus the guest can make sure the
21 * time values it got are consistent by checking the version before
22 * and after reading them.
23 */
24
25struct pvclock_vcpu_time_info {
26 u32 version;
27 u32 pad0;
28 u64 tsc_timestamp;
29 u64 system_time;
30 u32 tsc_to_system_mul;
31 s8 tsc_shift;
32 u8 pad[3];
33} __attribute__((__packed__)); /* 32 bytes */
34
35struct pvclock_wall_clock {
36 u32 version;
37 u32 sec;
38 u32 nsec;
39} __attribute__((__packed__));
40
41#endif /* __ASSEMBLY__ */
42#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 000000000000..85b1bba8e0a3
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,13 @@
1#ifndef _ASM_X86_PVCLOCK_H_
2#define _ASM_X86_PVCLOCK_H_
3
4#include <linux/clocksource.h>
5#include <asm/pvclock-abi.h>
6
7/* some helper functions for xen and kvm pv clock sources */
8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
9void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
10 struct pvclock_vcpu_time_info *vcpu,
11 struct timespec *ts);
12
13#endif /* _ASM_X86_PVCLOCK_H_ */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4dce28c..e11f24038b1d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
150 return (pte_t) { .pte = x }; 150 return (pte_t) { .pte = x };
151} 151}
152 152
153#ifdef CONFIG_X86_PAE
154#define pmd_val_ma(v) ((v).pmd) 153#define pmd_val_ma(v) ((v).pmd)
155#define pud_val_ma(v) ((v).pgd.pgd) 154#define pud_val_ma(v) ((v).pgd.pgd)
156#define __pmd_ma(x) ((pmd_t) { (x) } ) 155#define __pmd_ma(x) ((pmd_t) { (x) } )
157#else /* !X86_PAE */
158#define pmd_val_ma(v) ((v).pud.pgd.pgd)
159#endif /* CONFIG_X86_PAE */
160 156
161#define pgd_val_ma(x) ((x).pgd) 157#define pgd_val_ma(x) ((x).pgd)
162 158
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 092b1b25291d..de9d1df4bba2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,7 @@
33#define KVM_REQ_REPORT_TPR_ACCESS 2 33#define KVM_REQ_REPORT_TPR_ACCESS 2
34#define KVM_REQ_MMU_RELOAD 3 34#define KVM_REQ_MMU_RELOAD 3
35#define KVM_REQ_TRIPLE_FAULT 4 35#define KVM_REQ_TRIPLE_FAULT 4
36#define KVM_REQ_PENDING_TIMER 5
36 37
37struct kvm_vcpu; 38struct kvm_vcpu;
38extern struct kmem_cache *kvm_vcpu_cache; 39extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9b018da48cf3..819a0331cda9 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,6 +10,7 @@
10#define __XEN_PUBLIC_XEN_H__ 10#define __XEN_PUBLIC_XEN_H__
11 11
12#include <asm/xen/interface.h> 12#include <asm/xen/interface.h>
13#include <asm/pvclock-abi.h>
13 14
14/* 15/*
15 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). 16 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -336,7 +337,7 @@ struct vcpu_info {
336 uint8_t evtchn_upcall_mask; 337 uint8_t evtchn_upcall_mask;
337 unsigned long evtchn_pending_sel; 338 unsigned long evtchn_pending_sel;
338 struct arch_vcpu_info arch; 339 struct arch_vcpu_info arch;
339 struct vcpu_time_info time; 340 struct pvclock_vcpu_time_info time;
340}; /* 64 bytes (x86) */ 341}; /* 64 bytes (x86) */
341 342
342/* 343/*
@@ -384,9 +385,7 @@ struct shared_info {
384 * Wallclock time: updated only by control software. Guests should base 385 * Wallclock time: updated only by control software. Guests should base
385 * their gettimeofday() syscall on this wallclock-base value. 386 * their gettimeofday() syscall on this wallclock-base value.
386 */ 387 */
387 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ 388 struct pvclock_wall_clock wc;
388 uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
389 uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
390 389
391 struct arch_shared_info arch; 390 struct arch_shared_info arch;
392 391
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 79e3c90113c2..3ec23c3ec97f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs)
1499 return 1; 1499 return 1;
1500} 1500}
1501 1501
1502void kgdb_console_write(struct console *co, const char *s, unsigned count) 1502static void kgdb_console_write(struct console *co, const char *s,
1503 unsigned count)
1503{ 1504{
1504 unsigned long flags; 1505 unsigned long flags;
1505 1506
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 98778cb69c6e..1dcf9f3d1107 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -269,28 +269,9 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
269 } 269 }
270} 270}
271 271
272static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) 272static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
273{ 273{
274 int i;
275
276 for (i = 0; i < IOAPIC_NUM_PINS; i++)
277 if (ioapic->redirtbl[i].fields.vector == vector)
278 return i;
279 return -1;
280}
281
282void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
283{
284 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
285 union ioapic_redir_entry *ent; 274 union ioapic_redir_entry *ent;
286 int gsi;
287
288 gsi = get_eoi_gsi(ioapic, vector);
289 if (gsi == -1) {
290 printk(KERN_WARNING "Can't find redir item for %d EOI\n",
291 vector);
292 return;
293 }
294 275
295 ent = &ioapic->redirtbl[gsi]; 276 ent = &ioapic->redirtbl[gsi];
296 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 277 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -300,6 +281,16 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
300 ioapic_deliver(ioapic, gsi); 281 ioapic_deliver(ioapic, gsi);
301} 282}
302 283
284void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
285{
286 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
287 int i;
288
289 for (i = 0; i < IOAPIC_NUM_PINS; i++)
290 if (ioapic->redirtbl[i].fields.vector == vector)
291 __kvm_ioapic_update_eoi(ioapic, i);
292}
293
303static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) 294static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
304{ 295{
305 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; 296 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;