aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/kvm.c317
-rw-r--r--arch/x86/kernel/kvmclock.c13
5 files changed, 332 insertions, 12 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 591e60104278..c8b4efad7ebb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1406,6 +1406,16 @@ ENTRY(general_protection)
1406 CFI_ENDPROC 1406 CFI_ENDPROC
1407END(general_protection) 1407END(general_protection)
1408 1408
1409#ifdef CONFIG_KVM_GUEST
1410ENTRY(async_page_fault)
1411 RING0_EC_FRAME
1412 pushl $do_async_page_fault
1413 CFI_ADJUST_CFA_OFFSET 4
1414 jmp error_code
1415 CFI_ENDPROC
1416END(apf_page_fault)
1417#endif
1418
1409/* 1419/*
1410 * End of kprobes section 1420 * End of kprobes section
1411 */ 1421 */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d3b895f375d3..aed1ffbeb0c9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1329,6 +1329,9 @@ errorentry xen_stack_segment do_stack_segment
1329#endif 1329#endif
1330errorentry general_protection do_general_protection 1330errorentry general_protection do_general_protection
1331errorentry page_fault do_page_fault 1331errorentry page_fault do_page_fault
1332#ifdef CONFIG_KVM_GUEST
1333errorentry async_page_fault do_async_page_fault
1334#endif
1332#ifdef CONFIG_X86_MCE 1335#ifdef CONFIG_X86_MCE
1333paranoidzeroentry machine_check *machine_check_vector(%rip) 1336paranoidzeroentry machine_check *machine_check_vector(%rip)
1334#endif 1337#endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 58bb239a2fd7..e60c38cc0eed 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -169,6 +169,7 @@ int init_fpu(struct task_struct *tsk)
169 set_stopped_child_used_math(tsk); 169 set_stopped_child_used_math(tsk);
170 return 0; 170 return 0;
171} 171}
172EXPORT_SYMBOL_GPL(init_fpu);
172 173
173/* 174/*
174 * The xstateregs_active() routine is the same as the fpregs_active() routine, 175 * The xstateregs_active() routine is the same as the fpregs_active() routine,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 63b0ec8d3d4a..8dc44662394b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -27,16 +27,37 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/notifier.h>
31#include <linux/reboot.h>
32#include <linux/hash.h>
33#include <linux/sched.h>
34#include <linux/slab.h>
35#include <linux/kprobes.h>
30#include <asm/timer.h> 36#include <asm/timer.h>
37#include <asm/cpu.h>
38#include <asm/traps.h>
39#include <asm/desc.h>
40#include <asm/tlbflush.h>
31 41
32#define MMU_QUEUE_SIZE 1024 42#define MMU_QUEUE_SIZE 1024
33 43
44static int kvmapf = 1;
45
46static int parse_no_kvmapf(char *arg)
47{
48 kvmapf = 0;
49 return 0;
50}
51
52early_param("no-kvmapf", parse_no_kvmapf);
53
34struct kvm_para_state { 54struct kvm_para_state {
35 u8 mmu_queue[MMU_QUEUE_SIZE]; 55 u8 mmu_queue[MMU_QUEUE_SIZE];
36 int mmu_queue_len; 56 int mmu_queue_len;
37}; 57};
38 58
39static DEFINE_PER_CPU(struct kvm_para_state, para_state); 59static DEFINE_PER_CPU(struct kvm_para_state, para_state);
60static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
40 61
41static struct kvm_para_state *kvm_para_state(void) 62static struct kvm_para_state *kvm_para_state(void)
42{ 63{
@@ -50,6 +71,195 @@ static void kvm_io_delay(void)
50{ 71{
51} 72}
52 73
74#define KVM_TASK_SLEEP_HASHBITS 8
75#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
76
77struct kvm_task_sleep_node {
78 struct hlist_node link;
79 wait_queue_head_t wq;
80 u32 token;
81 int cpu;
82 bool halted;
83 struct mm_struct *mm;
84};
85
86static struct kvm_task_sleep_head {
87 spinlock_t lock;
88 struct hlist_head list;
89} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
90
91static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
92 u32 token)
93{
94 struct hlist_node *p;
95
96 hlist_for_each(p, &b->list) {
97 struct kvm_task_sleep_node *n =
98 hlist_entry(p, typeof(*n), link);
99 if (n->token == token)
100 return n;
101 }
102
103 return NULL;
104}
105
106void kvm_async_pf_task_wait(u32 token)
107{
108 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
109 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
110 struct kvm_task_sleep_node n, *e;
111 DEFINE_WAIT(wait);
112 int cpu, idle;
113
114 cpu = get_cpu();
115 idle = idle_cpu(cpu);
116 put_cpu();
117
118 spin_lock(&b->lock);
119 e = _find_apf_task(b, token);
120 if (e) {
121 /* dummy entry exist -> wake up was delivered ahead of PF */
122 hlist_del(&e->link);
123 kfree(e);
124 spin_unlock(&b->lock);
125 return;
126 }
127
128 n.token = token;
129 n.cpu = smp_processor_id();
130 n.mm = current->active_mm;
131 n.halted = idle || preempt_count() > 1;
132 atomic_inc(&n.mm->mm_count);
133 init_waitqueue_head(&n.wq);
134 hlist_add_head(&n.link, &b->list);
135 spin_unlock(&b->lock);
136
137 for (;;) {
138 if (!n.halted)
139 prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
140 if (hlist_unhashed(&n.link))
141 break;
142
143 if (!n.halted) {
144 local_irq_enable();
145 schedule();
146 local_irq_disable();
147 } else {
148 /*
149 * We cannot reschedule. So halt.
150 */
151 native_safe_halt();
152 local_irq_disable();
153 }
154 }
155 if (!n.halted)
156 finish_wait(&n.wq, &wait);
157
158 return;
159}
160EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
161
162static void apf_task_wake_one(struct kvm_task_sleep_node *n)
163{
164 hlist_del_init(&n->link);
165 if (!n->mm)
166 return;
167 mmdrop(n->mm);
168 if (n->halted)
169 smp_send_reschedule(n->cpu);
170 else if (waitqueue_active(&n->wq))
171 wake_up(&n->wq);
172}
173
174static void apf_task_wake_all(void)
175{
176 int i;
177
178 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
179 struct hlist_node *p, *next;
180 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
181 spin_lock(&b->lock);
182 hlist_for_each_safe(p, next, &b->list) {
183 struct kvm_task_sleep_node *n =
184 hlist_entry(p, typeof(*n), link);
185 if (n->cpu == smp_processor_id())
186 apf_task_wake_one(n);
187 }
188 spin_unlock(&b->lock);
189 }
190}
191
192void kvm_async_pf_task_wake(u32 token)
193{
194 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
195 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
196 struct kvm_task_sleep_node *n;
197
198 if (token == ~0) {
199 apf_task_wake_all();
200 return;
201 }
202
203again:
204 spin_lock(&b->lock);
205 n = _find_apf_task(b, token);
206 if (!n) {
207 /*
208 * async PF was not yet handled.
209 * Add dummy entry for the token.
210 */
211 n = kmalloc(sizeof(*n), GFP_ATOMIC);
212 if (!n) {
213 /*
214 * Allocation failed! Busy wait while other cpu
215 * handles async PF.
216 */
217 spin_unlock(&b->lock);
218 cpu_relax();
219 goto again;
220 }
221 n->token = token;
222 n->cpu = smp_processor_id();
223 n->mm = NULL;
224 init_waitqueue_head(&n->wq);
225 hlist_add_head(&n->link, &b->list);
226 } else
227 apf_task_wake_one(n);
228 spin_unlock(&b->lock);
229 return;
230}
231EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
232
233u32 kvm_read_and_reset_pf_reason(void)
234{
235 u32 reason = 0;
236
237 if (__get_cpu_var(apf_reason).enabled) {
238 reason = __get_cpu_var(apf_reason).reason;
239 __get_cpu_var(apf_reason).reason = 0;
240 }
241
242 return reason;
243}
244EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
245
246dotraplinkage void __kprobes
247do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
248{
249 switch (kvm_read_and_reset_pf_reason()) {
250 default:
251 do_page_fault(regs, error_code);
252 break;
253 case KVM_PV_REASON_PAGE_NOT_PRESENT:
254 /* page is swapped out by the host. */
255 kvm_async_pf_task_wait((u32)read_cr2());
256 break;
257 case KVM_PV_REASON_PAGE_READY:
258 kvm_async_pf_task_wake((u32)read_cr2());
259 break;
260 }
261}
262
53static void kvm_mmu_op(void *buffer, unsigned len) 263static void kvm_mmu_op(void *buffer, unsigned len)
54{ 264{
55 int r; 265 int r;
@@ -231,10 +441,117 @@ static void __init paravirt_ops_setup(void)
231#endif 441#endif
232} 442}
233 443
444void __cpuinit kvm_guest_cpu_init(void)
445{
446 if (!kvm_para_available())
447 return;
448
449 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
450 u64 pa = __pa(&__get_cpu_var(apf_reason));
451
452#ifdef CONFIG_PREEMPT
453 pa |= KVM_ASYNC_PF_SEND_ALWAYS;
454#endif
455 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
456 __get_cpu_var(apf_reason).enabled = 1;
457 printk(KERN_INFO"KVM setup async PF for cpu %d\n",
458 smp_processor_id());
459 }
460}
461
462static void kvm_pv_disable_apf(void *unused)
463{
464 if (!__get_cpu_var(apf_reason).enabled)
465 return;
466
467 wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
468 __get_cpu_var(apf_reason).enabled = 0;
469
470 printk(KERN_INFO"Unregister pv shared memory for cpu %d\n",
471 smp_processor_id());
472}
473
474static int kvm_pv_reboot_notify(struct notifier_block *nb,
475 unsigned long code, void *unused)
476{
477 if (code == SYS_RESTART)
478 on_each_cpu(kvm_pv_disable_apf, NULL, 1);
479 return NOTIFY_DONE;
480}
481
482static struct notifier_block kvm_pv_reboot_nb = {
483 .notifier_call = kvm_pv_reboot_notify,
484};
485
486#ifdef CONFIG_SMP
487static void __init kvm_smp_prepare_boot_cpu(void)
488{
489#ifdef CONFIG_KVM_CLOCK
490 WARN_ON(kvm_register_clock("primary cpu clock"));
491#endif
492 kvm_guest_cpu_init();
493 native_smp_prepare_boot_cpu();
494}
495
496static void kvm_guest_cpu_online(void *dummy)
497{
498 kvm_guest_cpu_init();
499}
500
501static void kvm_guest_cpu_offline(void *dummy)
502{
503 kvm_pv_disable_apf(NULL);
504 apf_task_wake_all();
505}
506
507static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
508 unsigned long action, void *hcpu)
509{
510 int cpu = (unsigned long)hcpu;
511 switch (action) {
512 case CPU_ONLINE:
513 case CPU_DOWN_FAILED:
514 case CPU_ONLINE_FROZEN:
515 smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0);
516 break;
517 case CPU_DOWN_PREPARE:
518 case CPU_DOWN_PREPARE_FROZEN:
519 smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1);
520 break;
521 default:
522 break;
523 }
524 return NOTIFY_OK;
525}
526
527static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
528 .notifier_call = kvm_cpu_notify,
529};
530#endif
531
532static void __init kvm_apf_trap_init(void)
533{
534 set_intr_gate(14, &async_page_fault);
535}
536
234void __init kvm_guest_init(void) 537void __init kvm_guest_init(void)
235{ 538{
539 int i;
540
236 if (!kvm_para_available()) 541 if (!kvm_para_available())
237 return; 542 return;
238 543
239 paravirt_ops_setup(); 544 paravirt_ops_setup();
545 register_reboot_notifier(&kvm_pv_reboot_nb);
546 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
547 spin_lock_init(&async_pf_sleepers[i].lock);
548 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
549 x86_init.irqs.trap_init = kvm_apf_trap_init;
550
551#ifdef CONFIG_SMP
552 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
553 register_cpu_notifier(&kvm_cpu_notifier);
554#else
555 kvm_guest_cpu_init();
556#endif
240} 557}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ca43ce31a19c..f98d3eafe07a 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -125,7 +125,7 @@ static struct clocksource kvm_clock = {
125 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 125 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
126}; 126};
127 127
128static int kvm_register_clock(char *txt) 128int kvm_register_clock(char *txt)
129{ 129{
130 int cpu = smp_processor_id(); 130 int cpu = smp_processor_id();
131 int low, high, ret; 131 int low, high, ret;
@@ -152,14 +152,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
152} 152}
153#endif 153#endif
154 154
155#ifdef CONFIG_SMP
156static void __init kvm_smp_prepare_boot_cpu(void)
157{
158 WARN_ON(kvm_register_clock("primary cpu clock"));
159 native_smp_prepare_boot_cpu();
160}
161#endif
162
163/* 155/*
164 * After the clock is registered, the host will keep writing to the 156 * After the clock is registered, the host will keep writing to the
165 * registered memory location. If the guest happens to shutdown, this memory 157 * registered memory location. If the guest happens to shutdown, this memory
@@ -206,9 +198,6 @@ void __init kvmclock_init(void)
206 x86_cpuinit.setup_percpu_clockev = 198 x86_cpuinit.setup_percpu_clockev =
207 kvm_setup_secondary_clock; 199 kvm_setup_secondary_clock;
208#endif 200#endif
209#ifdef CONFIG_SMP
210 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
211#endif
212 machine_ops.shutdown = kvm_shutdown; 201 machine_ops.shutdown = kvm_shutdown;
213#ifdef CONFIG_KEXEC 202#ifdef CONFIG_KEXEC
214 machine_ops.crash_shutdown = kvm_crash_shutdown; 203 machine_ops.crash_shutdown = kvm_crash_shutdown;