aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorGleb Natapov <gleb@redhat.com>2010-10-14 05:22:52 -0400
committerAvi Kivity <avi@redhat.com>2011-01-12 04:23:16 -0500
commit631bc4878220932fe67fc46fc7cf7cccdb1ec597 (patch)
treeac588182d02308a004d45a9c3ae6834d096e263d /arch/x86/kernel
parentfd10cde9294f73eeccbc16f3fec1ae6cde7b800c (diff)
KVM: Handle async PF in a guest.
When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/kvm.c181
3 files changed, 194 insertions, 0 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 591e60104278..c8b4efad7ebb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1406,6 +1406,16 @@ ENTRY(general_protection)
1406 CFI_ENDPROC 1406 CFI_ENDPROC
1407END(general_protection) 1407END(general_protection)
1408 1408
1409#ifdef CONFIG_KVM_GUEST
1410ENTRY(async_page_fault)
1411 RING0_EC_FRAME
1412 pushl $do_async_page_fault
1413 CFI_ADJUST_CFA_OFFSET 4
1414 jmp error_code
1415 CFI_ENDPROC
1416END(apf_page_fault)
1417#endif
1418
1409/* 1419/*
1410 * End of kprobes section 1420 * End of kprobes section
1411 */ 1421 */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e8697..bb3f6e9bfa68 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1319,6 +1319,9 @@ errorentry xen_stack_segment do_stack_segment
1319#endif 1319#endif
1320errorentry general_protection do_general_protection 1320errorentry general_protection do_general_protection
1321errorentry page_fault do_page_fault 1321errorentry page_fault do_page_fault
1322#ifdef CONFIG_KVM_GUEST
1323errorentry async_page_fault do_async_page_fault
1324#endif
1322#ifdef CONFIG_X86_MCE 1325#ifdef CONFIG_X86_MCE
1323paranoidzeroentry machine_check *machine_check_vector(%rip) 1326paranoidzeroentry machine_check *machine_check_vector(%rip)
1324#endif 1327#endif
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 032d03b6b54a..d5640634fef6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -29,8 +29,14 @@
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <linux/reboot.h> 31#include <linux/reboot.h>
32#include <linux/hash.h>
33#include <linux/sched.h>
34#include <linux/slab.h>
35#include <linux/kprobes.h>
32#include <asm/timer.h> 36#include <asm/timer.h>
33#include <asm/cpu.h> 37#include <asm/cpu.h>
38#include <asm/traps.h>
39#include <asm/desc.h>
34 40
35#define MMU_QUEUE_SIZE 1024 41#define MMU_QUEUE_SIZE 1024
36 42
@@ -64,6 +70,168 @@ static void kvm_io_delay(void)
64{ 70{
65} 71}
66 72
73#define KVM_TASK_SLEEP_HASHBITS 8
74#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
75
76struct kvm_task_sleep_node {
77 struct hlist_node link;
78 wait_queue_head_t wq;
79 u32 token;
80 int cpu;
81};
82
83static struct kvm_task_sleep_head {
84 spinlock_t lock;
85 struct hlist_head list;
86} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
87
88static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
89 u32 token)
90{
91 struct hlist_node *p;
92
93 hlist_for_each(p, &b->list) {
94 struct kvm_task_sleep_node *n =
95 hlist_entry(p, typeof(*n), link);
96 if (n->token == token)
97 return n;
98 }
99
100 return NULL;
101}
102
103void kvm_async_pf_task_wait(u32 token)
104{
105 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
106 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
107 struct kvm_task_sleep_node n, *e;
108 DEFINE_WAIT(wait);
109
110 spin_lock(&b->lock);
111 e = _find_apf_task(b, token);
112 if (e) {
113 /* dummy entry exist -> wake up was delivered ahead of PF */
114 hlist_del(&e->link);
115 kfree(e);
116 spin_unlock(&b->lock);
117 return;
118 }
119
120 n.token = token;
121 n.cpu = smp_processor_id();
122 init_waitqueue_head(&n.wq);
123 hlist_add_head(&n.link, &b->list);
124 spin_unlock(&b->lock);
125
126 for (;;) {
127 prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
128 if (hlist_unhashed(&n.link))
129 break;
130 local_irq_enable();
131 schedule();
132 local_irq_disable();
133 }
134 finish_wait(&n.wq, &wait);
135
136 return;
137}
138EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
139
140static void apf_task_wake_one(struct kvm_task_sleep_node *n)
141{
142 hlist_del_init(&n->link);
143 if (waitqueue_active(&n->wq))
144 wake_up(&n->wq);
145}
146
147static void apf_task_wake_all(void)
148{
149 int i;
150
151 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
152 struct hlist_node *p, *next;
153 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
154 spin_lock(&b->lock);
155 hlist_for_each_safe(p, next, &b->list) {
156 struct kvm_task_sleep_node *n =
157 hlist_entry(p, typeof(*n), link);
158 if (n->cpu == smp_processor_id())
159 apf_task_wake_one(n);
160 }
161 spin_unlock(&b->lock);
162 }
163}
164
165void kvm_async_pf_task_wake(u32 token)
166{
167 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
168 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
169 struct kvm_task_sleep_node *n;
170
171 if (token == ~0) {
172 apf_task_wake_all();
173 return;
174 }
175
176again:
177 spin_lock(&b->lock);
178 n = _find_apf_task(b, token);
179 if (!n) {
180 /*
181 * async PF was not yet handled.
182 * Add dummy entry for the token.
183 */
184 n = kmalloc(sizeof(*n), GFP_ATOMIC);
185 if (!n) {
186 /*
187 * Allocation failed! Busy wait while other cpu
188 * handles async PF.
189 */
190 spin_unlock(&b->lock);
191 cpu_relax();
192 goto again;
193 }
194 n->token = token;
195 n->cpu = smp_processor_id();
196 init_waitqueue_head(&n->wq);
197 hlist_add_head(&n->link, &b->list);
198 } else
199 apf_task_wake_one(n);
200 spin_unlock(&b->lock);
201 return;
202}
203EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
204
205u32 kvm_read_and_reset_pf_reason(void)
206{
207 u32 reason = 0;
208
209 if (__get_cpu_var(apf_reason).enabled) {
210 reason = __get_cpu_var(apf_reason).reason;
211 __get_cpu_var(apf_reason).reason = 0;
212 }
213
214 return reason;
215}
216EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
217
218dotraplinkage void __kprobes
219do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
220{
221 switch (kvm_read_and_reset_pf_reason()) {
222 default:
223 do_page_fault(regs, error_code);
224 break;
225 case KVM_PV_REASON_PAGE_NOT_PRESENT:
226 /* page is swapped out by the host. */
227 kvm_async_pf_task_wait((u32)read_cr2());
228 break;
229 case KVM_PV_REASON_PAGE_READY:
230 kvm_async_pf_task_wake((u32)read_cr2());
231 break;
232 }
233}
234
67static void kvm_mmu_op(void *buffer, unsigned len) 235static void kvm_mmu_op(void *buffer, unsigned len)
68{ 236{
69 int r; 237 int r;
@@ -300,6 +468,7 @@ static void kvm_guest_cpu_online(void *dummy)
300static void kvm_guest_cpu_offline(void *dummy) 468static void kvm_guest_cpu_offline(void *dummy)
301{ 469{
302 kvm_pv_disable_apf(NULL); 470 kvm_pv_disable_apf(NULL);
471 apf_task_wake_all();
303} 472}
304 473
305static int __cpuinit kvm_cpu_notify(struct notifier_block *self, 474static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
@@ -327,13 +496,25 @@ static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
327}; 496};
328#endif 497#endif
329 498
499static void __init kvm_apf_trap_init(void)
500{
501 set_intr_gate(14, &async_page_fault);
502}
503
330void __init kvm_guest_init(void) 504void __init kvm_guest_init(void)
331{ 505{
506 int i;
507
332 if (!kvm_para_available()) 508 if (!kvm_para_available())
333 return; 509 return;
334 510
335 paravirt_ops_setup(); 511 paravirt_ops_setup();
336 register_reboot_notifier(&kvm_pv_reboot_nb); 512 register_reboot_notifier(&kvm_pv_reboot_nb);
513 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
514 spin_lock_init(&async_pf_sleepers[i].lock);
515 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
516 x86_init.irqs.trap_init = kvm_apf_trap_init;
517
337#ifdef CONFIG_SMP 518#ifdef CONFIG_SMP
338 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 519 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
339 register_cpu_notifier(&kvm_cpu_notifier); 520 register_cpu_notifier(&kvm_cpu_notifier);