aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm_para.h12
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/entry_32.S10
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/kvm.c181
-rw-r--r--arch/x86/kvm/svm.c45
6 files changed, 243 insertions, 9 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2315398230d..fbfd3679bc1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt {
65 __u64 pt_phys; 65 __u64 pt_phys;
66}; 66};
67 67
68#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
69#define KVM_PV_REASON_PAGE_READY 2
70
68struct kvm_vcpu_pv_apf_data { 71struct kvm_vcpu_pv_apf_data {
69 __u32 reason; 72 __u32 reason;
70 __u8 pad[60]; 73 __u8 pad[60];
@@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void)
171 174
172#ifdef CONFIG_KVM_GUEST 175#ifdef CONFIG_KVM_GUEST
173void __init kvm_guest_init(void); 176void __init kvm_guest_init(void);
177void kvm_async_pf_task_wait(u32 token);
178void kvm_async_pf_task_wake(u32 token);
179u32 kvm_read_and_reset_pf_reason(void);
174#else 180#else
175#define kvm_guest_init() do { } while (0) 181#define kvm_guest_init() do { } while (0)
182#define kvm_async_pf_task_wait(T) do {} while(0)
183#define kvm_async_pf_task_wake(T) do {} while(0)
184static u32 kvm_read_and_reset_pf_reason(void)
185{
186 return 0;
187}
176#endif 188#endif
177 189
178#endif /* __KERNEL__ */ 190#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index f66cda56781..0310da67307 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void);
30asmlinkage void stack_segment(void); 30asmlinkage void stack_segment(void);
31asmlinkage void general_protection(void); 31asmlinkage void general_protection(void);
32asmlinkage void page_fault(void); 32asmlinkage void page_fault(void);
33asmlinkage void async_page_fault(void);
33asmlinkage void spurious_interrupt_bug(void); 34asmlinkage void spurious_interrupt_bug(void);
34asmlinkage void coprocessor_error(void); 35asmlinkage void coprocessor_error(void);
35asmlinkage void alignment_check(void); 36asmlinkage void alignment_check(void);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 591e6010427..c8b4efad7eb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1406,6 +1406,16 @@ ENTRY(general_protection)
1406 CFI_ENDPROC 1406 CFI_ENDPROC
1407END(general_protection) 1407END(general_protection)
1408 1408
1409#ifdef CONFIG_KVM_GUEST
1410ENTRY(async_page_fault)
1411 RING0_EC_FRAME
1412 pushl $do_async_page_fault
1413 CFI_ADJUST_CFA_OFFSET 4
1414 jmp error_code
1415 CFI_ENDPROC
1416END(apf_page_fault)
1417#endif
1418
1409/* 1419/*
1410 * End of kprobes section 1420 * End of kprobes section
1411 */ 1421 */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e869..bb3f6e9bfa6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1319,6 +1319,9 @@ errorentry xen_stack_segment do_stack_segment
1319#endif 1319#endif
1320errorentry general_protection do_general_protection 1320errorentry general_protection do_general_protection
1321errorentry page_fault do_page_fault 1321errorentry page_fault do_page_fault
1322#ifdef CONFIG_KVM_GUEST
1323errorentry async_page_fault do_async_page_fault
1324#endif
1322#ifdef CONFIG_X86_MCE 1325#ifdef CONFIG_X86_MCE
1323paranoidzeroentry machine_check *machine_check_vector(%rip) 1326paranoidzeroentry machine_check *machine_check_vector(%rip)
1324#endif 1327#endif
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 032d03b6b54..d5640634fef 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -29,8 +29,14 @@
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <linux/reboot.h> 31#include <linux/reboot.h>
32#include <linux/hash.h>
33#include <linux/sched.h>
34#include <linux/slab.h>
35#include <linux/kprobes.h>
32#include <asm/timer.h> 36#include <asm/timer.h>
33#include <asm/cpu.h> 37#include <asm/cpu.h>
38#include <asm/traps.h>
39#include <asm/desc.h>
34 40
35#define MMU_QUEUE_SIZE 1024 41#define MMU_QUEUE_SIZE 1024
36 42
@@ -64,6 +70,168 @@ static void kvm_io_delay(void)
64{ 70{
65} 71}
66 72
73#define KVM_TASK_SLEEP_HASHBITS 8
74#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
75
76struct kvm_task_sleep_node {
77 struct hlist_node link;
78 wait_queue_head_t wq;
79 u32 token;
80 int cpu;
81};
82
83static struct kvm_task_sleep_head {
84 spinlock_t lock;
85 struct hlist_head list;
86} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
87
88static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
89 u32 token)
90{
91 struct hlist_node *p;
92
93 hlist_for_each(p, &b->list) {
94 struct kvm_task_sleep_node *n =
95 hlist_entry(p, typeof(*n), link);
96 if (n->token == token)
97 return n;
98 }
99
100 return NULL;
101}
102
103void kvm_async_pf_task_wait(u32 token)
104{
105 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
106 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
107 struct kvm_task_sleep_node n, *e;
108 DEFINE_WAIT(wait);
109
110 spin_lock(&b->lock);
111 e = _find_apf_task(b, token);
112 if (e) {
113 /* dummy entry exist -> wake up was delivered ahead of PF */
114 hlist_del(&e->link);
115 kfree(e);
116 spin_unlock(&b->lock);
117 return;
118 }
119
120 n.token = token;
121 n.cpu = smp_processor_id();
122 init_waitqueue_head(&n.wq);
123 hlist_add_head(&n.link, &b->list);
124 spin_unlock(&b->lock);
125
126 for (;;) {
127 prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
128 if (hlist_unhashed(&n.link))
129 break;
130 local_irq_enable();
131 schedule();
132 local_irq_disable();
133 }
134 finish_wait(&n.wq, &wait);
135
136 return;
137}
138EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
139
140static void apf_task_wake_one(struct kvm_task_sleep_node *n)
141{
142 hlist_del_init(&n->link);
143 if (waitqueue_active(&n->wq))
144 wake_up(&n->wq);
145}
146
147static void apf_task_wake_all(void)
148{
149 int i;
150
151 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
152 struct hlist_node *p, *next;
153 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
154 spin_lock(&b->lock);
155 hlist_for_each_safe(p, next, &b->list) {
156 struct kvm_task_sleep_node *n =
157 hlist_entry(p, typeof(*n), link);
158 if (n->cpu == smp_processor_id())
159 apf_task_wake_one(n);
160 }
161 spin_unlock(&b->lock);
162 }
163}
164
165void kvm_async_pf_task_wake(u32 token)
166{
167 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
168 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
169 struct kvm_task_sleep_node *n;
170
171 if (token == ~0) {
172 apf_task_wake_all();
173 return;
174 }
175
176again:
177 spin_lock(&b->lock);
178 n = _find_apf_task(b, token);
179 if (!n) {
180 /*
181 * async PF was not yet handled.
182 * Add dummy entry for the token.
183 */
184 n = kmalloc(sizeof(*n), GFP_ATOMIC);
185 if (!n) {
186 /*
187 * Allocation failed! Busy wait while other cpu
188 * handles async PF.
189 */
190 spin_unlock(&b->lock);
191 cpu_relax();
192 goto again;
193 }
194 n->token = token;
195 n->cpu = smp_processor_id();
196 init_waitqueue_head(&n->wq);
197 hlist_add_head(&n->link, &b->list);
198 } else
199 apf_task_wake_one(n);
200 spin_unlock(&b->lock);
201 return;
202}
203EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
204
205u32 kvm_read_and_reset_pf_reason(void)
206{
207 u32 reason = 0;
208
209 if (__get_cpu_var(apf_reason).enabled) {
210 reason = __get_cpu_var(apf_reason).reason;
211 __get_cpu_var(apf_reason).reason = 0;
212 }
213
214 return reason;
215}
216EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
217
218dotraplinkage void __kprobes
219do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
220{
221 switch (kvm_read_and_reset_pf_reason()) {
222 default:
223 do_page_fault(regs, error_code);
224 break;
225 case KVM_PV_REASON_PAGE_NOT_PRESENT:
226 /* page is swapped out by the host. */
227 kvm_async_pf_task_wait((u32)read_cr2());
228 break;
229 case KVM_PV_REASON_PAGE_READY:
230 kvm_async_pf_task_wake((u32)read_cr2());
231 break;
232 }
233}
234
67static void kvm_mmu_op(void *buffer, unsigned len) 235static void kvm_mmu_op(void *buffer, unsigned len)
68{ 236{
69 int r; 237 int r;
@@ -300,6 +468,7 @@ static void kvm_guest_cpu_online(void *dummy)
300static void kvm_guest_cpu_offline(void *dummy) 468static void kvm_guest_cpu_offline(void *dummy)
301{ 469{
302 kvm_pv_disable_apf(NULL); 470 kvm_pv_disable_apf(NULL);
471 apf_task_wake_all();
303} 472}
304 473
305static int __cpuinit kvm_cpu_notify(struct notifier_block *self, 474static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
@@ -327,13 +496,25 @@ static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
327}; 496};
328#endif 497#endif
329 498
499static void __init kvm_apf_trap_init(void)
500{
501 set_intr_gate(14, &async_page_fault);
502}
503
330void __init kvm_guest_init(void) 504void __init kvm_guest_init(void)
331{ 505{
506 int i;
507
332 if (!kvm_para_available()) 508 if (!kvm_para_available())
333 return; 509 return;
334 510
335 paravirt_ops_setup(); 511 paravirt_ops_setup();
336 register_reboot_notifier(&kvm_pv_reboot_nb); 512 register_reboot_notifier(&kvm_pv_reboot_nb);
513 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
514 spin_lock_init(&async_pf_sleepers[i].lock);
515 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
516 x86_init.irqs.trap_init = kvm_apf_trap_init;
517
337#ifdef CONFIG_SMP 518#ifdef CONFIG_SMP
338 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 519 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
339 register_cpu_notifier(&kvm_cpu_notifier); 520 register_cpu_notifier(&kvm_cpu_notifier);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b81a9b7c2ca..93e8120b802 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -31,6 +31,7 @@
31 31
32#include <asm/tlbflush.h> 32#include <asm/tlbflush.h>
33#include <asm/desc.h> 33#include <asm/desc.h>
34#include <asm/kvm_para.h>
34 35
35#include <asm/virtext.h> 36#include <asm/virtext.h>
36#include "trace.h" 37#include "trace.h"
@@ -133,6 +134,7 @@ struct vcpu_svm {
133 134
134 unsigned int3_injected; 135 unsigned int3_injected;
135 unsigned long int3_rip; 136 unsigned long int3_rip;
137 u32 apf_reason;
136}; 138};
137 139
138#define MSR_INVALID 0xffffffffU 140#define MSR_INVALID 0xffffffffU
@@ -1383,16 +1385,33 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1383 1385
1384static int pf_interception(struct vcpu_svm *svm) 1386static int pf_interception(struct vcpu_svm *svm)
1385{ 1387{
1386 u64 fault_address; 1388 u64 fault_address = svm->vmcb->control.exit_info_2;
1387 u32 error_code; 1389 u32 error_code;
1390 int r = 1;
1388 1391
1389 fault_address = svm->vmcb->control.exit_info_2; 1392 switch (svm->apf_reason) {
1390 error_code = svm->vmcb->control.exit_info_1; 1393 default:
1394 error_code = svm->vmcb->control.exit_info_1;
1391 1395
1392 trace_kvm_page_fault(fault_address, error_code); 1396 trace_kvm_page_fault(fault_address, error_code);
1393 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) 1397 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1394 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1398 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1395 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1399 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1400 break;
1401 case KVM_PV_REASON_PAGE_NOT_PRESENT:
1402 svm->apf_reason = 0;
1403 local_irq_disable();
1404 kvm_async_pf_task_wait(fault_address);
1405 local_irq_enable();
1406 break;
1407 case KVM_PV_REASON_PAGE_READY:
1408 svm->apf_reason = 0;
1409 local_irq_disable();
1410 kvm_async_pf_task_wake(fault_address);
1411 local_irq_enable();
1412 break;
1413 }
1414 return r;
1396} 1415}
1397 1416
1398static int db_interception(struct vcpu_svm *svm) 1417static int db_interception(struct vcpu_svm *svm)
@@ -1836,8 +1855,8 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
1836 return NESTED_EXIT_HOST; 1855 return NESTED_EXIT_HOST;
1837 break; 1856 break;
1838 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 1857 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1839 /* When we're shadowing, trap PFs */ 1858 /* When we're shadowing, trap PFs, but not async PF */
1840 if (!npt_enabled) 1859 if (!npt_enabled && svm->apf_reason == 0)
1841 return NESTED_EXIT_HOST; 1860 return NESTED_EXIT_HOST;
1842 break; 1861 break;
1843 case SVM_EXIT_EXCP_BASE + NM_VECTOR: 1862 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
@@ -1893,6 +1912,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
1893 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 1912 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1894 if (svm->nested.intercept_exceptions & excp_bits) 1913 if (svm->nested.intercept_exceptions & excp_bits)
1895 vmexit = NESTED_EXIT_DONE; 1914 vmexit = NESTED_EXIT_DONE;
1915 /* async page fault always cause vmexit */
1916 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
1917 svm->apf_reason != 0)
1918 vmexit = NESTED_EXIT_DONE;
1896 break; 1919 break;
1897 } 1920 }
1898 case SVM_EXIT_ERR: { 1921 case SVM_EXIT_ERR: {
@@ -3414,6 +3437,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3414 3437
3415 svm->next_rip = 0; 3438 svm->next_rip = 0;
3416 3439
3440 /* if exit due to PF check for async PF */
3441 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3442 svm->apf_reason = kvm_read_and_reset_pf_reason();
3443
3417 if (npt_enabled) { 3444 if (npt_enabled) {
3418 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); 3445 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3419 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); 3446 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);