diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/entry_32.S | 10 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 317 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 13 |
5 files changed, 332 insertions, 12 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 591e60104278..c8b4efad7ebb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1406,6 +1406,16 @@ ENTRY(general_protection) | |||
1406 | CFI_ENDPROC | 1406 | CFI_ENDPROC |
1407 | END(general_protection) | 1407 | END(general_protection) |
1408 | 1408 | ||
1409 | #ifdef CONFIG_KVM_GUEST | ||
1410 | ENTRY(async_page_fault) | ||
1411 | RING0_EC_FRAME | ||
1412 | pushl $do_async_page_fault | ||
1413 | CFI_ADJUST_CFA_OFFSET 4 | ||
1414 | jmp error_code | ||
1415 | CFI_ENDPROC | ||
1416 | END(apf_page_fault) | ||
1417 | #endif | ||
1418 | |||
1409 | /* | 1419 | /* |
1410 | * End of kprobes section | 1420 | * End of kprobes section |
1411 | */ | 1421 | */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d3b895f375d3..aed1ffbeb0c9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1329,6 +1329,9 @@ errorentry xen_stack_segment do_stack_segment | |||
1329 | #endif | 1329 | #endif |
1330 | errorentry general_protection do_general_protection | 1330 | errorentry general_protection do_general_protection |
1331 | errorentry page_fault do_page_fault | 1331 | errorentry page_fault do_page_fault |
1332 | #ifdef CONFIG_KVM_GUEST | ||
1333 | errorentry async_page_fault do_async_page_fault | ||
1334 | #endif | ||
1332 | #ifdef CONFIG_X86_MCE | 1335 | #ifdef CONFIG_X86_MCE |
1333 | paranoidzeroentry machine_check *machine_check_vector(%rip) | 1336 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
1334 | #endif | 1337 | #endif |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 58bb239a2fd7..e60c38cc0eed 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -169,6 +169,7 @@ int init_fpu(struct task_struct *tsk) | |||
169 | set_stopped_child_used_math(tsk); | 169 | set_stopped_child_used_math(tsk); |
170 | return 0; | 170 | return 0; |
171 | } | 171 | } |
172 | EXPORT_SYMBOL_GPL(init_fpu); | ||
172 | 173 | ||
173 | /* | 174 | /* |
174 | * The xstateregs_active() routine is the same as the fpregs_active() routine, | 175 | * The xstateregs_active() routine is the same as the fpregs_active() routine, |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 63b0ec8d3d4a..8dc44662394b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -27,16 +27,37 @@ | |||
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
30 | #include <linux/notifier.h> | ||
31 | #include <linux/reboot.h> | ||
32 | #include <linux/hash.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/slab.h> | ||
35 | #include <linux/kprobes.h> | ||
30 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
37 | #include <asm/cpu.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | #include <asm/tlbflush.h> | ||
31 | 41 | ||
32 | #define MMU_QUEUE_SIZE 1024 | 42 | #define MMU_QUEUE_SIZE 1024 |
33 | 43 | ||
44 | static int kvmapf = 1; | ||
45 | |||
46 | static int parse_no_kvmapf(char *arg) | ||
47 | { | ||
48 | kvmapf = 0; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | early_param("no-kvmapf", parse_no_kvmapf); | ||
53 | |||
34 | struct kvm_para_state { | 54 | struct kvm_para_state { |
35 | u8 mmu_queue[MMU_QUEUE_SIZE]; | 55 | u8 mmu_queue[MMU_QUEUE_SIZE]; |
36 | int mmu_queue_len; | 56 | int mmu_queue_len; |
37 | }; | 57 | }; |
38 | 58 | ||
39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | 59 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); |
60 | static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); | ||
40 | 61 | ||
41 | static struct kvm_para_state *kvm_para_state(void) | 62 | static struct kvm_para_state *kvm_para_state(void) |
42 | { | 63 | { |
@@ -50,6 +71,195 @@ static void kvm_io_delay(void) | |||
50 | { | 71 | { |
51 | } | 72 | } |
52 | 73 | ||
74 | #define KVM_TASK_SLEEP_HASHBITS 8 | ||
75 | #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS) | ||
76 | |||
77 | struct kvm_task_sleep_node { | ||
78 | struct hlist_node link; | ||
79 | wait_queue_head_t wq; | ||
80 | u32 token; | ||
81 | int cpu; | ||
82 | bool halted; | ||
83 | struct mm_struct *mm; | ||
84 | }; | ||
85 | |||
86 | static struct kvm_task_sleep_head { | ||
87 | spinlock_t lock; | ||
88 | struct hlist_head list; | ||
89 | } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; | ||
90 | |||
91 | static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, | ||
92 | u32 token) | ||
93 | { | ||
94 | struct hlist_node *p; | ||
95 | |||
96 | hlist_for_each(p, &b->list) { | ||
97 | struct kvm_task_sleep_node *n = | ||
98 | hlist_entry(p, typeof(*n), link); | ||
99 | if (n->token == token) | ||
100 | return n; | ||
101 | } | ||
102 | |||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | void kvm_async_pf_task_wait(u32 token) | ||
107 | { | ||
108 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | ||
109 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | ||
110 | struct kvm_task_sleep_node n, *e; | ||
111 | DEFINE_WAIT(wait); | ||
112 | int cpu, idle; | ||
113 | |||
114 | cpu = get_cpu(); | ||
115 | idle = idle_cpu(cpu); | ||
116 | put_cpu(); | ||
117 | |||
118 | spin_lock(&b->lock); | ||
119 | e = _find_apf_task(b, token); | ||
120 | if (e) { | ||
121 | /* dummy entry exist -> wake up was delivered ahead of PF */ | ||
122 | hlist_del(&e->link); | ||
123 | kfree(e); | ||
124 | spin_unlock(&b->lock); | ||
125 | return; | ||
126 | } | ||
127 | |||
128 | n.token = token; | ||
129 | n.cpu = smp_processor_id(); | ||
130 | n.mm = current->active_mm; | ||
131 | n.halted = idle || preempt_count() > 1; | ||
132 | atomic_inc(&n.mm->mm_count); | ||
133 | init_waitqueue_head(&n.wq); | ||
134 | hlist_add_head(&n.link, &b->list); | ||
135 | spin_unlock(&b->lock); | ||
136 | |||
137 | for (;;) { | ||
138 | if (!n.halted) | ||
139 | prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); | ||
140 | if (hlist_unhashed(&n.link)) | ||
141 | break; | ||
142 | |||
143 | if (!n.halted) { | ||
144 | local_irq_enable(); | ||
145 | schedule(); | ||
146 | local_irq_disable(); | ||
147 | } else { | ||
148 | /* | ||
149 | * We cannot reschedule. So halt. | ||
150 | */ | ||
151 | native_safe_halt(); | ||
152 | local_irq_disable(); | ||
153 | } | ||
154 | } | ||
155 | if (!n.halted) | ||
156 | finish_wait(&n.wq, &wait); | ||
157 | |||
158 | return; | ||
159 | } | ||
160 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); | ||
161 | |||
162 | static void apf_task_wake_one(struct kvm_task_sleep_node *n) | ||
163 | { | ||
164 | hlist_del_init(&n->link); | ||
165 | if (!n->mm) | ||
166 | return; | ||
167 | mmdrop(n->mm); | ||
168 | if (n->halted) | ||
169 | smp_send_reschedule(n->cpu); | ||
170 | else if (waitqueue_active(&n->wq)) | ||
171 | wake_up(&n->wq); | ||
172 | } | ||
173 | |||
174 | static void apf_task_wake_all(void) | ||
175 | { | ||
176 | int i; | ||
177 | |||
178 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { | ||
179 | struct hlist_node *p, *next; | ||
180 | struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; | ||
181 | spin_lock(&b->lock); | ||
182 | hlist_for_each_safe(p, next, &b->list) { | ||
183 | struct kvm_task_sleep_node *n = | ||
184 | hlist_entry(p, typeof(*n), link); | ||
185 | if (n->cpu == smp_processor_id()) | ||
186 | apf_task_wake_one(n); | ||
187 | } | ||
188 | spin_unlock(&b->lock); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | void kvm_async_pf_task_wake(u32 token) | ||
193 | { | ||
194 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | ||
195 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | ||
196 | struct kvm_task_sleep_node *n; | ||
197 | |||
198 | if (token == ~0) { | ||
199 | apf_task_wake_all(); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | again: | ||
204 | spin_lock(&b->lock); | ||
205 | n = _find_apf_task(b, token); | ||
206 | if (!n) { | ||
207 | /* | ||
208 | * async PF was not yet handled. | ||
209 | * Add dummy entry for the token. | ||
210 | */ | ||
211 | n = kmalloc(sizeof(*n), GFP_ATOMIC); | ||
212 | if (!n) { | ||
213 | /* | ||
214 | * Allocation failed! Busy wait while other cpu | ||
215 | * handles async PF. | ||
216 | */ | ||
217 | spin_unlock(&b->lock); | ||
218 | cpu_relax(); | ||
219 | goto again; | ||
220 | } | ||
221 | n->token = token; | ||
222 | n->cpu = smp_processor_id(); | ||
223 | n->mm = NULL; | ||
224 | init_waitqueue_head(&n->wq); | ||
225 | hlist_add_head(&n->link, &b->list); | ||
226 | } else | ||
227 | apf_task_wake_one(n); | ||
228 | spin_unlock(&b->lock); | ||
229 | return; | ||
230 | } | ||
231 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); | ||
232 | |||
233 | u32 kvm_read_and_reset_pf_reason(void) | ||
234 | { | ||
235 | u32 reason = 0; | ||
236 | |||
237 | if (__get_cpu_var(apf_reason).enabled) { | ||
238 | reason = __get_cpu_var(apf_reason).reason; | ||
239 | __get_cpu_var(apf_reason).reason = 0; | ||
240 | } | ||
241 | |||
242 | return reason; | ||
243 | } | ||
244 | EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); | ||
245 | |||
246 | dotraplinkage void __kprobes | ||
247 | do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | ||
248 | { | ||
249 | switch (kvm_read_and_reset_pf_reason()) { | ||
250 | default: | ||
251 | do_page_fault(regs, error_code); | ||
252 | break; | ||
253 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | ||
254 | /* page is swapped out by the host. */ | ||
255 | kvm_async_pf_task_wait((u32)read_cr2()); | ||
256 | break; | ||
257 | case KVM_PV_REASON_PAGE_READY: | ||
258 | kvm_async_pf_task_wake((u32)read_cr2()); | ||
259 | break; | ||
260 | } | ||
261 | } | ||
262 | |||
53 | static void kvm_mmu_op(void *buffer, unsigned len) | 263 | static void kvm_mmu_op(void *buffer, unsigned len) |
54 | { | 264 | { |
55 | int r; | 265 | int r; |
@@ -231,10 +441,117 @@ static void __init paravirt_ops_setup(void) | |||
231 | #endif | 441 | #endif |
232 | } | 442 | } |
233 | 443 | ||
444 | void __cpuinit kvm_guest_cpu_init(void) | ||
445 | { | ||
446 | if (!kvm_para_available()) | ||
447 | return; | ||
448 | |||
449 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { | ||
450 | u64 pa = __pa(&__get_cpu_var(apf_reason)); | ||
451 | |||
452 | #ifdef CONFIG_PREEMPT | ||
453 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; | ||
454 | #endif | ||
455 | wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); | ||
456 | __get_cpu_var(apf_reason).enabled = 1; | ||
457 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", | ||
458 | smp_processor_id()); | ||
459 | } | ||
460 | } | ||
461 | |||
462 | static void kvm_pv_disable_apf(void *unused) | ||
463 | { | ||
464 | if (!__get_cpu_var(apf_reason).enabled) | ||
465 | return; | ||
466 | |||
467 | wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); | ||
468 | __get_cpu_var(apf_reason).enabled = 0; | ||
469 | |||
470 | printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", | ||
471 | smp_processor_id()); | ||
472 | } | ||
473 | |||
474 | static int kvm_pv_reboot_notify(struct notifier_block *nb, | ||
475 | unsigned long code, void *unused) | ||
476 | { | ||
477 | if (code == SYS_RESTART) | ||
478 | on_each_cpu(kvm_pv_disable_apf, NULL, 1); | ||
479 | return NOTIFY_DONE; | ||
480 | } | ||
481 | |||
482 | static struct notifier_block kvm_pv_reboot_nb = { | ||
483 | .notifier_call = kvm_pv_reboot_notify, | ||
484 | }; | ||
485 | |||
486 | #ifdef CONFIG_SMP | ||
487 | static void __init kvm_smp_prepare_boot_cpu(void) | ||
488 | { | ||
489 | #ifdef CONFIG_KVM_CLOCK | ||
490 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
491 | #endif | ||
492 | kvm_guest_cpu_init(); | ||
493 | native_smp_prepare_boot_cpu(); | ||
494 | } | ||
495 | |||
496 | static void kvm_guest_cpu_online(void *dummy) | ||
497 | { | ||
498 | kvm_guest_cpu_init(); | ||
499 | } | ||
500 | |||
501 | static void kvm_guest_cpu_offline(void *dummy) | ||
502 | { | ||
503 | kvm_pv_disable_apf(NULL); | ||
504 | apf_task_wake_all(); | ||
505 | } | ||
506 | |||
507 | static int __cpuinit kvm_cpu_notify(struct notifier_block *self, | ||
508 | unsigned long action, void *hcpu) | ||
509 | { | ||
510 | int cpu = (unsigned long)hcpu; | ||
511 | switch (action) { | ||
512 | case CPU_ONLINE: | ||
513 | case CPU_DOWN_FAILED: | ||
514 | case CPU_ONLINE_FROZEN: | ||
515 | smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); | ||
516 | break; | ||
517 | case CPU_DOWN_PREPARE: | ||
518 | case CPU_DOWN_PREPARE_FROZEN: | ||
519 | smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); | ||
520 | break; | ||
521 | default: | ||
522 | break; | ||
523 | } | ||
524 | return NOTIFY_OK; | ||
525 | } | ||
526 | |||
527 | static struct notifier_block __cpuinitdata kvm_cpu_notifier = { | ||
528 | .notifier_call = kvm_cpu_notify, | ||
529 | }; | ||
530 | #endif | ||
531 | |||
532 | static void __init kvm_apf_trap_init(void) | ||
533 | { | ||
534 | set_intr_gate(14, &async_page_fault); | ||
535 | } | ||
536 | |||
234 | void __init kvm_guest_init(void) | 537 | void __init kvm_guest_init(void) |
235 | { | 538 | { |
539 | int i; | ||
540 | |||
236 | if (!kvm_para_available()) | 541 | if (!kvm_para_available()) |
237 | return; | 542 | return; |
238 | 543 | ||
239 | paravirt_ops_setup(); | 544 | paravirt_ops_setup(); |
545 | register_reboot_notifier(&kvm_pv_reboot_nb); | ||
546 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) | ||
547 | spin_lock_init(&async_pf_sleepers[i].lock); | ||
548 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) | ||
549 | x86_init.irqs.trap_init = kvm_apf_trap_init; | ||
550 | |||
551 | #ifdef CONFIG_SMP | ||
552 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
553 | register_cpu_notifier(&kvm_cpu_notifier); | ||
554 | #else | ||
555 | kvm_guest_cpu_init(); | ||
556 | #endif | ||
240 | } | 557 | } |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ca43ce31a19c..f98d3eafe07a 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -125,7 +125,7 @@ static struct clocksource kvm_clock = { | |||
125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 125 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
126 | }; | 126 | }; |
127 | 127 | ||
128 | static int kvm_register_clock(char *txt) | 128 | int kvm_register_clock(char *txt) |
129 | { | 129 | { |
130 | int cpu = smp_processor_id(); | 130 | int cpu = smp_processor_id(); |
131 | int low, high, ret; | 131 | int low, high, ret; |
@@ -152,14 +152,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) | |||
152 | } | 152 | } |
153 | #endif | 153 | #endif |
154 | 154 | ||
155 | #ifdef CONFIG_SMP | ||
156 | static void __init kvm_smp_prepare_boot_cpu(void) | ||
157 | { | ||
158 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
159 | native_smp_prepare_boot_cpu(); | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | /* | 155 | /* |
164 | * After the clock is registered, the host will keep writing to the | 156 | * After the clock is registered, the host will keep writing to the |
165 | * registered memory location. If the guest happens to shutdown, this memory | 157 | * registered memory location. If the guest happens to shutdown, this memory |
@@ -206,9 +198,6 @@ void __init kvmclock_init(void) | |||
206 | x86_cpuinit.setup_percpu_clockev = | 198 | x86_cpuinit.setup_percpu_clockev = |
207 | kvm_setup_secondary_clock; | 199 | kvm_setup_secondary_clock; |
208 | #endif | 200 | #endif |
209 | #ifdef CONFIG_SMP | ||
210 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
211 | #endif | ||
212 | machine_ops.shutdown = kvm_shutdown; | 201 | machine_ops.shutdown = kvm_shutdown; |
213 | #ifdef CONFIG_KEXEC | 202 | #ifdef CONFIG_KEXEC |
214 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 203 | machine_ops.crash_shutdown = kvm_crash_shutdown; |