diff options
author | Boqun Feng <boqun.feng@gmail.com> | 2017-10-03 09:36:51 -0400 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2017-10-04 12:28:53 -0400 |
commit | a2b7861bb33b2538420bb5d8554153484d3f961f (patch) | |
tree | 3d2b1e460e56ecb7155d73e6482ad4908d95248e | |
parent | 2fb1e946450a4fef74bb72f360555f7760d816f0 (diff) |
kvm/x86: Avoid async PF preempting the kernel incorrectly
Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call
schedule() to reschedule in some cases. This could result in
accidentally ending the current RCU read-side critical section early,
causing random memory corruption in the guest, or otherwise preempting
the currently running task inside between preempt_disable and
preempt_enable.
The difficulty to handle this well is because we don't know whether an
async PF delivered in a preemptible section or RCU read-side critical section
for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock()
are both no-ops in that case.
To cure this, we treat any async PF interrupting a kernel context as one
that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing
the schedule() path in that case.
To do so, a second parameter for kvm_async_pf_task_wait() is introduced,
so that we know whether it's called from a context interrupting the
kernel, and the parameter is set properly in all the callsites.
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r-- | arch/x86/include/asm/kvm_para.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 2 |
3 files changed, 13 insertions, 7 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7cbf1b1..59ad3d132353 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, | |||
88 | bool kvm_para_available(void); | 88 | bool kvm_para_available(void); |
89 | unsigned int kvm_arch_para_features(void); | 89 | unsigned int kvm_arch_para_features(void); |
90 | void __init kvm_guest_init(void); | 90 | void __init kvm_guest_init(void); |
91 | void kvm_async_pf_task_wait(u32 token); | 91 | void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); |
92 | void kvm_async_pf_task_wake(u32 token); | 92 | void kvm_async_pf_task_wake(u32 token); |
93 | u32 kvm_read_and_reset_pf_reason(void); | 93 | u32 kvm_read_and_reset_pf_reason(void); |
94 | extern void kvm_disable_steal_time(void); | 94 | extern void kvm_disable_steal_time(void); |
@@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) | |||
103 | 103 | ||
104 | #else /* CONFIG_KVM_GUEST */ | 104 | #else /* CONFIG_KVM_GUEST */ |
105 | #define kvm_guest_init() do {} while (0) | 105 | #define kvm_guest_init() do {} while (0) |
106 | #define kvm_async_pf_task_wait(T) do {} while(0) | 106 | #define kvm_async_pf_task_wait(T, I) do {} while(0) |
107 | #define kvm_async_pf_task_wake(T) do {} while(0) | 107 | #define kvm_async_pf_task_wake(T) do {} while(0) |
108 | 108 | ||
109 | static inline bool kvm_para_available(void) | 109 | static inline bool kvm_para_available(void) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e675704fa6f7..8bb9594d0761 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, | |||
117 | return NULL; | 117 | return NULL; |
118 | } | 118 | } |
119 | 119 | ||
120 | void kvm_async_pf_task_wait(u32 token) | 120 | /* |
121 | * @interrupt_kernel: Is this called from a routine which interrupts the kernel | ||
122 | * (other than user space)? | ||
123 | */ | ||
124 | void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) | ||
121 | { | 125 | { |
122 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | 126 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); |
123 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | 127 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; |
@@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token) | |||
140 | 144 | ||
141 | n.token = token; | 145 | n.token = token; |
142 | n.cpu = smp_processor_id(); | 146 | n.cpu = smp_processor_id(); |
143 | n.halted = is_idle_task(current) || preempt_count() > 1 || | 147 | n.halted = is_idle_task(current) || |
144 | rcu_preempt_depth(); | 148 | (IS_ENABLED(CONFIG_PREEMPT_COUNT) |
149 | ? preempt_count() > 1 || rcu_preempt_depth() | ||
150 | : interrupt_kernel); | ||
145 | init_swait_queue_head(&n.wq); | 151 | init_swait_queue_head(&n.wq); |
146 | hlist_add_head(&n.link, &b->list); | 152 | hlist_add_head(&n.link, &b->list); |
147 | raw_spin_unlock(&b->lock); | 153 | raw_spin_unlock(&b->lock); |
@@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
269 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | 275 | case KVM_PV_REASON_PAGE_NOT_PRESENT: |
270 | /* page is swapped out by the host. */ | 276 | /* page is swapped out by the host. */ |
271 | prev_state = exception_enter(); | 277 | prev_state = exception_enter(); |
272 | kvm_async_pf_task_wait((u32)read_cr2()); | 278 | kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); |
273 | exception_exit(prev_state); | 279 | exception_exit(prev_state); |
274 | break; | 280 | break; |
275 | case KVM_PV_REASON_PAGE_READY: | 281 | case KVM_PV_REASON_PAGE_READY: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca30c1eb1d9..106d4a029a8a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, | |||
3837 | case KVM_PV_REASON_PAGE_NOT_PRESENT: | 3837 | case KVM_PV_REASON_PAGE_NOT_PRESENT: |
3838 | vcpu->arch.apf.host_apf_reason = 0; | 3838 | vcpu->arch.apf.host_apf_reason = 0; |
3839 | local_irq_disable(); | 3839 | local_irq_disable(); |
3840 | kvm_async_pf_task_wait(fault_address); | 3840 | kvm_async_pf_task_wait(fault_address, 0); |
3841 | local_irq_enable(); | 3841 | local_irq_enable(); |
3842 | break; | 3842 | break; |
3843 | case KVM_PV_REASON_PAGE_READY: | 3843 | case KVM_PV_REASON_PAGE_READY: |