aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoqun Feng <boqun.feng@gmail.com>2017-10-03 09:36:51 -0400
committerRadim Krčmář <rkrcmar@redhat.com>2017-10-04 12:28:53 -0400
commita2b7861bb33b2538420bb5d8554153484d3f961f (patch)
tree3d2b1e460e56ecb7155d73e6482ad4908d95248e
parent2fb1e946450a4fef74bb72f360555f7760d816f0 (diff)
kvm/x86: Avoid async PF preempting the kernel incorrectly
Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call schedule() to reschedule in some cases. This could result in accidentally ending the current RCU read-side critical section early, causing random memory corruption in the guest, or otherwise preempting the currently running task inside between preempt_disable and preempt_enable. The difficulty to handle this well is because we don't know whether an async PF delivered in a preemptible section or RCU read-side critical section for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock() are both no-ops in that case. To cure this, we treat any async PF interrupting a kernel context as one that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing the schedule() path in that case. To do so, a second parameter for kvm_async_pf_task_wait() is introduced, so that we know whether it's called from a context interrupting the kernel, and the parameter is set properly in all the callsites. Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r--arch/x86/include/asm/kvm_para.h4
-rw-r--r--arch/x86/kernel/kvm.c14
-rw-r--r--arch/x86/kvm/mmu.c2
3 files changed, 13 insertions, 7 deletions
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index bc62e7cbf1b1..59ad3d132353 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
88bool kvm_para_available(void); 88bool kvm_para_available(void);
89unsigned int kvm_arch_para_features(void); 89unsigned int kvm_arch_para_features(void);
90void __init kvm_guest_init(void); 90void __init kvm_guest_init(void);
91void kvm_async_pf_task_wait(u32 token); 91void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
92void kvm_async_pf_task_wake(u32 token); 92void kvm_async_pf_task_wake(u32 token);
93u32 kvm_read_and_reset_pf_reason(void); 93u32 kvm_read_and_reset_pf_reason(void);
94extern void kvm_disable_steal_time(void); 94extern void kvm_disable_steal_time(void);
@@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void)
103 103
104#else /* CONFIG_KVM_GUEST */ 104#else /* CONFIG_KVM_GUEST */
105#define kvm_guest_init() do {} while (0) 105#define kvm_guest_init() do {} while (0)
106#define kvm_async_pf_task_wait(T) do {} while(0) 106#define kvm_async_pf_task_wait(T, I) do {} while(0)
107#define kvm_async_pf_task_wake(T) do {} while(0) 107#define kvm_async_pf_task_wake(T) do {} while(0)
108 108
109static inline bool kvm_para_available(void) 109static inline bool kvm_para_available(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e675704fa6f7..8bb9594d0761 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
117 return NULL; 117 return NULL;
118} 118}
119 119
120void kvm_async_pf_task_wait(u32 token) 120/*
121 * @interrupt_kernel: Is this called from a routine which interrupts the kernel
122 * (other than user space)?
123 */
124void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
121{ 125{
122 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); 126 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
123 struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; 127 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
140 144
141 n.token = token; 145 n.token = token;
142 n.cpu = smp_processor_id(); 146 n.cpu = smp_processor_id();
143 n.halted = is_idle_task(current) || preempt_count() > 1 || 147 n.halted = is_idle_task(current) ||
144 rcu_preempt_depth(); 148 (IS_ENABLED(CONFIG_PREEMPT_COUNT)
149 ? preempt_count() > 1 || rcu_preempt_depth()
150 : interrupt_kernel);
145 init_swait_queue_head(&n.wq); 151 init_swait_queue_head(&n.wq);
146 hlist_add_head(&n.link, &b->list); 152 hlist_add_head(&n.link, &b->list);
147 raw_spin_unlock(&b->lock); 153 raw_spin_unlock(&b->lock);
@@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
269 case KVM_PV_REASON_PAGE_NOT_PRESENT: 275 case KVM_PV_REASON_PAGE_NOT_PRESENT:
270 /* page is swapped out by the host. */ 276 /* page is swapped out by the host. */
271 prev_state = exception_enter(); 277 prev_state = exception_enter();
272 kvm_async_pf_task_wait((u32)read_cr2()); 278 kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
273 exception_exit(prev_state); 279 exception_exit(prev_state);
274 break; 280 break;
275 case KVM_PV_REASON_PAGE_READY: 281 case KVM_PV_REASON_PAGE_READY:
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca30c1eb1d9..106d4a029a8a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
3837 case KVM_PV_REASON_PAGE_NOT_PRESENT: 3837 case KVM_PV_REASON_PAGE_NOT_PRESENT:
3838 vcpu->arch.apf.host_apf_reason = 0; 3838 vcpu->arch.apf.host_apf_reason = 0;
3839 local_irq_disable(); 3839 local_irq_disable();
3840 kvm_async_pf_task_wait(fault_address); 3840 kvm_async_pf_task_wait(fault_address, 0);
3841 local_irq_enable(); 3841 local_irq_enable();
3842 break; 3842 break;
3843 case KVM_PV_REASON_PAGE_READY: 3843 case KVM_PV_REASON_PAGE_READY: