aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2014-12-04 00:43:28 -0500
committerAlexander Graf <agraf@suse.de>2014-12-17 07:20:09 -0500
commit2711e248a352d7ecc8767b1dfa1f0c2356cb7f4b (patch)
tree137e4238d3129bd5ad152111d5f4e6bc46210bc9
parenta0499cf74698e94d1ba503da69794a35bced0523 (diff)
KVM: PPC: Book3S HV: Simplify locking around stolen time calculations
Currently the calculations of stolen time for PPC Book3S HV guests uses fields in both the vcpu struct and the kvmppc_vcore struct. The fields in the kvmppc_vcore struct are protected by the vcpu->arch.tbacct_lock of the vcpu that has taken responsibility for running the virtual core. This works correctly but confuses lockdep, because it sees that the code takes the tbacct_lock for a vcpu in kvmppc_remove_runnable() and then takes another vcpu's tbacct_lock in vcore_stolen_time(), and it thinks there is a possibility of deadlock, causing it to print reports like this: ============================================= [ INFO: possible recursive locking detected ] 3.18.0-rc7-kvm-00016-g8db4bc6 #89 Not tainted --------------------------------------------- qemu-system-ppc/6188 is trying to acquire lock: (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb1fe8>] .vcore_stolen_time+0x48/0xd0 [kvm_hv] but task is already holding lock: (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&vcpu->arch.tbacct_lock)->rlock); lock(&(&vcpu->arch.tbacct_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by qemu-system-ppc/6188: #0: (&vcpu->mutex){+.+.+.}, at: [<d00000000eb93f98>] .vcpu_load+0x28/0xe0 [kvm] #1: (&(&vcore->lock)->rlock){+.+...}, at: [<d00000000ecb41b0>] .kvmppc_vcpu_run_hv+0x530/0x1530 [kvm_hv] #2: (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv] stack backtrace: CPU: 40 PID: 6188 Comm: qemu-system-ppc Not tainted 3.18.0-rc7-kvm-00016-g8db4bc6 #89 Call Trace: [c000000b2754f3f0] [c000000000b31b6c] .dump_stack+0x88/0xb4 (unreliable) [c000000b2754f470] [c0000000000faeb8] .__lock_acquire+0x1878/0x2190 [c000000b2754f600] [c0000000000fbf0c] .lock_acquire+0xcc/0x1a0 [c000000b2754f6d0] [c000000000b2954c] ._raw_spin_lock_irq+0x4c/0x70 [c000000b2754f760] [d00000000ecb1fe8] .vcore_stolen_time+0x48/0xd0 [kvm_hv] [c000000b2754f7f0] [d00000000ecb25b4] .kvmppc_remove_runnable.part.3+0x44/0xd0 [kvm_hv] [c000000b2754f880] [d00000000ecb43ec] .kvmppc_vcpu_run_hv+0x76c/0x1530 [kvm_hv] [c000000b2754f9f0] [d00000000eb9f46c] .kvmppc_vcpu_run+0x2c/0x40 [kvm] [c000000b2754fa60] [d00000000eb9c9a4] .kvm_arch_vcpu_ioctl_run+0x54/0x160 [kvm] [c000000b2754faf0] [d00000000eb94538] .kvm_vcpu_ioctl+0x498/0x760 [kvm] [c000000b2754fcb0] [c000000000267eb4] .do_vfs_ioctl+0x444/0x770 [c000000b2754fd90] [c0000000002682a4] .SyS_ioctl+0xc4/0xe0 [c000000b2754fe30] [c0000000000092e4] syscall_exit+0x0/0x98 In order to make the locking easier to analyse, we change the code to use a spinlock in the kvmppc_vcore struct to protect the stolen_tb and preempt_tb fields. This lock needs to be an irq-safe lock since it is used in the kvmppc_core_vcpu_load_hv() and kvmppc_core_vcpu_put_hv() functions, which are called with the scheduler rq lock held, which is an irq-safe lock. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c60
2 files changed, 32 insertions, 29 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 047855619cc4..7cf94a5e8411 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -297,6 +297,7 @@ struct kvmppc_vcore {
297 struct list_head runnable_threads; 297 struct list_head runnable_threads;
298 spinlock_t lock; 298 spinlock_t lock;
299 wait_queue_head_t wq; 299 wait_queue_head_t wq;
300 spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
300 u64 stolen_tb; 301 u64 stolen_tb;
301 u64 preempt_tb; 302 u64 preempt_tb;
302 struct kvm_vcpu *runner; 303 struct kvm_vcpu *runner;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1a7a281be3bd..74afa2d06fbc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -135,11 +135,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
135 * stolen. 135 * stolen.
136 * 136 *
137 * Updates to busy_stolen are protected by arch.tbacct_lock; 137 * Updates to busy_stolen are protected by arch.tbacct_lock;
138 * updates to vc->stolen_tb are protected by the arch.tbacct_lock 138 * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
139 * of the vcpu that has taken responsibility for running the vcore 139 * lock. The stolen times are measured in units of timebase ticks.
140 * (i.e. vc->runner). The stolen times are measured in units of 140 * (Note that the != TB_NIL checks below are purely defensive;
141 * timebase ticks. (Note that the != TB_NIL checks below are 141 * they should never fail.)
142 * purely defensive; they should never fail.)
143 */ 142 */
144 143
145static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 144static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +146,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
147 struct kvmppc_vcore *vc = vcpu->arch.vcore; 146 struct kvmppc_vcore *vc = vcpu->arch.vcore;
148 unsigned long flags; 147 unsigned long flags;
149 148
150 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 149 /*
151 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && 150 * We can test vc->runner without taking the vcore lock,
152 vc->preempt_tb != TB_NIL) { 151 * because only this task ever sets vc->runner to this
153 vc->stolen_tb += mftb() - vc->preempt_tb; 152 * vcpu, and once it is set to this vcpu, only this task
154 vc->preempt_tb = TB_NIL; 153 * ever sets it to NULL.
154 */
155 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
156 spin_lock_irqsave(&vc->stoltb_lock, flags);
157 if (vc->preempt_tb != TB_NIL) {
158 vc->stolen_tb += mftb() - vc->preempt_tb;
159 vc->preempt_tb = TB_NIL;
160 }
161 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
155 } 162 }
163 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
156 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 164 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
157 vcpu->arch.busy_preempt != TB_NIL) { 165 vcpu->arch.busy_preempt != TB_NIL) {
158 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 166 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +174,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
166 struct kvmppc_vcore *vc = vcpu->arch.vcore; 174 struct kvmppc_vcore *vc = vcpu->arch.vcore;
167 unsigned long flags; 175 unsigned long flags;
168 176
169 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 177 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
170 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 178 spin_lock_irqsave(&vc->stoltb_lock, flags);
171 vc->preempt_tb = mftb(); 179 vc->preempt_tb = mftb();
180 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
181 }
182 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
172 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 183 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
173 vcpu->arch.busy_preempt = mftb(); 184 vcpu->arch.busy_preempt = mftb();
174 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 185 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
505static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) 516static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
506{ 517{
507 u64 p; 518 u64 p;
519 unsigned long flags;
508 520
509 /* 521 spin_lock_irqsave(&vc->stoltb_lock, flags);
510 * If we are the task running the vcore, then since we hold 522 p = vc->stolen_tb;
511 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
512 * can't be updated, so we don't need the tbacct_lock.
513 * If the vcore is inactive, it can't become active (since we
514 * hold the vcore lock), so the vcpu load/put functions won't
515 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
516 */
517 if (vc->vcore_state != VCORE_INACTIVE && 523 if (vc->vcore_state != VCORE_INACTIVE &&
518 vc->runner->arch.run_task != current) { 524 vc->preempt_tb != TB_NIL)
519 spin_lock_irq(&vc->runner->arch.tbacct_lock); 525 p += now - vc->preempt_tb;
520 p = vc->stolen_tb; 526 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
521 if (vc->preempt_tb != TB_NIL)
522 p += now - vc->preempt_tb;
523 spin_unlock_irq(&vc->runner->arch.tbacct_lock);
524 } else {
525 p = vc->stolen_tb;
526 }
527 return p; 527 return p;
528} 528}
529 529
@@ -1359,6 +1359,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1359 1359
1360 INIT_LIST_HEAD(&vcore->runnable_threads); 1360 INIT_LIST_HEAD(&vcore->runnable_threads);
1361 spin_lock_init(&vcore->lock); 1361 spin_lock_init(&vcore->lock);
1362 spin_lock_init(&vcore->stoltb_lock);
1362 init_waitqueue_head(&vcore->wq); 1363 init_waitqueue_head(&vcore->wq);
1363 vcore->preempt_tb = TB_NIL; 1364 vcore->preempt_tb = TB_NIL;
1364 vcore->lpcr = kvm->arch.lpcr; 1365 vcore->lpcr = kvm->arch.lpcr;
@@ -1696,6 +1697,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1696 vc->n_woken = 0; 1697 vc->n_woken = 0;
1697 vc->nap_count = 0; 1698 vc->nap_count = 0;
1698 vc->entry_exit_count = 0; 1699 vc->entry_exit_count = 0;
1700 vc->preempt_tb = TB_NIL;
1699 vc->vcore_state = VCORE_STARTING; 1701 vc->vcore_state = VCORE_STARTING;
1700 vc->in_guest = 0; 1702 vc->in_guest = 0;
1701 vc->napping_threads = 0; 1703 vc->napping_threads = 0;