diff options
| author | Paul Mackerras <paulus@samba.org> | 2014-12-04 00:43:28 -0500 |
|---|---|---|
| committer | Alexander Graf <agraf@suse.de> | 2014-12-17 07:20:09 -0500 |
| commit | 2711e248a352d7ecc8767b1dfa1f0c2356cb7f4b (patch) | |
| tree | 137e4238d3129bd5ad152111d5f4e6bc46210bc9 | |
| parent | a0499cf74698e94d1ba503da69794a35bced0523 (diff) | |
KVM: PPC: Book3S HV: Simplify locking around stolen time calculations
Currently the calculations of stolen time for PPC Book3S HV guests
uses fields in both the vcpu struct and the kvmppc_vcore struct. The
fields in the kvmppc_vcore struct are protected by the
vcpu->arch.tbacct_lock of the vcpu that has taken responsibility for
running the virtual core. This works correctly but confuses lockdep,
because it sees that the code takes the tbacct_lock for a vcpu in
kvmppc_remove_runnable() and then takes another vcpu's tbacct_lock in
vcore_stolen_time(), and it thinks there is a possibility of deadlock,
causing it to print reports like this:
=============================================
[ INFO: possible recursive locking detected ]
3.18.0-rc7-kvm-00016-g8db4bc6 #89 Not tainted
---------------------------------------------
qemu-system-ppc/6188 is trying to acquire lock:
(&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb1fe8>] .vcore_stolen_time+0x48/0xd0 [kvm_hv]
but task is already holding lock:
(&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&(&vcpu->arch.tbacct_lock)->rlock);
lock(&(&vcpu->arch.tbacct_lock)->rlock);
*** DEADLOCK ***
May be due to missing lock nesting notation
3 locks held by qemu-system-ppc/6188:
#0: (&vcpu->mutex){+.+.+.}, at: [<d00000000eb93f98>] .vcpu_load+0x28/0xe0 [kvm]
#1: (&(&vcore->lock)->rlock){+.+...}, at: [<d00000000ecb41b0>] .kvmppc_vcpu_run_hv+0x530/0x1530 [kvm_hv]
#2: (&(&vcpu->arch.tbacct_lock)->rlock){......}, at: [<d00000000ecb25a0>] .kvmppc_remove_runnable.part.3+0x30/0xd0 [kvm_hv]
stack backtrace:
CPU: 40 PID: 6188 Comm: qemu-system-ppc Not tainted 3.18.0-rc7-kvm-00016-g8db4bc6 #89
Call Trace:
[c000000b2754f3f0] [c000000000b31b6c] .dump_stack+0x88/0xb4 (unreliable)
[c000000b2754f470] [c0000000000faeb8] .__lock_acquire+0x1878/0x2190
[c000000b2754f600] [c0000000000fbf0c] .lock_acquire+0xcc/0x1a0
[c000000b2754f6d0] [c000000000b2954c] ._raw_spin_lock_irq+0x4c/0x70
[c000000b2754f760] [d00000000ecb1fe8] .vcore_stolen_time+0x48/0xd0 [kvm_hv]
[c000000b2754f7f0] [d00000000ecb25b4] .kvmppc_remove_runnable.part.3+0x44/0xd0 [kvm_hv]
[c000000b2754f880] [d00000000ecb43ec] .kvmppc_vcpu_run_hv+0x76c/0x1530 [kvm_hv]
[c000000b2754f9f0] [d00000000eb9f46c] .kvmppc_vcpu_run+0x2c/0x40 [kvm]
[c000000b2754fa60] [d00000000eb9c9a4] .kvm_arch_vcpu_ioctl_run+0x54/0x160 [kvm]
[c000000b2754faf0] [d00000000eb94538] .kvm_vcpu_ioctl+0x498/0x760 [kvm]
[c000000b2754fcb0] [c000000000267eb4] .do_vfs_ioctl+0x444/0x770
[c000000b2754fd90] [c0000000002682a4] .SyS_ioctl+0xc4/0xe0
[c000000b2754fe30] [c0000000000092e4] syscall_exit+0x0/0x98
In order to make the locking easier to analyse, we change the code to
use a spinlock in the kvmppc_vcore struct to protect the stolen_tb and
preempt_tb fields. This lock needs to be an irq-safe lock since it is
used in the kvmppc_core_vcpu_load_hv() and kvmppc_core_vcpu_put_hv()
functions, which are called with the scheduler rq lock held, which is
an irq-safe lock.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
| -rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 1 | ||||
| -rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 60 |
2 files changed, 32 insertions, 29 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 047855619cc4..7cf94a5e8411 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -297,6 +297,7 @@ struct kvmppc_vcore { | |||
| 297 | struct list_head runnable_threads; | 297 | struct list_head runnable_threads; |
| 298 | spinlock_t lock; | 298 | spinlock_t lock; |
| 299 | wait_queue_head_t wq; | 299 | wait_queue_head_t wq; |
| 300 | spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ | ||
| 300 | u64 stolen_tb; | 301 | u64 stolen_tb; |
| 301 | u64 preempt_tb; | 302 | u64 preempt_tb; |
| 302 | struct kvm_vcpu *runner; | 303 | struct kvm_vcpu *runner; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1a7a281be3bd..74afa2d06fbc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -135,11 +135,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) | |||
| 135 | * stolen. | 135 | * stolen. |
| 136 | * | 136 | * |
| 137 | * Updates to busy_stolen are protected by arch.tbacct_lock; | 137 | * Updates to busy_stolen are protected by arch.tbacct_lock; |
| 138 | * updates to vc->stolen_tb are protected by the arch.tbacct_lock | 138 | * updates to vc->stolen_tb are protected by the vcore->stoltb_lock |
| 139 | * of the vcpu that has taken responsibility for running the vcore | 139 | * lock. The stolen times are measured in units of timebase ticks. |
| 140 | * (i.e. vc->runner). The stolen times are measured in units of | 140 | * (Note that the != TB_NIL checks below are purely defensive; |
| 141 | * timebase ticks. (Note that the != TB_NIL checks below are | 141 | * they should never fail.) |
| 142 | * purely defensive; they should never fail.) | ||
| 143 | */ | 142 | */ |
| 144 | 143 | ||
| 145 | static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) | 144 | static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) |
| @@ -147,12 +146,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) | |||
| 147 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 146 | struct kvmppc_vcore *vc = vcpu->arch.vcore; |
| 148 | unsigned long flags; | 147 | unsigned long flags; |
| 149 | 148 | ||
| 150 | spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); | 149 | /* |
| 151 | if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && | 150 | * We can test vc->runner without taking the vcore lock, |
| 152 | vc->preempt_tb != TB_NIL) { | 151 | * because only this task ever sets vc->runner to this |
| 153 | vc->stolen_tb += mftb() - vc->preempt_tb; | 152 | * vcpu, and once it is set to this vcpu, only this task |
| 154 | vc->preempt_tb = TB_NIL; | 153 | * ever sets it to NULL. |
| 154 | */ | ||
| 155 | if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { | ||
| 156 | spin_lock_irqsave(&vc->stoltb_lock, flags); | ||
| 157 | if (vc->preempt_tb != TB_NIL) { | ||
| 158 | vc->stolen_tb += mftb() - vc->preempt_tb; | ||
| 159 | vc->preempt_tb = TB_NIL; | ||
| 160 | } | ||
| 161 | spin_unlock_irqrestore(&vc->stoltb_lock, flags); | ||
| 155 | } | 162 | } |
| 163 | spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); | ||
| 156 | if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && | 164 | if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && |
| 157 | vcpu->arch.busy_preempt != TB_NIL) { | 165 | vcpu->arch.busy_preempt != TB_NIL) { |
| 158 | vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; | 166 | vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; |
| @@ -166,9 +174,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) | |||
| 166 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | 174 | struct kvmppc_vcore *vc = vcpu->arch.vcore; |
| 167 | unsigned long flags; | 175 | unsigned long flags; |
| 168 | 176 | ||
| 169 | spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); | 177 | if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { |
| 170 | if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) | 178 | spin_lock_irqsave(&vc->stoltb_lock, flags); |
| 171 | vc->preempt_tb = mftb(); | 179 | vc->preempt_tb = mftb(); |
| 180 | spin_unlock_irqrestore(&vc->stoltb_lock, flags); | ||
| 181 | } | ||
| 182 | spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); | ||
| 172 | if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) | 183 | if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) |
| 173 | vcpu->arch.busy_preempt = mftb(); | 184 | vcpu->arch.busy_preempt = mftb(); |
| 174 | spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); | 185 | spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); |
| @@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) | |||
| 505 | static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) | 516 | static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) |
| 506 | { | 517 | { |
| 507 | u64 p; | 518 | u64 p; |
| 519 | unsigned long flags; | ||
| 508 | 520 | ||
| 509 | /* | 521 | spin_lock_irqsave(&vc->stoltb_lock, flags); |
| 510 | * If we are the task running the vcore, then since we hold | 522 | p = vc->stolen_tb; |
| 511 | * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb | ||
| 512 | * can't be updated, so we don't need the tbacct_lock. | ||
| 513 | * If the vcore is inactive, it can't become active (since we | ||
| 514 | * hold the vcore lock), so the vcpu load/put functions won't | ||
| 515 | * update stolen_tb/preempt_tb, and we don't need tbacct_lock. | ||
| 516 | */ | ||
| 517 | if (vc->vcore_state != VCORE_INACTIVE && | 523 | if (vc->vcore_state != VCORE_INACTIVE && |
| 518 | vc->runner->arch.run_task != current) { | 524 | vc->preempt_tb != TB_NIL) |
| 519 | spin_lock_irq(&vc->runner->arch.tbacct_lock); | 525 | p += now - vc->preempt_tb; |
| 520 | p = vc->stolen_tb; | 526 | spin_unlock_irqrestore(&vc->stoltb_lock, flags); |
| 521 | if (vc->preempt_tb != TB_NIL) | ||
| 522 | p += now - vc->preempt_tb; | ||
| 523 | spin_unlock_irq(&vc->runner->arch.tbacct_lock); | ||
| 524 | } else { | ||
| 525 | p = vc->stolen_tb; | ||
| 526 | } | ||
| 527 | return p; | 527 | return p; |
| 528 | } | 528 | } |
| 529 | 529 | ||
| @@ -1359,6 +1359,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) | |||
| 1359 | 1359 | ||
| 1360 | INIT_LIST_HEAD(&vcore->runnable_threads); | 1360 | INIT_LIST_HEAD(&vcore->runnable_threads); |
| 1361 | spin_lock_init(&vcore->lock); | 1361 | spin_lock_init(&vcore->lock); |
| 1362 | spin_lock_init(&vcore->stoltb_lock); | ||
| 1362 | init_waitqueue_head(&vcore->wq); | 1363 | init_waitqueue_head(&vcore->wq); |
| 1363 | vcore->preempt_tb = TB_NIL; | 1364 | vcore->preempt_tb = TB_NIL; |
| 1364 | vcore->lpcr = kvm->arch.lpcr; | 1365 | vcore->lpcr = kvm->arch.lpcr; |
| @@ -1696,6 +1697,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
| 1696 | vc->n_woken = 0; | 1697 | vc->n_woken = 0; |
| 1697 | vc->nap_count = 0; | 1698 | vc->nap_count = 0; |
| 1698 | vc->entry_exit_count = 0; | 1699 | vc->entry_exit_count = 0; |
| 1700 | vc->preempt_tb = TB_NIL; | ||
| 1699 | vc->vcore_state = VCORE_STARTING; | 1701 | vc->vcore_state = VCORE_STARTING; |
| 1700 | vc->in_guest = 0; | 1702 | vc->in_guest = 0; |
| 1701 | vc->napping_threads = 0; | 1703 | vc->napping_threads = 0; |
