aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhii Popovych <spopovyc@redhat.com>2017-12-04 09:36:42 -0500
committerPaul Mackerras <paulus@ozlabs.org>2017-12-05 21:36:22 -0500
commit4ed11aeefda439c76ddae3ceebcfa4fad111f149 (patch)
tree438bfbae624079bda7757d708763656c8f962849
parent3073774e638ef18d222465fe92bfc8fccb90d288 (diff)
KVM: PPC: Book3S HV: Fix use after free in case of multiple resize requests
When serving multiple resize requests following could happen: CPU0 CPU1 ---- ---- kvm_vm_ioctl_resize_hpt_prepare(1); -> schedule_work() /* system_rq might be busy: delay */ kvm_vm_ioctl_resize_hpt_prepare(2); mutex_lock(); if (resize) { ... release_hpt_resize(); } ... resize_hpt_prepare_work() -> schedule_work() { mutex_unlock() /* resize->kvm could be wrong */ struct kvm *kvm = resize->kvm; mutex_lock(&kvm->lock); <<<< UAF ... } i.e. a second resize request with different order could be started by kvm_vm_ioctl_resize_hpt_prepare(), causing the previous request to be free()d when there's still an active worker thread which will try to access it. This leads to a use after free in point marked with UAF on the diagram above. To prevent this from happening, instead of unconditionally releasing a pre-existing resize structure from the prepare ioctl(), we check if the existing structure has an in-progress worker. We do that by checking if the resize->error == -EBUSY, which is safe because the resize->error field is protected by the kvm->lock. If there is an active worker, instead of releasing, we mark the structure as stale by unlinking it from kvm_struct. In the worker thread we check for a stale structure (with kvm->lock held), and in that case abort, releasing the stale structure ourself. We make the check both before and the actual allocation. Strictly, only the check afterwards is needed, the check before is an optimization: if the structure happens to become stale before the worker thread is dispatched, rather than during the allocation, it means we can avoid allocating then immediately freeing a potentially substantial amount of memory. This fixes following or similar host kernel crash message: [ 635.277361] Unable to handle kernel paging request for data at address 0x00000000 [ 635.277438] Faulting instruction address: 0xc00000000052f568 [ 635.277446] Oops: Kernel access of bad area, sig: 11 [#1] [ 635.277451] SMP NR_CPUS=2048 NUMA PowerNV [ 635.277470] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter nfsv3 nfs_acl nfs lockd grace fscache kvm_hv kvm rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ext4 ib_srp scsi_transport_srp ib_ipoib mbcache jbd2 rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ocrdma(T) ib_core ses enclosure scsi_transport_sas sg shpchp leds_powernv ibmpowernv i2c_opal i2c_core powernv_rng ipmi_powernv ipmi_devintf ipmi_msghandler ip_tables xfs libcrc32c sr_mod sd_mod cdrom lpfc nvme_fc(T) nvme_fabrics nvme_core ipr nvmet_fc(T) tg3 nvmet libata be2net crc_t10dif crct10dif_generic scsi_transport_fc ptp scsi_tgt pps_core crct10dif_common dm_mirror dm_region_hash dm_log dm_mod [ 635.278687] CPU: 40 PID: 749 Comm: kworker/40:1 Tainted: G ------------ T 3.10.0.bz1510771+ #1 [ 635.278782] Workqueue: events resize_hpt_prepare_work [kvm_hv] [ 635.278851] task: c0000007e6840000 ti: c0000007e9180000 task.ti: c0000007e9180000 [ 635.278919] NIP: c00000000052f568 LR: c0000000009ea310 CTR: c0000000009ea4f0 [ 635.278988] REGS: c0000007e91837f0 TRAP: 0300 Tainted: G ------------ T (3.10.0.bz1510771+) [ 635.279077] MSR: 9000000100009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 24002022 XER: 00000000 [ 635.279248] CFAR: c000000000009368 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1 GPR00: c0000000009ea310 c0000007e9183a70 c000000001250b00 c0000007e9183b10 GPR04: 0000000000000000 0000000000000000 c0000007e9183650 0000000000000000 GPR08: c0000007ffff7b80 00000000ffffffff 0000000080000028 d00000000d2529a0 GPR12: 0000000000002200 c000000007b56800 c000000000120028 c0000007f135bb40 GPR16: 0000000000000000 c000000005c1e018 c000000005c1e018 0000000000000000 GPR20: 0000000000000001 c0000000011bf778 0000000000000001 fffffffffffffef7 GPR24: 0000000000000000 c000000f1e262e50 0000000000000002 c0000007e9180000 GPR28: c000000f1e262e4c c000000f1e262e50 0000000000000000 c0000007e9183b10 [ 635.280149] NIP [c00000000052f568] __list_add+0x38/0x110 [ 635.280197] LR [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280253] Call Trace: [ 635.280277] [c0000007e9183af0] [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280356] [c0000007e9183b70] [c0000000009ea554] mutex_lock+0x64/0x70 [ 635.280426] [c0000007e9183ba0] [d00000000d24da04] resize_hpt_prepare_work+0xe4/0x1c0 [kvm_hv] [ 635.280507] [c0000007e9183c40] [c000000000113c0c] process_one_work+0x1dc/0x680 [ 635.280587] [c0000007e9183ce0] [c000000000114250] worker_thread+0x1a0/0x520 [ 635.280655] [c0000007e9183d80] [c00000000012010c] kthread+0xec/0x100 [ 635.280724] [c0000007e9183e30] [c00000000000a4b8] ret_from_kernel_thread+0x5c/0xa4 [ 635.280814] Instruction dump: [ 635.280880] 7c0802a6 fba1ffe8 fbc1fff0 7cbd2b78 fbe1fff8 7c9e2378 7c7f1b78 f8010010 [ 635.281099] f821ff81 e8a50008 7fa52040 40de00b8 <e8be0000> 7fbd2840 40de008c 7fbff040 [ 635.281324] ---[ end trace b628b73449719b9d ]--- Cc: stable@vger.kernel.org # v4.10+ Fixes: b5baa6877315 ("KVM: PPC: Book3S HV: KVM-HV HPT resizing implementation") Signed-off-by: Serhii Popovych <spopovyc@redhat.com> [dwg: Replaced BUG_ON()s with WARN_ONs() and reworded commit message for clarity] Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c50
1 files changed, 35 insertions, 15 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f5f2c6bf5856..8355398f0bb6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1419,16 +1419,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
1419 1419
1420static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) 1420static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
1421{ 1421{
1422 BUG_ON(kvm->arch.resize_hpt != resize); 1422 if (WARN_ON(!mutex_is_locked(&kvm->lock)))
1423 return;
1423 1424
1424 if (!resize) 1425 if (!resize)
1425 return; 1426 return;
1426 1427
1427 if (resize->hpt.virt) 1428 if (resize->error != -EBUSY) {
1428 kvmppc_free_hpt(&resize->hpt); 1429 if (resize->hpt.virt)
1430 kvmppc_free_hpt(&resize->hpt);
1431 kfree(resize);
1432 }
1429 1433
1430 kvm->arch.resize_hpt = NULL; 1434 if (kvm->arch.resize_hpt == resize)
1431 kfree(resize); 1435 kvm->arch.resize_hpt = NULL;
1432} 1436}
1433 1437
1434static void resize_hpt_prepare_work(struct work_struct *work) 1438static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1437,26 +1441,42 @@ static void resize_hpt_prepare_work(struct work_struct *work)
1437 struct kvm_resize_hpt, 1441 struct kvm_resize_hpt,
1438 work); 1442 work);
1439 struct kvm *kvm = resize->kvm; 1443 struct kvm *kvm = resize->kvm;
1440 int err; 1444 int err = 0;
1441 1445
1442 if (WARN_ON(resize->error != -EBUSY)) 1446 if (WARN_ON(resize->error != -EBUSY))
1443 return; 1447 return;
1444 1448
1445 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", 1449 mutex_lock(&kvm->lock);
1446 resize->order);
1447 1450
1448 err = resize_hpt_allocate(resize); 1451 /* Request is still current? */
1452 if (kvm->arch.resize_hpt == resize) {
1453 /* We may request large allocations here:
1454 * do not sleep with kvm->lock held for a while.
1455 */
1456 mutex_unlock(&kvm->lock);
1449 1457
1450 /* We have strict assumption about -EBUSY 1458 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
1451 * when preparing for HPT resize. 1459 resize->order);
1452 */
1453 if (WARN_ON(err == -EBUSY))
1454 err = -EINPROGRESS;
1455 1460
1456 mutex_lock(&kvm->lock); 1461 err = resize_hpt_allocate(resize);
1462
1463 /* We have strict assumption about -EBUSY
1464 * when preparing for HPT resize.
1465 */
1466 if (WARN_ON(err == -EBUSY))
1467 err = -EINPROGRESS;
1468
1469 mutex_lock(&kvm->lock);
1470 /* It is possible that kvm->arch.resize_hpt != resize
1471 * after we grab kvm->lock again.
1472 */
1473 }
1457 1474
1458 resize->error = err; 1475 resize->error = err;
1459 1476
1477 if (kvm->arch.resize_hpt != resize)
1478 resize_hpt_release(kvm, resize);
1479
1460 mutex_unlock(&kvm->lock); 1480 mutex_unlock(&kvm->lock);
1461} 1481}
1462 1482