diff options
author | Avi Kivity <avi@qumranet.com> | 2007-03-20 06:46:50 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2007-05-03 03:52:25 -0400 |
commit | 039576c03c35e2f990ad9bb9c39e1bad3cd60d34 (patch) | |
tree | fa6c81a40a36d2c0da1cf20c5deb45cb9bd7ba95 /drivers/kvm/kvm_main.c | |
parent | f0fe510864a4520a85dfa35ae14f5f376c56efc7 (diff) |
KVM: Avoid guest virtual addresses in string pio userspace interface
The current string pio interface communicates using guest virtual addresses,
relying on userspace to translate addresses and to check permissions. This
interface cannot fully support guest smp, as the check needs to take into
account two pages at one in case an unaligned string transfer straddles a
page boundary.
Change the interface not to communicate guest addresses at all; instead use
a buffer page (mmaped by userspace) and do transfers there. The kernel
manages the virtual to physical translation and can perform the checks
atomically by taking the appropriate locks.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'drivers/kvm/kvm_main.c')
-rw-r--r-- | drivers/kvm/kvm_main.c | 183 |
1 files changed, 168 insertions, 15 deletions
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ba7f43a4459e..205998c141fb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c | |||
@@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) | |||
346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | 346 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); |
347 | } | 347 | } |
348 | 348 | ||
349 | static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | ||
350 | { | ||
351 | int i; | ||
352 | |||
353 | for (i = 0; i < 2; ++i) | ||
354 | if (vcpu->pio.guest_pages[i]) { | ||
355 | __free_page(vcpu->pio.guest_pages[i]); | ||
356 | vcpu->pio.guest_pages[i] = NULL; | ||
357 | } | ||
358 | } | ||
359 | |||
349 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | 360 | static void kvm_free_vcpu(struct kvm_vcpu *vcpu) |
350 | { | 361 | { |
351 | if (!vcpu->vmcs) | 362 | if (!vcpu->vmcs) |
@@ -357,6 +368,9 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | |||
357 | kvm_arch_ops->vcpu_free(vcpu); | 368 | kvm_arch_ops->vcpu_free(vcpu); |
358 | free_page((unsigned long)vcpu->run); | 369 | free_page((unsigned long)vcpu->run); |
359 | vcpu->run = NULL; | 370 | vcpu->run = NULL; |
371 | free_page((unsigned long)vcpu->pio_data); | ||
372 | vcpu->pio_data = NULL; | ||
373 | free_pio_guest_pages(vcpu); | ||
360 | } | 374 | } |
361 | 375 | ||
362 | static void kvm_free_vcpus(struct kvm *kvm) | 376 | static void kvm_free_vcpus(struct kvm *kvm) |
@@ -1550,44 +1564,168 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
1550 | } | 1564 | } |
1551 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 1565 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
1552 | 1566 | ||
1553 | static void complete_pio(struct kvm_vcpu *vcpu) | 1567 | static int pio_copy_data(struct kvm_vcpu *vcpu) |
1554 | { | 1568 | { |
1555 | struct kvm_io *io = &vcpu->run->io; | 1569 | void *p = vcpu->pio_data; |
1570 | void *q; | ||
1571 | unsigned bytes; | ||
1572 | int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; | ||
1573 | |||
1574 | kvm_arch_ops->vcpu_put(vcpu); | ||
1575 | q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, | ||
1576 | PAGE_KERNEL); | ||
1577 | if (!q) { | ||
1578 | kvm_arch_ops->vcpu_load(vcpu); | ||
1579 | free_pio_guest_pages(vcpu); | ||
1580 | return -ENOMEM; | ||
1581 | } | ||
1582 | q += vcpu->pio.guest_page_offset; | ||
1583 | bytes = vcpu->pio.size * vcpu->pio.cur_count; | ||
1584 | if (vcpu->pio.in) | ||
1585 | memcpy(q, p, bytes); | ||
1586 | else | ||
1587 | memcpy(p, q, bytes); | ||
1588 | q -= vcpu->pio.guest_page_offset; | ||
1589 | vunmap(q); | ||
1590 | kvm_arch_ops->vcpu_load(vcpu); | ||
1591 | free_pio_guest_pages(vcpu); | ||
1592 | return 0; | ||
1593 | } | ||
1594 | |||
1595 | static int complete_pio(struct kvm_vcpu *vcpu) | ||
1596 | { | ||
1597 | struct kvm_pio_request *io = &vcpu->pio; | ||
1556 | long delta; | 1598 | long delta; |
1599 | int r; | ||
1557 | 1600 | ||
1558 | kvm_arch_ops->cache_regs(vcpu); | 1601 | kvm_arch_ops->cache_regs(vcpu); |
1559 | 1602 | ||
1560 | if (!io->string) { | 1603 | if (!io->string) { |
1561 | if (io->direction == KVM_EXIT_IO_IN) | 1604 | if (io->in) |
1562 | memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, | 1605 | memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, |
1563 | io->size); | 1606 | io->size); |
1564 | } else { | 1607 | } else { |
1608 | if (io->in) { | ||
1609 | r = pio_copy_data(vcpu); | ||
1610 | if (r) { | ||
1611 | kvm_arch_ops->cache_regs(vcpu); | ||
1612 | return r; | ||
1613 | } | ||
1614 | } | ||
1615 | |||
1565 | delta = 1; | 1616 | delta = 1; |
1566 | if (io->rep) { | 1617 | if (io->rep) { |
1567 | delta *= io->count; | 1618 | delta *= io->cur_count; |
1568 | /* | 1619 | /* |
1569 | * The size of the register should really depend on | 1620 | * The size of the register should really depend on |
1570 | * current address size. | 1621 | * current address size. |
1571 | */ | 1622 | */ |
1572 | vcpu->regs[VCPU_REGS_RCX] -= delta; | 1623 | vcpu->regs[VCPU_REGS_RCX] -= delta; |
1573 | } | 1624 | } |
1574 | if (io->string_down) | 1625 | if (io->down) |
1575 | delta = -delta; | 1626 | delta = -delta; |
1576 | delta *= io->size; | 1627 | delta *= io->size; |
1577 | if (io->direction == KVM_EXIT_IO_IN) | 1628 | if (io->in) |
1578 | vcpu->regs[VCPU_REGS_RDI] += delta; | 1629 | vcpu->regs[VCPU_REGS_RDI] += delta; |
1579 | else | 1630 | else |
1580 | vcpu->regs[VCPU_REGS_RSI] += delta; | 1631 | vcpu->regs[VCPU_REGS_RSI] += delta; |
1581 | } | 1632 | } |
1582 | 1633 | ||
1583 | vcpu->pio_pending = 0; | ||
1584 | vcpu->run->io_completed = 0; | 1634 | vcpu->run->io_completed = 0; |
1585 | 1635 | ||
1586 | kvm_arch_ops->decache_regs(vcpu); | 1636 | kvm_arch_ops->decache_regs(vcpu); |
1587 | 1637 | ||
1588 | kvm_arch_ops->skip_emulated_instruction(vcpu); | 1638 | io->count -= io->cur_count; |
1639 | io->cur_count = 0; | ||
1640 | |||
1641 | if (!io->count) | ||
1642 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1643 | return 0; | ||
1589 | } | 1644 | } |
1590 | 1645 | ||
1646 | int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||
1647 | int size, unsigned long count, int string, int down, | ||
1648 | gva_t address, int rep, unsigned port) | ||
1649 | { | ||
1650 | unsigned now, in_page; | ||
1651 | int i; | ||
1652 | int nr_pages = 1; | ||
1653 | struct page *page; | ||
1654 | |||
1655 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
1656 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
1657 | vcpu->run->io.size = size; | ||
1658 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
1659 | vcpu->run->io.count = count; | ||
1660 | vcpu->run->io.port = port; | ||
1661 | vcpu->pio.count = count; | ||
1662 | vcpu->pio.cur_count = count; | ||
1663 | vcpu->pio.size = size; | ||
1664 | vcpu->pio.in = in; | ||
1665 | vcpu->pio.string = string; | ||
1666 | vcpu->pio.down = down; | ||
1667 | vcpu->pio.guest_page_offset = offset_in_page(address); | ||
1668 | vcpu->pio.rep = rep; | ||
1669 | |||
1670 | if (!string) { | ||
1671 | kvm_arch_ops->cache_regs(vcpu); | ||
1672 | memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||
1673 | kvm_arch_ops->decache_regs(vcpu); | ||
1674 | return 0; | ||
1675 | } | ||
1676 | |||
1677 | if (!count) { | ||
1678 | kvm_arch_ops->skip_emulated_instruction(vcpu); | ||
1679 | return 1; | ||
1680 | } | ||
1681 | |||
1682 | now = min(count, PAGE_SIZE / size); | ||
1683 | |||
1684 | if (!down) | ||
1685 | in_page = PAGE_SIZE - offset_in_page(address); | ||
1686 | else | ||
1687 | in_page = offset_in_page(address) + size; | ||
1688 | now = min(count, (unsigned long)in_page / size); | ||
1689 | if (!now) { | ||
1690 | /* | ||
1691 | * String I/O straddles page boundary. Pin two guest pages | ||
1692 | * so that we satisfy atomicity constraints. Do just one | ||
1693 | * transaction to avoid complexity. | ||
1694 | */ | ||
1695 | nr_pages = 2; | ||
1696 | now = 1; | ||
1697 | } | ||
1698 | if (down) { | ||
1699 | /* | ||
1700 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
1701 | */ | ||
1702 | printk(KERN_ERR "kvm: guest string pio down\n"); | ||
1703 | inject_gp(vcpu); | ||
1704 | return 1; | ||
1705 | } | ||
1706 | vcpu->run->io.count = now; | ||
1707 | vcpu->pio.cur_count = now; | ||
1708 | |||
1709 | for (i = 0; i < nr_pages; ++i) { | ||
1710 | spin_lock(&vcpu->kvm->lock); | ||
1711 | page = gva_to_page(vcpu, address + i * PAGE_SIZE); | ||
1712 | if (page) | ||
1713 | get_page(page); | ||
1714 | vcpu->pio.guest_pages[i] = page; | ||
1715 | spin_unlock(&vcpu->kvm->lock); | ||
1716 | if (!page) { | ||
1717 | inject_gp(vcpu); | ||
1718 | free_pio_guest_pages(vcpu); | ||
1719 | return 1; | ||
1720 | } | ||
1721 | } | ||
1722 | |||
1723 | if (!vcpu->pio.in) | ||
1724 | return pio_copy_data(vcpu); | ||
1725 | return 0; | ||
1726 | } | ||
1727 | EXPORT_SYMBOL_GPL(kvm_setup_pio); | ||
1728 | |||
1591 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1729 | static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
1592 | { | 1730 | { |
1593 | int r; | 1731 | int r; |
@@ -1602,9 +1740,11 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1602 | vcpu->cr8 = kvm_run->cr8; | 1740 | vcpu->cr8 = kvm_run->cr8; |
1603 | 1741 | ||
1604 | if (kvm_run->io_completed) { | 1742 | if (kvm_run->io_completed) { |
1605 | if (vcpu->pio_pending) | 1743 | if (vcpu->pio.cur_count) { |
1606 | complete_pio(vcpu); | 1744 | r = complete_pio(vcpu); |
1607 | else { | 1745 | if (r) |
1746 | goto out; | ||
1747 | } else { | ||
1608 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 1748 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
1609 | vcpu->mmio_read_completed = 1; | 1749 | vcpu->mmio_read_completed = 1; |
1610 | } | 1750 | } |
@@ -1620,6 +1760,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1620 | 1760 | ||
1621 | r = kvm_arch_ops->run(vcpu, kvm_run); | 1761 | r = kvm_arch_ops->run(vcpu, kvm_run); |
1622 | 1762 | ||
1763 | out: | ||
1623 | if (vcpu->sigset_active) | 1764 | if (vcpu->sigset_active) |
1624 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1765 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1625 | 1766 | ||
@@ -1995,9 +2136,12 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, | |||
1995 | 2136 | ||
1996 | *type = VM_FAULT_MINOR; | 2137 | *type = VM_FAULT_MINOR; |
1997 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 2138 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
1998 | if (pgoff != 0) | 2139 | if (pgoff == 0) |
2140 | page = virt_to_page(vcpu->run); | ||
2141 | else if (pgoff == KVM_PIO_PAGE_OFFSET) | ||
2142 | page = virt_to_page(vcpu->pio_data); | ||
2143 | else | ||
1999 | return NOPAGE_SIGBUS; | 2144 | return NOPAGE_SIGBUS; |
2000 | page = virt_to_page(vcpu->run); | ||
2001 | get_page(page); | 2145 | get_page(page); |
2002 | return page; | 2146 | return page; |
2003 | } | 2147 | } |
@@ -2094,6 +2238,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
2094 | goto out_unlock; | 2238 | goto out_unlock; |
2095 | vcpu->run = page_address(page); | 2239 | vcpu->run = page_address(page); |
2096 | 2240 | ||
2241 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2242 | r = -ENOMEM; | ||
2243 | if (!page) | ||
2244 | goto out_free_run; | ||
2245 | vcpu->pio_data = page_address(page); | ||
2246 | |||
2097 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, | 2247 | vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, |
2098 | FX_IMAGE_ALIGN); | 2248 | FX_IMAGE_ALIGN); |
2099 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; | 2249 | vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; |
@@ -2123,6 +2273,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
2123 | 2273 | ||
2124 | out_free_vcpus: | 2274 | out_free_vcpus: |
2125 | kvm_free_vcpu(vcpu); | 2275 | kvm_free_vcpu(vcpu); |
2276 | out_free_run: | ||
2277 | free_page((unsigned long)vcpu->run); | ||
2278 | vcpu->run = NULL; | ||
2126 | out_unlock: | 2279 | out_unlock: |
2127 | mutex_unlock(&vcpu->mutex); | 2280 | mutex_unlock(&vcpu->mutex); |
2128 | out: | 2281 | out: |
@@ -2491,7 +2644,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2491 | r = -EINVAL; | 2644 | r = -EINVAL; |
2492 | if (arg) | 2645 | if (arg) |
2493 | goto out; | 2646 | goto out; |
2494 | r = PAGE_SIZE; | 2647 | r = 2 * PAGE_SIZE; |
2495 | break; | 2648 | break; |
2496 | default: | 2649 | default: |
2497 | ; | 2650 | ; |