diff options
82 files changed, 5808 insertions, 1667 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e1d94bf4056e..6386f8c0482e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -95,7 +95,7 @@ described as 'basic' will be available. | |||
95 | Capability: basic | 95 | Capability: basic |
96 | Architectures: all | 96 | Architectures: all |
97 | Type: system ioctl | 97 | Type: system ioctl |
98 | Parameters: none | 98 | Parameters: machine type identifier (KVM_VM_*) |
99 | Returns: a VM fd that can be used to control the new virtual machine. | 99 | Returns: a VM fd that can be used to control the new virtual machine. |
100 | 100 | ||
101 | The new VM has no virtual cpus and no memory. An mmap() of a VM fd | 101 | The new VM has no virtual cpus and no memory. An mmap() of a VM fd |
@@ -103,6 +103,11 @@ will access the virtual machine's physical address space; offset zero | |||
103 | corresponds to guest physical address zero. Use of mmap() on a VM fd | 103 | corresponds to guest physical address zero. Use of mmap() on a VM fd |
104 | is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is | 104 | is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is |
105 | available. | 105 | available. |
106 | You most certainly want to use 0 as machine type. | ||
107 | |||
108 | In order to create user controlled virtual machines on S390, check | ||
109 | KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as | ||
110 | privileged user (CAP_SYS_ADMIN). | ||
106 | 111 | ||
107 | 4.3 KVM_GET_MSR_INDEX_LIST | 112 | 4.3 KVM_GET_MSR_INDEX_LIST |
108 | 113 | ||
@@ -213,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants | |||
213 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | 218 | single-threaded guest vcpus, it should make all vcpu ids be a multiple |
214 | of the number of vcpus per vcore. | 219 | of the number of vcpus per vcore. |
215 | 220 | ||
221 | For virtual cpus that have been created with S390 user controlled virtual | ||
222 | machines, the resulting vcpu fd can be memory mapped at page offset | ||
223 | KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual | ||
224 | cpu's hardware control block. | ||
225 | |||
216 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) | 226 | 4.8 KVM_GET_DIRTY_LOG (vm ioctl) |
217 | 227 | ||
218 | Capability: basic | 228 | Capability: basic |
@@ -1159,6 +1169,14 @@ following flags are specified: | |||
1159 | 1169 | ||
1160 | /* Depends on KVM_CAP_IOMMU */ | 1170 | /* Depends on KVM_CAP_IOMMU */ |
1161 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 1171 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
1172 | /* The following two depend on KVM_CAP_PCI_2_3 */ | ||
1173 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) | ||
1174 | #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) | ||
1175 | |||
1176 | If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts | ||
1177 | via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other | ||
1178 | assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the | ||
1179 | guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details. | ||
1162 | 1180 | ||
1163 | The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure | 1181 | The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure |
1164 | isolation of the device. Usages not specifying this flag are deprecated. | 1182 | isolation of the device. Usages not specifying this flag are deprecated. |
@@ -1399,6 +1417,71 @@ The following flags are defined: | |||
1399 | If datamatch flag is set, the event will be signaled only if the written value | 1417 | If datamatch flag is set, the event will be signaled only if the written value |
1400 | to the registered address is equal to datamatch in struct kvm_ioeventfd. | 1418 | to the registered address is equal to datamatch in struct kvm_ioeventfd. |
1401 | 1419 | ||
1420 | 4.59 KVM_DIRTY_TLB | ||
1421 | |||
1422 | Capability: KVM_CAP_SW_TLB | ||
1423 | Architectures: ppc | ||
1424 | Type: vcpu ioctl | ||
1425 | Parameters: struct kvm_dirty_tlb (in) | ||
1426 | Returns: 0 on success, -1 on error | ||
1427 | |||
1428 | struct kvm_dirty_tlb { | ||
1429 | __u64 bitmap; | ||
1430 | __u32 num_dirty; | ||
1431 | }; | ||
1432 | |||
1433 | This must be called whenever userspace has changed an entry in the shared | ||
1434 | TLB, prior to calling KVM_RUN on the associated vcpu. | ||
1435 | |||
1436 | The "bitmap" field is the userspace address of an array. This array | ||
1437 | consists of a number of bits, equal to the total number of TLB entries as | ||
1438 | determined by the last successful call to KVM_CONFIG_TLB, rounded up to the | ||
1439 | nearest multiple of 64. | ||
1440 | |||
1441 | Each bit corresponds to one TLB entry, ordered the same as in the shared TLB | ||
1442 | array. | ||
1443 | |||
1444 | The array is little-endian: the bit 0 is the least significant bit of the | ||
1445 | first byte, bit 8 is the least significant bit of the second byte, etc. | ||
1446 | This avoids any complications with differing word sizes. | ||
1447 | |||
1448 | The "num_dirty" field is a performance hint for KVM to determine whether it | ||
1449 | should skip processing the bitmap and just invalidate everything. It must | ||
1450 | be set to the number of set bits in the bitmap. | ||
1451 | |||
1452 | 4.60 KVM_ASSIGN_SET_INTX_MASK | ||
1453 | |||
1454 | Capability: KVM_CAP_PCI_2_3 | ||
1455 | Architectures: x86 | ||
1456 | Type: vm ioctl | ||
1457 | Parameters: struct kvm_assigned_pci_dev (in) | ||
1458 | Returns: 0 on success, -1 on error | ||
1459 | |||
1460 | Allows userspace to mask PCI INTx interrupts from the assigned device. The | ||
1461 | kernel will not deliver INTx interrupts to the guest between setting and | ||
1462 | clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of | ||
1463 | and emulation of PCI 2.3 INTx disable command register behavior. | ||
1464 | |||
1465 | This may be used for both PCI 2.3 devices supporting INTx disable natively and | ||
1466 | older devices lacking this support. Userspace is responsible for emulating the | ||
1467 | read value of the INTx disable bit in the guest visible PCI command register. | ||
1468 | When modifying the INTx disable state, userspace should precede updating the | ||
1469 | physical device command register by calling this ioctl to inform the kernel of | ||
1470 | the new intended INTx mask state. | ||
1471 | |||
1472 | Note that the kernel uses the device INTx disable bit to internally manage the | ||
1473 | device interrupt state for PCI 2.3 devices. Reads of this register may | ||
1474 | therefore not match the expected value. Writes should always use the guest | ||
1475 | intended INTx disable value rather than attempting to read-copy-update the | ||
1476 | current physical device state. Races between user and kernel updates to the | ||
1477 | INTx disable bit are handled lazily in the kernel. It's possible the device | ||
1478 | may generate unintended interrupts, but they will not be injected into the | ||
1479 | guest. | ||
1480 | |||
1481 | See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified | ||
1482 | by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is | ||
1483 | evaluated. | ||
1484 | |||
1402 | 4.62 KVM_CREATE_SPAPR_TCE | 1485 | 4.62 KVM_CREATE_SPAPR_TCE |
1403 | 1486 | ||
1404 | Capability: KVM_CAP_SPAPR_TCE | 1487 | Capability: KVM_CAP_SPAPR_TCE |
@@ -1491,6 +1574,101 @@ following algorithm: | |||
1491 | Some guests configure the LINT1 NMI input to cause a panic, aiding in | 1574 | Some guests configure the LINT1 NMI input to cause a panic, aiding in |
1492 | debugging. | 1575 | debugging. |
1493 | 1576 | ||
1577 | 4.65 KVM_S390_UCAS_MAP | ||
1578 | |||
1579 | Capability: KVM_CAP_S390_UCONTROL | ||
1580 | Architectures: s390 | ||
1581 | Type: vcpu ioctl | ||
1582 | Parameters: struct kvm_s390_ucas_mapping (in) | ||
1583 | Returns: 0 in case of success | ||
1584 | |||
1585 | The parameter is defined like this: | ||
1586 | struct kvm_s390_ucas_mapping { | ||
1587 | __u64 user_addr; | ||
1588 | __u64 vcpu_addr; | ||
1589 | __u64 length; | ||
1590 | }; | ||
1591 | |||
1592 | This ioctl maps the memory at "user_addr" with the length "length" to | ||
1593 | the vcpu's address space starting at "vcpu_addr". All parameters need to | ||
1594 | be alligned by 1 megabyte. | ||
1595 | |||
1596 | 4.66 KVM_S390_UCAS_UNMAP | ||
1597 | |||
1598 | Capability: KVM_CAP_S390_UCONTROL | ||
1599 | Architectures: s390 | ||
1600 | Type: vcpu ioctl | ||
1601 | Parameters: struct kvm_s390_ucas_mapping (in) | ||
1602 | Returns: 0 in case of success | ||
1603 | |||
1604 | The parameter is defined like this: | ||
1605 | struct kvm_s390_ucas_mapping { | ||
1606 | __u64 user_addr; | ||
1607 | __u64 vcpu_addr; | ||
1608 | __u64 length; | ||
1609 | }; | ||
1610 | |||
1611 | This ioctl unmaps the memory in the vcpu's address space starting at | ||
1612 | "vcpu_addr" with the length "length". The field "user_addr" is ignored. | ||
1613 | All parameters need to be alligned by 1 megabyte. | ||
1614 | |||
1615 | 4.67 KVM_S390_VCPU_FAULT | ||
1616 | |||
1617 | Capability: KVM_CAP_S390_UCONTROL | ||
1618 | Architectures: s390 | ||
1619 | Type: vcpu ioctl | ||
1620 | Parameters: vcpu absolute address (in) | ||
1621 | Returns: 0 in case of success | ||
1622 | |||
1623 | This call creates a page table entry on the virtual cpu's address space | ||
1624 | (for user controlled virtual machines) or the virtual machine's address | ||
1625 | space (for regular virtual machines). This only works for minor faults, | ||
1626 | thus it's recommended to access subject memory page via the user page | ||
1627 | table upfront. This is useful to handle validity intercepts for user | ||
1628 | controlled virtual machines to fault in the virtual cpu's lowcore pages | ||
1629 | prior to calling the KVM_RUN ioctl. | ||
1630 | |||
1631 | 4.68 KVM_SET_ONE_REG | ||
1632 | |||
1633 | Capability: KVM_CAP_ONE_REG | ||
1634 | Architectures: all | ||
1635 | Type: vcpu ioctl | ||
1636 | Parameters: struct kvm_one_reg (in) | ||
1637 | Returns: 0 on success, negative value on failure | ||
1638 | |||
1639 | struct kvm_one_reg { | ||
1640 | __u64 id; | ||
1641 | __u64 addr; | ||
1642 | }; | ||
1643 | |||
1644 | Using this ioctl, a single vcpu register can be set to a specific value | ||
1645 | defined by user space with the passed in struct kvm_one_reg, where id | ||
1646 | refers to the register identifier as described below and addr is a pointer | ||
1647 | to a variable with the respective size. There can be architecture agnostic | ||
1648 | and architecture specific registers. Each have their own range of operation | ||
1649 | and their own constants and width. To keep track of the implemented | ||
1650 | registers, find a list below: | ||
1651 | |||
1652 | Arch | Register | Width (bits) | ||
1653 | | | | ||
1654 | PPC | KVM_REG_PPC_HIOR | 64 | ||
1655 | |||
1656 | 4.69 KVM_GET_ONE_REG | ||
1657 | |||
1658 | Capability: KVM_CAP_ONE_REG | ||
1659 | Architectures: all | ||
1660 | Type: vcpu ioctl | ||
1661 | Parameters: struct kvm_one_reg (in and out) | ||
1662 | Returns: 0 on success, negative value on failure | ||
1663 | |||
1664 | This ioctl allows to receive the value of a single register implemented | ||
1665 | in a vcpu. The register to read is indicated by the "id" field of the | ||
1666 | kvm_one_reg struct passed in. On success, the register value can be found | ||
1667 | at the memory location pointed to by "addr". | ||
1668 | |||
1669 | The list of registers accessible using this interface is identical to the | ||
1670 | list in 4.64. | ||
1671 | |||
1494 | 5. The kvm_run structure | 1672 | 5. The kvm_run structure |
1495 | 1673 | ||
1496 | Application code obtains a pointer to the kvm_run structure by | 1674 | Application code obtains a pointer to the kvm_run structure by |
@@ -1651,6 +1829,20 @@ s390 specific. | |||
1651 | 1829 | ||
1652 | s390 specific. | 1830 | s390 specific. |
1653 | 1831 | ||
1832 | /* KVM_EXIT_S390_UCONTROL */ | ||
1833 | struct { | ||
1834 | __u64 trans_exc_code; | ||
1835 | __u32 pgm_code; | ||
1836 | } s390_ucontrol; | ||
1837 | |||
1838 | s390 specific. A page fault has occurred for a user controlled virtual | ||
1839 | machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be | ||
1840 | resolved by the kernel. | ||
1841 | The program code and the translation exception code that were placed | ||
1842 | in the cpu's lowcore are presented here as defined by the z Architecture | ||
1843 | Principles of Operation Book in the Chapter for Dynamic Address Translation | ||
1844 | (DAT) | ||
1845 | |||
1654 | /* KVM_EXIT_DCR */ | 1846 | /* KVM_EXIT_DCR */ |
1655 | struct { | 1847 | struct { |
1656 | __u32 dcrn; | 1848 | __u32 dcrn; |
@@ -1693,6 +1885,29 @@ developer registration required to access it). | |||
1693 | /* Fix the size of the union. */ | 1885 | /* Fix the size of the union. */ |
1694 | char padding[256]; | 1886 | char padding[256]; |
1695 | }; | 1887 | }; |
1888 | |||
1889 | /* | ||
1890 | * shared registers between kvm and userspace. | ||
1891 | * kvm_valid_regs specifies the register classes set by the host | ||
1892 | * kvm_dirty_regs specified the register classes dirtied by userspace | ||
1893 | * struct kvm_sync_regs is architecture specific, as well as the | ||
1894 | * bits for kvm_valid_regs and kvm_dirty_regs | ||
1895 | */ | ||
1896 | __u64 kvm_valid_regs; | ||
1897 | __u64 kvm_dirty_regs; | ||
1898 | union { | ||
1899 | struct kvm_sync_regs regs; | ||
1900 | char padding[1024]; | ||
1901 | } s; | ||
1902 | |||
1903 | If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access | ||
1904 | certain guest registers without having to call SET/GET_*REGS. Thus we can | ||
1905 | avoid some system call overhead if userspace has to handle the exit. | ||
1906 | Userspace can query the validity of the structure by checking | ||
1907 | kvm_valid_regs for specific bits. These bits are architecture specific | ||
1908 | and usually define the validity of a groups of registers. (e.g. one bit | ||
1909 | for general purpose registers) | ||
1910 | |||
1696 | }; | 1911 | }; |
1697 | 1912 | ||
1698 | 6. Capabilities that can be enabled | 1913 | 6. Capabilities that can be enabled |
@@ -1741,3 +1956,45 @@ HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the | |||
1741 | HTAB invisible to the guest. | 1956 | HTAB invisible to the guest. |
1742 | 1957 | ||
1743 | When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. | 1958 | When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. |
1959 | |||
1960 | 6.3 KVM_CAP_SW_TLB | ||
1961 | |||
1962 | Architectures: ppc | ||
1963 | Parameters: args[0] is the address of a struct kvm_config_tlb | ||
1964 | Returns: 0 on success; -1 on error | ||
1965 | |||
1966 | struct kvm_config_tlb { | ||
1967 | __u64 params; | ||
1968 | __u64 array; | ||
1969 | __u32 mmu_type; | ||
1970 | __u32 array_len; | ||
1971 | }; | ||
1972 | |||
1973 | Configures the virtual CPU's TLB array, establishing a shared memory area | ||
1974 | between userspace and KVM. The "params" and "array" fields are userspace | ||
1975 | addresses of mmu-type-specific data structures. The "array_len" field is an | ||
1976 | safety mechanism, and should be set to the size in bytes of the memory that | ||
1977 | userspace has reserved for the array. It must be at least the size dictated | ||
1978 | by "mmu_type" and "params". | ||
1979 | |||
1980 | While KVM_RUN is active, the shared region is under control of KVM. Its | ||
1981 | contents are undefined, and any modification by userspace results in | ||
1982 | boundedly undefined behavior. | ||
1983 | |||
1984 | On return from KVM_RUN, the shared region will reflect the current state of | ||
1985 | the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB | ||
1986 | to tell KVM which entries have been changed, prior to calling KVM_RUN again | ||
1987 | on this vcpu. | ||
1988 | |||
1989 | For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: | ||
1990 | - The "params" field is of type "struct kvm_book3e_206_tlb_params". | ||
1991 | - The "array" field points to an array of type "struct | ||
1992 | kvm_book3e_206_tlb_entry". | ||
1993 | - The array consists of all entries in the first TLB, followed by all | ||
1994 | entries in the second TLB. | ||
1995 | - Within a TLB, entries are ordered first by increasing set number. Within a | ||
1996 | set, entries are ordered by way (increasing ESEL). | ||
1997 | - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1) | ||
1998 | where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value. | ||
1999 | - The tsize field of mas1 shall be set to 4K on TLB0, even though the | ||
2000 | hardware ignores this value for TLB0. | ||
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt index 2b7ce190cde4..6e7c37050930 100644 --- a/Documentation/virtual/kvm/ppc-pv.txt +++ b/Documentation/virtual/kvm/ppc-pv.txt | |||
@@ -81,28 +81,8 @@ additional registers to the magic page. If you add fields to the magic page, | |||
81 | also define a new hypercall feature to indicate that the host can give you more | 81 | also define a new hypercall feature to indicate that the host can give you more |
82 | registers. Only if the host supports the additional features, make use of them. | 82 | registers. Only if the host supports the additional features, make use of them. |
83 | 83 | ||
84 | The magic page has the following layout as described in | 84 | The magic page layout is described by struct kvm_vcpu_arch_shared |
85 | arch/powerpc/include/asm/kvm_para.h: | 85 | in arch/powerpc/include/asm/kvm_para.h. |
86 | |||
87 | struct kvm_vcpu_arch_shared { | ||
88 | __u64 scratch1; | ||
89 | __u64 scratch2; | ||
90 | __u64 scratch3; | ||
91 | __u64 critical; /* Guest may not get interrupts if == r1 */ | ||
92 | __u64 sprg0; | ||
93 | __u64 sprg1; | ||
94 | __u64 sprg2; | ||
95 | __u64 sprg3; | ||
96 | __u64 srr0; | ||
97 | __u64 srr1; | ||
98 | __u64 dar; | ||
99 | __u64 msr; | ||
100 | __u32 dsisr; | ||
101 | __u32 int_pending; /* Tells the guest if we have an interrupt */ | ||
102 | }; | ||
103 | |||
104 | Additions to the page must only occur at the end. Struct fields are always 32 | ||
105 | or 64 bit aligned, depending on them being 32 or 64 bit wide respectively. | ||
106 | 86 | ||
107 | Magic page features | 87 | Magic page features |
108 | =================== | 88 | =================== |
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index bc90c75adf67..b9f82c84f093 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h | |||
@@ -261,4 +261,8 @@ struct kvm_debug_exit_arch { | |||
261 | struct kvm_guest_debug_arch { | 261 | struct kvm_guest_debug_arch { |
262 | }; | 262 | }; |
263 | 263 | ||
264 | /* definition of registers in kvm_run */ | ||
265 | struct kvm_sync_regs { | ||
266 | }; | ||
267 | |||
264 | #endif | 268 | #endif |
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 2689ee54a1c9..e35b3a84a40b 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
@@ -459,6 +459,9 @@ struct kvm_sal_data { | |||
459 | unsigned long boot_gp; | 459 | unsigned long boot_gp; |
460 | }; | 460 | }; |
461 | 461 | ||
462 | struct kvm_arch_memory_slot { | ||
463 | }; | ||
464 | |||
462 | struct kvm_arch { | 465 | struct kvm_arch { |
463 | spinlock_t dirty_log_lock; | 466 | spinlock_t dirty_log_lock; |
464 | 467 | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 405052002493..f5104b7c52cd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -809,10 +809,13 @@ static void kvm_build_io_pmt(struct kvm *kvm) | |||
809 | #define GUEST_PHYSICAL_RR4 0x2739 | 809 | #define GUEST_PHYSICAL_RR4 0x2739 |
810 | #define VMM_INIT_RR 0x1660 | 810 | #define VMM_INIT_RR 0x1660 |
811 | 811 | ||
812 | int kvm_arch_init_vm(struct kvm *kvm) | 812 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
813 | { | 813 | { |
814 | BUG_ON(!kvm); | 814 | BUG_ON(!kvm); |
815 | 815 | ||
816 | if (type) | ||
817 | return -EINVAL; | ||
818 | |||
816 | kvm->arch.is_sn2 = ia64_platform_is("sn2"); | 819 | kvm->arch.is_sn2 = ia64_platform_is("sn2"); |
817 | 820 | ||
818 | kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; | 821 | kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; |
@@ -1169,6 +1172,11 @@ out: | |||
1169 | 1172 | ||
1170 | #define PALE_RESET_ENTRY 0x80000000ffffffb0UL | 1173 | #define PALE_RESET_ENTRY 0x80000000ffffffb0UL |
1171 | 1174 | ||
1175 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | ||
1176 | { | ||
1177 | return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL); | ||
1178 | } | ||
1179 | |||
1172 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 1180 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
1173 | { | 1181 | { |
1174 | struct kvm_vcpu *v; | 1182 | struct kvm_vcpu *v; |
@@ -1563,6 +1571,21 @@ out: | |||
1563 | return r; | 1571 | return r; |
1564 | } | 1572 | } |
1565 | 1573 | ||
1574 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | ||
1575 | { | ||
1576 | return VM_FAULT_SIGBUS; | ||
1577 | } | ||
1578 | |||
1579 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
1580 | struct kvm_memory_slot *dont) | ||
1581 | { | ||
1582 | } | ||
1583 | |||
1584 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | ||
1585 | { | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1566 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1589 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1567 | struct kvm_memory_slot *memslot, | 1590 | struct kvm_memory_slot *memslot, |
1568 | struct kvm_memory_slot old, | 1591 | struct kvm_memory_slot old, |
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index f7727d91ac6b..b921c3f48928 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
@@ -265,12 +265,9 @@ struct kvm_debug_exit_arch { | |||
265 | struct kvm_guest_debug_arch { | 265 | struct kvm_guest_debug_arch { |
266 | }; | 266 | }; |
267 | 267 | ||
268 | #define KVM_REG_MASK 0x001f | 268 | /* definition of registers in kvm_run */ |
269 | #define KVM_REG_EXT_MASK 0xffe0 | 269 | struct kvm_sync_regs { |
270 | #define KVM_REG_GPR 0x0000 | 270 | }; |
271 | #define KVM_REG_FPR 0x0020 | ||
272 | #define KVM_REG_QPR 0x0040 | ||
273 | #define KVM_REG_FQPR 0x0060 | ||
274 | 271 | ||
275 | #define KVM_INTERRUPT_SET -1U | 272 | #define KVM_INTERRUPT_SET -1U |
276 | #define KVM_INTERRUPT_UNSET -2U | 273 | #define KVM_INTERRUPT_UNSET -2U |
@@ -292,4 +289,41 @@ struct kvm_allocate_rma { | |||
292 | __u64 rma_size; | 289 | __u64 rma_size; |
293 | }; | 290 | }; |
294 | 291 | ||
292 | struct kvm_book3e_206_tlb_entry { | ||
293 | __u32 mas8; | ||
294 | __u32 mas1; | ||
295 | __u64 mas2; | ||
296 | __u64 mas7_3; | ||
297 | }; | ||
298 | |||
299 | struct kvm_book3e_206_tlb_params { | ||
300 | /* | ||
301 | * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV: | ||
302 | * | ||
303 | * - The number of ways of TLB0 must be a power of two between 2 and | ||
304 | * 16. | ||
305 | * - TLB1 must be fully associative. | ||
306 | * - The size of TLB0 must be a multiple of the number of ways, and | ||
307 | * the number of sets must be a power of two. | ||
308 | * - The size of TLB1 may not exceed 64 entries. | ||
309 | * - TLB0 supports 4 KiB pages. | ||
310 | * - The page sizes supported by TLB1 are as indicated by | ||
311 | * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1) | ||
312 | * as returned by KVM_GET_SREGS. | ||
313 | * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[] | ||
314 | * and tlb_ways[] must be zero. | ||
315 | * | ||
316 | * tlb_ways[n] = tlb_sizes[n] means the array is fully associative. | ||
317 | * | ||
318 | * KVM will adjust TLBnCFG based on the sizes configured here, | ||
319 | * though arrays greater than 2048 entries will have TLBnCFG[NENTRY] | ||
320 | * set to zero. | ||
321 | */ | ||
322 | __u32 tlb_sizes[4]; | ||
323 | __u32 tlb_ways[4]; | ||
324 | __u32 reserved[8]; | ||
325 | }; | ||
326 | |||
327 | #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) | ||
328 | |||
295 | #endif /* __LINUX_KVM_POWERPC_H */ | 329 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 69c7377d2071..aa795ccef294 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s { | |||
90 | #endif | 90 | #endif |
91 | int context_id[SID_CONTEXTS]; | 91 | int context_id[SID_CONTEXTS]; |
92 | 92 | ||
93 | bool hior_explicit; /* HIOR is set by ioctl, not PVR */ | ||
94 | |||
93 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; | 95 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; |
94 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; | 96 | struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG]; |
95 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; | 97 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; |
@@ -119,6 +121,11 @@ extern void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu); | |||
119 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | 121 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); |
120 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 122 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
121 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 123 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
124 | extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, | ||
125 | struct kvm_vcpu *vcpu, unsigned long addr, | ||
126 | unsigned long status); | ||
127 | extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, | ||
128 | unsigned long slb_v, unsigned long valid); | ||
122 | 129 | ||
123 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | 130 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); |
124 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); | 131 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); |
@@ -138,6 +145,21 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | |||
138 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | 145 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); |
139 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | 146 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); |
140 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); | 147 | extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
148 | extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | ||
149 | unsigned long *rmap, long pte_index, int realmode); | ||
150 | extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | ||
151 | unsigned long pte_index); | ||
152 | void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | ||
153 | unsigned long pte_index); | ||
154 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | ||
155 | unsigned long *nb_ret); | ||
156 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | ||
157 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
158 | long pte_index, unsigned long pteh, unsigned long ptel); | ||
159 | extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
160 | long pte_index, unsigned long pteh, unsigned long ptel); | ||
161 | extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, | ||
162 | struct kvm_memory_slot *memslot); | ||
141 | 163 | ||
142 | extern void kvmppc_entry_trampoline(void); | 164 | extern void kvmppc_entry_trampoline(void); |
143 | extern void kvmppc_hv_entry_trampoline(void); | 165 | extern void kvmppc_hv_entry_trampoline(void); |
@@ -183,7 +205,9 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, | |||
183 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 205 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) |
184 | { | 206 | { |
185 | if ( num < 14 ) { | 207 | if ( num < 14 ) { |
186 | to_svcpu(vcpu)->gpr[num] = val; | 208 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
209 | svcpu->gpr[num] = val; | ||
210 | svcpu_put(svcpu); | ||
187 | to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; | 211 | to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; |
188 | } else | 212 | } else |
189 | vcpu->arch.gpr[num] = val; | 213 | vcpu->arch.gpr[num] = val; |
@@ -191,80 +215,120 @@ static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | |||
191 | 215 | ||
192 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | 216 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) |
193 | { | 217 | { |
194 | if ( num < 14 ) | 218 | if ( num < 14 ) { |
195 | return to_svcpu(vcpu)->gpr[num]; | 219 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
196 | else | 220 | ulong r = svcpu->gpr[num]; |
221 | svcpu_put(svcpu); | ||
222 | return r; | ||
223 | } else | ||
197 | return vcpu->arch.gpr[num]; | 224 | return vcpu->arch.gpr[num]; |
198 | } | 225 | } |
199 | 226 | ||
200 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | 227 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) |
201 | { | 228 | { |
202 | to_svcpu(vcpu)->cr = val; | 229 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
230 | svcpu->cr = val; | ||
231 | svcpu_put(svcpu); | ||
203 | to_book3s(vcpu)->shadow_vcpu->cr = val; | 232 | to_book3s(vcpu)->shadow_vcpu->cr = val; |
204 | } | 233 | } |
205 | 234 | ||
206 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | 235 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) |
207 | { | 236 | { |
208 | return to_svcpu(vcpu)->cr; | 237 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
238 | u32 r; | ||
239 | r = svcpu->cr; | ||
240 | svcpu_put(svcpu); | ||
241 | return r; | ||
209 | } | 242 | } |
210 | 243 | ||
211 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | 244 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) |
212 | { | 245 | { |
213 | to_svcpu(vcpu)->xer = val; | 246 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
247 | svcpu->xer = val; | ||
214 | to_book3s(vcpu)->shadow_vcpu->xer = val; | 248 | to_book3s(vcpu)->shadow_vcpu->xer = val; |
249 | svcpu_put(svcpu); | ||
215 | } | 250 | } |
216 | 251 | ||
217 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | 252 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) |
218 | { | 253 | { |
219 | return to_svcpu(vcpu)->xer; | 254 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
255 | u32 r; | ||
256 | r = svcpu->xer; | ||
257 | svcpu_put(svcpu); | ||
258 | return r; | ||
220 | } | 259 | } |
221 | 260 | ||
222 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) | 261 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) |
223 | { | 262 | { |
224 | to_svcpu(vcpu)->ctr = val; | 263 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
264 | svcpu->ctr = val; | ||
265 | svcpu_put(svcpu); | ||
225 | } | 266 | } |
226 | 267 | ||
227 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) | 268 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) |
228 | { | 269 | { |
229 | return to_svcpu(vcpu)->ctr; | 270 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
271 | ulong r; | ||
272 | r = svcpu->ctr; | ||
273 | svcpu_put(svcpu); | ||
274 | return r; | ||
230 | } | 275 | } |
231 | 276 | ||
232 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) | 277 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) |
233 | { | 278 | { |
234 | to_svcpu(vcpu)->lr = val; | 279 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
280 | svcpu->lr = val; | ||
281 | svcpu_put(svcpu); | ||
235 | } | 282 | } |
236 | 283 | ||
237 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) | 284 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) |
238 | { | 285 | { |
239 | return to_svcpu(vcpu)->lr; | 286 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
287 | ulong r; | ||
288 | r = svcpu->lr; | ||
289 | svcpu_put(svcpu); | ||
290 | return r; | ||
240 | } | 291 | } |
241 | 292 | ||
242 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) | 293 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) |
243 | { | 294 | { |
244 | to_svcpu(vcpu)->pc = val; | 295 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
296 | svcpu->pc = val; | ||
297 | svcpu_put(svcpu); | ||
245 | } | 298 | } |
246 | 299 | ||
247 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) | 300 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) |
248 | { | 301 | { |
249 | return to_svcpu(vcpu)->pc; | 302 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
303 | ulong r; | ||
304 | r = svcpu->pc; | ||
305 | svcpu_put(svcpu); | ||
306 | return r; | ||
250 | } | 307 | } |
251 | 308 | ||
252 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | 309 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) |
253 | { | 310 | { |
254 | ulong pc = kvmppc_get_pc(vcpu); | 311 | ulong pc = kvmppc_get_pc(vcpu); |
255 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | 312 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
313 | u32 r; | ||
256 | 314 | ||
257 | /* Load the instruction manually if it failed to do so in the | 315 | /* Load the instruction manually if it failed to do so in the |
258 | * exit path */ | 316 | * exit path */ |
259 | if (svcpu->last_inst == KVM_INST_FETCH_FAILED) | 317 | if (svcpu->last_inst == KVM_INST_FETCH_FAILED) |
260 | kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); | 318 | kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); |
261 | 319 | ||
262 | return svcpu->last_inst; | 320 | r = svcpu->last_inst; |
321 | svcpu_put(svcpu); | ||
322 | return r; | ||
263 | } | 323 | } |
264 | 324 | ||
265 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | 325 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) |
266 | { | 326 | { |
267 | return to_svcpu(vcpu)->fault_dar; | 327 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
328 | ulong r; | ||
329 | r = svcpu->fault_dar; | ||
330 | svcpu_put(svcpu); | ||
331 | return r; | ||
268 | } | 332 | } |
269 | 333 | ||
270 | static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | 334 | static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) |
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h index de604db135f5..38040ff82063 100644 --- a/arch/powerpc/include/asm/kvm_book3s_32.h +++ b/arch/powerpc/include/asm/kvm_book3s_32.h | |||
@@ -20,11 +20,15 @@ | |||
20 | #ifndef __ASM_KVM_BOOK3S_32_H__ | 20 | #ifndef __ASM_KVM_BOOK3S_32_H__ |
21 | #define __ASM_KVM_BOOK3S_32_H__ | 21 | #define __ASM_KVM_BOOK3S_32_H__ |
22 | 22 | ||
23 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | 23 | static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) |
24 | { | 24 | { |
25 | return to_book3s(vcpu)->shadow_vcpu; | 25 | return to_book3s(vcpu)->shadow_vcpu; |
26 | } | 26 | } |
27 | 27 | ||
28 | static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) | ||
29 | { | ||
30 | } | ||
31 | |||
28 | #define PTE_SIZE 12 | 32 | #define PTE_SIZE 12 |
29 | #define VSID_ALL 0 | 33 | #define VSID_ALL 0 |
30 | #define SR_INVALID 0x00000001 /* VSID 1 should always be unused */ | 34 | #define SR_INVALID 0x00000001 /* VSID 1 should always be unused */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index d0ac94f98f9e..b0c08b142770 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -21,14 +21,56 @@ | |||
21 | #define __ASM_KVM_BOOK3S_64_H__ | 21 | #define __ASM_KVM_BOOK3S_64_H__ |
22 | 22 | ||
23 | #ifdef CONFIG_KVM_BOOK3S_PR | 23 | #ifdef CONFIG_KVM_BOOK3S_PR |
24 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | 24 | static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) |
25 | { | 25 | { |
26 | preempt_disable(); | ||
26 | return &get_paca()->shadow_vcpu; | 27 | return &get_paca()->shadow_vcpu; |
27 | } | 28 | } |
29 | |||
30 | static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) | ||
31 | { | ||
32 | preempt_enable(); | ||
33 | } | ||
28 | #endif | 34 | #endif |
29 | 35 | ||
30 | #define SPAPR_TCE_SHIFT 12 | 36 | #define SPAPR_TCE_SHIFT 12 |
31 | 37 | ||
38 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
39 | /* For now use fixed-size 16MB page table */ | ||
40 | #define HPT_ORDER 24 | ||
41 | #define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ | ||
42 | #define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */ | ||
43 | #define HPT_HASH_MASK (HPT_NPTEG - 1) | ||
44 | #endif | ||
45 | |||
46 | #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ | ||
47 | |||
48 | /* | ||
49 | * We use a lock bit in HPTE dword 0 to synchronize updates and | ||
50 | * accesses to each HPTE, and another bit to indicate non-present | ||
51 | * HPTEs. | ||
52 | */ | ||
53 | #define HPTE_V_HVLOCK 0x40UL | ||
54 | #define HPTE_V_ABSENT 0x20UL | ||
55 | |||
56 | static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) | ||
57 | { | ||
58 | unsigned long tmp, old; | ||
59 | |||
60 | asm volatile(" ldarx %0,0,%2\n" | ||
61 | " and. %1,%0,%3\n" | ||
62 | " bne 2f\n" | ||
63 | " ori %0,%0,%4\n" | ||
64 | " stdcx. %0,0,%2\n" | ||
65 | " beq+ 2f\n" | ||
66 | " li %1,%3\n" | ||
67 | "2: isync" | ||
68 | : "=&r" (tmp), "=&r" (old) | ||
69 | : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) | ||
70 | : "cc", "memory"); | ||
71 | return old == 0; | ||
72 | } | ||
73 | |||
32 | static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | 74 | static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, |
33 | unsigned long pte_index) | 75 | unsigned long pte_index) |
34 | { | 76 | { |
@@ -62,4 +104,140 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, | |||
62 | return rb; | 104 | return rb; |
63 | } | 105 | } |
64 | 106 | ||
107 | static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) | ||
108 | { | ||
109 | /* only handle 4k, 64k and 16M pages for now */ | ||
110 | if (!(h & HPTE_V_LARGE)) | ||
111 | return 1ul << 12; /* 4k page */ | ||
112 | if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206)) | ||
113 | return 1ul << 16; /* 64k page */ | ||
114 | if ((l & 0xff000) == 0) | ||
115 | return 1ul << 24; /* 16M page */ | ||
116 | return 0; /* error */ | ||
117 | } | ||
118 | |||
119 | static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) | ||
120 | { | ||
121 | return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; | ||
122 | } | ||
123 | |||
124 | static inline int hpte_is_writable(unsigned long ptel) | ||
125 | { | ||
126 | unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP); | ||
127 | |||
128 | return pp != PP_RXRX && pp != PP_RXXX; | ||
129 | } | ||
130 | |||
131 | static inline unsigned long hpte_make_readonly(unsigned long ptel) | ||
132 | { | ||
133 | if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX) | ||
134 | ptel = (ptel & ~HPTE_R_PP) | PP_RXXX; | ||
135 | else | ||
136 | ptel |= PP_RXRX; | ||
137 | return ptel; | ||
138 | } | ||
139 | |||
140 | static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) | ||
141 | { | ||
142 | unsigned int wimg = ptel & HPTE_R_WIMG; | ||
143 | |||
144 | /* Handle SAO */ | ||
145 | if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) && | ||
146 | cpu_has_feature(CPU_FTR_ARCH_206)) | ||
147 | wimg = HPTE_R_M; | ||
148 | |||
149 | if (!io_type) | ||
150 | return wimg == HPTE_R_M; | ||
151 | |||
152 | return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Lock and read a linux PTE. If it's present and writable, atomically | ||
157 | * set dirty and referenced bits and return the PTE, otherwise return 0. | ||
158 | */ | ||
159 | static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) | ||
160 | { | ||
161 | pte_t pte, tmp; | ||
162 | |||
163 | /* wait until _PAGE_BUSY is clear then set it atomically */ | ||
164 | __asm__ __volatile__ ( | ||
165 | "1: ldarx %0,0,%3\n" | ||
166 | " andi. %1,%0,%4\n" | ||
167 | " bne- 1b\n" | ||
168 | " ori %1,%0,%4\n" | ||
169 | " stdcx. %1,0,%3\n" | ||
170 | " bne- 1b" | ||
171 | : "=&r" (pte), "=&r" (tmp), "=m" (*p) | ||
172 | : "r" (p), "i" (_PAGE_BUSY) | ||
173 | : "cc"); | ||
174 | |||
175 | if (pte_present(pte)) { | ||
176 | pte = pte_mkyoung(pte); | ||
177 | if (writing && pte_write(pte)) | ||
178 | pte = pte_mkdirty(pte); | ||
179 | } | ||
180 | |||
181 | *p = pte; /* clears _PAGE_BUSY */ | ||
182 | |||
183 | return pte; | ||
184 | } | ||
185 | |||
186 | /* Return HPTE cache control bits corresponding to Linux pte bits */ | ||
187 | static inline unsigned long hpte_cache_bits(unsigned long pte_val) | ||
188 | { | ||
189 | #if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W | ||
190 | return pte_val & (HPTE_R_W | HPTE_R_I); | ||
191 | #else | ||
192 | return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) + | ||
193 | ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0); | ||
194 | #endif | ||
195 | } | ||
196 | |||
197 | static inline bool hpte_read_permission(unsigned long pp, unsigned long key) | ||
198 | { | ||
199 | if (key) | ||
200 | return PP_RWRX <= pp && pp <= PP_RXRX; | ||
201 | return 1; | ||
202 | } | ||
203 | |||
204 | static inline bool hpte_write_permission(unsigned long pp, unsigned long key) | ||
205 | { | ||
206 | if (key) | ||
207 | return pp == PP_RWRW; | ||
208 | return pp <= PP_RWRW; | ||
209 | } | ||
210 | |||
211 | static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr) | ||
212 | { | ||
213 | unsigned long skey; | ||
214 | |||
215 | skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) | | ||
216 | ((hpte_r & HPTE_R_KEY_LO) >> 9); | ||
217 | return (amr >> (62 - 2 * skey)) & 3; | ||
218 | } | ||
219 | |||
220 | static inline void lock_rmap(unsigned long *rmap) | ||
221 | { | ||
222 | do { | ||
223 | while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap)) | ||
224 | cpu_relax(); | ||
225 | } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap)); | ||
226 | } | ||
227 | |||
228 | static inline void unlock_rmap(unsigned long *rmap) | ||
229 | { | ||
230 | __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap); | ||
231 | } | ||
232 | |||
233 | static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, | ||
234 | unsigned long pagesize) | ||
235 | { | ||
236 | unsigned long mask = (pagesize >> PAGE_SHIFT) - 1; | ||
237 | |||
238 | if (pagesize <= PAGE_SIZE) | ||
239 | return 1; | ||
240 | return !(memslot->base_gfn & mask) && !(memslot->npages & mask); | ||
241 | } | ||
242 | |||
65 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | 243 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index adbfca9dd100..8cd50a514271 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h | |||
@@ -22,46 +22,55 @@ | |||
22 | #define E500_PID_NUM 3 | 22 | #define E500_PID_NUM 3 |
23 | #define E500_TLB_NUM 2 | 23 | #define E500_TLB_NUM 2 |
24 | 24 | ||
25 | struct tlbe{ | ||
26 | u32 mas1; | ||
27 | u32 mas2; | ||
28 | u32 mas3; | ||
29 | u32 mas7; | ||
30 | }; | ||
31 | |||
32 | #define E500_TLB_VALID 1 | 25 | #define E500_TLB_VALID 1 |
33 | #define E500_TLB_DIRTY 2 | 26 | #define E500_TLB_DIRTY 2 |
34 | 27 | ||
35 | struct tlbe_priv { | 28 | struct tlbe_ref { |
36 | pfn_t pfn; | 29 | pfn_t pfn; |
37 | unsigned int flags; /* E500_TLB_* */ | 30 | unsigned int flags; /* E500_TLB_* */ |
38 | }; | 31 | }; |
39 | 32 | ||
33 | struct tlbe_priv { | ||
34 | struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ | ||
35 | }; | ||
36 | |||
40 | struct vcpu_id_table; | 37 | struct vcpu_id_table; |
41 | 38 | ||
39 | struct kvmppc_e500_tlb_params { | ||
40 | int entries, ways, sets; | ||
41 | }; | ||
42 | |||
42 | struct kvmppc_vcpu_e500 { | 43 | struct kvmppc_vcpu_e500 { |
43 | /* Unmodified copy of the guest's TLB. */ | 44 | /* Unmodified copy of the guest's TLB -- shared with host userspace. */ |
44 | struct tlbe *gtlb_arch[E500_TLB_NUM]; | 45 | struct kvm_book3e_206_tlb_entry *gtlb_arch; |
46 | |||
47 | /* Starting entry number in gtlb_arch[] */ | ||
48 | int gtlb_offset[E500_TLB_NUM]; | ||
45 | 49 | ||
46 | /* KVM internal information associated with each guest TLB entry */ | 50 | /* KVM internal information associated with each guest TLB entry */ |
47 | struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; | 51 | struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; |
48 | 52 | ||
49 | unsigned int gtlb_size[E500_TLB_NUM]; | 53 | struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; |
54 | |||
50 | unsigned int gtlb_nv[E500_TLB_NUM]; | 55 | unsigned int gtlb_nv[E500_TLB_NUM]; |
51 | 56 | ||
57 | /* | ||
58 | * information associated with each host TLB entry -- | ||
59 | * TLB1 only for now. If/when guest TLB1 entries can be | ||
60 | * mapped with host TLB0, this will be used for that too. | ||
61 | * | ||
62 | * We don't want to use this for guest TLB0 because then we'd | ||
63 | * have the overhead of doing the translation again even if | ||
64 | * the entry is still in the guest TLB (e.g. we swapped out | ||
65 | * and back, and our host TLB entries got evicted). | ||
66 | */ | ||
67 | struct tlbe_ref *tlb_refs[E500_TLB_NUM]; | ||
68 | unsigned int host_tlb1_nv; | ||
69 | |||
52 | u32 host_pid[E500_PID_NUM]; | 70 | u32 host_pid[E500_PID_NUM]; |
53 | u32 pid[E500_PID_NUM]; | 71 | u32 pid[E500_PID_NUM]; |
54 | u32 svr; | 72 | u32 svr; |
55 | 73 | ||
56 | u32 mas0; | ||
57 | u32 mas1; | ||
58 | u32 mas2; | ||
59 | u32 mas3; | ||
60 | u32 mas4; | ||
61 | u32 mas5; | ||
62 | u32 mas6; | ||
63 | u32 mas7; | ||
64 | |||
65 | /* vcpu id table */ | 74 | /* vcpu id table */ |
66 | struct vcpu_id_table *idt; | 75 | struct vcpu_id_table *idt; |
67 | 76 | ||
@@ -73,6 +82,9 @@ struct kvmppc_vcpu_e500 { | |||
73 | u32 tlb1cfg; | 82 | u32 tlb1cfg; |
74 | u64 mcar; | 83 | u64 mcar; |
75 | 84 | ||
85 | struct page **shared_tlb_pages; | ||
86 | int num_shared_tlb_pages; | ||
87 | |||
76 | struct kvm_vcpu vcpu; | 88 | struct kvm_vcpu vcpu; |
77 | }; | 89 | }; |
78 | 90 | ||
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bf8af5d5d5dc..52eb9c1f4fe0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -32,17 +32,32 @@ | |||
32 | #include <linux/atomic.h> | 32 | #include <linux/atomic.h> |
33 | #include <asm/kvm_asm.h> | 33 | #include <asm/kvm_asm.h> |
34 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
35 | #include <asm/page.h> | ||
35 | 36 | ||
36 | #define KVM_MAX_VCPUS NR_CPUS | 37 | #define KVM_MAX_VCPUS NR_CPUS |
37 | #define KVM_MAX_VCORES NR_CPUS | 38 | #define KVM_MAX_VCORES NR_CPUS |
38 | #define KVM_MEMORY_SLOTS 32 | 39 | #define KVM_MEMORY_SLOTS 32 |
39 | /* memory slots that does not exposed to userspace */ | 40 | /* memory slots that does not exposed to userspace */ |
40 | #define KVM_PRIVATE_MEM_SLOTS 4 | 41 | #define KVM_PRIVATE_MEM_SLOTS 4 |
42 | #define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) | ||
41 | 43 | ||
42 | #ifdef CONFIG_KVM_MMIO | 44 | #ifdef CONFIG_KVM_MMIO |
43 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 45 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
44 | #endif | 46 | #endif |
45 | 47 | ||
48 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
49 | #include <linux/mmu_notifier.h> | ||
50 | |||
51 | #define KVM_ARCH_WANT_MMU_NOTIFIER | ||
52 | |||
53 | struct kvm; | ||
54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | ||
55 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); | ||
56 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
57 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
58 | |||
59 | #endif | ||
60 | |||
46 | /* We don't currently support large pages. */ | 61 | /* We don't currently support large pages. */ |
47 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | 62 | #define KVM_HPAGE_GFN_SHIFT(x) 0 |
48 | #define KVM_NR_PAGE_SIZES 1 | 63 | #define KVM_NR_PAGE_SIZES 1 |
@@ -158,34 +173,72 @@ struct kvmppc_spapr_tce_table { | |||
158 | struct page *pages[0]; | 173 | struct page *pages[0]; |
159 | }; | 174 | }; |
160 | 175 | ||
161 | struct kvmppc_rma_info { | 176 | struct kvmppc_linear_info { |
162 | void *base_virt; | 177 | void *base_virt; |
163 | unsigned long base_pfn; | 178 | unsigned long base_pfn; |
164 | unsigned long npages; | 179 | unsigned long npages; |
165 | struct list_head list; | 180 | struct list_head list; |
166 | atomic_t use_count; | 181 | atomic_t use_count; |
182 | int type; | ||
183 | }; | ||
184 | |||
185 | /* | ||
186 | * The reverse mapping array has one entry for each HPTE, | ||
187 | * which stores the guest's view of the second word of the HPTE | ||
188 | * (including the guest physical address of the mapping), | ||
189 | * plus forward and backward pointers in a doubly-linked ring | ||
190 | * of HPTEs that map the same host page. The pointers in this | ||
191 | * ring are 32-bit HPTE indexes, to save space. | ||
192 | */ | ||
193 | struct revmap_entry { | ||
194 | unsigned long guest_rpte; | ||
195 | unsigned int forw, back; | ||
196 | }; | ||
197 | |||
198 | /* | ||
199 | * We use the top bit of each memslot->rmap entry as a lock bit, | ||
200 | * and bit 32 as a present flag. The bottom 32 bits are the | ||
201 | * index in the guest HPT of a HPTE that points to the page. | ||
202 | */ | ||
203 | #define KVMPPC_RMAP_LOCK_BIT 63 | ||
204 | #define KVMPPC_RMAP_RC_SHIFT 32 | ||
205 | #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) | ||
206 | #define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) | ||
207 | #define KVMPPC_RMAP_PRESENT 0x100000000ul | ||
208 | #define KVMPPC_RMAP_INDEX 0xfffffffful | ||
209 | |||
210 | /* Low-order bits in kvm->arch.slot_phys[][] */ | ||
211 | #define KVMPPC_PAGE_ORDER_MASK 0x1f | ||
212 | #define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ | ||
213 | #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ | ||
214 | #define KVMPPC_GOT_PAGE 0x80 | ||
215 | |||
216 | struct kvm_arch_memory_slot { | ||
167 | }; | 217 | }; |
168 | 218 | ||
169 | struct kvm_arch { | 219 | struct kvm_arch { |
170 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 220 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
171 | unsigned long hpt_virt; | 221 | unsigned long hpt_virt; |
172 | unsigned long ram_npages; | 222 | struct revmap_entry *revmap; |
173 | unsigned long ram_psize; | ||
174 | unsigned long ram_porder; | ||
175 | struct kvmppc_pginfo *ram_pginfo; | ||
176 | unsigned int lpid; | 223 | unsigned int lpid; |
177 | unsigned int host_lpid; | 224 | unsigned int host_lpid; |
178 | unsigned long host_lpcr; | 225 | unsigned long host_lpcr; |
179 | unsigned long sdr1; | 226 | unsigned long sdr1; |
180 | unsigned long host_sdr1; | 227 | unsigned long host_sdr1; |
181 | int tlbie_lock; | 228 | int tlbie_lock; |
182 | int n_rma_pages; | ||
183 | unsigned long lpcr; | 229 | unsigned long lpcr; |
184 | unsigned long rmor; | 230 | unsigned long rmor; |
185 | struct kvmppc_rma_info *rma; | 231 | struct kvmppc_linear_info *rma; |
232 | unsigned long vrma_slb_v; | ||
233 | int rma_setup_done; | ||
234 | int using_mmu_notifiers; | ||
186 | struct list_head spapr_tce_tables; | 235 | struct list_head spapr_tce_tables; |
236 | spinlock_t slot_phys_lock; | ||
237 | unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; | ||
238 | int slot_npages[KVM_MEM_SLOTS_NUM]; | ||
187 | unsigned short last_vcpu[NR_CPUS]; | 239 | unsigned short last_vcpu[NR_CPUS]; |
188 | struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; | 240 | struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; |
241 | struct kvmppc_linear_info *hpt_li; | ||
189 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 242 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
190 | }; | 243 | }; |
191 | 244 | ||
@@ -318,10 +371,6 @@ struct kvm_vcpu_arch { | |||
318 | u32 vrsave; /* also USPRG0 */ | 371 | u32 vrsave; /* also USPRG0 */ |
319 | u32 mmucr; | 372 | u32 mmucr; |
320 | ulong shadow_msr; | 373 | ulong shadow_msr; |
321 | ulong sprg4; | ||
322 | ulong sprg5; | ||
323 | ulong sprg6; | ||
324 | ulong sprg7; | ||
325 | ulong csrr0; | 374 | ulong csrr0; |
326 | ulong csrr1; | 375 | ulong csrr1; |
327 | ulong dsrr0; | 376 | ulong dsrr0; |
@@ -329,16 +378,14 @@ struct kvm_vcpu_arch { | |||
329 | ulong mcsrr0; | 378 | ulong mcsrr0; |
330 | ulong mcsrr1; | 379 | ulong mcsrr1; |
331 | ulong mcsr; | 380 | ulong mcsr; |
332 | ulong esr; | ||
333 | u32 dec; | 381 | u32 dec; |
334 | u32 decar; | 382 | u32 decar; |
335 | u32 tbl; | 383 | u32 tbl; |
336 | u32 tbu; | 384 | u32 tbu; |
337 | u32 tcr; | 385 | u32 tcr; |
338 | u32 tsr; | 386 | ulong tsr; /* we need to perform set/clr_bits() which requires ulong */ |
339 | u32 ivor[64]; | 387 | u32 ivor[64]; |
340 | ulong ivpr; | 388 | ulong ivpr; |
341 | u32 pir; | ||
342 | u32 pvr; | 389 | u32 pvr; |
343 | 390 | ||
344 | u32 shadow_pid; | 391 | u32 shadow_pid; |
@@ -427,9 +474,14 @@ struct kvm_vcpu_arch { | |||
427 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 474 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
428 | struct kvm_vcpu_arch_shared shregs; | 475 | struct kvm_vcpu_arch_shared shregs; |
429 | 476 | ||
477 | unsigned long pgfault_addr; | ||
478 | long pgfault_index; | ||
479 | unsigned long pgfault_hpte[2]; | ||
480 | |||
430 | struct list_head run_list; | 481 | struct list_head run_list; |
431 | struct task_struct *run_task; | 482 | struct task_struct *run_task; |
432 | struct kvm_run *kvm_run; | 483 | struct kvm_run *kvm_run; |
484 | pgd_t *pgdir; | ||
433 | #endif | 485 | #endif |
434 | }; | 486 | }; |
435 | 487 | ||
@@ -438,4 +490,12 @@ struct kvm_vcpu_arch { | |||
438 | #define KVMPPC_VCPU_BUSY_IN_HOST 1 | 490 | #define KVMPPC_VCPU_BUSY_IN_HOST 1 |
439 | #define KVMPPC_VCPU_RUNNABLE 2 | 491 | #define KVMPPC_VCPU_RUNNABLE 2 |
440 | 492 | ||
493 | /* Values for vcpu->arch.io_gpr */ | ||
494 | #define KVM_MMIO_REG_MASK 0x001f | ||
495 | #define KVM_MMIO_REG_EXT_MASK 0xffe0 | ||
496 | #define KVM_MMIO_REG_GPR 0x0000 | ||
497 | #define KVM_MMIO_REG_FPR 0x0020 | ||
498 | #define KVM_MMIO_REG_QPR 0x0040 | ||
499 | #define KVM_MMIO_REG_FQPR 0x0060 | ||
500 | |||
441 | #endif /* __POWERPC_KVM_HOST_H__ */ | 501 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 50533f9adf40..7b754e743003 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h | |||
@@ -22,6 +22,16 @@ | |||
22 | 22 | ||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | 24 | ||
25 | /* | ||
26 | * Additions to this struct must only occur at the end, and should be | ||
27 | * accompanied by a KVM_MAGIC_FEAT flag to advertise that they are present | ||
28 | * (albeit not necessarily relevant to the current target hardware platform). | ||
29 | * | ||
30 | * Struct fields are always 32 or 64 bit aligned, depending on them being 32 | ||
31 | * or 64 bit wide respectively. | ||
32 | * | ||
33 | * See Documentation/virtual/kvm/ppc-pv.txt | ||
34 | */ | ||
25 | struct kvm_vcpu_arch_shared { | 35 | struct kvm_vcpu_arch_shared { |
26 | __u64 scratch1; | 36 | __u64 scratch1; |
27 | __u64 scratch2; | 37 | __u64 scratch2; |
@@ -33,11 +43,35 @@ struct kvm_vcpu_arch_shared { | |||
33 | __u64 sprg3; | 43 | __u64 sprg3; |
34 | __u64 srr0; | 44 | __u64 srr0; |
35 | __u64 srr1; | 45 | __u64 srr1; |
36 | __u64 dar; | 46 | __u64 dar; /* dear on BookE */ |
37 | __u64 msr; | 47 | __u64 msr; |
38 | __u32 dsisr; | 48 | __u32 dsisr; |
39 | __u32 int_pending; /* Tells the guest if we have an interrupt */ | 49 | __u32 int_pending; /* Tells the guest if we have an interrupt */ |
40 | __u32 sr[16]; | 50 | __u32 sr[16]; |
51 | __u32 mas0; | ||
52 | __u32 mas1; | ||
53 | __u64 mas7_3; | ||
54 | __u64 mas2; | ||
55 | __u32 mas4; | ||
56 | __u32 mas6; | ||
57 | __u32 esr; | ||
58 | __u32 pir; | ||
59 | |||
60 | /* | ||
61 | * SPRG4-7 are user-readable, so we can only keep these consistent | ||
62 | * between the shared area and the real registers when there's an | ||
63 | * intervening exit to KVM. This also applies to SPRG3 on some | ||
64 | * chips. | ||
65 | * | ||
66 | * This suffices for access by guest userspace, since in PR-mode | ||
67 | * KVM, an exit must occur when changing the guest's MSR[PR]. | ||
68 | * If the guest kernel writes to SPRG3-7 via the shared area, it | ||
69 | * must also use the shared area for reading while in kernel space. | ||
70 | */ | ||
71 | __u64 sprg4; | ||
72 | __u64 sprg5; | ||
73 | __u64 sprg6; | ||
74 | __u64 sprg7; | ||
41 | }; | 75 | }; |
42 | 76 | ||
43 | #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ | 77 | #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ |
@@ -47,7 +81,10 @@ struct kvm_vcpu_arch_shared { | |||
47 | 81 | ||
48 | #define KVM_FEATURE_MAGIC_PAGE 1 | 82 | #define KVM_FEATURE_MAGIC_PAGE 1 |
49 | 83 | ||
50 | #define KVM_MAGIC_FEAT_SR (1 << 0) | 84 | #define KVM_MAGIC_FEAT_SR (1 << 0) |
85 | |||
86 | /* MASn, ESR, PIR, and high SPRGs */ | ||
87 | #define KVM_MAGIC_FEAT_MAS0_TO_SPRG7 (1 << 1) | ||
51 | 88 | ||
52 | #ifdef __KERNEL__ | 89 | #ifdef __KERNEL__ |
53 | 90 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 46efd1a265c9..9d6dee0f7d48 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run, | |||
66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); | 66 | extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); |
67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); | 67 | extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); |
68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); | 68 | extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); |
69 | extern void kvmppc_decrementer_func(unsigned long data); | ||
69 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); | 70 | extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); |
70 | 71 | ||
71 | /* Core-specific hooks */ | 72 | /* Core-specific hooks */ |
@@ -94,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, | |||
94 | extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); | 95 | extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); |
95 | extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); | 96 | extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); |
96 | 97 | ||
97 | extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); | 98 | extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); |
98 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); | 99 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); |
99 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); | 100 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); |
100 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | 101 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); |
@@ -120,15 +121,17 @@ extern long kvmppc_alloc_hpt(struct kvm *kvm); | |||
120 | extern void kvmppc_free_hpt(struct kvm *kvm); | 121 | extern void kvmppc_free_hpt(struct kvm *kvm); |
121 | extern long kvmppc_prepare_vrma(struct kvm *kvm, | 122 | extern long kvmppc_prepare_vrma(struct kvm *kvm, |
122 | struct kvm_userspace_memory_region *mem); | 123 | struct kvm_userspace_memory_region *mem); |
123 | extern void kvmppc_map_vrma(struct kvm *kvm, | 124 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
124 | struct kvm_userspace_memory_region *mem); | 125 | struct kvm_memory_slot *memslot, unsigned long porder); |
125 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 126 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
126 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 127 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
127 | struct kvm_create_spapr_tce *args); | 128 | struct kvm_create_spapr_tce *args); |
128 | extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, | 129 | extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, |
129 | struct kvm_allocate_rma *rma); | 130 | struct kvm_allocate_rma *rma); |
130 | extern struct kvmppc_rma_info *kvm_alloc_rma(void); | 131 | extern struct kvmppc_linear_info *kvm_alloc_rma(void); |
131 | extern void kvm_release_rma(struct kvmppc_rma_info *ri); | 132 | extern void kvm_release_rma(struct kvmppc_linear_info *ri); |
133 | extern struct kvmppc_linear_info *kvm_alloc_hpt(void); | ||
134 | extern void kvm_release_hpt(struct kvmppc_linear_info *li); | ||
132 | extern int kvmppc_core_init_vm(struct kvm *kvm); | 135 | extern int kvmppc_core_init_vm(struct kvm *kvm); |
133 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); | 136 | extern void kvmppc_core_destroy_vm(struct kvm *kvm); |
134 | extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 137 | extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
@@ -175,6 +178,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | |||
175 | void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 178 | void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
176 | int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 179 | int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
177 | 180 | ||
181 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); | ||
182 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); | ||
183 | |||
178 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); | 184 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); |
179 | 185 | ||
180 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 186 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
@@ -183,14 +189,19 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | |||
183 | paca[cpu].kvm_hstate.xics_phys = addr; | 189 | paca[cpu].kvm_hstate.xics_phys = addr; |
184 | } | 190 | } |
185 | 191 | ||
186 | extern void kvm_rma_init(void); | 192 | extern void kvm_linear_init(void); |
187 | 193 | ||
188 | #else | 194 | #else |
189 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | 195 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) |
190 | {} | 196 | {} |
191 | 197 | ||
192 | static inline void kvm_rma_init(void) | 198 | static inline void kvm_linear_init(void) |
193 | {} | 199 | {} |
194 | #endif | 200 | #endif |
195 | 201 | ||
202 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | ||
203 | struct kvm_config_tlb *cfg); | ||
204 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | ||
205 | struct kvm_dirty_tlb *cfg); | ||
206 | |||
196 | #endif /* __POWERPC_KVM_PPC_H__ */ | 207 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index f5f89cafebd0..cdb5421877e2 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h | |||
@@ -41,9 +41,10 @@ | |||
41 | /* MAS registers bit definitions */ | 41 | /* MAS registers bit definitions */ |
42 | 42 | ||
43 | #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) | 43 | #define MAS0_TLBSEL(x) (((x) << 28) & 0x30000000) |
44 | #define MAS0_ESEL(x) (((x) << 16) & 0x0FFF0000) | ||
45 | #define MAS0_NV(x) ((x) & 0x00000FFF) | ||
46 | #define MAS0_ESEL_MASK 0x0FFF0000 | 44 | #define MAS0_ESEL_MASK 0x0FFF0000 |
45 | #define MAS0_ESEL_SHIFT 16 | ||
46 | #define MAS0_ESEL(x) (((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK) | ||
47 | #define MAS0_NV(x) ((x) & 0x00000FFF) | ||
47 | #define MAS0_HES 0x00004000 | 48 | #define MAS0_HES 0x00004000 |
48 | #define MAS0_WQ_ALLWAYS 0x00000000 | 49 | #define MAS0_WQ_ALLWAYS 0x00000000 |
49 | #define MAS0_WQ_COND 0x00001000 | 50 | #define MAS0_WQ_COND 0x00001000 |
@@ -167,6 +168,7 @@ | |||
167 | #define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */ | 168 | #define TLBnCFG_MAXSIZE 0x000f0000 /* Maximum Page Size (v1.0) */ |
168 | #define TLBnCFG_MAXSIZE_SHIFT 16 | 169 | #define TLBnCFG_MAXSIZE_SHIFT 16 |
169 | #define TLBnCFG_ASSOC 0xff000000 /* Associativity */ | 170 | #define TLBnCFG_ASSOC 0xff000000 /* Associativity */ |
171 | #define TLBnCFG_ASSOC_SHIFT 24 | ||
170 | 172 | ||
171 | /* TLBnPS encoding */ | 173 | /* TLBnPS encoding */ |
172 | #define TLBnPS_4K 0x00000004 | 174 | #define TLBnPS_4K 0x00000004 |
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 412ba493cb98..0759dd8bf5aa 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h | |||
@@ -108,11 +108,11 @@ extern char initial_stab[]; | |||
108 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) | 108 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) |
109 | 109 | ||
110 | /* Values for PP (assumes Ks=0, Kp=1) */ | 110 | /* Values for PP (assumes Ks=0, Kp=1) */ |
111 | /* pp0 will always be 0 for linux */ | ||
112 | #define PP_RWXX 0 /* Supervisor read/write, User none */ | 111 | #define PP_RWXX 0 /* Supervisor read/write, User none */ |
113 | #define PP_RWRX 1 /* Supervisor read/write, User read */ | 112 | #define PP_RWRX 1 /* Supervisor read/write, User read */ |
114 | #define PP_RWRW 2 /* Supervisor read/write, User read/write */ | 113 | #define PP_RWRW 2 /* Supervisor read/write, User read/write */ |
115 | #define PP_RXRX 3 /* Supervisor read, User read */ | 114 | #define PP_RXRX 3 /* Supervisor read, User read */ |
115 | #define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ | ||
116 | 116 | ||
117 | #ifndef __ASSEMBLY__ | 117 | #ifndef __ASSEMBLY__ |
118 | 118 | ||
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index e980faae4225..d81f99430fe7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h | |||
@@ -45,6 +45,7 @@ | |||
45 | #define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff | 45 | #define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff |
46 | #define PPC_INST_MTSPR_DSCR 0x7c1103a6 | 46 | #define PPC_INST_MTSPR_DSCR 0x7c1103a6 |
47 | #define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff | 47 | #define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff |
48 | #define PPC_INST_SLBFEE 0x7c0007a7 | ||
48 | 49 | ||
49 | #define PPC_INST_STRING 0x7c00042a | 50 | #define PPC_INST_STRING 0x7c00042a |
50 | #define PPC_INST_STRING_MASK 0xfc0007fe | 51 | #define PPC_INST_STRING_MASK 0xfc0007fe |
@@ -183,7 +184,8 @@ | |||
183 | __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) | 184 | __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) |
184 | #define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ | 185 | #define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ |
185 | __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) | 186 | __PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b)) |
186 | 187 | #define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ | |
188 | __PPC_RT(t) | __PPC_RB(b)) | ||
187 | 189 | ||
188 | /* | 190 | /* |
189 | * Define what the VSX XX1 form instructions will look like, then add | 191 | * Define what the VSX XX1 form instructions will look like, then add |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b1a215eabef6..9d7f0fb69028 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -216,6 +216,7 @@ | |||
216 | #define DSISR_ISSTORE 0x02000000 /* access was a store */ | 216 | #define DSISR_ISSTORE 0x02000000 /* access was a store */ |
217 | #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ | 217 | #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ |
218 | #define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */ | 218 | #define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */ |
219 | #define DSISR_KEYFAULT 0x00200000 /* Key fault */ | ||
219 | #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ | 220 | #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ |
220 | #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ | 221 | #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ |
221 | #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ | 222 | #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ |
@@ -237,6 +238,7 @@ | |||
237 | #define LPCR_ISL (1ul << (63-2)) | 238 | #define LPCR_ISL (1ul << (63-2)) |
238 | #define LPCR_VC_SH (63-2) | 239 | #define LPCR_VC_SH (63-2) |
239 | #define LPCR_DPFD_SH (63-11) | 240 | #define LPCR_DPFD_SH (63-11) |
241 | #define LPCR_VRMASD (0x1ful << (63-16)) | ||
240 | #define LPCR_VRMA_L (1ul << (63-12)) | 242 | #define LPCR_VRMA_L (1ul << (63-12)) |
241 | #define LPCR_VRMA_LP0 (1ul << (63-15)) | 243 | #define LPCR_VRMA_LP0 (1ul << (63-15)) |
242 | #define LPCR_VRMA_LP1 (1ul << (63-16)) | 244 | #define LPCR_VRMA_LP1 (1ul << (63-16)) |
@@ -493,6 +495,9 @@ | |||
493 | #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ | 495 | #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ |
494 | #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ | 496 | #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ |
495 | #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ | 497 | #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ |
498 | #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ | ||
499 | #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ | ||
500 | #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ | ||
496 | #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ | 501 | #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ |
497 | #define SRR1_WAKESYSERR 0x00300000 /* System error */ | 502 | #define SRR1_WAKESYSERR 0x00300000 /* System error */ |
498 | #define SRR1_WAKEEE 0x00200000 /* External interrupt */ | 503 | #define SRR1_WAKEEE 0x00200000 /* External interrupt */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cc492e48ddfa..34b8afe94a50 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -412,16 +412,23 @@ int main(void) | |||
412 | DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); | 412 | DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); |
413 | DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); | 413 | DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); |
414 | #endif | 414 | #endif |
415 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); | 415 | DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); |
416 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | 416 | DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); |
417 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); | 417 | DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); |
418 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); | 418 | DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7)); |
419 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); | 419 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); |
420 | DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); | 420 | DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); |
421 | DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); | 421 | DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); |
422 | DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); | 422 | DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); |
423 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); | 423 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); |
424 | 424 | ||
425 | DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); | ||
426 | DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); | ||
427 | DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2)); | ||
428 | DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3)); | ||
429 | DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); | ||
430 | DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); | ||
431 | |||
425 | /* book3s */ | 432 | /* book3s */ |
426 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 433 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
427 | DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); | 434 | DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); |
@@ -434,6 +441,7 @@ int main(void) | |||
434 | DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); | 441 | DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); |
435 | DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); | 442 | DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); |
436 | DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); | 443 | DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); |
444 | DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); | ||
437 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); | 445 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); |
438 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); | 446 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); |
439 | #endif | 447 | #endif |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2d0868a4e2f0..cb705fdbb458 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -101,14 +101,14 @@ data_access_not_stab: | |||
101 | END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) | 101 | END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) |
102 | #endif | 102 | #endif |
103 | EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, | 103 | EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, |
104 | KVMTEST_PR, 0x300) | 104 | KVMTEST, 0x300) |
105 | 105 | ||
106 | . = 0x380 | 106 | . = 0x380 |
107 | .globl data_access_slb_pSeries | 107 | .globl data_access_slb_pSeries |
108 | data_access_slb_pSeries: | 108 | data_access_slb_pSeries: |
109 | HMT_MEDIUM | 109 | HMT_MEDIUM |
110 | SET_SCRATCH0(r13) | 110 | SET_SCRATCH0(r13) |
111 | EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) | 111 | EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) |
112 | std r3,PACA_EXSLB+EX_R3(r13) | 112 | std r3,PACA_EXSLB+EX_R3(r13) |
113 | mfspr r3,SPRN_DAR | 113 | mfspr r3,SPRN_DAR |
114 | #ifdef __DISABLED__ | 114 | #ifdef __DISABLED__ |
@@ -330,8 +330,8 @@ do_stab_bolted_pSeries: | |||
330 | EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) | 330 | EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) |
331 | #endif /* CONFIG_POWER4_ONLY */ | 331 | #endif /* CONFIG_POWER4_ONLY */ |
332 | 332 | ||
333 | KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300) | 333 | KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) |
334 | KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380) | 334 | KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) |
335 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) | 335 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) |
336 | KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) | 336 | KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) |
337 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) | 337 | KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) |
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 2985338d0e10..62bdf2389669 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | 2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. |
3 | * Copyright 2010-2011 Freescale Semiconductor, Inc. | ||
3 | * | 4 | * |
4 | * Authors: | 5 | * Authors: |
5 | * Alexander Graf <agraf@suse.de> | 6 | * Alexander Graf <agraf@suse.de> |
@@ -29,6 +30,7 @@ | |||
29 | #include <asm/sections.h> | 30 | #include <asm/sections.h> |
30 | #include <asm/cacheflush.h> | 31 | #include <asm/cacheflush.h> |
31 | #include <asm/disassemble.h> | 32 | #include <asm/disassemble.h> |
33 | #include <asm/ppc-opcode.h> | ||
32 | 34 | ||
33 | #define KVM_MAGIC_PAGE (-4096L) | 35 | #define KVM_MAGIC_PAGE (-4096L) |
34 | #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) | 36 | #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) |
@@ -41,34 +43,30 @@ | |||
41 | #define KVM_INST_B 0x48000000 | 43 | #define KVM_INST_B 0x48000000 |
42 | #define KVM_INST_B_MASK 0x03ffffff | 44 | #define KVM_INST_B_MASK 0x03ffffff |
43 | #define KVM_INST_B_MAX 0x01ffffff | 45 | #define KVM_INST_B_MAX 0x01ffffff |
46 | #define KVM_INST_LI 0x38000000 | ||
44 | 47 | ||
45 | #define KVM_MASK_RT 0x03e00000 | 48 | #define KVM_MASK_RT 0x03e00000 |
46 | #define KVM_RT_30 0x03c00000 | 49 | #define KVM_RT_30 0x03c00000 |
47 | #define KVM_MASK_RB 0x0000f800 | 50 | #define KVM_MASK_RB 0x0000f800 |
48 | #define KVM_INST_MFMSR 0x7c0000a6 | 51 | #define KVM_INST_MFMSR 0x7c0000a6 |
49 | #define KVM_INST_MFSPR_SPRG0 0x7c1042a6 | 52 | |
50 | #define KVM_INST_MFSPR_SPRG1 0x7c1142a6 | 53 | #define SPR_FROM 0 |
51 | #define KVM_INST_MFSPR_SPRG2 0x7c1242a6 | 54 | #define SPR_TO 0x100 |
52 | #define KVM_INST_MFSPR_SPRG3 0x7c1342a6 | 55 | |
53 | #define KVM_INST_MFSPR_SRR0 0x7c1a02a6 | 56 | #define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \ |
54 | #define KVM_INST_MFSPR_SRR1 0x7c1b02a6 | 57 | (((sprn) & 0x1f) << 16) | \ |
55 | #define KVM_INST_MFSPR_DAR 0x7c1302a6 | 58 | (((sprn) & 0x3e0) << 6) | \ |
56 | #define KVM_INST_MFSPR_DSISR 0x7c1202a6 | 59 | (moveto)) |
57 | 60 | ||
58 | #define KVM_INST_MTSPR_SPRG0 0x7c1043a6 | 61 | #define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM) |
59 | #define KVM_INST_MTSPR_SPRG1 0x7c1143a6 | 62 | #define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO) |
60 | #define KVM_INST_MTSPR_SPRG2 0x7c1243a6 | ||
61 | #define KVM_INST_MTSPR_SPRG3 0x7c1343a6 | ||
62 | #define KVM_INST_MTSPR_SRR0 0x7c1a03a6 | ||
63 | #define KVM_INST_MTSPR_SRR1 0x7c1b03a6 | ||
64 | #define KVM_INST_MTSPR_DAR 0x7c1303a6 | ||
65 | #define KVM_INST_MTSPR_DSISR 0x7c1203a6 | ||
66 | 63 | ||
67 | #define KVM_INST_TLBSYNC 0x7c00046c | 64 | #define KVM_INST_TLBSYNC 0x7c00046c |
68 | #define KVM_INST_MTMSRD_L0 0x7c000164 | 65 | #define KVM_INST_MTMSRD_L0 0x7c000164 |
69 | #define KVM_INST_MTMSRD_L1 0x7c010164 | 66 | #define KVM_INST_MTMSRD_L1 0x7c010164 |
70 | #define KVM_INST_MTMSR 0x7c000124 | 67 | #define KVM_INST_MTMSR 0x7c000124 |
71 | 68 | ||
69 | #define KVM_INST_WRTEE 0x7c000106 | ||
72 | #define KVM_INST_WRTEEI_0 0x7c000146 | 70 | #define KVM_INST_WRTEEI_0 0x7c000146 |
73 | #define KVM_INST_WRTEEI_1 0x7c008146 | 71 | #define KVM_INST_WRTEEI_1 0x7c008146 |
74 | 72 | ||
@@ -270,26 +268,27 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt) | |||
270 | 268 | ||
271 | #ifdef CONFIG_BOOKE | 269 | #ifdef CONFIG_BOOKE |
272 | 270 | ||
273 | extern u32 kvm_emulate_wrteei_branch_offs; | 271 | extern u32 kvm_emulate_wrtee_branch_offs; |
274 | extern u32 kvm_emulate_wrteei_ee_offs; | 272 | extern u32 kvm_emulate_wrtee_reg_offs; |
275 | extern u32 kvm_emulate_wrteei_len; | 273 | extern u32 kvm_emulate_wrtee_orig_ins_offs; |
276 | extern u32 kvm_emulate_wrteei[]; | 274 | extern u32 kvm_emulate_wrtee_len; |
275 | extern u32 kvm_emulate_wrtee[]; | ||
277 | 276 | ||
278 | static void kvm_patch_ins_wrteei(u32 *inst) | 277 | static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one) |
279 | { | 278 | { |
280 | u32 *p; | 279 | u32 *p; |
281 | int distance_start; | 280 | int distance_start; |
282 | int distance_end; | 281 | int distance_end; |
283 | ulong next_inst; | 282 | ulong next_inst; |
284 | 283 | ||
285 | p = kvm_alloc(kvm_emulate_wrteei_len * 4); | 284 | p = kvm_alloc(kvm_emulate_wrtee_len * 4); |
286 | if (!p) | 285 | if (!p) |
287 | return; | 286 | return; |
288 | 287 | ||
289 | /* Find out where we are and put everything there */ | 288 | /* Find out where we are and put everything there */ |
290 | distance_start = (ulong)p - (ulong)inst; | 289 | distance_start = (ulong)p - (ulong)inst; |
291 | next_inst = ((ulong)inst + 4); | 290 | next_inst = ((ulong)inst + 4); |
292 | distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs]; | 291 | distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs]; |
293 | 292 | ||
294 | /* Make sure we only write valid b instructions */ | 293 | /* Make sure we only write valid b instructions */ |
295 | if (distance_start > KVM_INST_B_MAX) { | 294 | if (distance_start > KVM_INST_B_MAX) { |
@@ -298,10 +297,65 @@ static void kvm_patch_ins_wrteei(u32 *inst) | |||
298 | } | 297 | } |
299 | 298 | ||
300 | /* Modify the chunk to fit the invocation */ | 299 | /* Modify the chunk to fit the invocation */ |
301 | memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4); | 300 | memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4); |
302 | p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK; | 301 | p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK; |
303 | p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE); | 302 | |
304 | flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4); | 303 | if (imm_one) { |
304 | p[kvm_emulate_wrtee_reg_offs] = | ||
305 | KVM_INST_LI | __PPC_RT(30) | MSR_EE; | ||
306 | } else { | ||
307 | /* Make clobbered registers work too */ | ||
308 | switch (get_rt(rt)) { | ||
309 | case 30: | ||
310 | kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs], | ||
311 | magic_var(scratch2), KVM_RT_30); | ||
312 | break; | ||
313 | case 31: | ||
314 | kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs], | ||
315 | magic_var(scratch1), KVM_RT_30); | ||
316 | break; | ||
317 | default: | ||
318 | p[kvm_emulate_wrtee_reg_offs] |= rt; | ||
319 | break; | ||
320 | } | ||
321 | } | ||
322 | |||
323 | p[kvm_emulate_wrtee_orig_ins_offs] = *inst; | ||
324 | flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4); | ||
325 | |||
326 | /* Patch the invocation */ | ||
327 | kvm_patch_ins_b(inst, distance_start); | ||
328 | } | ||
329 | |||
330 | extern u32 kvm_emulate_wrteei_0_branch_offs; | ||
331 | extern u32 kvm_emulate_wrteei_0_len; | ||
332 | extern u32 kvm_emulate_wrteei_0[]; | ||
333 | |||
334 | static void kvm_patch_ins_wrteei_0(u32 *inst) | ||
335 | { | ||
336 | u32 *p; | ||
337 | int distance_start; | ||
338 | int distance_end; | ||
339 | ulong next_inst; | ||
340 | |||
341 | p = kvm_alloc(kvm_emulate_wrteei_0_len * 4); | ||
342 | if (!p) | ||
343 | return; | ||
344 | |||
345 | /* Find out where we are and put everything there */ | ||
346 | distance_start = (ulong)p - (ulong)inst; | ||
347 | next_inst = ((ulong)inst + 4); | ||
348 | distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs]; | ||
349 | |||
350 | /* Make sure we only write valid b instructions */ | ||
351 | if (distance_start > KVM_INST_B_MAX) { | ||
352 | kvm_patching_worked = false; | ||
353 | return; | ||
354 | } | ||
355 | |||
356 | memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4); | ||
357 | p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK; | ||
358 | flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4); | ||
305 | 359 | ||
306 | /* Patch the invocation */ | 360 | /* Patch the invocation */ |
307 | kvm_patch_ins_b(inst, distance_start); | 361 | kvm_patch_ins_b(inst, distance_start); |
@@ -380,56 +434,191 @@ static void kvm_check_ins(u32 *inst, u32 features) | |||
380 | case KVM_INST_MFMSR: | 434 | case KVM_INST_MFMSR: |
381 | kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); | 435 | kvm_patch_ins_ld(inst, magic_var(msr), inst_rt); |
382 | break; | 436 | break; |
383 | case KVM_INST_MFSPR_SPRG0: | 437 | case KVM_INST_MFSPR(SPRN_SPRG0): |
384 | kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); | 438 | kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt); |
385 | break; | 439 | break; |
386 | case KVM_INST_MFSPR_SPRG1: | 440 | case KVM_INST_MFSPR(SPRN_SPRG1): |
387 | kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); | 441 | kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt); |
388 | break; | 442 | break; |
389 | case KVM_INST_MFSPR_SPRG2: | 443 | case KVM_INST_MFSPR(SPRN_SPRG2): |
390 | kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); | 444 | kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt); |
391 | break; | 445 | break; |
392 | case KVM_INST_MFSPR_SPRG3: | 446 | case KVM_INST_MFSPR(SPRN_SPRG3): |
393 | kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); | 447 | kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt); |
394 | break; | 448 | break; |
395 | case KVM_INST_MFSPR_SRR0: | 449 | case KVM_INST_MFSPR(SPRN_SRR0): |
396 | kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); | 450 | kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt); |
397 | break; | 451 | break; |
398 | case KVM_INST_MFSPR_SRR1: | 452 | case KVM_INST_MFSPR(SPRN_SRR1): |
399 | kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); | 453 | kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt); |
400 | break; | 454 | break; |
401 | case KVM_INST_MFSPR_DAR: | 455 | #ifdef CONFIG_BOOKE |
456 | case KVM_INST_MFSPR(SPRN_DEAR): | ||
457 | #else | ||
458 | case KVM_INST_MFSPR(SPRN_DAR): | ||
459 | #endif | ||
402 | kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); | 460 | kvm_patch_ins_ld(inst, magic_var(dar), inst_rt); |
403 | break; | 461 | break; |
404 | case KVM_INST_MFSPR_DSISR: | 462 | case KVM_INST_MFSPR(SPRN_DSISR): |
405 | kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); | 463 | kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); |
406 | break; | 464 | break; |
407 | 465 | ||
466 | #ifdef CONFIG_PPC_BOOK3E_MMU | ||
467 | case KVM_INST_MFSPR(SPRN_MAS0): | ||
468 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
469 | kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt); | ||
470 | break; | ||
471 | case KVM_INST_MFSPR(SPRN_MAS1): | ||
472 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
473 | kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt); | ||
474 | break; | ||
475 | case KVM_INST_MFSPR(SPRN_MAS2): | ||
476 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
477 | kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt); | ||
478 | break; | ||
479 | case KVM_INST_MFSPR(SPRN_MAS3): | ||
480 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
481 | kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt); | ||
482 | break; | ||
483 | case KVM_INST_MFSPR(SPRN_MAS4): | ||
484 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
485 | kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt); | ||
486 | break; | ||
487 | case KVM_INST_MFSPR(SPRN_MAS6): | ||
488 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
489 | kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt); | ||
490 | break; | ||
491 | case KVM_INST_MFSPR(SPRN_MAS7): | ||
492 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
493 | kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt); | ||
494 | break; | ||
495 | #endif /* CONFIG_PPC_BOOK3E_MMU */ | ||
496 | |||
497 | case KVM_INST_MFSPR(SPRN_SPRG4): | ||
498 | #ifdef CONFIG_BOOKE | ||
499 | case KVM_INST_MFSPR(SPRN_SPRG4R): | ||
500 | #endif | ||
501 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
502 | kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt); | ||
503 | break; | ||
504 | case KVM_INST_MFSPR(SPRN_SPRG5): | ||
505 | #ifdef CONFIG_BOOKE | ||
506 | case KVM_INST_MFSPR(SPRN_SPRG5R): | ||
507 | #endif | ||
508 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
509 | kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt); | ||
510 | break; | ||
511 | case KVM_INST_MFSPR(SPRN_SPRG6): | ||
512 | #ifdef CONFIG_BOOKE | ||
513 | case KVM_INST_MFSPR(SPRN_SPRG6R): | ||
514 | #endif | ||
515 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
516 | kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt); | ||
517 | break; | ||
518 | case KVM_INST_MFSPR(SPRN_SPRG7): | ||
519 | #ifdef CONFIG_BOOKE | ||
520 | case KVM_INST_MFSPR(SPRN_SPRG7R): | ||
521 | #endif | ||
522 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
523 | kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt); | ||
524 | break; | ||
525 | |||
526 | #ifdef CONFIG_BOOKE | ||
527 | case KVM_INST_MFSPR(SPRN_ESR): | ||
528 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
529 | kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt); | ||
530 | break; | ||
531 | #endif | ||
532 | |||
533 | case KVM_INST_MFSPR(SPRN_PIR): | ||
534 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
535 | kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt); | ||
536 | break; | ||
537 | |||
538 | |||
408 | /* Stores */ | 539 | /* Stores */ |
409 | case KVM_INST_MTSPR_SPRG0: | 540 | case KVM_INST_MTSPR(SPRN_SPRG0): |
410 | kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); | 541 | kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt); |
411 | break; | 542 | break; |
412 | case KVM_INST_MTSPR_SPRG1: | 543 | case KVM_INST_MTSPR(SPRN_SPRG1): |
413 | kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); | 544 | kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt); |
414 | break; | 545 | break; |
415 | case KVM_INST_MTSPR_SPRG2: | 546 | case KVM_INST_MTSPR(SPRN_SPRG2): |
416 | kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); | 547 | kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt); |
417 | break; | 548 | break; |
418 | case KVM_INST_MTSPR_SPRG3: | 549 | case KVM_INST_MTSPR(SPRN_SPRG3): |
419 | kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); | 550 | kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt); |
420 | break; | 551 | break; |
421 | case KVM_INST_MTSPR_SRR0: | 552 | case KVM_INST_MTSPR(SPRN_SRR0): |
422 | kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); | 553 | kvm_patch_ins_std(inst, magic_var(srr0), inst_rt); |
423 | break; | 554 | break; |
424 | case KVM_INST_MTSPR_SRR1: | 555 | case KVM_INST_MTSPR(SPRN_SRR1): |
425 | kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); | 556 | kvm_patch_ins_std(inst, magic_var(srr1), inst_rt); |
426 | break; | 557 | break; |
427 | case KVM_INST_MTSPR_DAR: | 558 | #ifdef CONFIG_BOOKE |
559 | case KVM_INST_MTSPR(SPRN_DEAR): | ||
560 | #else | ||
561 | case KVM_INST_MTSPR(SPRN_DAR): | ||
562 | #endif | ||
428 | kvm_patch_ins_std(inst, magic_var(dar), inst_rt); | 563 | kvm_patch_ins_std(inst, magic_var(dar), inst_rt); |
429 | break; | 564 | break; |
430 | case KVM_INST_MTSPR_DSISR: | 565 | case KVM_INST_MTSPR(SPRN_DSISR): |
431 | kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); | 566 | kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); |
432 | break; | 567 | break; |
568 | #ifdef CONFIG_PPC_BOOK3E_MMU | ||
569 | case KVM_INST_MTSPR(SPRN_MAS0): | ||
570 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
571 | kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt); | ||
572 | break; | ||
573 | case KVM_INST_MTSPR(SPRN_MAS1): | ||
574 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
575 | kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt); | ||
576 | break; | ||
577 | case KVM_INST_MTSPR(SPRN_MAS2): | ||
578 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
579 | kvm_patch_ins_std(inst, magic_var(mas2), inst_rt); | ||
580 | break; | ||
581 | case KVM_INST_MTSPR(SPRN_MAS3): | ||
582 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
583 | kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt); | ||
584 | break; | ||
585 | case KVM_INST_MTSPR(SPRN_MAS4): | ||
586 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
587 | kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt); | ||
588 | break; | ||
589 | case KVM_INST_MTSPR(SPRN_MAS6): | ||
590 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
591 | kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt); | ||
592 | break; | ||
593 | case KVM_INST_MTSPR(SPRN_MAS7): | ||
594 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
595 | kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt); | ||
596 | break; | ||
597 | #endif /* CONFIG_PPC_BOOK3E_MMU */ | ||
598 | |||
599 | case KVM_INST_MTSPR(SPRN_SPRG4): | ||
600 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
601 | kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt); | ||
602 | break; | ||
603 | case KVM_INST_MTSPR(SPRN_SPRG5): | ||
604 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
605 | kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt); | ||
606 | break; | ||
607 | case KVM_INST_MTSPR(SPRN_SPRG6): | ||
608 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
609 | kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt); | ||
610 | break; | ||
611 | case KVM_INST_MTSPR(SPRN_SPRG7): | ||
612 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
613 | kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt); | ||
614 | break; | ||
615 | |||
616 | #ifdef CONFIG_BOOKE | ||
617 | case KVM_INST_MTSPR(SPRN_ESR): | ||
618 | if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) | ||
619 | kvm_patch_ins_stw(inst, magic_var(esr), inst_rt); | ||
620 | break; | ||
621 | #endif | ||
433 | 622 | ||
434 | /* Nops */ | 623 | /* Nops */ |
435 | case KVM_INST_TLBSYNC: | 624 | case KVM_INST_TLBSYNC: |
@@ -444,6 +633,11 @@ static void kvm_check_ins(u32 *inst, u32 features) | |||
444 | case KVM_INST_MTMSRD_L0: | 633 | case KVM_INST_MTMSRD_L0: |
445 | kvm_patch_ins_mtmsr(inst, inst_rt); | 634 | kvm_patch_ins_mtmsr(inst, inst_rt); |
446 | break; | 635 | break; |
636 | #ifdef CONFIG_BOOKE | ||
637 | case KVM_INST_WRTEE: | ||
638 | kvm_patch_ins_wrtee(inst, inst_rt, 0); | ||
639 | break; | ||
640 | #endif | ||
447 | } | 641 | } |
448 | 642 | ||
449 | switch (inst_no_rt & ~KVM_MASK_RB) { | 643 | switch (inst_no_rt & ~KVM_MASK_RB) { |
@@ -461,13 +655,19 @@ static void kvm_check_ins(u32 *inst, u32 features) | |||
461 | switch (_inst) { | 655 | switch (_inst) { |
462 | #ifdef CONFIG_BOOKE | 656 | #ifdef CONFIG_BOOKE |
463 | case KVM_INST_WRTEEI_0: | 657 | case KVM_INST_WRTEEI_0: |
658 | kvm_patch_ins_wrteei_0(inst); | ||
659 | break; | ||
660 | |||
464 | case KVM_INST_WRTEEI_1: | 661 | case KVM_INST_WRTEEI_1: |
465 | kvm_patch_ins_wrteei(inst); | 662 | kvm_patch_ins_wrtee(inst, 0, 1); |
466 | break; | 663 | break; |
467 | #endif | 664 | #endif |
468 | } | 665 | } |
469 | } | 666 | } |
470 | 667 | ||
668 | extern u32 kvm_template_start[]; | ||
669 | extern u32 kvm_template_end[]; | ||
670 | |||
471 | static void kvm_use_magic_page(void) | 671 | static void kvm_use_magic_page(void) |
472 | { | 672 | { |
473 | u32 *p; | 673 | u32 *p; |
@@ -488,8 +688,23 @@ static void kvm_use_magic_page(void) | |||
488 | start = (void*)_stext; | 688 | start = (void*)_stext; |
489 | end = (void*)_etext; | 689 | end = (void*)_etext; |
490 | 690 | ||
491 | for (p = start; p < end; p++) | 691 | /* |
692 | * Being interrupted in the middle of patching would | ||
693 | * be bad for SPRG4-7, which KVM can't keep in sync | ||
694 | * with emulated accesses because reads don't trap. | ||
695 | */ | ||
696 | local_irq_disable(); | ||
697 | |||
698 | for (p = start; p < end; p++) { | ||
699 | /* Avoid patching the template code */ | ||
700 | if (p >= kvm_template_start && p < kvm_template_end) { | ||
701 | p = kvm_template_end - 1; | ||
702 | continue; | ||
703 | } | ||
492 | kvm_check_ins(p, features); | 704 | kvm_check_ins(p, features); |
705 | } | ||
706 | |||
707 | local_irq_enable(); | ||
493 | 708 | ||
494 | printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", | 709 | printk(KERN_INFO "KVM: Live patching for a fast VM %s\n", |
495 | kvm_patching_worked ? "worked" : "failed"); | 710 | kvm_patching_worked ? "worked" : "failed"); |
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index f2b1b2523e61..e291cf3cf954 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S | |||
@@ -13,6 +13,7 @@ | |||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
14 | * | 14 | * |
15 | * Copyright SUSE Linux Products GmbH 2010 | 15 | * Copyright SUSE Linux Products GmbH 2010 |
16 | * Copyright 2010-2011 Freescale Semiconductor, Inc. | ||
16 | * | 17 | * |
17 | * Authors: Alexander Graf <agraf@suse.de> | 18 | * Authors: Alexander Graf <agraf@suse.de> |
18 | */ | 19 | */ |
@@ -65,6 +66,9 @@ kvm_hypercall_start: | |||
65 | shared->critical == r1 and r2 is always != r1 */ \ | 66 | shared->critical == r1 and r2 is always != r1 */ \ |
66 | STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); | 67 | STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); |
67 | 68 | ||
69 | .global kvm_template_start | ||
70 | kvm_template_start: | ||
71 | |||
68 | .global kvm_emulate_mtmsrd | 72 | .global kvm_emulate_mtmsrd |
69 | kvm_emulate_mtmsrd: | 73 | kvm_emulate_mtmsrd: |
70 | 74 | ||
@@ -167,6 +171,9 @@ maybe_stay_in_guest: | |||
167 | kvm_emulate_mtmsr_reg2: | 171 | kvm_emulate_mtmsr_reg2: |
168 | ori r30, r0, 0 | 172 | ori r30, r0, 0 |
169 | 173 | ||
174 | /* Put MSR into magic page because we don't call mtmsr */ | ||
175 | STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | ||
176 | |||
170 | /* Check if we have to fetch an interrupt */ | 177 | /* Check if we have to fetch an interrupt */ |
171 | lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) | 178 | lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) |
172 | cmpwi r31, 0 | 179 | cmpwi r31, 0 |
@@ -174,15 +181,10 @@ kvm_emulate_mtmsr_reg2: | |||
174 | 181 | ||
175 | /* Check if we may trigger an interrupt */ | 182 | /* Check if we may trigger an interrupt */ |
176 | andi. r31, r30, MSR_EE | 183 | andi. r31, r30, MSR_EE |
177 | beq no_mtmsr | 184 | bne do_mtmsr |
178 | |||
179 | b do_mtmsr | ||
180 | 185 | ||
181 | no_mtmsr: | 186 | no_mtmsr: |
182 | 187 | ||
183 | /* Put MSR into magic page because we don't call mtmsr */ | ||
184 | STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | ||
185 | |||
186 | SCRATCH_RESTORE | 188 | SCRATCH_RESTORE |
187 | 189 | ||
188 | /* Go back to caller */ | 190 | /* Go back to caller */ |
@@ -210,24 +212,80 @@ kvm_emulate_mtmsr_orig_ins_offs: | |||
210 | kvm_emulate_mtmsr_len: | 212 | kvm_emulate_mtmsr_len: |
211 | .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 | 213 | .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4 |
212 | 214 | ||
215 | /* also used for wrteei 1 */ | ||
216 | .global kvm_emulate_wrtee | ||
217 | kvm_emulate_wrtee: | ||
218 | |||
219 | SCRATCH_SAVE | ||
220 | |||
221 | /* Fetch old MSR in r31 */ | ||
222 | LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | ||
223 | |||
224 | /* Insert new MSR[EE] */ | ||
225 | kvm_emulate_wrtee_reg: | ||
226 | ori r30, r0, 0 | ||
227 | rlwimi r31, r30, 0, MSR_EE | ||
228 | |||
229 | /* | ||
230 | * If MSR[EE] is now set, check for a pending interrupt. | ||
231 | * We could skip this if MSR[EE] was already on, but that | ||
232 | * should be rare, so don't bother. | ||
233 | */ | ||
234 | andi. r30, r30, MSR_EE | ||
235 | |||
236 | /* Put MSR into magic page because we don't call wrtee */ | ||
237 | STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | ||
238 | |||
239 | beq no_wrtee | ||
240 | |||
241 | /* Check if we have to fetch an interrupt */ | ||
242 | lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0) | ||
243 | cmpwi r30, 0 | ||
244 | bne do_wrtee | ||
245 | |||
246 | no_wrtee: | ||
247 | SCRATCH_RESTORE | ||
248 | |||
249 | /* Go back to caller */ | ||
250 | kvm_emulate_wrtee_branch: | ||
251 | b . | ||
252 | |||
253 | do_wrtee: | ||
254 | SCRATCH_RESTORE | ||
213 | 255 | ||
256 | /* Just fire off the wrtee if it's critical */ | ||
257 | kvm_emulate_wrtee_orig_ins: | ||
258 | wrtee r0 | ||
214 | 259 | ||
215 | .global kvm_emulate_wrteei | 260 | b kvm_emulate_wrtee_branch |
216 | kvm_emulate_wrteei: | ||
217 | 261 | ||
262 | kvm_emulate_wrtee_end: | ||
263 | |||
264 | .global kvm_emulate_wrtee_branch_offs | ||
265 | kvm_emulate_wrtee_branch_offs: | ||
266 | .long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4 | ||
267 | |||
268 | .global kvm_emulate_wrtee_reg_offs | ||
269 | kvm_emulate_wrtee_reg_offs: | ||
270 | .long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4 | ||
271 | |||
272 | .global kvm_emulate_wrtee_orig_ins_offs | ||
273 | kvm_emulate_wrtee_orig_ins_offs: | ||
274 | .long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4 | ||
275 | |||
276 | .global kvm_emulate_wrtee_len | ||
277 | kvm_emulate_wrtee_len: | ||
278 | .long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4 | ||
279 | |||
280 | .global kvm_emulate_wrteei_0 | ||
281 | kvm_emulate_wrteei_0: | ||
218 | SCRATCH_SAVE | 282 | SCRATCH_SAVE |
219 | 283 | ||
220 | /* Fetch old MSR in r31 */ | 284 | /* Fetch old MSR in r31 */ |
221 | LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | 285 | LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) |
222 | 286 | ||
223 | /* Remove MSR_EE from old MSR */ | 287 | /* Remove MSR_EE from old MSR */ |
224 | li r30, 0 | 288 | rlwinm r31, r31, 0, ~MSR_EE |
225 | ori r30, r30, MSR_EE | ||
226 | andc r31, r31, r30 | ||
227 | |||
228 | /* OR new MSR_EE onto the old MSR */ | ||
229 | kvm_emulate_wrteei_ee: | ||
230 | ori r31, r31, 0 | ||
231 | 289 | ||
232 | /* Write new MSR value back */ | 290 | /* Write new MSR value back */ |
233 | STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) | 291 | STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0) |
@@ -235,22 +293,17 @@ kvm_emulate_wrteei_ee: | |||
235 | SCRATCH_RESTORE | 293 | SCRATCH_RESTORE |
236 | 294 | ||
237 | /* Go back to caller */ | 295 | /* Go back to caller */ |
238 | kvm_emulate_wrteei_branch: | 296 | kvm_emulate_wrteei_0_branch: |
239 | b . | 297 | b . |
240 | kvm_emulate_wrteei_end: | 298 | kvm_emulate_wrteei_0_end: |
241 | |||
242 | .global kvm_emulate_wrteei_branch_offs | ||
243 | kvm_emulate_wrteei_branch_offs: | ||
244 | .long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4 | ||
245 | 299 | ||
246 | .global kvm_emulate_wrteei_ee_offs | 300 | .global kvm_emulate_wrteei_0_branch_offs |
247 | kvm_emulate_wrteei_ee_offs: | 301 | kvm_emulate_wrteei_0_branch_offs: |
248 | .long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4 | 302 | .long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4 |
249 | |||
250 | .global kvm_emulate_wrteei_len | ||
251 | kvm_emulate_wrteei_len: | ||
252 | .long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4 | ||
253 | 303 | ||
304 | .global kvm_emulate_wrteei_0_len | ||
305 | kvm_emulate_wrteei_0_len: | ||
306 | .long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4 | ||
254 | 307 | ||
255 | .global kvm_emulate_mtsrin | 308 | .global kvm_emulate_mtsrin |
256 | kvm_emulate_mtsrin: | 309 | kvm_emulate_mtsrin: |
@@ -300,3 +353,6 @@ kvm_emulate_mtsrin_orig_ins_offs: | |||
300 | .global kvm_emulate_mtsrin_len | 353 | .global kvm_emulate_mtsrin_len |
301 | kvm_emulate_mtsrin_len: | 354 | kvm_emulate_mtsrin_len: |
302 | .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 | 355 | .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4 |
356 | |||
357 | .global kvm_template_end | ||
358 | kvm_template_end: | ||
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4cb8f1e9d044..4721b0c8d7b7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -598,7 +598,7 @@ void __init setup_arch(char **cmdline_p) | |||
598 | /* Initialize the MMU context management stuff */ | 598 | /* Initialize the MMU context management stuff */ |
599 | mmu_context_init(); | 599 | mmu_context_init(); |
600 | 600 | ||
601 | kvm_rma_init(); | 601 | kvm_linear_init(); |
602 | 602 | ||
603 | ppc64_boot_msg(0x15, "Setup Done"); | 603 | ppc64_boot_msg(0x15, "Setup Done"); |
604 | } | 604 | } |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 78133deb4b64..8f64709ae331 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -69,6 +69,7 @@ config KVM_BOOK3S_64 | |||
69 | config KVM_BOOK3S_64_HV | 69 | config KVM_BOOK3S_64_HV |
70 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" | 70 | bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" |
71 | depends on KVM_BOOK3S_64 | 71 | depends on KVM_BOOK3S_64 |
72 | select MMU_NOTIFIER | ||
72 | ---help--- | 73 | ---help--- |
73 | Support running unmodified book3s_64 guest kernels in | 74 | Support running unmodified book3s_64 guest kernels in |
74 | virtual machines on POWER7 and PPC970 processors that have | 75 | virtual machines on POWER7 and PPC970 processors that have |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e41ac6f7dcf1..7d54f4ed6d96 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) | |||
258 | return true; | 258 | return true; |
259 | } | 259 | } |
260 | 260 | ||
261 | void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | 261 | void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) |
262 | { | 262 | { |
263 | unsigned long *pending = &vcpu->arch.pending_exceptions; | 263 | unsigned long *pending = &vcpu->arch.pending_exceptions; |
264 | unsigned long old_pending = vcpu->arch.pending_exceptions; | 264 | unsigned long old_pending = vcpu->arch.pending_exceptions; |
@@ -423,10 +423,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
423 | regs->sprg1 = vcpu->arch.shared->sprg1; | 423 | regs->sprg1 = vcpu->arch.shared->sprg1; |
424 | regs->sprg2 = vcpu->arch.shared->sprg2; | 424 | regs->sprg2 = vcpu->arch.shared->sprg2; |
425 | regs->sprg3 = vcpu->arch.shared->sprg3; | 425 | regs->sprg3 = vcpu->arch.shared->sprg3; |
426 | regs->sprg4 = vcpu->arch.sprg4; | 426 | regs->sprg4 = vcpu->arch.shared->sprg4; |
427 | regs->sprg5 = vcpu->arch.sprg5; | 427 | regs->sprg5 = vcpu->arch.shared->sprg5; |
428 | regs->sprg6 = vcpu->arch.sprg6; | 428 | regs->sprg6 = vcpu->arch.shared->sprg6; |
429 | regs->sprg7 = vcpu->arch.sprg7; | 429 | regs->sprg7 = vcpu->arch.shared->sprg7; |
430 | 430 | ||
431 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 431 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
432 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 432 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
@@ -450,10 +450,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
450 | vcpu->arch.shared->sprg1 = regs->sprg1; | 450 | vcpu->arch.shared->sprg1 = regs->sprg1; |
451 | vcpu->arch.shared->sprg2 = regs->sprg2; | 451 | vcpu->arch.shared->sprg2 = regs->sprg2; |
452 | vcpu->arch.shared->sprg3 = regs->sprg3; | 452 | vcpu->arch.shared->sprg3 = regs->sprg3; |
453 | vcpu->arch.sprg4 = regs->sprg4; | 453 | vcpu->arch.shared->sprg4 = regs->sprg4; |
454 | vcpu->arch.sprg5 = regs->sprg5; | 454 | vcpu->arch.shared->sprg5 = regs->sprg5; |
455 | vcpu->arch.sprg6 = regs->sprg6; | 455 | vcpu->arch.shared->sprg6 = regs->sprg6; |
456 | vcpu->arch.sprg7 = regs->sprg7; | 456 | vcpu->arch.shared->sprg7 = regs->sprg7; |
457 | 457 | ||
458 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 458 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
459 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 459 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
@@ -477,41 +477,10 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
477 | return 0; | 477 | return 0; |
478 | } | 478 | } |
479 | 479 | ||
480 | /* | 480 | void kvmppc_decrementer_func(unsigned long data) |
481 | * Get (and clear) the dirty memory log for a memory slot. | ||
482 | */ | ||
483 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
484 | struct kvm_dirty_log *log) | ||
485 | { | 481 | { |
486 | struct kvm_memory_slot *memslot; | 482 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; |
487 | struct kvm_vcpu *vcpu; | ||
488 | ulong ga, ga_end; | ||
489 | int is_dirty = 0; | ||
490 | int r; | ||
491 | unsigned long n; | ||
492 | |||
493 | mutex_lock(&kvm->slots_lock); | ||
494 | |||
495 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | ||
496 | if (r) | ||
497 | goto out; | ||
498 | |||
499 | /* If nothing is dirty, don't bother messing with page tables. */ | ||
500 | if (is_dirty) { | ||
501 | memslot = id_to_memslot(kvm->memslots, log->slot); | ||
502 | 483 | ||
503 | ga = memslot->base_gfn << PAGE_SHIFT; | 484 | kvmppc_core_queue_dec(vcpu); |
504 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 485 | kvm_vcpu_kick(vcpu); |
505 | |||
506 | kvm_for_each_vcpu(n, vcpu, kvm) | ||
507 | kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); | ||
508 | |||
509 | n = kvm_dirty_bitmap_bytes(memslot); | ||
510 | memset(memslot->dirty_bitmap, 0, n); | ||
511 | } | ||
512 | |||
513 | r = 0; | ||
514 | out: | ||
515 | mutex_unlock(&kvm->slots_lock); | ||
516 | return r; | ||
517 | } | 486 | } |
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 9fecbfbce773..f922c29bb234 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c | |||
@@ -151,13 +151,15 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
151 | bool primary = false; | 151 | bool primary = false; |
152 | bool evict = false; | 152 | bool evict = false; |
153 | struct hpte_cache *pte; | 153 | struct hpte_cache *pte; |
154 | int r = 0; | ||
154 | 155 | ||
155 | /* Get host physical address for gpa */ | 156 | /* Get host physical address for gpa */ |
156 | hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); | 157 | hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); |
157 | if (is_error_pfn(hpaddr)) { | 158 | if (is_error_pfn(hpaddr)) { |
158 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", | 159 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", |
159 | orig_pte->eaddr); | 160 | orig_pte->eaddr); |
160 | return -EINVAL; | 161 | r = -EINVAL; |
162 | goto out; | ||
161 | } | 163 | } |
162 | hpaddr <<= PAGE_SHIFT; | 164 | hpaddr <<= PAGE_SHIFT; |
163 | 165 | ||
@@ -249,7 +251,8 @@ next_pteg: | |||
249 | 251 | ||
250 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | 252 | kvmppc_mmu_hpte_cache_map(vcpu, pte); |
251 | 253 | ||
252 | return 0; | 254 | out: |
255 | return r; | ||
253 | } | 256 | } |
254 | 257 | ||
255 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | 258 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) |
@@ -297,12 +300,14 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
297 | u64 gvsid; | 300 | u64 gvsid; |
298 | u32 sr; | 301 | u32 sr; |
299 | struct kvmppc_sid_map *map; | 302 | struct kvmppc_sid_map *map; |
300 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | 303 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
304 | int r = 0; | ||
301 | 305 | ||
302 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | 306 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { |
303 | /* Invalidate an entry */ | 307 | /* Invalidate an entry */ |
304 | svcpu->sr[esid] = SR_INVALID; | 308 | svcpu->sr[esid] = SR_INVALID; |
305 | return -ENOENT; | 309 | r = -ENOENT; |
310 | goto out; | ||
306 | } | 311 | } |
307 | 312 | ||
308 | map = find_sid_vsid(vcpu, gvsid); | 313 | map = find_sid_vsid(vcpu, gvsid); |
@@ -315,17 +320,21 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
315 | 320 | ||
316 | dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); | 321 | dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); |
317 | 322 | ||
318 | return 0; | 323 | out: |
324 | svcpu_put(svcpu); | ||
325 | return r; | ||
319 | } | 326 | } |
320 | 327 | ||
321 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | 328 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) |
322 | { | 329 | { |
323 | int i; | 330 | int i; |
324 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | 331 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
325 | 332 | ||
326 | dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); | 333 | dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); |
327 | for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) | 334 | for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) |
328 | svcpu->sr[i] = SR_INVALID; | 335 | svcpu->sr[i] = SR_INVALID; |
336 | |||
337 | svcpu_put(svcpu); | ||
329 | } | 338 | } |
330 | 339 | ||
331 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 340 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index fa2f08434ba5..6f87f39a1ac2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -88,12 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
88 | int vflags = 0; | 88 | int vflags = 0; |
89 | int attempt = 0; | 89 | int attempt = 0; |
90 | struct kvmppc_sid_map *map; | 90 | struct kvmppc_sid_map *map; |
91 | int r = 0; | ||
91 | 92 | ||
92 | /* Get host physical address for gpa */ | 93 | /* Get host physical address for gpa */ |
93 | hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); | 94 | hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); |
94 | if (is_error_pfn(hpaddr)) { | 95 | if (is_error_pfn(hpaddr)) { |
95 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); | 96 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); |
96 | return -EINVAL; | 97 | r = -EINVAL; |
98 | goto out; | ||
97 | } | 99 | } |
98 | hpaddr <<= PAGE_SHIFT; | 100 | hpaddr <<= PAGE_SHIFT; |
99 | hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); | 101 | hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK); |
@@ -110,7 +112,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
110 | printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", | 112 | printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", |
111 | vsid, orig_pte->eaddr); | 113 | vsid, orig_pte->eaddr); |
112 | WARN_ON(true); | 114 | WARN_ON(true); |
113 | return -EINVAL; | 115 | r = -EINVAL; |
116 | goto out; | ||
114 | } | 117 | } |
115 | 118 | ||
116 | vsid = map->host_vsid; | 119 | vsid = map->host_vsid; |
@@ -131,8 +134,10 @@ map_again: | |||
131 | 134 | ||
132 | /* In case we tried normal mapping already, let's nuke old entries */ | 135 | /* In case we tried normal mapping already, let's nuke old entries */ |
133 | if (attempt > 1) | 136 | if (attempt > 1) |
134 | if (ppc_md.hpte_remove(hpteg) < 0) | 137 | if (ppc_md.hpte_remove(hpteg) < 0) { |
135 | return -1; | 138 | r = -1; |
139 | goto out; | ||
140 | } | ||
136 | 141 | ||
137 | ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M); | 142 | ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M); |
138 | 143 | ||
@@ -162,7 +167,8 @@ map_again: | |||
162 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | 167 | kvmppc_mmu_hpte_cache_map(vcpu, pte); |
163 | } | 168 | } |
164 | 169 | ||
165 | return 0; | 170 | out: |
171 | return r; | ||
166 | } | 172 | } |
167 | 173 | ||
168 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | 174 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) |
@@ -207,25 +213,30 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | |||
207 | 213 | ||
208 | static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | 214 | static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) |
209 | { | 215 | { |
216 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
210 | int i; | 217 | int i; |
211 | int max_slb_size = 64; | 218 | int max_slb_size = 64; |
212 | int found_inval = -1; | 219 | int found_inval = -1; |
213 | int r; | 220 | int r; |
214 | 221 | ||
215 | if (!to_svcpu(vcpu)->slb_max) | 222 | if (!svcpu->slb_max) |
216 | to_svcpu(vcpu)->slb_max = 1; | 223 | svcpu->slb_max = 1; |
217 | 224 | ||
218 | /* Are we overwriting? */ | 225 | /* Are we overwriting? */ |
219 | for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) { | 226 | for (i = 1; i < svcpu->slb_max; i++) { |
220 | if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V)) | 227 | if (!(svcpu->slb[i].esid & SLB_ESID_V)) |
221 | found_inval = i; | 228 | found_inval = i; |
222 | else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid) | 229 | else if ((svcpu->slb[i].esid & ESID_MASK) == esid) { |
223 | return i; | 230 | r = i; |
231 | goto out; | ||
232 | } | ||
224 | } | 233 | } |
225 | 234 | ||
226 | /* Found a spare entry that was invalidated before */ | 235 | /* Found a spare entry that was invalidated before */ |
227 | if (found_inval > 0) | 236 | if (found_inval > 0) { |
228 | return found_inval; | 237 | r = found_inval; |
238 | goto out; | ||
239 | } | ||
229 | 240 | ||
230 | /* No spare invalid entry, so create one */ | 241 | /* No spare invalid entry, so create one */ |
231 | 242 | ||
@@ -233,30 +244,35 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | |||
233 | max_slb_size = mmu_slb_size; | 244 | max_slb_size = mmu_slb_size; |
234 | 245 | ||
235 | /* Overflowing -> purge */ | 246 | /* Overflowing -> purge */ |
236 | if ((to_svcpu(vcpu)->slb_max) == max_slb_size) | 247 | if ((svcpu->slb_max) == max_slb_size) |
237 | kvmppc_mmu_flush_segments(vcpu); | 248 | kvmppc_mmu_flush_segments(vcpu); |
238 | 249 | ||
239 | r = to_svcpu(vcpu)->slb_max; | 250 | r = svcpu->slb_max; |
240 | to_svcpu(vcpu)->slb_max++; | 251 | svcpu->slb_max++; |
241 | 252 | ||
253 | out: | ||
254 | svcpu_put(svcpu); | ||
242 | return r; | 255 | return r; |
243 | } | 256 | } |
244 | 257 | ||
245 | int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | 258 | int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) |
246 | { | 259 | { |
260 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
247 | u64 esid = eaddr >> SID_SHIFT; | 261 | u64 esid = eaddr >> SID_SHIFT; |
248 | u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V; | 262 | u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V; |
249 | u64 slb_vsid = SLB_VSID_USER; | 263 | u64 slb_vsid = SLB_VSID_USER; |
250 | u64 gvsid; | 264 | u64 gvsid; |
251 | int slb_index; | 265 | int slb_index; |
252 | struct kvmppc_sid_map *map; | 266 | struct kvmppc_sid_map *map; |
267 | int r = 0; | ||
253 | 268 | ||
254 | slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK); | 269 | slb_index = kvmppc_mmu_next_segment(vcpu, eaddr & ESID_MASK); |
255 | 270 | ||
256 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | 271 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { |
257 | /* Invalidate an entry */ | 272 | /* Invalidate an entry */ |
258 | to_svcpu(vcpu)->slb[slb_index].esid = 0; | 273 | svcpu->slb[slb_index].esid = 0; |
259 | return -ENOENT; | 274 | r = -ENOENT; |
275 | goto out; | ||
260 | } | 276 | } |
261 | 277 | ||
262 | map = find_sid_vsid(vcpu, gvsid); | 278 | map = find_sid_vsid(vcpu, gvsid); |
@@ -269,18 +285,22 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
269 | slb_vsid &= ~SLB_VSID_KP; | 285 | slb_vsid &= ~SLB_VSID_KP; |
270 | slb_esid |= slb_index; | 286 | slb_esid |= slb_index; |
271 | 287 | ||
272 | to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; | 288 | svcpu->slb[slb_index].esid = slb_esid; |
273 | to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; | 289 | svcpu->slb[slb_index].vsid = slb_vsid; |
274 | 290 | ||
275 | trace_kvm_book3s_slbmte(slb_vsid, slb_esid); | 291 | trace_kvm_book3s_slbmte(slb_vsid, slb_esid); |
276 | 292 | ||
277 | return 0; | 293 | out: |
294 | svcpu_put(svcpu); | ||
295 | return r; | ||
278 | } | 296 | } |
279 | 297 | ||
280 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | 298 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) |
281 | { | 299 | { |
282 | to_svcpu(vcpu)->slb_max = 1; | 300 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
283 | to_svcpu(vcpu)->slb[0].esid = 0; | 301 | svcpu->slb_max = 1; |
302 | svcpu->slb[0].esid = 0; | ||
303 | svcpu_put(svcpu); | ||
284 | } | 304 | } |
285 | 305 | ||
286 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 306 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bc3a2ea94217..ddc485a529f2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/gfp.h> | 23 | #include <linux/gfp.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/hugetlb.h> | 25 | #include <linux/hugetlb.h> |
26 | #include <linux/vmalloc.h> | ||
26 | 27 | ||
27 | #include <asm/tlbflush.h> | 28 | #include <asm/tlbflush.h> |
28 | #include <asm/kvm_ppc.h> | 29 | #include <asm/kvm_ppc.h> |
@@ -33,15 +34,6 @@ | |||
33 | #include <asm/ppc-opcode.h> | 34 | #include <asm/ppc-opcode.h> |
34 | #include <asm/cputable.h> | 35 | #include <asm/cputable.h> |
35 | 36 | ||
36 | /* For now use fixed-size 16MB page table */ | ||
37 | #define HPT_ORDER 24 | ||
38 | #define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ | ||
39 | #define HPT_HASH_MASK (HPT_NPTEG - 1) | ||
40 | |||
41 | /* Pages in the VRMA are 16MB pages */ | ||
42 | #define VRMA_PAGE_ORDER 24 | ||
43 | #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ | ||
44 | |||
45 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ | 37 | /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ |
46 | #define MAX_LPID_970 63 | 38 | #define MAX_LPID_970 63 |
47 | #define NR_LPIDS (LPID_RSVD + 1) | 39 | #define NR_LPIDS (LPID_RSVD + 1) |
@@ -51,21 +43,41 @@ long kvmppc_alloc_hpt(struct kvm *kvm) | |||
51 | { | 43 | { |
52 | unsigned long hpt; | 44 | unsigned long hpt; |
53 | unsigned long lpid; | 45 | unsigned long lpid; |
46 | struct revmap_entry *rev; | ||
47 | struct kvmppc_linear_info *li; | ||
48 | |||
49 | /* Allocate guest's hashed page table */ | ||
50 | li = kvm_alloc_hpt(); | ||
51 | if (li) { | ||
52 | /* using preallocated memory */ | ||
53 | hpt = (ulong)li->base_virt; | ||
54 | kvm->arch.hpt_li = li; | ||
55 | } else { | ||
56 | /* using dynamic memory */ | ||
57 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| | ||
58 | __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); | ||
59 | } | ||
54 | 60 | ||
55 | hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN, | ||
56 | HPT_ORDER - PAGE_SHIFT); | ||
57 | if (!hpt) { | 61 | if (!hpt) { |
58 | pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); | 62 | pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); |
59 | return -ENOMEM; | 63 | return -ENOMEM; |
60 | } | 64 | } |
61 | kvm->arch.hpt_virt = hpt; | 65 | kvm->arch.hpt_virt = hpt; |
62 | 66 | ||
67 | /* Allocate reverse map array */ | ||
68 | rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE); | ||
69 | if (!rev) { | ||
70 | pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); | ||
71 | goto out_freehpt; | ||
72 | } | ||
73 | kvm->arch.revmap = rev; | ||
74 | |||
75 | /* Allocate the guest's logical partition ID */ | ||
63 | do { | 76 | do { |
64 | lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); | 77 | lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); |
65 | if (lpid >= NR_LPIDS) { | 78 | if (lpid >= NR_LPIDS) { |
66 | pr_err("kvm_alloc_hpt: No LPIDs free\n"); | 79 | pr_err("kvm_alloc_hpt: No LPIDs free\n"); |
67 | free_pages(hpt, HPT_ORDER - PAGE_SHIFT); | 80 | goto out_freeboth; |
68 | return -ENOMEM; | ||
69 | } | 81 | } |
70 | } while (test_and_set_bit(lpid, lpid_inuse)); | 82 | } while (test_and_set_bit(lpid, lpid_inuse)); |
71 | 83 | ||
@@ -74,37 +86,64 @@ long kvmppc_alloc_hpt(struct kvm *kvm) | |||
74 | 86 | ||
75 | pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); | 87 | pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); |
76 | return 0; | 88 | return 0; |
89 | |||
90 | out_freeboth: | ||
91 | vfree(rev); | ||
92 | out_freehpt: | ||
93 | free_pages(hpt, HPT_ORDER - PAGE_SHIFT); | ||
94 | return -ENOMEM; | ||
77 | } | 95 | } |
78 | 96 | ||
79 | void kvmppc_free_hpt(struct kvm *kvm) | 97 | void kvmppc_free_hpt(struct kvm *kvm) |
80 | { | 98 | { |
81 | clear_bit(kvm->arch.lpid, lpid_inuse); | 99 | clear_bit(kvm->arch.lpid, lpid_inuse); |
82 | free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); | 100 | vfree(kvm->arch.revmap); |
101 | if (kvm->arch.hpt_li) | ||
102 | kvm_release_hpt(kvm->arch.hpt_li); | ||
103 | else | ||
104 | free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); | ||
105 | } | ||
106 | |||
107 | /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ | ||
108 | static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) | ||
109 | { | ||
110 | return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; | ||
111 | } | ||
112 | |||
113 | /* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ | ||
114 | static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) | ||
115 | { | ||
116 | return (pgsize == 0x10000) ? 0x1000 : 0; | ||
83 | } | 117 | } |
84 | 118 | ||
85 | void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) | 119 | void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, |
120 | unsigned long porder) | ||
86 | { | 121 | { |
87 | unsigned long i; | 122 | unsigned long i; |
88 | unsigned long npages = kvm->arch.ram_npages; | 123 | unsigned long npages; |
89 | unsigned long pfn; | 124 | unsigned long hp_v, hp_r; |
90 | unsigned long *hpte; | 125 | unsigned long addr, hash; |
91 | unsigned long hash; | 126 | unsigned long psize; |
92 | struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo; | 127 | unsigned long hp0, hp1; |
128 | long ret; | ||
93 | 129 | ||
94 | if (!pginfo) | 130 | psize = 1ul << porder; |
95 | return; | 131 | npages = memslot->npages >> (porder - PAGE_SHIFT); |
96 | 132 | ||
97 | /* VRMA can't be > 1TB */ | 133 | /* VRMA can't be > 1TB */ |
98 | if (npages > 1ul << (40 - kvm->arch.ram_porder)) | 134 | if (npages > 1ul << (40 - porder)) |
99 | npages = 1ul << (40 - kvm->arch.ram_porder); | 135 | npages = 1ul << (40 - porder); |
100 | /* Can't use more than 1 HPTE per HPTEG */ | 136 | /* Can't use more than 1 HPTE per HPTEG */ |
101 | if (npages > HPT_NPTEG) | 137 | if (npages > HPT_NPTEG) |
102 | npages = HPT_NPTEG; | 138 | npages = HPT_NPTEG; |
103 | 139 | ||
140 | hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | | ||
141 | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); | ||
142 | hp1 = hpte1_pgsize_encoding(psize) | | ||
143 | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; | ||
144 | |||
104 | for (i = 0; i < npages; ++i) { | 145 | for (i = 0; i < npages; ++i) { |
105 | pfn = pginfo[i].pfn; | 146 | addr = i << porder; |
106 | if (!pfn) | ||
107 | break; | ||
108 | /* can't use hpt_hash since va > 64 bits */ | 147 | /* can't use hpt_hash since va > 64 bits */ |
109 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; | 148 | hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; |
110 | /* | 149 | /* |
@@ -113,15 +152,15 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) | |||
113 | * at most one HPTE per HPTEG, we just assume entry 7 | 152 | * at most one HPTE per HPTEG, we just assume entry 7 |
114 | * is available and use it. | 153 | * is available and use it. |
115 | */ | 154 | */ |
116 | hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7)); | 155 | hash = (hash << 3) + 7; |
117 | hpte += 7 * 2; | 156 | hp_v = hp0 | ((addr >> 16) & ~0x7fUL); |
118 | /* HPTE low word - RPN, protection, etc. */ | 157 | hp_r = hp1 | addr; |
119 | hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C | | 158 | ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); |
120 | HPTE_R_M | PP_RWXX; | 159 | if (ret != H_SUCCESS) { |
121 | wmb(); | 160 | pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", |
122 | hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | | 161 | addr, ret); |
123 | (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED | | 162 | break; |
124 | HPTE_V_LARGE | HPTE_V_VALID; | 163 | } |
125 | } | 164 | } |
126 | } | 165 | } |
127 | 166 | ||
@@ -158,10 +197,814 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) | |||
158 | kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); | 197 | kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); |
159 | } | 198 | } |
160 | 199 | ||
200 | /* | ||
201 | * This is called to get a reference to a guest page if there isn't | ||
202 | * one already in the kvm->arch.slot_phys[][] arrays. | ||
203 | */ | ||
204 | static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, | ||
205 | struct kvm_memory_slot *memslot, | ||
206 | unsigned long psize) | ||
207 | { | ||
208 | unsigned long start; | ||
209 | long np, err; | ||
210 | struct page *page, *hpage, *pages[1]; | ||
211 | unsigned long s, pgsize; | ||
212 | unsigned long *physp; | ||
213 | unsigned int is_io, got, pgorder; | ||
214 | struct vm_area_struct *vma; | ||
215 | unsigned long pfn, i, npages; | ||
216 | |||
217 | physp = kvm->arch.slot_phys[memslot->id]; | ||
218 | if (!physp) | ||
219 | return -EINVAL; | ||
220 | if (physp[gfn - memslot->base_gfn]) | ||
221 | return 0; | ||
222 | |||
223 | is_io = 0; | ||
224 | got = 0; | ||
225 | page = NULL; | ||
226 | pgsize = psize; | ||
227 | err = -EINVAL; | ||
228 | start = gfn_to_hva_memslot(memslot, gfn); | ||
229 | |||
230 | /* Instantiate and get the page we want access to */ | ||
231 | np = get_user_pages_fast(start, 1, 1, pages); | ||
232 | if (np != 1) { | ||
233 | /* Look up the vma for the page */ | ||
234 | down_read(¤t->mm->mmap_sem); | ||
235 | vma = find_vma(current->mm, start); | ||
236 | if (!vma || vma->vm_start > start || | ||
237 | start + psize > vma->vm_end || | ||
238 | !(vma->vm_flags & VM_PFNMAP)) | ||
239 | goto up_err; | ||
240 | is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); | ||
241 | pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); | ||
242 | /* check alignment of pfn vs. requested page size */ | ||
243 | if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) | ||
244 | goto up_err; | ||
245 | up_read(¤t->mm->mmap_sem); | ||
246 | |||
247 | } else { | ||
248 | page = pages[0]; | ||
249 | got = KVMPPC_GOT_PAGE; | ||
250 | |||
251 | /* See if this is a large page */ | ||
252 | s = PAGE_SIZE; | ||
253 | if (PageHuge(page)) { | ||
254 | hpage = compound_head(page); | ||
255 | s <<= compound_order(hpage); | ||
256 | /* Get the whole large page if slot alignment is ok */ | ||
257 | if (s > psize && slot_is_aligned(memslot, s) && | ||
258 | !(memslot->userspace_addr & (s - 1))) { | ||
259 | start &= ~(s - 1); | ||
260 | pgsize = s; | ||
261 | page = hpage; | ||
262 | } | ||
263 | } | ||
264 | if (s < psize) | ||
265 | goto out; | ||
266 | pfn = page_to_pfn(page); | ||
267 | } | ||
268 | |||
269 | npages = pgsize >> PAGE_SHIFT; | ||
270 | pgorder = __ilog2(npages); | ||
271 | physp += (gfn - memslot->base_gfn) & ~(npages - 1); | ||
272 | spin_lock(&kvm->arch.slot_phys_lock); | ||
273 | for (i = 0; i < npages; ++i) { | ||
274 | if (!physp[i]) { | ||
275 | physp[i] = ((pfn + i) << PAGE_SHIFT) + | ||
276 | got + is_io + pgorder; | ||
277 | got = 0; | ||
278 | } | ||
279 | } | ||
280 | spin_unlock(&kvm->arch.slot_phys_lock); | ||
281 | err = 0; | ||
282 | |||
283 | out: | ||
284 | if (got) { | ||
285 | if (PageHuge(page)) | ||
286 | page = compound_head(page); | ||
287 | put_page(page); | ||
288 | } | ||
289 | return err; | ||
290 | |||
291 | up_err: | ||
292 | up_read(¤t->mm->mmap_sem); | ||
293 | return err; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * We come here on a H_ENTER call from the guest when we are not | ||
298 | * using mmu notifiers and we don't have the requested page pinned | ||
299 | * already. | ||
300 | */ | ||
301 | long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | ||
302 | long pte_index, unsigned long pteh, unsigned long ptel) | ||
303 | { | ||
304 | struct kvm *kvm = vcpu->kvm; | ||
305 | unsigned long psize, gpa, gfn; | ||
306 | struct kvm_memory_slot *memslot; | ||
307 | long ret; | ||
308 | |||
309 | if (kvm->arch.using_mmu_notifiers) | ||
310 | goto do_insert; | ||
311 | |||
312 | psize = hpte_page_size(pteh, ptel); | ||
313 | if (!psize) | ||
314 | return H_PARAMETER; | ||
315 | |||
316 | pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); | ||
317 | |||
318 | /* Find the memslot (if any) for this address */ | ||
319 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); | ||
320 | gfn = gpa >> PAGE_SHIFT; | ||
321 | memslot = gfn_to_memslot(kvm, gfn); | ||
322 | if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { | ||
323 | if (!slot_is_aligned(memslot, psize)) | ||
324 | return H_PARAMETER; | ||
325 | if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) | ||
326 | return H_PARAMETER; | ||
327 | } | ||
328 | |||
329 | do_insert: | ||
330 | /* Protect linux PTE lookup from page table destruction */ | ||
331 | rcu_read_lock_sched(); /* this disables preemption too */ | ||
332 | vcpu->arch.pgdir = current->mm->pgd; | ||
333 | ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); | ||
334 | rcu_read_unlock_sched(); | ||
335 | if (ret == H_TOO_HARD) { | ||
336 | /* this can't happen */ | ||
337 | pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); | ||
338 | ret = H_RESOURCE; /* or something */ | ||
339 | } | ||
340 | return ret; | ||
341 | |||
342 | } | ||
343 | |||
344 | static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, | ||
345 | gva_t eaddr) | ||
346 | { | ||
347 | u64 mask; | ||
348 | int i; | ||
349 | |||
350 | for (i = 0; i < vcpu->arch.slb_nr; i++) { | ||
351 | if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) | ||
352 | continue; | ||
353 | |||
354 | if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) | ||
355 | mask = ESID_MASK_1T; | ||
356 | else | ||
357 | mask = ESID_MASK; | ||
358 | |||
359 | if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) | ||
360 | return &vcpu->arch.slb[i]; | ||
361 | } | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, | ||
366 | unsigned long ea) | ||
367 | { | ||
368 | unsigned long ra_mask; | ||
369 | |||
370 | ra_mask = hpte_page_size(v, r) - 1; | ||
371 | return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); | ||
372 | } | ||
373 | |||
161 | static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, | 374 | static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, |
162 | struct kvmppc_pte *gpte, bool data) | 375 | struct kvmppc_pte *gpte, bool data) |
376 | { | ||
377 | struct kvm *kvm = vcpu->kvm; | ||
378 | struct kvmppc_slb *slbe; | ||
379 | unsigned long slb_v; | ||
380 | unsigned long pp, key; | ||
381 | unsigned long v, gr; | ||
382 | unsigned long *hptep; | ||
383 | int index; | ||
384 | int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); | ||
385 | |||
386 | /* Get SLB entry */ | ||
387 | if (virtmode) { | ||
388 | slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); | ||
389 | if (!slbe) | ||
390 | return -EINVAL; | ||
391 | slb_v = slbe->origv; | ||
392 | } else { | ||
393 | /* real mode access */ | ||
394 | slb_v = vcpu->kvm->arch.vrma_slb_v; | ||
395 | } | ||
396 | |||
397 | /* Find the HPTE in the hash table */ | ||
398 | index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, | ||
399 | HPTE_V_VALID | HPTE_V_ABSENT); | ||
400 | if (index < 0) | ||
401 | return -ENOENT; | ||
402 | hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); | ||
403 | v = hptep[0] & ~HPTE_V_HVLOCK; | ||
404 | gr = kvm->arch.revmap[index].guest_rpte; | ||
405 | |||
406 | /* Unlock the HPTE */ | ||
407 | asm volatile("lwsync" : : : "memory"); | ||
408 | hptep[0] = v; | ||
409 | |||
410 | gpte->eaddr = eaddr; | ||
411 | gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); | ||
412 | |||
413 | /* Get PP bits and key for permission check */ | ||
414 | pp = gr & (HPTE_R_PP0 | HPTE_R_PP); | ||
415 | key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; | ||
416 | key &= slb_v; | ||
417 | |||
418 | /* Calculate permissions */ | ||
419 | gpte->may_read = hpte_read_permission(pp, key); | ||
420 | gpte->may_write = hpte_write_permission(pp, key); | ||
421 | gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); | ||
422 | |||
423 | /* Storage key permission check for POWER7 */ | ||
424 | if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { | ||
425 | int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); | ||
426 | if (amrfield & 1) | ||
427 | gpte->may_read = 0; | ||
428 | if (amrfield & 2) | ||
429 | gpte->may_write = 0; | ||
430 | } | ||
431 | |||
432 | /* Get the guest physical address */ | ||
433 | gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); | ||
434 | return 0; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Quick test for whether an instruction is a load or a store. | ||
439 | * If the instruction is a load or a store, then this will indicate | ||
440 | * which it is, at least on server processors. (Embedded processors | ||
441 | * have some external PID instructions that don't follow the rule | ||
442 | * embodied here.) If the instruction isn't a load or store, then | ||
443 | * this doesn't return anything useful. | ||
444 | */ | ||
445 | static int instruction_is_store(unsigned int instr) | ||
446 | { | ||
447 | unsigned int mask; | ||
448 | |||
449 | mask = 0x10000000; | ||
450 | if ((instr & 0xfc000000) == 0x7c000000) | ||
451 | mask = 0x100; /* major opcode 31 */ | ||
452 | return (instr & mask) != 0; | ||
453 | } | ||
454 | |||
455 | static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
456 | unsigned long gpa, int is_store) | ||
457 | { | ||
458 | int ret; | ||
459 | u32 last_inst; | ||
460 | unsigned long srr0 = kvmppc_get_pc(vcpu); | ||
461 | |||
462 | /* We try to load the last instruction. We don't let | ||
463 | * emulate_instruction do it as it doesn't check what | ||
464 | * kvmppc_ld returns. | ||
465 | * If we fail, we just return to the guest and try executing it again. | ||
466 | */ | ||
467 | if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { | ||
468 | ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); | ||
469 | if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) | ||
470 | return RESUME_GUEST; | ||
471 | vcpu->arch.last_inst = last_inst; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * WARNING: We do not know for sure whether the instruction we just | ||
476 | * read from memory is the same that caused the fault in the first | ||
477 | * place. If the instruction we read is neither an load or a store, | ||
478 | * then it can't access memory, so we don't need to worry about | ||
479 | * enforcing access permissions. So, assuming it is a load or | ||
480 | * store, we just check that its direction (load or store) is | ||
481 | * consistent with the original fault, since that's what we | ||
482 | * checked the access permissions against. If there is a mismatch | ||
483 | * we just return and retry the instruction. | ||
484 | */ | ||
485 | |||
486 | if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) | ||
487 | return RESUME_GUEST; | ||
488 | |||
489 | /* | ||
490 | * Emulated accesses are emulated by looking at the hash for | ||
491 | * translation once, then performing the access later. The | ||
492 | * translation could be invalidated in the meantime in which | ||
493 | * point performing the subsequent memory access on the old | ||
494 | * physical address could possibly be a security hole for the | ||
495 | * guest (but not the host). | ||
496 | * | ||
497 | * This is less of an issue for MMIO stores since they aren't | ||
498 | * globally visible. It could be an issue for MMIO loads to | ||
499 | * a certain extent but we'll ignore it for now. | ||
500 | */ | ||
501 | |||
502 | vcpu->arch.paddr_accessed = gpa; | ||
503 | return kvmppc_emulate_mmio(run, vcpu); | ||
504 | } | ||
505 | |||
506 | int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
507 | unsigned long ea, unsigned long dsisr) | ||
508 | { | ||
509 | struct kvm *kvm = vcpu->kvm; | ||
510 | unsigned long *hptep, hpte[3], r; | ||
511 | unsigned long mmu_seq, psize, pte_size; | ||
512 | unsigned long gfn, hva, pfn; | ||
513 | struct kvm_memory_slot *memslot; | ||
514 | unsigned long *rmap; | ||
515 | struct revmap_entry *rev; | ||
516 | struct page *page, *pages[1]; | ||
517 | long index, ret, npages; | ||
518 | unsigned long is_io; | ||
519 | unsigned int writing, write_ok; | ||
520 | struct vm_area_struct *vma; | ||
521 | unsigned long rcbits; | ||
522 | |||
523 | /* | ||
524 | * Real-mode code has already searched the HPT and found the | ||
525 | * entry we're interested in. Lock the entry and check that | ||
526 | * it hasn't changed. If it has, just return and re-execute the | ||
527 | * instruction. | ||
528 | */ | ||
529 | if (ea != vcpu->arch.pgfault_addr) | ||
530 | return RESUME_GUEST; | ||
531 | index = vcpu->arch.pgfault_index; | ||
532 | hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); | ||
533 | rev = &kvm->arch.revmap[index]; | ||
534 | preempt_disable(); | ||
535 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
536 | cpu_relax(); | ||
537 | hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; | ||
538 | hpte[1] = hptep[1]; | ||
539 | hpte[2] = r = rev->guest_rpte; | ||
540 | asm volatile("lwsync" : : : "memory"); | ||
541 | hptep[0] = hpte[0]; | ||
542 | preempt_enable(); | ||
543 | |||
544 | if (hpte[0] != vcpu->arch.pgfault_hpte[0] || | ||
545 | hpte[1] != vcpu->arch.pgfault_hpte[1]) | ||
546 | return RESUME_GUEST; | ||
547 | |||
548 | /* Translate the logical address and get the page */ | ||
549 | psize = hpte_page_size(hpte[0], r); | ||
550 | gfn = hpte_rpn(r, psize); | ||
551 | memslot = gfn_to_memslot(kvm, gfn); | ||
552 | |||
553 | /* No memslot means it's an emulated MMIO region */ | ||
554 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { | ||
555 | unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); | ||
556 | return kvmppc_hv_emulate_mmio(run, vcpu, gpa, | ||
557 | dsisr & DSISR_ISSTORE); | ||
558 | } | ||
559 | |||
560 | if (!kvm->arch.using_mmu_notifiers) | ||
561 | return -EFAULT; /* should never get here */ | ||
562 | |||
563 | /* used to check for invalidations in progress */ | ||
564 | mmu_seq = kvm->mmu_notifier_seq; | ||
565 | smp_rmb(); | ||
566 | |||
567 | is_io = 0; | ||
568 | pfn = 0; | ||
569 | page = NULL; | ||
570 | pte_size = PAGE_SIZE; | ||
571 | writing = (dsisr & DSISR_ISSTORE) != 0; | ||
572 | /* If writing != 0, then the HPTE must allow writing, if we get here */ | ||
573 | write_ok = writing; | ||
574 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
575 | npages = get_user_pages_fast(hva, 1, writing, pages); | ||
576 | if (npages < 1) { | ||
577 | /* Check if it's an I/O mapping */ | ||
578 | down_read(¤t->mm->mmap_sem); | ||
579 | vma = find_vma(current->mm, hva); | ||
580 | if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && | ||
581 | (vma->vm_flags & VM_PFNMAP)) { | ||
582 | pfn = vma->vm_pgoff + | ||
583 | ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
584 | pte_size = psize; | ||
585 | is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); | ||
586 | write_ok = vma->vm_flags & VM_WRITE; | ||
587 | } | ||
588 | up_read(¤t->mm->mmap_sem); | ||
589 | if (!pfn) | ||
590 | return -EFAULT; | ||
591 | } else { | ||
592 | page = pages[0]; | ||
593 | if (PageHuge(page)) { | ||
594 | page = compound_head(page); | ||
595 | pte_size <<= compound_order(page); | ||
596 | } | ||
597 | /* if the guest wants write access, see if that is OK */ | ||
598 | if (!writing && hpte_is_writable(r)) { | ||
599 | pte_t *ptep, pte; | ||
600 | |||
601 | /* | ||
602 | * We need to protect against page table destruction | ||
603 | * while looking up and updating the pte. | ||
604 | */ | ||
605 | rcu_read_lock_sched(); | ||
606 | ptep = find_linux_pte_or_hugepte(current->mm->pgd, | ||
607 | hva, NULL); | ||
608 | if (ptep && pte_present(*ptep)) { | ||
609 | pte = kvmppc_read_update_linux_pte(ptep, 1); | ||
610 | if (pte_write(pte)) | ||
611 | write_ok = 1; | ||
612 | } | ||
613 | rcu_read_unlock_sched(); | ||
614 | } | ||
615 | pfn = page_to_pfn(page); | ||
616 | } | ||
617 | |||
618 | ret = -EFAULT; | ||
619 | if (psize > pte_size) | ||
620 | goto out_put; | ||
621 | |||
622 | /* Check WIMG vs. the actual page we're accessing */ | ||
623 | if (!hpte_cache_flags_ok(r, is_io)) { | ||
624 | if (is_io) | ||
625 | return -EFAULT; | ||
626 | /* | ||
627 | * Allow guest to map emulated device memory as | ||
628 | * uncacheable, but actually make it cacheable. | ||
629 | */ | ||
630 | r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; | ||
631 | } | ||
632 | |||
633 | /* Set the HPTE to point to pfn */ | ||
634 | r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); | ||
635 | if (hpte_is_writable(r) && !write_ok) | ||
636 | r = hpte_make_readonly(r); | ||
637 | ret = RESUME_GUEST; | ||
638 | preempt_disable(); | ||
639 | while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) | ||
640 | cpu_relax(); | ||
641 | if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || | ||
642 | rev->guest_rpte != hpte[2]) | ||
643 | /* HPTE has been changed under us; let the guest retry */ | ||
644 | goto out_unlock; | ||
645 | hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | ||
646 | |||
647 | rmap = &memslot->rmap[gfn - memslot->base_gfn]; | ||
648 | lock_rmap(rmap); | ||
649 | |||
650 | /* Check if we might have been invalidated; let the guest retry if so */ | ||
651 | ret = RESUME_GUEST; | ||
652 | if (mmu_notifier_retry(vcpu, mmu_seq)) { | ||
653 | unlock_rmap(rmap); | ||
654 | goto out_unlock; | ||
655 | } | ||
656 | |||
657 | /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ | ||
658 | rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; | ||
659 | r &= rcbits | ~(HPTE_R_R | HPTE_R_C); | ||
660 | |||
661 | if (hptep[0] & HPTE_V_VALID) { | ||
662 | /* HPTE was previously valid, so we need to invalidate it */ | ||
663 | unlock_rmap(rmap); | ||
664 | hptep[0] |= HPTE_V_ABSENT; | ||
665 | kvmppc_invalidate_hpte(kvm, hptep, index); | ||
666 | /* don't lose previous R and C bits */ | ||
667 | r |= hptep[1] & (HPTE_R_R | HPTE_R_C); | ||
668 | } else { | ||
669 | kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); | ||
670 | } | ||
671 | |||
672 | hptep[1] = r; | ||
673 | eieio(); | ||
674 | hptep[0] = hpte[0]; | ||
675 | asm volatile("ptesync" : : : "memory"); | ||
676 | preempt_enable(); | ||
677 | if (page && hpte_is_writable(r)) | ||
678 | SetPageDirty(page); | ||
679 | |||
680 | out_put: | ||
681 | if (page) | ||
682 | put_page(page); | ||
683 | return ret; | ||
684 | |||
685 | out_unlock: | ||
686 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
687 | preempt_enable(); | ||
688 | goto out_put; | ||
689 | } | ||
690 | |||
691 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
692 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
693 | unsigned long gfn)) | ||
694 | { | ||
695 | int ret; | ||
696 | int retval = 0; | ||
697 | struct kvm_memslots *slots; | ||
698 | struct kvm_memory_slot *memslot; | ||
699 | |||
700 | slots = kvm_memslots(kvm); | ||
701 | kvm_for_each_memslot(memslot, slots) { | ||
702 | unsigned long start = memslot->userspace_addr; | ||
703 | unsigned long end; | ||
704 | |||
705 | end = start + (memslot->npages << PAGE_SHIFT); | ||
706 | if (hva >= start && hva < end) { | ||
707 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
708 | |||
709 | ret = handler(kvm, &memslot->rmap[gfn_offset], | ||
710 | memslot->base_gfn + gfn_offset); | ||
711 | retval |= ret; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | return retval; | ||
716 | } | ||
717 | |||
718 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
719 | unsigned long gfn) | ||
720 | { | ||
721 | struct revmap_entry *rev = kvm->arch.revmap; | ||
722 | unsigned long h, i, j; | ||
723 | unsigned long *hptep; | ||
724 | unsigned long ptel, psize, rcbits; | ||
725 | |||
726 | for (;;) { | ||
727 | lock_rmap(rmapp); | ||
728 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | ||
729 | unlock_rmap(rmapp); | ||
730 | break; | ||
731 | } | ||
732 | |||
733 | /* | ||
734 | * To avoid an ABBA deadlock with the HPTE lock bit, | ||
735 | * we can't spin on the HPTE lock while holding the | ||
736 | * rmap chain lock. | ||
737 | */ | ||
738 | i = *rmapp & KVMPPC_RMAP_INDEX; | ||
739 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
740 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | ||
741 | /* unlock rmap before spinning on the HPTE lock */ | ||
742 | unlock_rmap(rmapp); | ||
743 | while (hptep[0] & HPTE_V_HVLOCK) | ||
744 | cpu_relax(); | ||
745 | continue; | ||
746 | } | ||
747 | j = rev[i].forw; | ||
748 | if (j == i) { | ||
749 | /* chain is now empty */ | ||
750 | *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); | ||
751 | } else { | ||
752 | /* remove i from chain */ | ||
753 | h = rev[i].back; | ||
754 | rev[h].forw = j; | ||
755 | rev[j].back = h; | ||
756 | rev[i].forw = rev[i].back = i; | ||
757 | *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; | ||
758 | } | ||
759 | |||
760 | /* Now check and modify the HPTE */ | ||
761 | ptel = rev[i].guest_rpte; | ||
762 | psize = hpte_page_size(hptep[0], ptel); | ||
763 | if ((hptep[0] & HPTE_V_VALID) && | ||
764 | hpte_rpn(ptel, psize) == gfn) { | ||
765 | hptep[0] |= HPTE_V_ABSENT; | ||
766 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
767 | /* Harvest R and C */ | ||
768 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); | ||
769 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
770 | rev[i].guest_rpte = ptel | rcbits; | ||
771 | } | ||
772 | unlock_rmap(rmapp); | ||
773 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
774 | } | ||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
779 | { | ||
780 | if (kvm->arch.using_mmu_notifiers) | ||
781 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
782 | return 0; | ||
783 | } | ||
784 | |||
785 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
786 | unsigned long gfn) | ||
787 | { | ||
788 | struct revmap_entry *rev = kvm->arch.revmap; | ||
789 | unsigned long head, i, j; | ||
790 | unsigned long *hptep; | ||
791 | int ret = 0; | ||
792 | |||
793 | retry: | ||
794 | lock_rmap(rmapp); | ||
795 | if (*rmapp & KVMPPC_RMAP_REFERENCED) { | ||
796 | *rmapp &= ~KVMPPC_RMAP_REFERENCED; | ||
797 | ret = 1; | ||
798 | } | ||
799 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | ||
800 | unlock_rmap(rmapp); | ||
801 | return ret; | ||
802 | } | ||
803 | |||
804 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | ||
805 | do { | ||
806 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
807 | j = rev[i].forw; | ||
808 | |||
809 | /* If this HPTE isn't referenced, ignore it */ | ||
810 | if (!(hptep[1] & HPTE_R_R)) | ||
811 | continue; | ||
812 | |||
813 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | ||
814 | /* unlock rmap before spinning on the HPTE lock */ | ||
815 | unlock_rmap(rmapp); | ||
816 | while (hptep[0] & HPTE_V_HVLOCK) | ||
817 | cpu_relax(); | ||
818 | goto retry; | ||
819 | } | ||
820 | |||
821 | /* Now check and modify the HPTE */ | ||
822 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { | ||
823 | kvmppc_clear_ref_hpte(kvm, hptep, i); | ||
824 | rev[i].guest_rpte |= HPTE_R_R; | ||
825 | ret = 1; | ||
826 | } | ||
827 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
828 | } while ((i = j) != head); | ||
829 | |||
830 | unlock_rmap(rmapp); | ||
831 | return ret; | ||
832 | } | ||
833 | |||
834 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
835 | { | ||
836 | if (!kvm->arch.using_mmu_notifiers) | ||
837 | return 0; | ||
838 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
839 | } | ||
840 | |||
841 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
842 | unsigned long gfn) | ||
843 | { | ||
844 | struct revmap_entry *rev = kvm->arch.revmap; | ||
845 | unsigned long head, i, j; | ||
846 | unsigned long *hp; | ||
847 | int ret = 1; | ||
848 | |||
849 | if (*rmapp & KVMPPC_RMAP_REFERENCED) | ||
850 | return 1; | ||
851 | |||
852 | lock_rmap(rmapp); | ||
853 | if (*rmapp & KVMPPC_RMAP_REFERENCED) | ||
854 | goto out; | ||
855 | |||
856 | if (*rmapp & KVMPPC_RMAP_PRESENT) { | ||
857 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | ||
858 | do { | ||
859 | hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); | ||
860 | j = rev[i].forw; | ||
861 | if (hp[1] & HPTE_R_R) | ||
862 | goto out; | ||
863 | } while ((i = j) != head); | ||
864 | } | ||
865 | ret = 0; | ||
866 | |||
867 | out: | ||
868 | unlock_rmap(rmapp); | ||
869 | return ret; | ||
870 | } | ||
871 | |||
872 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
873 | { | ||
874 | if (!kvm->arch.using_mmu_notifiers) | ||
875 | return 0; | ||
876 | return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); | ||
877 | } | ||
878 | |||
879 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | ||
163 | { | 880 | { |
164 | return -ENOENT; | 881 | if (!kvm->arch.using_mmu_notifiers) |
882 | return; | ||
883 | kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
884 | } | ||
885 | |||
886 | static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | ||
887 | { | ||
888 | struct revmap_entry *rev = kvm->arch.revmap; | ||
889 | unsigned long head, i, j; | ||
890 | unsigned long *hptep; | ||
891 | int ret = 0; | ||
892 | |||
893 | retry: | ||
894 | lock_rmap(rmapp); | ||
895 | if (*rmapp & KVMPPC_RMAP_CHANGED) { | ||
896 | *rmapp &= ~KVMPPC_RMAP_CHANGED; | ||
897 | ret = 1; | ||
898 | } | ||
899 | if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { | ||
900 | unlock_rmap(rmapp); | ||
901 | return ret; | ||
902 | } | ||
903 | |||
904 | i = head = *rmapp & KVMPPC_RMAP_INDEX; | ||
905 | do { | ||
906 | hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); | ||
907 | j = rev[i].forw; | ||
908 | |||
909 | if (!(hptep[1] & HPTE_R_C)) | ||
910 | continue; | ||
911 | |||
912 | if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { | ||
913 | /* unlock rmap before spinning on the HPTE lock */ | ||
914 | unlock_rmap(rmapp); | ||
915 | while (hptep[0] & HPTE_V_HVLOCK) | ||
916 | cpu_relax(); | ||
917 | goto retry; | ||
918 | } | ||
919 | |||
920 | /* Now check and modify the HPTE */ | ||
921 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { | ||
922 | /* need to make it temporarily absent to clear C */ | ||
923 | hptep[0] |= HPTE_V_ABSENT; | ||
924 | kvmppc_invalidate_hpte(kvm, hptep, i); | ||
925 | hptep[1] &= ~HPTE_R_C; | ||
926 | eieio(); | ||
927 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | ||
928 | rev[i].guest_rpte |= HPTE_R_C; | ||
929 | ret = 1; | ||
930 | } | ||
931 | hptep[0] &= ~HPTE_V_HVLOCK; | ||
932 | } while ((i = j) != head); | ||
933 | |||
934 | unlock_rmap(rmapp); | ||
935 | return ret; | ||
936 | } | ||
937 | |||
938 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) | ||
939 | { | ||
940 | unsigned long i; | ||
941 | unsigned long *rmapp, *map; | ||
942 | |||
943 | preempt_disable(); | ||
944 | rmapp = memslot->rmap; | ||
945 | map = memslot->dirty_bitmap; | ||
946 | for (i = 0; i < memslot->npages; ++i) { | ||
947 | if (kvm_test_clear_dirty(kvm, rmapp)) | ||
948 | __set_bit_le(i, map); | ||
949 | ++rmapp; | ||
950 | } | ||
951 | preempt_enable(); | ||
952 | return 0; | ||
953 | } | ||
954 | |||
955 | void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | ||
956 | unsigned long *nb_ret) | ||
957 | { | ||
958 | struct kvm_memory_slot *memslot; | ||
959 | unsigned long gfn = gpa >> PAGE_SHIFT; | ||
960 | struct page *page, *pages[1]; | ||
961 | int npages; | ||
962 | unsigned long hva, psize, offset; | ||
963 | unsigned long pa; | ||
964 | unsigned long *physp; | ||
965 | |||
966 | memslot = gfn_to_memslot(kvm, gfn); | ||
967 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
968 | return NULL; | ||
969 | if (!kvm->arch.using_mmu_notifiers) { | ||
970 | physp = kvm->arch.slot_phys[memslot->id]; | ||
971 | if (!physp) | ||
972 | return NULL; | ||
973 | physp += gfn - memslot->base_gfn; | ||
974 | pa = *physp; | ||
975 | if (!pa) { | ||
976 | if (kvmppc_get_guest_page(kvm, gfn, memslot, | ||
977 | PAGE_SIZE) < 0) | ||
978 | return NULL; | ||
979 | pa = *physp; | ||
980 | } | ||
981 | page = pfn_to_page(pa >> PAGE_SHIFT); | ||
982 | } else { | ||
983 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
984 | npages = get_user_pages_fast(hva, 1, 1, pages); | ||
985 | if (npages < 1) | ||
986 | return NULL; | ||
987 | page = pages[0]; | ||
988 | } | ||
989 | psize = PAGE_SIZE; | ||
990 | if (PageHuge(page)) { | ||
991 | page = compound_head(page); | ||
992 | psize <<= compound_order(page); | ||
993 | } | ||
994 | if (!kvm->arch.using_mmu_notifiers) | ||
995 | get_page(page); | ||
996 | offset = gpa & (psize - 1); | ||
997 | if (nb_ret) | ||
998 | *nb_ret = psize - offset; | ||
999 | return page_address(page) + offset; | ||
1000 | } | ||
1001 | |||
1002 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) | ||
1003 | { | ||
1004 | struct page *page = virt_to_page(va); | ||
1005 | |||
1006 | page = compound_head(page); | ||
1007 | put_page(page); | ||
165 | } | 1008 | } |
166 | 1009 | ||
167 | void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) | 1010 | void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 0c9dc62532d0..f1950d131827 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -230,9 +230,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
230 | 230 | ||
231 | r = kvmppc_st(vcpu, &addr, 32, zeros, true); | 231 | r = kvmppc_st(vcpu, &addr, 32, zeros, true); |
232 | if ((r == -ENOENT) || (r == -EPERM)) { | 232 | if ((r == -ENOENT) || (r == -EPERM)) { |
233 | struct kvmppc_book3s_shadow_vcpu *svcpu; | ||
234 | |||
235 | svcpu = svcpu_get(vcpu); | ||
233 | *advance = 0; | 236 | *advance = 0; |
234 | vcpu->arch.shared->dar = vaddr; | 237 | vcpu->arch.shared->dar = vaddr; |
235 | to_svcpu(vcpu)->fault_dar = vaddr; | 238 | svcpu->fault_dar = vaddr; |
236 | 239 | ||
237 | dsisr = DSISR_ISSTORE; | 240 | dsisr = DSISR_ISSTORE; |
238 | if (r == -ENOENT) | 241 | if (r == -ENOENT) |
@@ -241,7 +244,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
241 | dsisr |= DSISR_PROTFAULT; | 244 | dsisr |= DSISR_PROTFAULT; |
242 | 245 | ||
243 | vcpu->arch.shared->dsisr = dsisr; | 246 | vcpu->arch.shared->dsisr = dsisr; |
244 | to_svcpu(vcpu)->fault_dsisr = dsisr; | 247 | svcpu->fault_dsisr = dsisr; |
248 | svcpu_put(svcpu); | ||
245 | 249 | ||
246 | kvmppc_book3s_queue_irqprio(vcpu, | 250 | kvmppc_book3s_queue_irqprio(vcpu, |
247 | BOOK3S_INTERRUPT_DATA_STORAGE); | 251 | BOOK3S_INTERRUPT_DATA_STORAGE); |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a7267167a550..d386b6198bc7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -48,22 +48,14 @@ | |||
48 | #include <linux/gfp.h> | 48 | #include <linux/gfp.h> |
49 | #include <linux/vmalloc.h> | 49 | #include <linux/vmalloc.h> |
50 | #include <linux/highmem.h> | 50 | #include <linux/highmem.h> |
51 | 51 | #include <linux/hugetlb.h> | |
52 | /* | ||
53 | * For now, limit memory to 64GB and require it to be large pages. | ||
54 | * This value is chosen because it makes the ram_pginfo array be | ||
55 | * 64kB in size, which is about as large as we want to be trying | ||
56 | * to allocate with kmalloc. | ||
57 | */ | ||
58 | #define MAX_MEM_ORDER 36 | ||
59 | |||
60 | #define LARGE_PAGE_ORDER 24 /* 16MB pages */ | ||
61 | 52 | ||
62 | /* #define EXIT_DEBUG */ | 53 | /* #define EXIT_DEBUG */ |
63 | /* #define EXIT_DEBUG_SIMPLE */ | 54 | /* #define EXIT_DEBUG_SIMPLE */ |
64 | /* #define EXIT_DEBUG_INT */ | 55 | /* #define EXIT_DEBUG_INT */ |
65 | 56 | ||
66 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 57 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
58 | static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); | ||
67 | 59 | ||
68 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 60 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
69 | { | 61 | { |
@@ -146,10 +138,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
146 | unsigned long vcpuid, unsigned long vpa) | 138 | unsigned long vcpuid, unsigned long vpa) |
147 | { | 139 | { |
148 | struct kvm *kvm = vcpu->kvm; | 140 | struct kvm *kvm = vcpu->kvm; |
149 | unsigned long pg_index, ra, len; | 141 | unsigned long len, nb; |
150 | unsigned long pg_offset; | ||
151 | void *va; | 142 | void *va; |
152 | struct kvm_vcpu *tvcpu; | 143 | struct kvm_vcpu *tvcpu; |
144 | int err = H_PARAMETER; | ||
153 | 145 | ||
154 | tvcpu = kvmppc_find_vcpu(kvm, vcpuid); | 146 | tvcpu = kvmppc_find_vcpu(kvm, vcpuid); |
155 | if (!tvcpu) | 147 | if (!tvcpu) |
@@ -162,45 +154,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
162 | if (flags < 4) { | 154 | if (flags < 4) { |
163 | if (vpa & 0x7f) | 155 | if (vpa & 0x7f) |
164 | return H_PARAMETER; | 156 | return H_PARAMETER; |
157 | if (flags >= 2 && !tvcpu->arch.vpa) | ||
158 | return H_RESOURCE; | ||
165 | /* registering new area; convert logical addr to real */ | 159 | /* registering new area; convert logical addr to real */ |
166 | pg_index = vpa >> kvm->arch.ram_porder; | 160 | va = kvmppc_pin_guest_page(kvm, vpa, &nb); |
167 | pg_offset = vpa & (kvm->arch.ram_psize - 1); | 161 | if (va == NULL) |
168 | if (pg_index >= kvm->arch.ram_npages) | ||
169 | return H_PARAMETER; | 162 | return H_PARAMETER; |
170 | if (kvm->arch.ram_pginfo[pg_index].pfn == 0) | ||
171 | return H_PARAMETER; | ||
172 | ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT; | ||
173 | ra |= pg_offset; | ||
174 | va = __va(ra); | ||
175 | if (flags <= 1) | 163 | if (flags <= 1) |
176 | len = *(unsigned short *)(va + 4); | 164 | len = *(unsigned short *)(va + 4); |
177 | else | 165 | else |
178 | len = *(unsigned int *)(va + 4); | 166 | len = *(unsigned int *)(va + 4); |
179 | if (pg_offset + len > kvm->arch.ram_psize) | 167 | if (len > nb) |
180 | return H_PARAMETER; | 168 | goto out_unpin; |
181 | switch (flags) { | 169 | switch (flags) { |
182 | case 1: /* register VPA */ | 170 | case 1: /* register VPA */ |
183 | if (len < 640) | 171 | if (len < 640) |
184 | return H_PARAMETER; | 172 | goto out_unpin; |
173 | if (tvcpu->arch.vpa) | ||
174 | kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); | ||
185 | tvcpu->arch.vpa = va; | 175 | tvcpu->arch.vpa = va; |
186 | init_vpa(vcpu, va); | 176 | init_vpa(vcpu, va); |
187 | break; | 177 | break; |
188 | case 2: /* register DTL */ | 178 | case 2: /* register DTL */ |
189 | if (len < 48) | 179 | if (len < 48) |
190 | return H_PARAMETER; | 180 | goto out_unpin; |
191 | if (!tvcpu->arch.vpa) | ||
192 | return H_RESOURCE; | ||
193 | len -= len % 48; | 181 | len -= len % 48; |
182 | if (tvcpu->arch.dtl) | ||
183 | kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); | ||
194 | tvcpu->arch.dtl = va; | 184 | tvcpu->arch.dtl = va; |
195 | tvcpu->arch.dtl_end = va + len; | 185 | tvcpu->arch.dtl_end = va + len; |
196 | break; | 186 | break; |
197 | case 3: /* register SLB shadow buffer */ | 187 | case 3: /* register SLB shadow buffer */ |
198 | if (len < 8) | 188 | if (len < 16) |
199 | return H_PARAMETER; | 189 | goto out_unpin; |
200 | if (!tvcpu->arch.vpa) | 190 | if (tvcpu->arch.slb_shadow) |
201 | return H_RESOURCE; | 191 | kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); |
202 | tvcpu->arch.slb_shadow = va; | ||
203 | len = (len - 16) / 16; | ||
204 | tvcpu->arch.slb_shadow = va; | 192 | tvcpu->arch.slb_shadow = va; |
205 | break; | 193 | break; |
206 | } | 194 | } |
@@ -209,17 +197,30 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
209 | case 5: /* unregister VPA */ | 197 | case 5: /* unregister VPA */ |
210 | if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) | 198 | if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) |
211 | return H_RESOURCE; | 199 | return H_RESOURCE; |
200 | if (!tvcpu->arch.vpa) | ||
201 | break; | ||
202 | kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); | ||
212 | tvcpu->arch.vpa = NULL; | 203 | tvcpu->arch.vpa = NULL; |
213 | break; | 204 | break; |
214 | case 6: /* unregister DTL */ | 205 | case 6: /* unregister DTL */ |
206 | if (!tvcpu->arch.dtl) | ||
207 | break; | ||
208 | kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); | ||
215 | tvcpu->arch.dtl = NULL; | 209 | tvcpu->arch.dtl = NULL; |
216 | break; | 210 | break; |
217 | case 7: /* unregister SLB shadow buffer */ | 211 | case 7: /* unregister SLB shadow buffer */ |
212 | if (!tvcpu->arch.slb_shadow) | ||
213 | break; | ||
214 | kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); | ||
218 | tvcpu->arch.slb_shadow = NULL; | 215 | tvcpu->arch.slb_shadow = NULL; |
219 | break; | 216 | break; |
220 | } | 217 | } |
221 | } | 218 | } |
222 | return H_SUCCESS; | 219 | return H_SUCCESS; |
220 | |||
221 | out_unpin: | ||
222 | kvmppc_unpin_guest_page(kvm, va); | ||
223 | return err; | ||
223 | } | 224 | } |
224 | 225 | ||
225 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | 226 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) |
@@ -229,6 +230,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
229 | struct kvm_vcpu *tvcpu; | 230 | struct kvm_vcpu *tvcpu; |
230 | 231 | ||
231 | switch (req) { | 232 | switch (req) { |
233 | case H_ENTER: | ||
234 | ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
235 | kvmppc_get_gpr(vcpu, 5), | ||
236 | kvmppc_get_gpr(vcpu, 6), | ||
237 | kvmppc_get_gpr(vcpu, 7)); | ||
238 | break; | ||
232 | case H_CEDE: | 239 | case H_CEDE: |
233 | break; | 240 | break; |
234 | case H_PROD: | 241 | case H_PROD: |
@@ -318,20 +325,19 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
318 | break; | 325 | break; |
319 | } | 326 | } |
320 | /* | 327 | /* |
321 | * We get these next two if the guest does a bad real-mode access, | 328 | * We get these next two if the guest accesses a page which it thinks |
322 | * as we have enabled VRMA (virtualized real mode area) mode in the | 329 | * it has mapped but which is not actually present, either because |
323 | * LPCR. We just generate an appropriate DSI/ISI to the guest. | 330 | * it is for an emulated I/O device or because the corresonding |
331 | * host page has been paged out. Any other HDSI/HISI interrupts | ||
332 | * have been handled already. | ||
324 | */ | 333 | */ |
325 | case BOOK3S_INTERRUPT_H_DATA_STORAGE: | 334 | case BOOK3S_INTERRUPT_H_DATA_STORAGE: |
326 | vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr; | 335 | r = kvmppc_book3s_hv_page_fault(run, vcpu, |
327 | vcpu->arch.shregs.dar = vcpu->arch.fault_dar; | 336 | vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); |
328 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0); | ||
329 | r = RESUME_GUEST; | ||
330 | break; | 337 | break; |
331 | case BOOK3S_INTERRUPT_H_INST_STORAGE: | 338 | case BOOK3S_INTERRUPT_H_INST_STORAGE: |
332 | kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, | 339 | r = kvmppc_book3s_hv_page_fault(run, vcpu, |
333 | 0x08000000); | 340 | kvmppc_get_pc(vcpu), 0); |
334 | r = RESUME_GUEST; | ||
335 | break; | 341 | break; |
336 | /* | 342 | /* |
337 | * This occurs if the guest executes an illegal instruction. | 343 | * This occurs if the guest executes an illegal instruction. |
@@ -391,6 +397,42 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
391 | return 0; | 397 | return 0; |
392 | } | 398 | } |
393 | 399 | ||
400 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
401 | { | ||
402 | int r = -EINVAL; | ||
403 | |||
404 | switch (reg->id) { | ||
405 | case KVM_REG_PPC_HIOR: | ||
406 | r = put_user(0, (u64 __user *)reg->addr); | ||
407 | break; | ||
408 | default: | ||
409 | break; | ||
410 | } | ||
411 | |||
412 | return r; | ||
413 | } | ||
414 | |||
415 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
416 | { | ||
417 | int r = -EINVAL; | ||
418 | |||
419 | switch (reg->id) { | ||
420 | case KVM_REG_PPC_HIOR: | ||
421 | { | ||
422 | u64 hior; | ||
423 | /* Only allow this to be set to zero */ | ||
424 | r = get_user(hior, (u64 __user *)reg->addr); | ||
425 | if (!r && (hior != 0)) | ||
426 | r = -EINVAL; | ||
427 | break; | ||
428 | } | ||
429 | default: | ||
430 | break; | ||
431 | } | ||
432 | |||
433 | return r; | ||
434 | } | ||
435 | |||
394 | int kvmppc_core_check_processor_compat(void) | 436 | int kvmppc_core_check_processor_compat(void) |
395 | { | 437 | { |
396 | if (cpu_has_feature(CPU_FTR_HVMODE)) | 438 | if (cpu_has_feature(CPU_FTR_HVMODE)) |
@@ -410,7 +452,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
410 | goto out; | 452 | goto out; |
411 | 453 | ||
412 | err = -ENOMEM; | 454 | err = -ENOMEM; |
413 | vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); | 455 | vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
414 | if (!vcpu) | 456 | if (!vcpu) |
415 | goto out; | 457 | goto out; |
416 | 458 | ||
@@ -462,15 +504,21 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
462 | return vcpu; | 504 | return vcpu; |
463 | 505 | ||
464 | free_vcpu: | 506 | free_vcpu: |
465 | kfree(vcpu); | 507 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
466 | out: | 508 | out: |
467 | return ERR_PTR(err); | 509 | return ERR_PTR(err); |
468 | } | 510 | } |
469 | 511 | ||
470 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | 512 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) |
471 | { | 513 | { |
514 | if (vcpu->arch.dtl) | ||
515 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); | ||
516 | if (vcpu->arch.slb_shadow) | ||
517 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); | ||
518 | if (vcpu->arch.vpa) | ||
519 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); | ||
472 | kvm_vcpu_uninit(vcpu); | 520 | kvm_vcpu_uninit(vcpu); |
473 | kfree(vcpu); | 521 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
474 | } | 522 | } |
475 | 523 | ||
476 | static void kvmppc_set_timer(struct kvm_vcpu *vcpu) | 524 | static void kvmppc_set_timer(struct kvm_vcpu *vcpu) |
@@ -481,7 +529,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) | |||
481 | if (now > vcpu->arch.dec_expires) { | 529 | if (now > vcpu->arch.dec_expires) { |
482 | /* decrementer has already gone negative */ | 530 | /* decrementer has already gone negative */ |
483 | kvmppc_core_queue_dec(vcpu); | 531 | kvmppc_core_queue_dec(vcpu); |
484 | kvmppc_core_deliver_interrupts(vcpu); | 532 | kvmppc_core_prepare_to_enter(vcpu); |
485 | return; | 533 | return; |
486 | } | 534 | } |
487 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC | 535 | dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC |
@@ -796,7 +844,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
796 | 844 | ||
797 | list_for_each_entry_safe(v, vn, &vc->runnable_threads, | 845 | list_for_each_entry_safe(v, vn, &vc->runnable_threads, |
798 | arch.run_list) { | 846 | arch.run_list) { |
799 | kvmppc_core_deliver_interrupts(v); | 847 | kvmppc_core_prepare_to_enter(v); |
800 | if (signal_pending(v->arch.run_task)) { | 848 | if (signal_pending(v->arch.run_task)) { |
801 | kvmppc_remove_runnable(vc, v); | 849 | kvmppc_remove_runnable(vc, v); |
802 | v->stat.signal_exits++; | 850 | v->stat.signal_exits++; |
@@ -835,20 +883,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
835 | return -EINVAL; | 883 | return -EINVAL; |
836 | } | 884 | } |
837 | 885 | ||
886 | kvmppc_core_prepare_to_enter(vcpu); | ||
887 | |||
838 | /* No need to go into the guest when all we'll do is come back out */ | 888 | /* No need to go into the guest when all we'll do is come back out */ |
839 | if (signal_pending(current)) { | 889 | if (signal_pending(current)) { |
840 | run->exit_reason = KVM_EXIT_INTR; | 890 | run->exit_reason = KVM_EXIT_INTR; |
841 | return -EINTR; | 891 | return -EINTR; |
842 | } | 892 | } |
843 | 893 | ||
844 | /* On PPC970, check that we have an RMA region */ | 894 | /* On the first time here, set up VRMA or RMA */ |
845 | if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) | 895 | if (!vcpu->kvm->arch.rma_setup_done) { |
846 | return -EPERM; | 896 | r = kvmppc_hv_setup_rma(vcpu); |
897 | if (r) | ||
898 | return r; | ||
899 | } | ||
847 | 900 | ||
848 | flush_fp_to_thread(current); | 901 | flush_fp_to_thread(current); |
849 | flush_altivec_to_thread(current); | 902 | flush_altivec_to_thread(current); |
850 | flush_vsx_to_thread(current); | 903 | flush_vsx_to_thread(current); |
851 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; | 904 | vcpu->arch.wqp = &vcpu->arch.vcore->wq; |
905 | vcpu->arch.pgdir = current->mm->pgd; | ||
852 | 906 | ||
853 | do { | 907 | do { |
854 | r = kvmppc_run_vcpu(run, vcpu); | 908 | r = kvmppc_run_vcpu(run, vcpu); |
@@ -856,7 +910,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
856 | if (run->exit_reason == KVM_EXIT_PAPR_HCALL && | 910 | if (run->exit_reason == KVM_EXIT_PAPR_HCALL && |
857 | !(vcpu->arch.shregs.msr & MSR_PR)) { | 911 | !(vcpu->arch.shregs.msr & MSR_PR)) { |
858 | r = kvmppc_pseries_do_hcall(vcpu); | 912 | r = kvmppc_pseries_do_hcall(vcpu); |
859 | kvmppc_core_deliver_interrupts(vcpu); | 913 | kvmppc_core_prepare_to_enter(vcpu); |
860 | } | 914 | } |
861 | } while (r == RESUME_GUEST); | 915 | } while (r == RESUME_GUEST); |
862 | return r; | 916 | return r; |
@@ -1000,7 +1054,7 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
1000 | 1054 | ||
1001 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1055 | static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1002 | { | 1056 | { |
1003 | struct kvmppc_rma_info *ri = vma->vm_file->private_data; | 1057 | struct kvmppc_linear_info *ri = vma->vm_file->private_data; |
1004 | struct page *page; | 1058 | struct page *page; |
1005 | 1059 | ||
1006 | if (vmf->pgoff >= ri->npages) | 1060 | if (vmf->pgoff >= ri->npages) |
@@ -1025,7 +1079,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) | |||
1025 | 1079 | ||
1026 | static int kvm_rma_release(struct inode *inode, struct file *filp) | 1080 | static int kvm_rma_release(struct inode *inode, struct file *filp) |
1027 | { | 1081 | { |
1028 | struct kvmppc_rma_info *ri = filp->private_data; | 1082 | struct kvmppc_linear_info *ri = filp->private_data; |
1029 | 1083 | ||
1030 | kvm_release_rma(ri); | 1084 | kvm_release_rma(ri); |
1031 | return 0; | 1085 | return 0; |
@@ -1038,7 +1092,7 @@ static struct file_operations kvm_rma_fops = { | |||
1038 | 1092 | ||
1039 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | 1093 | long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) |
1040 | { | 1094 | { |
1041 | struct kvmppc_rma_info *ri; | 1095 | struct kvmppc_linear_info *ri; |
1042 | long fd; | 1096 | long fd; |
1043 | 1097 | ||
1044 | ri = kvm_alloc_rma(); | 1098 | ri = kvm_alloc_rma(); |
@@ -1053,89 +1107,189 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) | |||
1053 | return fd; | 1107 | return fd; |
1054 | } | 1108 | } |
1055 | 1109 | ||
1056 | static struct page *hva_to_page(unsigned long addr) | 1110 | /* |
1111 | * Get (and clear) the dirty memory log for a memory slot. | ||
1112 | */ | ||
1113 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | ||
1057 | { | 1114 | { |
1058 | struct page *page[1]; | 1115 | struct kvm_memory_slot *memslot; |
1059 | int npages; | 1116 | int r; |
1117 | unsigned long n; | ||
1060 | 1118 | ||
1061 | might_sleep(); | 1119 | mutex_lock(&kvm->slots_lock); |
1062 | 1120 | ||
1063 | npages = get_user_pages_fast(addr, 1, 1, page); | 1121 | r = -EINVAL; |
1122 | if (log->slot >= KVM_MEMORY_SLOTS) | ||
1123 | goto out; | ||
1064 | 1124 | ||
1065 | if (unlikely(npages != 1)) | 1125 | memslot = id_to_memslot(kvm->memslots, log->slot); |
1066 | return 0; | 1126 | r = -ENOENT; |
1127 | if (!memslot->dirty_bitmap) | ||
1128 | goto out; | ||
1129 | |||
1130 | n = kvm_dirty_bitmap_bytes(memslot); | ||
1131 | memset(memslot->dirty_bitmap, 0, n); | ||
1132 | |||
1133 | r = kvmppc_hv_get_dirty_log(kvm, memslot); | ||
1134 | if (r) | ||
1135 | goto out; | ||
1067 | 1136 | ||
1068 | return page[0]; | 1137 | r = -EFAULT; |
1138 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | ||
1139 | goto out; | ||
1140 | |||
1141 | r = 0; | ||
1142 | out: | ||
1143 | mutex_unlock(&kvm->slots_lock); | ||
1144 | return r; | ||
1145 | } | ||
1146 | |||
1147 | static unsigned long slb_pgsize_encoding(unsigned long psize) | ||
1148 | { | ||
1149 | unsigned long senc = 0; | ||
1150 | |||
1151 | if (psize > 0x1000) { | ||
1152 | senc = SLB_VSID_L; | ||
1153 | if (psize == 0x10000) | ||
1154 | senc |= SLB_VSID_LP_01; | ||
1155 | } | ||
1156 | return senc; | ||
1069 | } | 1157 | } |
1070 | 1158 | ||
1071 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 1159 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
1072 | struct kvm_userspace_memory_region *mem) | 1160 | struct kvm_userspace_memory_region *mem) |
1073 | { | 1161 | { |
1074 | unsigned long psize, porder; | 1162 | unsigned long npages; |
1075 | unsigned long i, npages, totalpages; | 1163 | unsigned long *phys; |
1076 | unsigned long pg_ix; | 1164 | |
1077 | struct kvmppc_pginfo *pginfo; | 1165 | /* Allocate a slot_phys array */ |
1078 | unsigned long hva; | 1166 | phys = kvm->arch.slot_phys[mem->slot]; |
1079 | struct kvmppc_rma_info *ri = NULL; | 1167 | if (!kvm->arch.using_mmu_notifiers && !phys) { |
1168 | npages = mem->memory_size >> PAGE_SHIFT; | ||
1169 | phys = vzalloc(npages * sizeof(unsigned long)); | ||
1170 | if (!phys) | ||
1171 | return -ENOMEM; | ||
1172 | kvm->arch.slot_phys[mem->slot] = phys; | ||
1173 | kvm->arch.slot_npages[mem->slot] = npages; | ||
1174 | } | ||
1175 | |||
1176 | return 0; | ||
1177 | } | ||
1178 | |||
1179 | static void unpin_slot(struct kvm *kvm, int slot_id) | ||
1180 | { | ||
1181 | unsigned long *physp; | ||
1182 | unsigned long j, npages, pfn; | ||
1080 | struct page *page; | 1183 | struct page *page; |
1081 | 1184 | ||
1082 | /* For now, only allow 16MB pages */ | 1185 | physp = kvm->arch.slot_phys[slot_id]; |
1083 | porder = LARGE_PAGE_ORDER; | 1186 | npages = kvm->arch.slot_npages[slot_id]; |
1084 | psize = 1ul << porder; | 1187 | if (physp) { |
1085 | if ((mem->memory_size & (psize - 1)) || | 1188 | spin_lock(&kvm->arch.slot_phys_lock); |
1086 | (mem->guest_phys_addr & (psize - 1))) { | 1189 | for (j = 0; j < npages; j++) { |
1087 | pr_err("bad memory_size=%llx @ %llx\n", | 1190 | if (!(physp[j] & KVMPPC_GOT_PAGE)) |
1088 | mem->memory_size, mem->guest_phys_addr); | 1191 | continue; |
1089 | return -EINVAL; | 1192 | pfn = physp[j] >> PAGE_SHIFT; |
1193 | page = pfn_to_page(pfn); | ||
1194 | if (PageHuge(page)) | ||
1195 | page = compound_head(page); | ||
1196 | SetPageDirty(page); | ||
1197 | put_page(page); | ||
1198 | } | ||
1199 | kvm->arch.slot_phys[slot_id] = NULL; | ||
1200 | spin_unlock(&kvm->arch.slot_phys_lock); | ||
1201 | vfree(physp); | ||
1090 | } | 1202 | } |
1203 | } | ||
1091 | 1204 | ||
1092 | npages = mem->memory_size >> porder; | 1205 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
1093 | totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder; | 1206 | struct kvm_userspace_memory_region *mem) |
1207 | { | ||
1208 | } | ||
1094 | 1209 | ||
1095 | /* More memory than we have space to track? */ | 1210 | static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) |
1096 | if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER))) | 1211 | { |
1097 | return -EINVAL; | 1212 | int err = 0; |
1213 | struct kvm *kvm = vcpu->kvm; | ||
1214 | struct kvmppc_linear_info *ri = NULL; | ||
1215 | unsigned long hva; | ||
1216 | struct kvm_memory_slot *memslot; | ||
1217 | struct vm_area_struct *vma; | ||
1218 | unsigned long lpcr, senc; | ||
1219 | unsigned long psize, porder; | ||
1220 | unsigned long rma_size; | ||
1221 | unsigned long rmls; | ||
1222 | unsigned long *physp; | ||
1223 | unsigned long i, npages; | ||
1098 | 1224 | ||
1099 | /* Do we already have an RMA registered? */ | 1225 | mutex_lock(&kvm->lock); |
1100 | if (mem->guest_phys_addr == 0 && kvm->arch.rma) | 1226 | if (kvm->arch.rma_setup_done) |
1101 | return -EINVAL; | 1227 | goto out; /* another vcpu beat us to it */ |
1102 | 1228 | ||
1103 | if (totalpages > kvm->arch.ram_npages) | 1229 | /* Look up the memslot for guest physical address 0 */ |
1104 | kvm->arch.ram_npages = totalpages; | 1230 | memslot = gfn_to_memslot(kvm, 0); |
1231 | |||
1232 | /* We must have some memory at 0 by now */ | ||
1233 | err = -EINVAL; | ||
1234 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
1235 | goto out; | ||
1236 | |||
1237 | /* Look up the VMA for the start of this memory slot */ | ||
1238 | hva = memslot->userspace_addr; | ||
1239 | down_read(¤t->mm->mmap_sem); | ||
1240 | vma = find_vma(current->mm, hva); | ||
1241 | if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) | ||
1242 | goto up_out; | ||
1243 | |||
1244 | psize = vma_kernel_pagesize(vma); | ||
1245 | porder = __ilog2(psize); | ||
1105 | 1246 | ||
1106 | /* Is this one of our preallocated RMAs? */ | 1247 | /* Is this one of our preallocated RMAs? */ |
1107 | if (mem->guest_phys_addr == 0) { | 1248 | if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && |
1108 | struct vm_area_struct *vma; | 1249 | hva == vma->vm_start) |
1109 | 1250 | ri = vma->vm_file->private_data; | |
1110 | down_read(¤t->mm->mmap_sem); | 1251 | |
1111 | vma = find_vma(current->mm, mem->userspace_addr); | 1252 | up_read(¤t->mm->mmap_sem); |
1112 | if (vma && vma->vm_file && | 1253 | |
1113 | vma->vm_file->f_op == &kvm_rma_fops && | 1254 | if (!ri) { |
1114 | mem->userspace_addr == vma->vm_start) | 1255 | /* On POWER7, use VRMA; on PPC970, give up */ |
1115 | ri = vma->vm_file->private_data; | 1256 | err = -EPERM; |
1116 | up_read(¤t->mm->mmap_sem); | 1257 | if (cpu_has_feature(CPU_FTR_ARCH_201)) { |
1117 | if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { | 1258 | pr_err("KVM: CPU requires an RMO\n"); |
1118 | pr_err("CPU requires an RMO\n"); | 1259 | goto out; |
1119 | return -EINVAL; | ||
1120 | } | 1260 | } |
1121 | } | ||
1122 | 1261 | ||
1123 | if (ri) { | 1262 | /* We can handle 4k, 64k or 16M pages in the VRMA */ |
1124 | unsigned long rma_size; | 1263 | err = -EINVAL; |
1125 | unsigned long lpcr; | 1264 | if (!(psize == 0x1000 || psize == 0x10000 || |
1126 | long rmls; | 1265 | psize == 0x1000000)) |
1266 | goto out; | ||
1267 | |||
1268 | /* Update VRMASD field in the LPCR */ | ||
1269 | senc = slb_pgsize_encoding(psize); | ||
1270 | kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | | ||
1271 | (VRMA_VSID << SLB_VSID_SHIFT_1T); | ||
1272 | lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; | ||
1273 | lpcr |= senc << (LPCR_VRMASD_SH - 4); | ||
1274 | kvm->arch.lpcr = lpcr; | ||
1127 | 1275 | ||
1128 | rma_size = ri->npages << PAGE_SHIFT; | 1276 | /* Create HPTEs in the hash page table for the VRMA */ |
1129 | if (rma_size > mem->memory_size) | 1277 | kvmppc_map_vrma(vcpu, memslot, porder); |
1130 | rma_size = mem->memory_size; | 1278 | |
1279 | } else { | ||
1280 | /* Set up to use an RMO region */ | ||
1281 | rma_size = ri->npages; | ||
1282 | if (rma_size > memslot->npages) | ||
1283 | rma_size = memslot->npages; | ||
1284 | rma_size <<= PAGE_SHIFT; | ||
1131 | rmls = lpcr_rmls(rma_size); | 1285 | rmls = lpcr_rmls(rma_size); |
1286 | err = -EINVAL; | ||
1132 | if (rmls < 0) { | 1287 | if (rmls < 0) { |
1133 | pr_err("Can't use RMA of 0x%lx bytes\n", rma_size); | 1288 | pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); |
1134 | return -EINVAL; | 1289 | goto out; |
1135 | } | 1290 | } |
1136 | atomic_inc(&ri->use_count); | 1291 | atomic_inc(&ri->use_count); |
1137 | kvm->arch.rma = ri; | 1292 | kvm->arch.rma = ri; |
1138 | kvm->arch.n_rma_pages = rma_size >> porder; | ||
1139 | 1293 | ||
1140 | /* Update LPCR and RMOR */ | 1294 | /* Update LPCR and RMOR */ |
1141 | lpcr = kvm->arch.lpcr; | 1295 | lpcr = kvm->arch.lpcr; |
@@ -1155,53 +1309,35 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1155 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; | 1309 | kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; |
1156 | } | 1310 | } |
1157 | kvm->arch.lpcr = lpcr; | 1311 | kvm->arch.lpcr = lpcr; |
1158 | pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", | 1312 | pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", |
1159 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); | 1313 | ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); |
1160 | } | ||
1161 | 1314 | ||
1162 | pg_ix = mem->guest_phys_addr >> porder; | 1315 | /* Initialize phys addrs of pages in RMO */ |
1163 | pginfo = kvm->arch.ram_pginfo + pg_ix; | 1316 | npages = ri->npages; |
1164 | for (i = 0; i < npages; ++i, ++pg_ix) { | 1317 | porder = __ilog2(npages); |
1165 | if (ri && pg_ix < kvm->arch.n_rma_pages) { | 1318 | physp = kvm->arch.slot_phys[memslot->id]; |
1166 | pginfo[i].pfn = ri->base_pfn + | 1319 | spin_lock(&kvm->arch.slot_phys_lock); |
1167 | (pg_ix << (porder - PAGE_SHIFT)); | 1320 | for (i = 0; i < npages; ++i) |
1168 | continue; | 1321 | physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; |
1169 | } | 1322 | spin_unlock(&kvm->arch.slot_phys_lock); |
1170 | hva = mem->userspace_addr + (i << porder); | ||
1171 | page = hva_to_page(hva); | ||
1172 | if (!page) { | ||
1173 | pr_err("oops, no pfn for hva %lx\n", hva); | ||
1174 | goto err; | ||
1175 | } | ||
1176 | /* Check it's a 16MB page */ | ||
1177 | if (!PageHead(page) || | ||
1178 | compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) { | ||
1179 | pr_err("page at %lx isn't 16MB (o=%d)\n", | ||
1180 | hva, compound_order(page)); | ||
1181 | goto err; | ||
1182 | } | ||
1183 | pginfo[i].pfn = page_to_pfn(page); | ||
1184 | } | 1323 | } |
1185 | 1324 | ||
1186 | return 0; | 1325 | /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ |
1187 | 1326 | smp_wmb(); | |
1188 | err: | 1327 | kvm->arch.rma_setup_done = 1; |
1189 | return -EINVAL; | 1328 | err = 0; |
1190 | } | 1329 | out: |
1330 | mutex_unlock(&kvm->lock); | ||
1331 | return err; | ||
1191 | 1332 | ||
1192 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1333 | up_out: |
1193 | struct kvm_userspace_memory_region *mem) | 1334 | up_read(¤t->mm->mmap_sem); |
1194 | { | 1335 | goto out; |
1195 | if (mem->guest_phys_addr == 0 && mem->memory_size != 0 && | ||
1196 | !kvm->arch.rma) | ||
1197 | kvmppc_map_vrma(kvm, mem); | ||
1198 | } | 1336 | } |
1199 | 1337 | ||
1200 | int kvmppc_core_init_vm(struct kvm *kvm) | 1338 | int kvmppc_core_init_vm(struct kvm *kvm) |
1201 | { | 1339 | { |
1202 | long r; | 1340 | long r; |
1203 | unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER); | ||
1204 | long err = -ENOMEM; | ||
1205 | unsigned long lpcr; | 1341 | unsigned long lpcr; |
1206 | 1342 | ||
1207 | /* Allocate hashed page table */ | 1343 | /* Allocate hashed page table */ |
@@ -1211,19 +1347,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1211 | 1347 | ||
1212 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1348 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
1213 | 1349 | ||
1214 | kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo), | ||
1215 | GFP_KERNEL); | ||
1216 | if (!kvm->arch.ram_pginfo) { | ||
1217 | pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n", | ||
1218 | npages * sizeof(struct kvmppc_pginfo)); | ||
1219 | goto out_free; | ||
1220 | } | ||
1221 | |||
1222 | kvm->arch.ram_npages = 0; | ||
1223 | kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER; | ||
1224 | kvm->arch.ram_porder = LARGE_PAGE_ORDER; | ||
1225 | kvm->arch.rma = NULL; | 1350 | kvm->arch.rma = NULL; |
1226 | kvm->arch.n_rma_pages = 0; | ||
1227 | 1351 | ||
1228 | kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); | 1352 | kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); |
1229 | 1353 | ||
@@ -1241,30 +1365,25 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1241 | kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); | 1365 | kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); |
1242 | lpcr &= LPCR_PECE | LPCR_LPES; | 1366 | lpcr &= LPCR_PECE | LPCR_LPES; |
1243 | lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | | 1367 | lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | |
1244 | LPCR_VPM0 | LPCR_VRMA_L; | 1368 | LPCR_VPM0 | LPCR_VPM1; |
1369 | kvm->arch.vrma_slb_v = SLB_VSID_B_1T | | ||
1370 | (VRMA_VSID << SLB_VSID_SHIFT_1T); | ||
1245 | } | 1371 | } |
1246 | kvm->arch.lpcr = lpcr; | 1372 | kvm->arch.lpcr = lpcr; |
1247 | 1373 | ||
1374 | kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); | ||
1375 | spin_lock_init(&kvm->arch.slot_phys_lock); | ||
1248 | return 0; | 1376 | return 0; |
1249 | |||
1250 | out_free: | ||
1251 | kvmppc_free_hpt(kvm); | ||
1252 | return err; | ||
1253 | } | 1377 | } |
1254 | 1378 | ||
1255 | void kvmppc_core_destroy_vm(struct kvm *kvm) | 1379 | void kvmppc_core_destroy_vm(struct kvm *kvm) |
1256 | { | 1380 | { |
1257 | struct kvmppc_pginfo *pginfo; | ||
1258 | unsigned long i; | 1381 | unsigned long i; |
1259 | 1382 | ||
1260 | if (kvm->arch.ram_pginfo) { | 1383 | if (!kvm->arch.using_mmu_notifiers) |
1261 | pginfo = kvm->arch.ram_pginfo; | 1384 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) |
1262 | kvm->arch.ram_pginfo = NULL; | 1385 | unpin_slot(kvm, i); |
1263 | for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i) | 1386 | |
1264 | if (pginfo[i].pfn) | ||
1265 | put_page(pfn_to_page(pginfo[i].pfn)); | ||
1266 | kfree(pginfo); | ||
1267 | } | ||
1268 | if (kvm->arch.rma) { | 1387 | if (kvm->arch.rma) { |
1269 | kvm_release_rma(kvm->arch.rma); | 1388 | kvm_release_rma(kvm->arch.rma); |
1270 | kvm->arch.rma = NULL; | 1389 | kvm->arch.rma = NULL; |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a795a13f4a70..bed1279aa6a8 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -18,6 +18,15 @@ | |||
18 | #include <asm/kvm_ppc.h> | 18 | #include <asm/kvm_ppc.h> |
19 | #include <asm/kvm_book3s.h> | 19 | #include <asm/kvm_book3s.h> |
20 | 20 | ||
21 | #define KVM_LINEAR_RMA 0 | ||
22 | #define KVM_LINEAR_HPT 1 | ||
23 | |||
24 | static void __init kvm_linear_init_one(ulong size, int count, int type); | ||
25 | static struct kvmppc_linear_info *kvm_alloc_linear(int type); | ||
26 | static void kvm_release_linear(struct kvmppc_linear_info *ri); | ||
27 | |||
28 | /*************** RMA *************/ | ||
29 | |||
21 | /* | 30 | /* |
22 | * This maintains a list of RMAs (real mode areas) for KVM guests to use. | 31 | * This maintains a list of RMAs (real mode areas) for KVM guests to use. |
23 | * Each RMA has to be physically contiguous and of a size that the | 32 | * Each RMA has to be physically contiguous and of a size that the |
@@ -29,32 +38,6 @@ | |||
29 | static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ | 38 | static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ |
30 | static unsigned long kvm_rma_count; | 39 | static unsigned long kvm_rma_count; |
31 | 40 | ||
32 | static int __init early_parse_rma_size(char *p) | ||
33 | { | ||
34 | if (!p) | ||
35 | return 1; | ||
36 | |||
37 | kvm_rma_size = memparse(p, &p); | ||
38 | |||
39 | return 0; | ||
40 | } | ||
41 | early_param("kvm_rma_size", early_parse_rma_size); | ||
42 | |||
43 | static int __init early_parse_rma_count(char *p) | ||
44 | { | ||
45 | if (!p) | ||
46 | return 1; | ||
47 | |||
48 | kvm_rma_count = simple_strtoul(p, NULL, 0); | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | early_param("kvm_rma_count", early_parse_rma_count); | ||
53 | |||
54 | static struct kvmppc_rma_info *rma_info; | ||
55 | static LIST_HEAD(free_rmas); | ||
56 | static DEFINE_SPINLOCK(rma_lock); | ||
57 | |||
58 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. | 41 | /* Work out RMLS (real mode limit selector) field value for a given RMA size. |
59 | Assumes POWER7 or PPC970. */ | 42 | Assumes POWER7 or PPC970. */ |
60 | static inline int lpcr_rmls(unsigned long rma_size) | 43 | static inline int lpcr_rmls(unsigned long rma_size) |
@@ -81,45 +64,106 @@ static inline int lpcr_rmls(unsigned long rma_size) | |||
81 | } | 64 | } |
82 | } | 65 | } |
83 | 66 | ||
67 | static int __init early_parse_rma_size(char *p) | ||
68 | { | ||
69 | if (!p) | ||
70 | return 1; | ||
71 | |||
72 | kvm_rma_size = memparse(p, &p); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | early_param("kvm_rma_size", early_parse_rma_size); | ||
77 | |||
78 | static int __init early_parse_rma_count(char *p) | ||
79 | { | ||
80 | if (!p) | ||
81 | return 1; | ||
82 | |||
83 | kvm_rma_count = simple_strtoul(p, NULL, 0); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | early_param("kvm_rma_count", early_parse_rma_count); | ||
88 | |||
89 | struct kvmppc_linear_info *kvm_alloc_rma(void) | ||
90 | { | ||
91 | return kvm_alloc_linear(KVM_LINEAR_RMA); | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); | ||
94 | |||
95 | void kvm_release_rma(struct kvmppc_linear_info *ri) | ||
96 | { | ||
97 | kvm_release_linear(ri); | ||
98 | } | ||
99 | EXPORT_SYMBOL_GPL(kvm_release_rma); | ||
100 | |||
101 | /*************** HPT *************/ | ||
102 | |||
84 | /* | 103 | /* |
85 | * Called at boot time while the bootmem allocator is active, | 104 | * This maintains a list of big linear HPT tables that contain the GVA->HPA |
86 | * to allocate contiguous physical memory for the real memory | 105 | * memory mappings. If we don't reserve those early on, we might not be able |
87 | * areas for guests. | 106 | * to get a big (usually 16MB) linear memory region from the kernel anymore. |
88 | */ | 107 | */ |
89 | void __init kvm_rma_init(void) | 108 | |
109 | static unsigned long kvm_hpt_count; | ||
110 | |||
111 | static int __init early_parse_hpt_count(char *p) | ||
112 | { | ||
113 | if (!p) | ||
114 | return 1; | ||
115 | |||
116 | kvm_hpt_count = simple_strtoul(p, NULL, 0); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | early_param("kvm_hpt_count", early_parse_hpt_count); | ||
121 | |||
122 | struct kvmppc_linear_info *kvm_alloc_hpt(void) | ||
123 | { | ||
124 | return kvm_alloc_linear(KVM_LINEAR_HPT); | ||
125 | } | ||
126 | EXPORT_SYMBOL_GPL(kvm_alloc_hpt); | ||
127 | |||
128 | void kvm_release_hpt(struct kvmppc_linear_info *li) | ||
129 | { | ||
130 | kvm_release_linear(li); | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(kvm_release_hpt); | ||
133 | |||
134 | /*************** generic *************/ | ||
135 | |||
136 | static LIST_HEAD(free_linears); | ||
137 | static DEFINE_SPINLOCK(linear_lock); | ||
138 | |||
139 | static void __init kvm_linear_init_one(ulong size, int count, int type) | ||
90 | { | 140 | { |
91 | unsigned long i; | 141 | unsigned long i; |
92 | unsigned long j, npages; | 142 | unsigned long j, npages; |
93 | void *rma; | 143 | void *linear; |
94 | struct page *pg; | 144 | struct page *pg; |
145 | const char *typestr; | ||
146 | struct kvmppc_linear_info *linear_info; | ||
95 | 147 | ||
96 | /* Only do this on PPC970 in HV mode */ | 148 | if (!count) |
97 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | ||
98 | !cpu_has_feature(CPU_FTR_ARCH_201)) | ||
99 | return; | ||
100 | |||
101 | if (!kvm_rma_size || !kvm_rma_count) | ||
102 | return; | 149 | return; |
103 | 150 | ||
104 | /* Check that the requested size is one supported in hardware */ | 151 | typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; |
105 | if (lpcr_rmls(kvm_rma_size) < 0) { | 152 | |
106 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | 153 | npages = size >> PAGE_SHIFT; |
107 | return; | 154 | linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); |
108 | } | 155 | for (i = 0; i < count; ++i) { |
109 | 156 | linear = alloc_bootmem_align(size, size); | |
110 | npages = kvm_rma_size >> PAGE_SHIFT; | 157 | pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, |
111 | rma_info = alloc_bootmem(kvm_rma_count * sizeof(struct kvmppc_rma_info)); | 158 | size >> 20); |
112 | for (i = 0; i < kvm_rma_count; ++i) { | 159 | linear_info[i].base_virt = linear; |
113 | rma = alloc_bootmem_align(kvm_rma_size, kvm_rma_size); | 160 | linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; |
114 | pr_info("Allocated KVM RMA at %p (%ld MB)\n", rma, | 161 | linear_info[i].npages = npages; |
115 | kvm_rma_size >> 20); | 162 | linear_info[i].type = type; |
116 | rma_info[i].base_virt = rma; | 163 | list_add_tail(&linear_info[i].list, &free_linears); |
117 | rma_info[i].base_pfn = __pa(rma) >> PAGE_SHIFT; | 164 | atomic_set(&linear_info[i].use_count, 0); |
118 | rma_info[i].npages = npages; | 165 | |
119 | list_add_tail(&rma_info[i].list, &free_rmas); | 166 | pg = pfn_to_page(linear_info[i].base_pfn); |
120 | atomic_set(&rma_info[i].use_count, 0); | ||
121 | |||
122 | pg = pfn_to_page(rma_info[i].base_pfn); | ||
123 | for (j = 0; j < npages; ++j) { | 167 | for (j = 0; j < npages; ++j) { |
124 | atomic_inc(&pg->_count); | 168 | atomic_inc(&pg->_count); |
125 | ++pg; | 169 | ++pg; |
@@ -127,30 +171,59 @@ void __init kvm_rma_init(void) | |||
127 | } | 171 | } |
128 | } | 172 | } |
129 | 173 | ||
130 | struct kvmppc_rma_info *kvm_alloc_rma(void) | 174 | static struct kvmppc_linear_info *kvm_alloc_linear(int type) |
131 | { | 175 | { |
132 | struct kvmppc_rma_info *ri; | 176 | struct kvmppc_linear_info *ri; |
133 | 177 | ||
134 | ri = NULL; | 178 | ri = NULL; |
135 | spin_lock(&rma_lock); | 179 | spin_lock(&linear_lock); |
136 | if (!list_empty(&free_rmas)) { | 180 | list_for_each_entry(ri, &free_linears, list) { |
137 | ri = list_first_entry(&free_rmas, struct kvmppc_rma_info, list); | 181 | if (ri->type != type) |
182 | continue; | ||
183 | |||
138 | list_del(&ri->list); | 184 | list_del(&ri->list); |
139 | atomic_inc(&ri->use_count); | 185 | atomic_inc(&ri->use_count); |
186 | break; | ||
140 | } | 187 | } |
141 | spin_unlock(&rma_lock); | 188 | spin_unlock(&linear_lock); |
189 | memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); | ||
142 | return ri; | 190 | return ri; |
143 | } | 191 | } |
144 | EXPORT_SYMBOL_GPL(kvm_alloc_rma); | ||
145 | 192 | ||
146 | void kvm_release_rma(struct kvmppc_rma_info *ri) | 193 | static void kvm_release_linear(struct kvmppc_linear_info *ri) |
147 | { | 194 | { |
148 | if (atomic_dec_and_test(&ri->use_count)) { | 195 | if (atomic_dec_and_test(&ri->use_count)) { |
149 | spin_lock(&rma_lock); | 196 | spin_lock(&linear_lock); |
150 | list_add_tail(&ri->list, &free_rmas); | 197 | list_add_tail(&ri->list, &free_linears); |
151 | spin_unlock(&rma_lock); | 198 | spin_unlock(&linear_lock); |
152 | 199 | ||
153 | } | 200 | } |
154 | } | 201 | } |
155 | EXPORT_SYMBOL_GPL(kvm_release_rma); | ||
156 | 202 | ||
203 | /* | ||
204 | * Called at boot time while the bootmem allocator is active, | ||
205 | * to allocate contiguous physical memory for the hash page | ||
206 | * tables for guests. | ||
207 | */ | ||
208 | void __init kvm_linear_init(void) | ||
209 | { | ||
210 | /* HPT */ | ||
211 | kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT); | ||
212 | |||
213 | /* RMA */ | ||
214 | /* Only do this on PPC970 in HV mode */ | ||
215 | if (!cpu_has_feature(CPU_FTR_HVMODE) || | ||
216 | !cpu_has_feature(CPU_FTR_ARCH_201)) | ||
217 | return; | ||
218 | |||
219 | if (!kvm_rma_size || !kvm_rma_count) | ||
220 | return; | ||
221 | |||
222 | /* Check that the requested size is one supported in hardware */ | ||
223 | if (lpcr_rmls(kvm_rma_size) < 0) { | ||
224 | pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); | ||
225 | return; | ||
226 | } | ||
227 | |||
228 | kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); | ||
229 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index bacb0cfa3602..def880aea63a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kvm.h> | 11 | #include <linux/kvm.h> |
12 | #include <linux/kvm_host.h> | 12 | #include <linux/kvm_host.h> |
13 | #include <linux/hugetlb.h> | 13 | #include <linux/hugetlb.h> |
14 | #include <linux/module.h> | ||
14 | 15 | ||
15 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
16 | #include <asm/kvm_ppc.h> | 17 | #include <asm/kvm_ppc.h> |
@@ -20,95 +21,307 @@ | |||
20 | #include <asm/synch.h> | 21 | #include <asm/synch.h> |
21 | #include <asm/ppc-opcode.h> | 22 | #include <asm/ppc-opcode.h> |
22 | 23 | ||
23 | /* For now use fixed-size 16MB page table */ | 24 | /* Translate address of a vmalloc'd thing to a linear map address */ |
24 | #define HPT_ORDER 24 | 25 | static void *real_vmalloc_addr(void *x) |
25 | #define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ | 26 | { |
26 | #define HPT_HASH_MASK (HPT_NPTEG - 1) | 27 | unsigned long addr = (unsigned long) x; |
28 | pte_t *p; | ||
27 | 29 | ||
28 | #define HPTE_V_HVLOCK 0x40UL | 30 | p = find_linux_pte(swapper_pg_dir, addr); |
31 | if (!p || !pte_present(*p)) | ||
32 | return NULL; | ||
33 | /* assume we don't have huge pages in vmalloc space... */ | ||
34 | addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); | ||
35 | return __va(addr); | ||
36 | } | ||
29 | 37 | ||
30 | static inline long lock_hpte(unsigned long *hpte, unsigned long bits) | 38 | /* |
39 | * Add this HPTE into the chain for the real page. | ||
40 | * Must be called with the chain locked; it unlocks the chain. | ||
41 | */ | ||
42 | void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | ||
43 | unsigned long *rmap, long pte_index, int realmode) | ||
31 | { | 44 | { |
32 | unsigned long tmp, old; | 45 | struct revmap_entry *head, *tail; |
46 | unsigned long i; | ||
33 | 47 | ||
34 | asm volatile(" ldarx %0,0,%2\n" | 48 | if (*rmap & KVMPPC_RMAP_PRESENT) { |
35 | " and. %1,%0,%3\n" | 49 | i = *rmap & KVMPPC_RMAP_INDEX; |
36 | " bne 2f\n" | 50 | head = &kvm->arch.revmap[i]; |
37 | " ori %0,%0,%4\n" | 51 | if (realmode) |
38 | " stdcx. %0,0,%2\n" | 52 | head = real_vmalloc_addr(head); |
39 | " beq+ 2f\n" | 53 | tail = &kvm->arch.revmap[head->back]; |
40 | " li %1,%3\n" | 54 | if (realmode) |
41 | "2: isync" | 55 | tail = real_vmalloc_addr(tail); |
42 | : "=&r" (tmp), "=&r" (old) | 56 | rev->forw = i; |
43 | : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) | 57 | rev->back = head->back; |
44 | : "cc", "memory"); | 58 | tail->forw = pte_index; |
45 | return old == 0; | 59 | head->back = pte_index; |
60 | } else { | ||
61 | rev->forw = rev->back = pte_index; | ||
62 | i = pte_index; | ||
63 | } | ||
64 | smp_wmb(); | ||
65 | *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ | ||
66 | } | ||
67 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); | ||
68 | |||
69 | /* Remove this HPTE from the chain for a real page */ | ||
70 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, | ||
71 | struct revmap_entry *rev, | ||
72 | unsigned long hpte_v, unsigned long hpte_r) | ||
73 | { | ||
74 | struct revmap_entry *next, *prev; | ||
75 | unsigned long gfn, ptel, head; | ||
76 | struct kvm_memory_slot *memslot; | ||
77 | unsigned long *rmap; | ||
78 | unsigned long rcbits; | ||
79 | |||
80 | rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); | ||
81 | ptel = rev->guest_rpte |= rcbits; | ||
82 | gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); | ||
83 | memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); | ||
84 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
85 | return; | ||
86 | |||
87 | rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]); | ||
88 | lock_rmap(rmap); | ||
89 | |||
90 | head = *rmap & KVMPPC_RMAP_INDEX; | ||
91 | next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); | ||
92 | prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); | ||
93 | next->back = rev->back; | ||
94 | prev->forw = rev->forw; | ||
95 | if (head == pte_index) { | ||
96 | head = rev->forw; | ||
97 | if (head == pte_index) | ||
98 | *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); | ||
99 | else | ||
100 | *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; | ||
101 | } | ||
102 | *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; | ||
103 | unlock_rmap(rmap); | ||
104 | } | ||
105 | |||
106 | static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, | ||
107 | int writing, unsigned long *pte_sizep) | ||
108 | { | ||
109 | pte_t *ptep; | ||
110 | unsigned long ps = *pte_sizep; | ||
111 | unsigned int shift; | ||
112 | |||
113 | ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); | ||
114 | if (!ptep) | ||
115 | return __pte(0); | ||
116 | if (shift) | ||
117 | *pte_sizep = 1ul << shift; | ||
118 | else | ||
119 | *pte_sizep = PAGE_SIZE; | ||
120 | if (ps > *pte_sizep) | ||
121 | return __pte(0); | ||
122 | if (!pte_present(*ptep)) | ||
123 | return __pte(0); | ||
124 | return kvmppc_read_update_linux_pte(ptep, writing); | ||
125 | } | ||
126 | |||
127 | static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) | ||
128 | { | ||
129 | asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); | ||
130 | hpte[0] = hpte_v; | ||
46 | } | 131 | } |
47 | 132 | ||
48 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 133 | long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
49 | long pte_index, unsigned long pteh, unsigned long ptel) | 134 | long pte_index, unsigned long pteh, unsigned long ptel) |
50 | { | 135 | { |
51 | unsigned long porder; | ||
52 | struct kvm *kvm = vcpu->kvm; | 136 | struct kvm *kvm = vcpu->kvm; |
53 | unsigned long i, lpn, pa; | 137 | unsigned long i, pa, gpa, gfn, psize; |
138 | unsigned long slot_fn, hva; | ||
54 | unsigned long *hpte; | 139 | unsigned long *hpte; |
140 | struct revmap_entry *rev; | ||
141 | unsigned long g_ptel = ptel; | ||
142 | struct kvm_memory_slot *memslot; | ||
143 | unsigned long *physp, pte_size; | ||
144 | unsigned long is_io; | ||
145 | unsigned long *rmap; | ||
146 | pte_t pte; | ||
147 | unsigned int writing; | ||
148 | unsigned long mmu_seq; | ||
149 | unsigned long rcbits; | ||
150 | bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; | ||
55 | 151 | ||
56 | /* only handle 4k, 64k and 16M pages for now */ | 152 | psize = hpte_page_size(pteh, ptel); |
57 | porder = 12; | 153 | if (!psize) |
58 | if (pteh & HPTE_V_LARGE) { | 154 | return H_PARAMETER; |
59 | if (cpu_has_feature(CPU_FTR_ARCH_206) && | 155 | writing = hpte_is_writable(ptel); |
60 | (ptel & 0xf000) == 0x1000) { | 156 | pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); |
61 | /* 64k page */ | 157 | |
62 | porder = 16; | 158 | /* used later to detect if we might have been invalidated */ |
63 | } else if ((ptel & 0xff000) == 0) { | 159 | mmu_seq = kvm->mmu_notifier_seq; |
64 | /* 16M page */ | 160 | smp_rmb(); |
65 | porder = 24; | 161 | |
66 | /* lowest AVA bit must be 0 for 16M pages */ | 162 | /* Find the memslot (if any) for this address */ |
67 | if (pteh & 0x80) | 163 | gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); |
68 | return H_PARAMETER; | 164 | gfn = gpa >> PAGE_SHIFT; |
69 | } else | 165 | memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); |
166 | pa = 0; | ||
167 | is_io = ~0ul; | ||
168 | rmap = NULL; | ||
169 | if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { | ||
170 | /* PPC970 can't do emulated MMIO */ | ||
171 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) | ||
70 | return H_PARAMETER; | 172 | return H_PARAMETER; |
173 | /* Emulated MMIO - mark this with key=31 */ | ||
174 | pteh |= HPTE_V_ABSENT; | ||
175 | ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; | ||
176 | goto do_insert; | ||
71 | } | 177 | } |
72 | lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder; | 178 | |
73 | if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder) | 179 | /* Check if the requested page fits entirely in the memslot. */ |
74 | return H_PARAMETER; | 180 | if (!slot_is_aligned(memslot, psize)) |
75 | pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT; | ||
76 | if (!pa) | ||
77 | return H_PARAMETER; | 181 | return H_PARAMETER; |
78 | /* Check WIMG */ | 182 | slot_fn = gfn - memslot->base_gfn; |
79 | if ((ptel & HPTE_R_WIMG) != HPTE_R_M && | 183 | rmap = &memslot->rmap[slot_fn]; |
80 | (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M)) | 184 | |
185 | if (!kvm->arch.using_mmu_notifiers) { | ||
186 | physp = kvm->arch.slot_phys[memslot->id]; | ||
187 | if (!physp) | ||
188 | return H_PARAMETER; | ||
189 | physp += slot_fn; | ||
190 | if (realmode) | ||
191 | physp = real_vmalloc_addr(physp); | ||
192 | pa = *physp; | ||
193 | if (!pa) | ||
194 | return H_TOO_HARD; | ||
195 | is_io = pa & (HPTE_R_I | HPTE_R_W); | ||
196 | pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); | ||
197 | pa &= PAGE_MASK; | ||
198 | } else { | ||
199 | /* Translate to host virtual address */ | ||
200 | hva = gfn_to_hva_memslot(memslot, gfn); | ||
201 | |||
202 | /* Look up the Linux PTE for the backing page */ | ||
203 | pte_size = psize; | ||
204 | pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); | ||
205 | if (pte_present(pte)) { | ||
206 | if (writing && !pte_write(pte)) | ||
207 | /* make the actual HPTE be read-only */ | ||
208 | ptel = hpte_make_readonly(ptel); | ||
209 | is_io = hpte_cache_bits(pte_val(pte)); | ||
210 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
211 | } | ||
212 | } | ||
213 | if (pte_size < psize) | ||
81 | return H_PARAMETER; | 214 | return H_PARAMETER; |
82 | pteh &= ~0x60UL; | 215 | if (pa && pte_size > psize) |
83 | ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize); | 216 | pa |= gpa & (pte_size - 1); |
217 | |||
218 | ptel &= ~(HPTE_R_PP0 - psize); | ||
84 | ptel |= pa; | 219 | ptel |= pa; |
85 | if (pte_index >= (HPT_NPTEG << 3)) | 220 | |
221 | if (pa) | ||
222 | pteh |= HPTE_V_VALID; | ||
223 | else | ||
224 | pteh |= HPTE_V_ABSENT; | ||
225 | |||
226 | /* Check WIMG */ | ||
227 | if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { | ||
228 | if (is_io) | ||
229 | return H_PARAMETER; | ||
230 | /* | ||
231 | * Allow guest to map emulated device memory as | ||
232 | * uncacheable, but actually make it cacheable. | ||
233 | */ | ||
234 | ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); | ||
235 | ptel |= HPTE_R_M; | ||
236 | } | ||
237 | |||
238 | /* Find and lock the HPTEG slot to use */ | ||
239 | do_insert: | ||
240 | if (pte_index >= HPT_NPTE) | ||
86 | return H_PARAMETER; | 241 | return H_PARAMETER; |
87 | if (likely((flags & H_EXACT) == 0)) { | 242 | if (likely((flags & H_EXACT) == 0)) { |
88 | pte_index &= ~7UL; | 243 | pte_index &= ~7UL; |
89 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 244 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
90 | for (i = 0; ; ++i) { | 245 | for (i = 0; i < 8; ++i) { |
91 | if (i == 8) | ||
92 | return H_PTEG_FULL; | ||
93 | if ((*hpte & HPTE_V_VALID) == 0 && | 246 | if ((*hpte & HPTE_V_VALID) == 0 && |
94 | lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) | 247 | try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | |
248 | HPTE_V_ABSENT)) | ||
95 | break; | 249 | break; |
96 | hpte += 2; | 250 | hpte += 2; |
97 | } | 251 | } |
252 | if (i == 8) { | ||
253 | /* | ||
254 | * Since try_lock_hpte doesn't retry (not even stdcx. | ||
255 | * failures), it could be that there is a free slot | ||
256 | * but we transiently failed to lock it. Try again, | ||
257 | * actually locking each slot and checking it. | ||
258 | */ | ||
259 | hpte -= 16; | ||
260 | for (i = 0; i < 8; ++i) { | ||
261 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | ||
262 | cpu_relax(); | ||
263 | if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) | ||
264 | break; | ||
265 | *hpte &= ~HPTE_V_HVLOCK; | ||
266 | hpte += 2; | ||
267 | } | ||
268 | if (i == 8) | ||
269 | return H_PTEG_FULL; | ||
270 | } | ||
271 | pte_index += i; | ||
98 | } else { | 272 | } else { |
99 | i = 0; | ||
100 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 273 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
101 | if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) | 274 | if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | |
102 | return H_PTEG_FULL; | 275 | HPTE_V_ABSENT)) { |
276 | /* Lock the slot and check again */ | ||
277 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) | ||
278 | cpu_relax(); | ||
279 | if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { | ||
280 | *hpte &= ~HPTE_V_HVLOCK; | ||
281 | return H_PTEG_FULL; | ||
282 | } | ||
283 | } | ||
103 | } | 284 | } |
285 | |||
286 | /* Save away the guest's idea of the second HPTE dword */ | ||
287 | rev = &kvm->arch.revmap[pte_index]; | ||
288 | if (realmode) | ||
289 | rev = real_vmalloc_addr(rev); | ||
290 | if (rev) | ||
291 | rev->guest_rpte = g_ptel; | ||
292 | |||
293 | /* Link HPTE into reverse-map chain */ | ||
294 | if (pteh & HPTE_V_VALID) { | ||
295 | if (realmode) | ||
296 | rmap = real_vmalloc_addr(rmap); | ||
297 | lock_rmap(rmap); | ||
298 | /* Check for pending invalidations under the rmap chain lock */ | ||
299 | if (kvm->arch.using_mmu_notifiers && | ||
300 | mmu_notifier_retry(vcpu, mmu_seq)) { | ||
301 | /* inval in progress, write a non-present HPTE */ | ||
302 | pteh |= HPTE_V_ABSENT; | ||
303 | pteh &= ~HPTE_V_VALID; | ||
304 | unlock_rmap(rmap); | ||
305 | } else { | ||
306 | kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, | ||
307 | realmode); | ||
308 | /* Only set R/C in real HPTE if already set in *rmap */ | ||
309 | rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; | ||
310 | ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); | ||
311 | } | ||
312 | } | ||
313 | |||
104 | hpte[1] = ptel; | 314 | hpte[1] = ptel; |
315 | |||
316 | /* Write the first HPTE dword, unlocking the HPTE and making it valid */ | ||
105 | eieio(); | 317 | eieio(); |
106 | hpte[0] = pteh; | 318 | hpte[0] = pteh; |
107 | asm volatile("ptesync" : : : "memory"); | 319 | asm volatile("ptesync" : : : "memory"); |
108 | atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt); | 320 | |
109 | vcpu->arch.gpr[4] = pte_index + i; | 321 | vcpu->arch.gpr[4] = pte_index; |
110 | return H_SUCCESS; | 322 | return H_SUCCESS; |
111 | } | 323 | } |
324 | EXPORT_SYMBOL_GPL(kvmppc_h_enter); | ||
112 | 325 | ||
113 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) | 326 | #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) |
114 | 327 | ||
@@ -137,37 +350,46 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, | |||
137 | struct kvm *kvm = vcpu->kvm; | 350 | struct kvm *kvm = vcpu->kvm; |
138 | unsigned long *hpte; | 351 | unsigned long *hpte; |
139 | unsigned long v, r, rb; | 352 | unsigned long v, r, rb; |
353 | struct revmap_entry *rev; | ||
140 | 354 | ||
141 | if (pte_index >= (HPT_NPTEG << 3)) | 355 | if (pte_index >= HPT_NPTE) |
142 | return H_PARAMETER; | 356 | return H_PARAMETER; |
143 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 357 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
144 | while (!lock_hpte(hpte, HPTE_V_HVLOCK)) | 358 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
145 | cpu_relax(); | 359 | cpu_relax(); |
146 | if ((hpte[0] & HPTE_V_VALID) == 0 || | 360 | if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || |
147 | ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || | 361 | ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || |
148 | ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { | 362 | ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { |
149 | hpte[0] &= ~HPTE_V_HVLOCK; | 363 | hpte[0] &= ~HPTE_V_HVLOCK; |
150 | return H_NOT_FOUND; | 364 | return H_NOT_FOUND; |
151 | } | 365 | } |
152 | if (atomic_read(&kvm->online_vcpus) == 1) | 366 | |
153 | flags |= H_LOCAL; | 367 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); |
154 | vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK; | 368 | v = hpte[0] & ~HPTE_V_HVLOCK; |
155 | vcpu->arch.gpr[5] = r = hpte[1]; | 369 | if (v & HPTE_V_VALID) { |
156 | rb = compute_tlbie_rb(v, r, pte_index); | 370 | hpte[0] &= ~HPTE_V_VALID; |
157 | hpte[0] = 0; | 371 | rb = compute_tlbie_rb(v, hpte[1], pte_index); |
158 | if (!(flags & H_LOCAL)) { | 372 | if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) { |
159 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 373 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) |
160 | cpu_relax(); | 374 | cpu_relax(); |
161 | asm volatile("ptesync" : : : "memory"); | 375 | asm volatile("ptesync" : : : "memory"); |
162 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | 376 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" |
163 | : : "r" (rb), "r" (kvm->arch.lpid)); | 377 | : : "r" (rb), "r" (kvm->arch.lpid)); |
164 | asm volatile("ptesync" : : : "memory"); | 378 | asm volatile("ptesync" : : : "memory"); |
165 | kvm->arch.tlbie_lock = 0; | 379 | kvm->arch.tlbie_lock = 0; |
166 | } else { | 380 | } else { |
167 | asm volatile("ptesync" : : : "memory"); | 381 | asm volatile("ptesync" : : : "memory"); |
168 | asm volatile("tlbiel %0" : : "r" (rb)); | 382 | asm volatile("tlbiel %0" : : "r" (rb)); |
169 | asm volatile("ptesync" : : : "memory"); | 383 | asm volatile("ptesync" : : : "memory"); |
384 | } | ||
385 | /* Read PTE low word after tlbie to get final R/C values */ | ||
386 | remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); | ||
170 | } | 387 | } |
388 | r = rev->guest_rpte; | ||
389 | unlock_hpte(hpte, 0); | ||
390 | |||
391 | vcpu->arch.gpr[4] = v; | ||
392 | vcpu->arch.gpr[5] = r; | ||
171 | return H_SUCCESS; | 393 | return H_SUCCESS; |
172 | } | 394 | } |
173 | 395 | ||
@@ -175,78 +397,117 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) | |||
175 | { | 397 | { |
176 | struct kvm *kvm = vcpu->kvm; | 398 | struct kvm *kvm = vcpu->kvm; |
177 | unsigned long *args = &vcpu->arch.gpr[4]; | 399 | unsigned long *args = &vcpu->arch.gpr[4]; |
178 | unsigned long *hp, tlbrb[4]; | 400 | unsigned long *hp, *hptes[4], tlbrb[4]; |
179 | long int i, found; | 401 | long int i, j, k, n, found, indexes[4]; |
180 | long int n_inval = 0; | 402 | unsigned long flags, req, pte_index, rcbits; |
181 | unsigned long flags, req, pte_index; | ||
182 | long int local = 0; | 403 | long int local = 0; |
183 | long int ret = H_SUCCESS; | 404 | long int ret = H_SUCCESS; |
405 | struct revmap_entry *rev, *revs[4]; | ||
184 | 406 | ||
185 | if (atomic_read(&kvm->online_vcpus) == 1) | 407 | if (atomic_read(&kvm->online_vcpus) == 1) |
186 | local = 1; | 408 | local = 1; |
187 | for (i = 0; i < 4; ++i) { | 409 | for (i = 0; i < 4 && ret == H_SUCCESS; ) { |
188 | pte_index = args[i * 2]; | 410 | n = 0; |
189 | flags = pte_index >> 56; | 411 | for (; i < 4; ++i) { |
190 | pte_index &= ((1ul << 56) - 1); | 412 | j = i * 2; |
191 | req = flags >> 6; | 413 | pte_index = args[j]; |
192 | flags &= 3; | 414 | flags = pte_index >> 56; |
193 | if (req == 3) | 415 | pte_index &= ((1ul << 56) - 1); |
194 | break; | 416 | req = flags >> 6; |
195 | if (req != 1 || flags == 3 || | 417 | flags &= 3; |
196 | pte_index >= (HPT_NPTEG << 3)) { | 418 | if (req == 3) { /* no more requests */ |
197 | /* parameter error */ | 419 | i = 4; |
198 | args[i * 2] = ((0xa0 | flags) << 56) + pte_index; | ||
199 | ret = H_PARAMETER; | ||
200 | break; | ||
201 | } | ||
202 | hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | ||
203 | while (!lock_hpte(hp, HPTE_V_HVLOCK)) | ||
204 | cpu_relax(); | ||
205 | found = 0; | ||
206 | if (hp[0] & HPTE_V_VALID) { | ||
207 | switch (flags & 3) { | ||
208 | case 0: /* absolute */ | ||
209 | found = 1; | ||
210 | break; | 420 | break; |
211 | case 1: /* andcond */ | 421 | } |
212 | if (!(hp[0] & args[i * 2 + 1])) | 422 | if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) { |
213 | found = 1; | 423 | /* parameter error */ |
424 | args[j] = ((0xa0 | flags) << 56) + pte_index; | ||
425 | ret = H_PARAMETER; | ||
214 | break; | 426 | break; |
215 | case 2: /* AVPN */ | 427 | } |
216 | if ((hp[0] & ~0x7fUL) == args[i * 2 + 1]) | 428 | hp = (unsigned long *) |
429 | (kvm->arch.hpt_virt + (pte_index << 4)); | ||
430 | /* to avoid deadlock, don't spin except for first */ | ||
431 | if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { | ||
432 | if (n) | ||
433 | break; | ||
434 | while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) | ||
435 | cpu_relax(); | ||
436 | } | ||
437 | found = 0; | ||
438 | if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { | ||
439 | switch (flags & 3) { | ||
440 | case 0: /* absolute */ | ||
217 | found = 1; | 441 | found = 1; |
218 | break; | 442 | break; |
443 | case 1: /* andcond */ | ||
444 | if (!(hp[0] & args[j + 1])) | ||
445 | found = 1; | ||
446 | break; | ||
447 | case 2: /* AVPN */ | ||
448 | if ((hp[0] & ~0x7fUL) == args[j + 1]) | ||
449 | found = 1; | ||
450 | break; | ||
451 | } | ||
452 | } | ||
453 | if (!found) { | ||
454 | hp[0] &= ~HPTE_V_HVLOCK; | ||
455 | args[j] = ((0x90 | flags) << 56) + pte_index; | ||
456 | continue; | ||
219 | } | 457 | } |
458 | |||
459 | args[j] = ((0x80 | flags) << 56) + pte_index; | ||
460 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | ||
461 | |||
462 | if (!(hp[0] & HPTE_V_VALID)) { | ||
463 | /* insert R and C bits from PTE */ | ||
464 | rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); | ||
465 | args[j] |= rcbits << (56 - 5); | ||
466 | continue; | ||
467 | } | ||
468 | |||
469 | hp[0] &= ~HPTE_V_VALID; /* leave it locked */ | ||
470 | tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); | ||
471 | indexes[n] = j; | ||
472 | hptes[n] = hp; | ||
473 | revs[n] = rev; | ||
474 | ++n; | ||
475 | } | ||
476 | |||
477 | if (!n) | ||
478 | break; | ||
479 | |||
480 | /* Now that we've collected a batch, do the tlbies */ | ||
481 | if (!local) { | ||
482 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
483 | cpu_relax(); | ||
484 | asm volatile("ptesync" : : : "memory"); | ||
485 | for (k = 0; k < n; ++k) | ||
486 | asm volatile(PPC_TLBIE(%1,%0) : : | ||
487 | "r" (tlbrb[k]), | ||
488 | "r" (kvm->arch.lpid)); | ||
489 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
490 | kvm->arch.tlbie_lock = 0; | ||
491 | } else { | ||
492 | asm volatile("ptesync" : : : "memory"); | ||
493 | for (k = 0; k < n; ++k) | ||
494 | asm volatile("tlbiel %0" : : "r" (tlbrb[k])); | ||
495 | asm volatile("ptesync" : : : "memory"); | ||
220 | } | 496 | } |
221 | if (!found) { | 497 | |
222 | hp[0] &= ~HPTE_V_HVLOCK; | 498 | /* Read PTE low words after tlbie to get final R/C values */ |
223 | args[i * 2] = ((0x90 | flags) << 56) + pte_index; | 499 | for (k = 0; k < n; ++k) { |
224 | continue; | 500 | j = indexes[k]; |
501 | pte_index = args[j] & ((1ul << 56) - 1); | ||
502 | hp = hptes[k]; | ||
503 | rev = revs[k]; | ||
504 | remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); | ||
505 | rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); | ||
506 | args[j] |= rcbits << (56 - 5); | ||
507 | hp[0] = 0; | ||
225 | } | 508 | } |
226 | /* insert R and C bits from PTE */ | ||
227 | flags |= (hp[1] >> 5) & 0x0c; | ||
228 | args[i * 2] = ((0x80 | flags) << 56) + pte_index; | ||
229 | tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index); | ||
230 | hp[0] = 0; | ||
231 | } | ||
232 | if (n_inval == 0) | ||
233 | return ret; | ||
234 | |||
235 | if (!local) { | ||
236 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
237 | cpu_relax(); | ||
238 | asm volatile("ptesync" : : : "memory"); | ||
239 | for (i = 0; i < n_inval; ++i) | ||
240 | asm volatile(PPC_TLBIE(%1,%0) | ||
241 | : : "r" (tlbrb[i]), "r" (kvm->arch.lpid)); | ||
242 | asm volatile("eieio; tlbsync; ptesync" : : : "memory"); | ||
243 | kvm->arch.tlbie_lock = 0; | ||
244 | } else { | ||
245 | asm volatile("ptesync" : : : "memory"); | ||
246 | for (i = 0; i < n_inval; ++i) | ||
247 | asm volatile("tlbiel %0" : : "r" (tlbrb[i])); | ||
248 | asm volatile("ptesync" : : : "memory"); | ||
249 | } | 509 | } |
510 | |||
250 | return ret; | 511 | return ret; |
251 | } | 512 | } |
252 | 513 | ||
@@ -256,40 +517,55 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
256 | { | 517 | { |
257 | struct kvm *kvm = vcpu->kvm; | 518 | struct kvm *kvm = vcpu->kvm; |
258 | unsigned long *hpte; | 519 | unsigned long *hpte; |
259 | unsigned long v, r, rb; | 520 | struct revmap_entry *rev; |
521 | unsigned long v, r, rb, mask, bits; | ||
260 | 522 | ||
261 | if (pte_index >= (HPT_NPTEG << 3)) | 523 | if (pte_index >= HPT_NPTE) |
262 | return H_PARAMETER; | 524 | return H_PARAMETER; |
525 | |||
263 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 526 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
264 | while (!lock_hpte(hpte, HPTE_V_HVLOCK)) | 527 | while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) |
265 | cpu_relax(); | 528 | cpu_relax(); |
266 | if ((hpte[0] & HPTE_V_VALID) == 0 || | 529 | if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || |
267 | ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { | 530 | ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { |
268 | hpte[0] &= ~HPTE_V_HVLOCK; | 531 | hpte[0] &= ~HPTE_V_HVLOCK; |
269 | return H_NOT_FOUND; | 532 | return H_NOT_FOUND; |
270 | } | 533 | } |
534 | |||
271 | if (atomic_read(&kvm->online_vcpus) == 1) | 535 | if (atomic_read(&kvm->online_vcpus) == 1) |
272 | flags |= H_LOCAL; | 536 | flags |= H_LOCAL; |
273 | v = hpte[0]; | 537 | v = hpte[0]; |
274 | r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | | 538 | bits = (flags << 55) & HPTE_R_PP0; |
275 | HPTE_R_KEY_HI | HPTE_R_KEY_LO); | 539 | bits |= (flags << 48) & HPTE_R_KEY_HI; |
276 | r |= (flags << 55) & HPTE_R_PP0; | 540 | bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); |
277 | r |= (flags << 48) & HPTE_R_KEY_HI; | 541 | |
278 | r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); | 542 | /* Update guest view of 2nd HPTE dword */ |
279 | rb = compute_tlbie_rb(v, r, pte_index); | 543 | mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | |
280 | hpte[0] = v & ~HPTE_V_VALID; | 544 | HPTE_R_KEY_HI | HPTE_R_KEY_LO; |
281 | if (!(flags & H_LOCAL)) { | 545 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); |
282 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | 546 | if (rev) { |
283 | cpu_relax(); | 547 | r = (rev->guest_rpte & ~mask) | bits; |
284 | asm volatile("ptesync" : : : "memory"); | 548 | rev->guest_rpte = r; |
285 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | 549 | } |
286 | : : "r" (rb), "r" (kvm->arch.lpid)); | 550 | r = (hpte[1] & ~mask) | bits; |
287 | asm volatile("ptesync" : : : "memory"); | 551 | |
288 | kvm->arch.tlbie_lock = 0; | 552 | /* Update HPTE */ |
289 | } else { | 553 | if (v & HPTE_V_VALID) { |
290 | asm volatile("ptesync" : : : "memory"); | 554 | rb = compute_tlbie_rb(v, r, pte_index); |
291 | asm volatile("tlbiel %0" : : "r" (rb)); | 555 | hpte[0] = v & ~HPTE_V_VALID; |
292 | asm volatile("ptesync" : : : "memory"); | 556 | if (!(flags & H_LOCAL)) { |
557 | while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
558 | cpu_relax(); | ||
559 | asm volatile("ptesync" : : : "memory"); | ||
560 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
561 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
562 | asm volatile("ptesync" : : : "memory"); | ||
563 | kvm->arch.tlbie_lock = 0; | ||
564 | } else { | ||
565 | asm volatile("ptesync" : : : "memory"); | ||
566 | asm volatile("tlbiel %0" : : "r" (rb)); | ||
567 | asm volatile("ptesync" : : : "memory"); | ||
568 | } | ||
293 | } | 569 | } |
294 | hpte[1] = r; | 570 | hpte[1] = r; |
295 | eieio(); | 571 | eieio(); |
@@ -298,40 +574,243 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, | |||
298 | return H_SUCCESS; | 574 | return H_SUCCESS; |
299 | } | 575 | } |
300 | 576 | ||
301 | static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr) | ||
302 | { | ||
303 | long int i; | ||
304 | unsigned long offset, rpn; | ||
305 | |||
306 | offset = realaddr & (kvm->arch.ram_psize - 1); | ||
307 | rpn = (realaddr - offset) >> PAGE_SHIFT; | ||
308 | for (i = 0; i < kvm->arch.ram_npages; ++i) | ||
309 | if (rpn == kvm->arch.ram_pginfo[i].pfn) | ||
310 | return (i << PAGE_SHIFT) + offset; | ||
311 | return HPTE_R_RPN; /* all 1s in the RPN field */ | ||
312 | } | ||
313 | |||
314 | long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, | 577 | long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, |
315 | unsigned long pte_index) | 578 | unsigned long pte_index) |
316 | { | 579 | { |
317 | struct kvm *kvm = vcpu->kvm; | 580 | struct kvm *kvm = vcpu->kvm; |
318 | unsigned long *hpte, r; | 581 | unsigned long *hpte, v, r; |
319 | int i, n = 1; | 582 | int i, n = 1; |
583 | struct revmap_entry *rev = NULL; | ||
320 | 584 | ||
321 | if (pte_index >= (HPT_NPTEG << 3)) | 585 | if (pte_index >= HPT_NPTE) |
322 | return H_PARAMETER; | 586 | return H_PARAMETER; |
323 | if (flags & H_READ_4) { | 587 | if (flags & H_READ_4) { |
324 | pte_index &= ~3; | 588 | pte_index &= ~3; |
325 | n = 4; | 589 | n = 4; |
326 | } | 590 | } |
591 | rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); | ||
327 | for (i = 0; i < n; ++i, ++pte_index) { | 592 | for (i = 0; i < n; ++i, ++pte_index) { |
328 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); | 593 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); |
594 | v = hpte[0] & ~HPTE_V_HVLOCK; | ||
329 | r = hpte[1]; | 595 | r = hpte[1]; |
330 | if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID)) | 596 | if (v & HPTE_V_ABSENT) { |
331 | r = reverse_xlate(kvm, r & HPTE_R_RPN) | | 597 | v &= ~HPTE_V_ABSENT; |
332 | (r & ~HPTE_R_RPN); | 598 | v |= HPTE_V_VALID; |
333 | vcpu->arch.gpr[4 + i * 2] = hpte[0]; | 599 | } |
600 | if (v & HPTE_V_VALID) | ||
601 | r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); | ||
602 | vcpu->arch.gpr[4 + i * 2] = v; | ||
334 | vcpu->arch.gpr[5 + i * 2] = r; | 603 | vcpu->arch.gpr[5 + i * 2] = r; |
335 | } | 604 | } |
336 | return H_SUCCESS; | 605 | return H_SUCCESS; |
337 | } | 606 | } |
607 | |||
608 | void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, | ||
609 | unsigned long pte_index) | ||
610 | { | ||
611 | unsigned long rb; | ||
612 | |||
613 | hptep[0] &= ~HPTE_V_VALID; | ||
614 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); | ||
615 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
616 | cpu_relax(); | ||
617 | asm volatile("ptesync" : : : "memory"); | ||
618 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
619 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
620 | asm volatile("ptesync" : : : "memory"); | ||
621 | kvm->arch.tlbie_lock = 0; | ||
622 | } | ||
623 | EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); | ||
624 | |||
625 | void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | ||
626 | unsigned long pte_index) | ||
627 | { | ||
628 | unsigned long rb; | ||
629 | unsigned char rbyte; | ||
630 | |||
631 | rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); | ||
632 | rbyte = (hptep[1] & ~HPTE_R_R) >> 8; | ||
633 | /* modify only the second-last byte, which contains the ref bit */ | ||
634 | *((char *)hptep + 14) = rbyte; | ||
635 | while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) | ||
636 | cpu_relax(); | ||
637 | asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" | ||
638 | : : "r" (rb), "r" (kvm->arch.lpid)); | ||
639 | asm volatile("ptesync" : : : "memory"); | ||
640 | kvm->arch.tlbie_lock = 0; | ||
641 | } | ||
642 | EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); | ||
643 | |||
644 | static int slb_base_page_shift[4] = { | ||
645 | 24, /* 16M */ | ||
646 | 16, /* 64k */ | ||
647 | 34, /* 16G */ | ||
648 | 20, /* 1M, unsupported */ | ||
649 | }; | ||
650 | |||
651 | long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, | ||
652 | unsigned long valid) | ||
653 | { | ||
654 | unsigned int i; | ||
655 | unsigned int pshift; | ||
656 | unsigned long somask; | ||
657 | unsigned long vsid, hash; | ||
658 | unsigned long avpn; | ||
659 | unsigned long *hpte; | ||
660 | unsigned long mask, val; | ||
661 | unsigned long v, r; | ||
662 | |||
663 | /* Get page shift, work out hash and AVPN etc. */ | ||
664 | mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; | ||
665 | val = 0; | ||
666 | pshift = 12; | ||
667 | if (slb_v & SLB_VSID_L) { | ||
668 | mask |= HPTE_V_LARGE; | ||
669 | val |= HPTE_V_LARGE; | ||
670 | pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; | ||
671 | } | ||
672 | if (slb_v & SLB_VSID_B_1T) { | ||
673 | somask = (1UL << 40) - 1; | ||
674 | vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; | ||
675 | vsid ^= vsid << 25; | ||
676 | } else { | ||
677 | somask = (1UL << 28) - 1; | ||
678 | vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; | ||
679 | } | ||
680 | hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK; | ||
681 | avpn = slb_v & ~(somask >> 16); /* also includes B */ | ||
682 | avpn |= (eaddr & somask) >> 16; | ||
683 | |||
684 | if (pshift >= 24) | ||
685 | avpn &= ~((1UL << (pshift - 16)) - 1); | ||
686 | else | ||
687 | avpn &= ~0x7fUL; | ||
688 | val |= avpn; | ||
689 | |||
690 | for (;;) { | ||
691 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); | ||
692 | |||
693 | for (i = 0; i < 16; i += 2) { | ||
694 | /* Read the PTE racily */ | ||
695 | v = hpte[i] & ~HPTE_V_HVLOCK; | ||
696 | |||
697 | /* Check valid/absent, hash, segment size and AVPN */ | ||
698 | if (!(v & valid) || (v & mask) != val) | ||
699 | continue; | ||
700 | |||
701 | /* Lock the PTE and read it under the lock */ | ||
702 | while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) | ||
703 | cpu_relax(); | ||
704 | v = hpte[i] & ~HPTE_V_HVLOCK; | ||
705 | r = hpte[i+1]; | ||
706 | |||
707 | /* | ||
708 | * Check the HPTE again, including large page size | ||
709 | * Since we don't currently allow any MPSS (mixed | ||
710 | * page-size segment) page sizes, it is sufficient | ||
711 | * to check against the actual page size. | ||
712 | */ | ||
713 | if ((v & valid) && (v & mask) == val && | ||
714 | hpte_page_size(v, r) == (1ul << pshift)) | ||
715 | /* Return with the HPTE still locked */ | ||
716 | return (hash << 3) + (i >> 1); | ||
717 | |||
718 | /* Unlock and move on */ | ||
719 | hpte[i] = v; | ||
720 | } | ||
721 | |||
722 | if (val & HPTE_V_SECONDARY) | ||
723 | break; | ||
724 | val |= HPTE_V_SECONDARY; | ||
725 | hash = hash ^ HPT_HASH_MASK; | ||
726 | } | ||
727 | return -1; | ||
728 | } | ||
729 | EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); | ||
730 | |||
731 | /* | ||
732 | * Called in real mode to check whether an HPTE not found fault | ||
733 | * is due to accessing a paged-out page or an emulated MMIO page, | ||
734 | * or if a protection fault is due to accessing a page that the | ||
735 | * guest wanted read/write access to but which we made read-only. | ||
736 | * Returns a possibly modified status (DSISR) value if not | ||
737 | * (i.e. pass the interrupt to the guest), | ||
738 | * -1 to pass the fault up to host kernel mode code, -2 to do that | ||
739 | * and also load the instruction word (for MMIO emulation), | ||
740 | * or 0 if we should make the guest retry the access. | ||
741 | */ | ||
742 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | ||
743 | unsigned long slb_v, unsigned int status, bool data) | ||
744 | { | ||
745 | struct kvm *kvm = vcpu->kvm; | ||
746 | long int index; | ||
747 | unsigned long v, r, gr; | ||
748 | unsigned long *hpte; | ||
749 | unsigned long valid; | ||
750 | struct revmap_entry *rev; | ||
751 | unsigned long pp, key; | ||
752 | |||
753 | /* For protection fault, expect to find a valid HPTE */ | ||
754 | valid = HPTE_V_VALID; | ||
755 | if (status & DSISR_NOHPTE) | ||
756 | valid |= HPTE_V_ABSENT; | ||
757 | |||
758 | index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); | ||
759 | if (index < 0) { | ||
760 | if (status & DSISR_NOHPTE) | ||
761 | return status; /* there really was no HPTE */ | ||
762 | return 0; /* for prot fault, HPTE disappeared */ | ||
763 | } | ||
764 | hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); | ||
765 | v = hpte[0] & ~HPTE_V_HVLOCK; | ||
766 | r = hpte[1]; | ||
767 | rev = real_vmalloc_addr(&kvm->arch.revmap[index]); | ||
768 | gr = rev->guest_rpte; | ||
769 | |||
770 | unlock_hpte(hpte, v); | ||
771 | |||
772 | /* For not found, if the HPTE is valid by now, retry the instruction */ | ||
773 | if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) | ||
774 | return 0; | ||
775 | |||
776 | /* Check access permissions to the page */ | ||
777 | pp = gr & (HPTE_R_PP0 | HPTE_R_PP); | ||
778 | key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; | ||
779 | status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ | ||
780 | if (!data) { | ||
781 | if (gr & (HPTE_R_N | HPTE_R_G)) | ||
782 | return status | SRR1_ISI_N_OR_G; | ||
783 | if (!hpte_read_permission(pp, slb_v & key)) | ||
784 | return status | SRR1_ISI_PROT; | ||
785 | } else if (status & DSISR_ISSTORE) { | ||
786 | /* check write permission */ | ||
787 | if (!hpte_write_permission(pp, slb_v & key)) | ||
788 | return status | DSISR_PROTFAULT; | ||
789 | } else { | ||
790 | if (!hpte_read_permission(pp, slb_v & key)) | ||
791 | return status | DSISR_PROTFAULT; | ||
792 | } | ||
793 | |||
794 | /* Check storage key, if applicable */ | ||
795 | if (data && (vcpu->arch.shregs.msr & MSR_DR)) { | ||
796 | unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); | ||
797 | if (status & DSISR_ISSTORE) | ||
798 | perm >>= 1; | ||
799 | if (perm & 1) | ||
800 | return status | DSISR_KEYFAULT; | ||
801 | } | ||
802 | |||
803 | /* Save HPTE info for virtual-mode handler */ | ||
804 | vcpu->arch.pgfault_addr = addr; | ||
805 | vcpu->arch.pgfault_index = index; | ||
806 | vcpu->arch.pgfault_hpte[0] = v; | ||
807 | vcpu->arch.pgfault_hpte[1] = r; | ||
808 | |||
809 | /* Check the storage key to see if it is possibly emulated MMIO */ | ||
810 | if (data && (vcpu->arch.shregs.msr & MSR_IR) && | ||
811 | (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == | ||
812 | (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) | ||
813 | return -2; /* MMIO emulation - load instr word */ | ||
814 | |||
815 | return -1; /* send fault up to host kernel mode */ | ||
816 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5c8b26183f50..b70bf22a3ff3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -601,6 +601,30 @@ kvmppc_interrupt: | |||
601 | 601 | ||
602 | stw r12,VCPU_TRAP(r9) | 602 | stw r12,VCPU_TRAP(r9) |
603 | 603 | ||
604 | /* Save HEIR (HV emulation assist reg) in last_inst | ||
605 | if this is an HEI (HV emulation interrupt, e40) */ | ||
606 | li r3,KVM_INST_FETCH_FAILED | ||
607 | BEGIN_FTR_SECTION | ||
608 | cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST | ||
609 | bne 11f | ||
610 | mfspr r3,SPRN_HEIR | ||
611 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | ||
612 | 11: stw r3,VCPU_LAST_INST(r9) | ||
613 | |||
614 | /* these are volatile across C function calls */ | ||
615 | mfctr r3 | ||
616 | mfxer r4 | ||
617 | std r3, VCPU_CTR(r9) | ||
618 | stw r4, VCPU_XER(r9) | ||
619 | |||
620 | BEGIN_FTR_SECTION | ||
621 | /* If this is a page table miss then see if it's theirs or ours */ | ||
622 | cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE | ||
623 | beq kvmppc_hdsi | ||
624 | cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE | ||
625 | beq kvmppc_hisi | ||
626 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | ||
627 | |||
604 | /* See if this is a leftover HDEC interrupt */ | 628 | /* See if this is a leftover HDEC interrupt */ |
605 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER | 629 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER |
606 | bne 2f | 630 | bne 2f |
@@ -608,7 +632,7 @@ kvmppc_interrupt: | |||
608 | cmpwi r3,0 | 632 | cmpwi r3,0 |
609 | bge ignore_hdec | 633 | bge ignore_hdec |
610 | 2: | 634 | 2: |
611 | /* See if this is something we can handle in real mode */ | 635 | /* See if this is an hcall we can handle in real mode */ |
612 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL | 636 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL |
613 | beq hcall_try_real_mode | 637 | beq hcall_try_real_mode |
614 | 638 | ||
@@ -624,6 +648,7 @@ BEGIN_FTR_SECTION | |||
624 | 1: | 648 | 1: |
625 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 649 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
626 | 650 | ||
651 | nohpte_cont: | ||
627 | hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | 652 | hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ |
628 | /* Save DEC */ | 653 | /* Save DEC */ |
629 | mfspr r5,SPRN_DEC | 654 | mfspr r5,SPRN_DEC |
@@ -632,36 +657,21 @@ hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | |||
632 | add r5,r5,r6 | 657 | add r5,r5,r6 |
633 | std r5,VCPU_DEC_EXPIRES(r9) | 658 | std r5,VCPU_DEC_EXPIRES(r9) |
634 | 659 | ||
635 | /* Save HEIR (HV emulation assist reg) in last_inst | ||
636 | if this is an HEI (HV emulation interrupt, e40) */ | ||
637 | li r3,-1 | ||
638 | BEGIN_FTR_SECTION | ||
639 | cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST | ||
640 | bne 11f | ||
641 | mfspr r3,SPRN_HEIR | ||
642 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | ||
643 | 11: stw r3,VCPU_LAST_INST(r9) | ||
644 | |||
645 | /* Save more register state */ | 660 | /* Save more register state */ |
646 | mfxer r5 | ||
647 | mfdar r6 | 661 | mfdar r6 |
648 | mfdsisr r7 | 662 | mfdsisr r7 |
649 | mfctr r8 | ||
650 | |||
651 | stw r5, VCPU_XER(r9) | ||
652 | std r6, VCPU_DAR(r9) | 663 | std r6, VCPU_DAR(r9) |
653 | stw r7, VCPU_DSISR(r9) | 664 | stw r7, VCPU_DSISR(r9) |
654 | std r8, VCPU_CTR(r9) | ||
655 | /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */ | ||
656 | BEGIN_FTR_SECTION | 665 | BEGIN_FTR_SECTION |
666 | /* don't overwrite fault_dar/fault_dsisr if HDSI */ | ||
657 | cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 667 | cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE |
658 | beq 6f | 668 | beq 6f |
659 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 669 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) |
660 | 7: std r6, VCPU_FAULT_DAR(r9) | 670 | std r6, VCPU_FAULT_DAR(r9) |
661 | stw r7, VCPU_FAULT_DSISR(r9) | 671 | stw r7, VCPU_FAULT_DSISR(r9) |
662 | 672 | ||
663 | /* Save guest CTRL register, set runlatch to 1 */ | 673 | /* Save guest CTRL register, set runlatch to 1 */ |
664 | mfspr r6,SPRN_CTRLF | 674 | 6: mfspr r6,SPRN_CTRLF |
665 | stw r6,VCPU_CTRL(r9) | 675 | stw r6,VCPU_CTRL(r9) |
666 | andi. r0,r6,1 | 676 | andi. r0,r6,1 |
667 | bne 4f | 677 | bne 4f |
@@ -1094,9 +1104,131 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
1094 | mtspr SPRN_HSRR1, r7 | 1104 | mtspr SPRN_HSRR1, r7 |
1095 | ba 0x500 | 1105 | ba 0x500 |
1096 | 1106 | ||
1097 | 6: mfspr r6,SPRN_HDAR | 1107 | /* |
1098 | mfspr r7,SPRN_HDSISR | 1108 | * Check whether an HDSI is an HPTE not found fault or something else. |
1099 | b 7b | 1109 | * If it is an HPTE not found fault that is due to the guest accessing |
1110 | * a page that they have mapped but which we have paged out, then | ||
1111 | * we continue on with the guest exit path. In all other cases, | ||
1112 | * reflect the HDSI to the guest as a DSI. | ||
1113 | */ | ||
1114 | kvmppc_hdsi: | ||
1115 | mfspr r4, SPRN_HDAR | ||
1116 | mfspr r6, SPRN_HDSISR | ||
1117 | /* HPTE not found fault or protection fault? */ | ||
1118 | andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h | ||
1119 | beq 1f /* if not, send it to the guest */ | ||
1120 | andi. r0, r11, MSR_DR /* data relocation enabled? */ | ||
1121 | beq 3f | ||
1122 | clrrdi r0, r4, 28 | ||
1123 | PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ | ||
1124 | bne 1f /* if no SLB entry found */ | ||
1125 | 4: std r4, VCPU_FAULT_DAR(r9) | ||
1126 | stw r6, VCPU_FAULT_DSISR(r9) | ||
1127 | |||
1128 | /* Search the hash table. */ | ||
1129 | mr r3, r9 /* vcpu pointer */ | ||
1130 | li r7, 1 /* data fault */ | ||
1131 | bl .kvmppc_hpte_hv_fault | ||
1132 | ld r9, HSTATE_KVM_VCPU(r13) | ||
1133 | ld r10, VCPU_PC(r9) | ||
1134 | ld r11, VCPU_MSR(r9) | ||
1135 | li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE | ||
1136 | cmpdi r3, 0 /* retry the instruction */ | ||
1137 | beq 6f | ||
1138 | cmpdi r3, -1 /* handle in kernel mode */ | ||
1139 | beq nohpte_cont | ||
1140 | cmpdi r3, -2 /* MMIO emulation; need instr word */ | ||
1141 | beq 2f | ||
1142 | |||
1143 | /* Synthesize a DSI for the guest */ | ||
1144 | ld r4, VCPU_FAULT_DAR(r9) | ||
1145 | mr r6, r3 | ||
1146 | 1: mtspr SPRN_DAR, r4 | ||
1147 | mtspr SPRN_DSISR, r6 | ||
1148 | mtspr SPRN_SRR0, r10 | ||
1149 | mtspr SPRN_SRR1, r11 | ||
1150 | li r10, BOOK3S_INTERRUPT_DATA_STORAGE | ||
1151 | li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
1152 | rotldi r11, r11, 63 | ||
1153 | 6: ld r7, VCPU_CTR(r9) | ||
1154 | lwz r8, VCPU_XER(r9) | ||
1155 | mtctr r7 | ||
1156 | mtxer r8 | ||
1157 | mr r4, r9 | ||
1158 | b fast_guest_return | ||
1159 | |||
1160 | 3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */ | ||
1161 | ld r5, KVM_VRMA_SLB_V(r5) | ||
1162 | b 4b | ||
1163 | |||
1164 | /* If this is for emulated MMIO, load the instruction word */ | ||
1165 | 2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */ | ||
1166 | |||
1167 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
1168 | * we'll just continue at the next IP. */ | ||
1169 | li r0, KVM_GUEST_MODE_SKIP | ||
1170 | stb r0, HSTATE_IN_GUEST(r13) | ||
1171 | |||
1172 | /* Do the access with MSR:DR enabled */ | ||
1173 | mfmsr r3 | ||
1174 | ori r4, r3, MSR_DR /* Enable paging for data */ | ||
1175 | mtmsrd r4 | ||
1176 | lwz r8, 0(r10) | ||
1177 | mtmsrd r3 | ||
1178 | |||
1179 | /* Store the result */ | ||
1180 | stw r8, VCPU_LAST_INST(r9) | ||
1181 | |||
1182 | /* Unset guest mode. */ | ||
1183 | li r0, KVM_GUEST_MODE_NONE | ||
1184 | stb r0, HSTATE_IN_GUEST(r13) | ||
1185 | b nohpte_cont | ||
1186 | |||
1187 | /* | ||
1188 | * Similarly for an HISI, reflect it to the guest as an ISI unless | ||
1189 | * it is an HPTE not found fault for a page that we have paged out. | ||
1190 | */ | ||
1191 | kvmppc_hisi: | ||
1192 | andis. r0, r11, SRR1_ISI_NOPT@h | ||
1193 | beq 1f | ||
1194 | andi. r0, r11, MSR_IR /* instruction relocation enabled? */ | ||
1195 | beq 3f | ||
1196 | clrrdi r0, r10, 28 | ||
1197 | PPC_SLBFEE_DOT(r5, r0) /* if so, look up SLB */ | ||
1198 | bne 1f /* if no SLB entry found */ | ||
1199 | 4: | ||
1200 | /* Search the hash table. */ | ||
1201 | mr r3, r9 /* vcpu pointer */ | ||
1202 | mr r4, r10 | ||
1203 | mr r6, r11 | ||
1204 | li r7, 0 /* instruction fault */ | ||
1205 | bl .kvmppc_hpte_hv_fault | ||
1206 | ld r9, HSTATE_KVM_VCPU(r13) | ||
1207 | ld r10, VCPU_PC(r9) | ||
1208 | ld r11, VCPU_MSR(r9) | ||
1209 | li r12, BOOK3S_INTERRUPT_H_INST_STORAGE | ||
1210 | cmpdi r3, 0 /* retry the instruction */ | ||
1211 | beq 6f | ||
1212 | cmpdi r3, -1 /* handle in kernel mode */ | ||
1213 | beq nohpte_cont | ||
1214 | |||
1215 | /* Synthesize an ISI for the guest */ | ||
1216 | mr r11, r3 | ||
1217 | 1: mtspr SPRN_SRR0, r10 | ||
1218 | mtspr SPRN_SRR1, r11 | ||
1219 | li r10, BOOK3S_INTERRUPT_INST_STORAGE | ||
1220 | li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
1221 | rotldi r11, r11, 63 | ||
1222 | 6: ld r7, VCPU_CTR(r9) | ||
1223 | lwz r8, VCPU_XER(r9) | ||
1224 | mtctr r7 | ||
1225 | mtxer r8 | ||
1226 | mr r4, r9 | ||
1227 | b fast_guest_return | ||
1228 | |||
1229 | 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ | ||
1230 | ld r5, KVM_VRMA_SLB_V(r6) | ||
1231 | b 4b | ||
1100 | 1232 | ||
1101 | /* | 1233 | /* |
1102 | * Try to handle an hcall in real mode. | 1234 | * Try to handle an hcall in real mode. |
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index 7b0ee96c1bed..e70ef2d86431 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c | |||
@@ -196,7 +196,8 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
196 | kvmppc_inject_pf(vcpu, addr, false); | 196 | kvmppc_inject_pf(vcpu, addr, false); |
197 | goto done_load; | 197 | goto done_load; |
198 | } else if (r == EMULATE_DO_MMIO) { | 198 | } else if (r == EMULATE_DO_MMIO) { |
199 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); | 199 | emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, |
200 | len, 1); | ||
200 | goto done_load; | 201 | goto done_load; |
201 | } | 202 | } |
202 | 203 | ||
@@ -286,11 +287,13 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
286 | kvmppc_inject_pf(vcpu, addr, false); | 287 | kvmppc_inject_pf(vcpu, addr, false); |
287 | goto done_load; | 288 | goto done_load; |
288 | } else if ((r == EMULATE_DO_MMIO) && w) { | 289 | } else if ((r == EMULATE_DO_MMIO) && w) { |
289 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); | 290 | emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs, |
291 | 4, 1); | ||
290 | vcpu->arch.qpr[rs] = tmp[1]; | 292 | vcpu->arch.qpr[rs] = tmp[1]; |
291 | goto done_load; | 293 | goto done_load; |
292 | } else if (r == EMULATE_DO_MMIO) { | 294 | } else if (r == EMULATE_DO_MMIO) { |
293 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); | 295 | emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs, |
296 | 8, 1); | ||
294 | goto done_load; | 297 | goto done_load; |
295 | } | 298 | } |
296 | 299 | ||
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 220fcdf26978..7340e1090b77 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -51,15 +51,19 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | |||
51 | #define MSR_USER32 MSR_USER | 51 | #define MSR_USER32 MSR_USER |
52 | #define MSR_USER64 MSR_USER | 52 | #define MSR_USER64 MSR_USER |
53 | #define HW_PAGE_SIZE PAGE_SIZE | 53 | #define HW_PAGE_SIZE PAGE_SIZE |
54 | #define __hard_irq_disable local_irq_disable | ||
55 | #define __hard_irq_enable local_irq_enable | ||
54 | #endif | 56 | #endif |
55 | 57 | ||
56 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 58 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
57 | { | 59 | { |
58 | #ifdef CONFIG_PPC_BOOK3S_64 | 60 | #ifdef CONFIG_PPC_BOOK3S_64 |
59 | memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); | 61 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
62 | memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); | ||
60 | memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, | 63 | memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, |
61 | sizeof(get_paca()->shadow_vcpu)); | 64 | sizeof(get_paca()->shadow_vcpu)); |
62 | to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; | 65 | svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; |
66 | svcpu_put(svcpu); | ||
63 | #endif | 67 | #endif |
64 | 68 | ||
65 | #ifdef CONFIG_PPC_BOOK3S_32 | 69 | #ifdef CONFIG_PPC_BOOK3S_32 |
@@ -70,10 +74,12 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
70 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | 74 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) |
71 | { | 75 | { |
72 | #ifdef CONFIG_PPC_BOOK3S_64 | 76 | #ifdef CONFIG_PPC_BOOK3S_64 |
73 | memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); | 77 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); |
78 | memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); | ||
74 | memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | 79 | memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, |
75 | sizeof(get_paca()->shadow_vcpu)); | 80 | sizeof(get_paca()->shadow_vcpu)); |
76 | to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; | 81 | to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; |
82 | svcpu_put(svcpu); | ||
77 | #endif | 83 | #endif |
78 | 84 | ||
79 | kvmppc_giveup_ext(vcpu, MSR_FP); | 85 | kvmppc_giveup_ext(vcpu, MSR_FP); |
@@ -151,14 +157,16 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
151 | #ifdef CONFIG_PPC_BOOK3S_64 | 157 | #ifdef CONFIG_PPC_BOOK3S_64 |
152 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | 158 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { |
153 | kvmppc_mmu_book3s_64_init(vcpu); | 159 | kvmppc_mmu_book3s_64_init(vcpu); |
154 | to_book3s(vcpu)->hior = 0xfff00000; | 160 | if (!to_book3s(vcpu)->hior_explicit) |
161 | to_book3s(vcpu)->hior = 0xfff00000; | ||
155 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | 162 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; |
156 | vcpu->arch.cpu_type = KVM_CPU_3S_64; | 163 | vcpu->arch.cpu_type = KVM_CPU_3S_64; |
157 | } else | 164 | } else |
158 | #endif | 165 | #endif |
159 | { | 166 | { |
160 | kvmppc_mmu_book3s_32_init(vcpu); | 167 | kvmppc_mmu_book3s_32_init(vcpu); |
161 | to_book3s(vcpu)->hior = 0; | 168 | if (!to_book3s(vcpu)->hior_explicit) |
169 | to_book3s(vcpu)->hior = 0; | ||
162 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | 170 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; |
163 | vcpu->arch.cpu_type = KVM_CPU_3S_32; | 171 | vcpu->arch.cpu_type = KVM_CPU_3S_32; |
164 | } | 172 | } |
@@ -308,19 +316,22 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
308 | 316 | ||
309 | if (page_found == -ENOENT) { | 317 | if (page_found == -ENOENT) { |
310 | /* Page not found in guest PTE entries */ | 318 | /* Page not found in guest PTE entries */ |
319 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
311 | vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); | 320 | vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); |
312 | vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; | 321 | vcpu->arch.shared->dsisr = svcpu->fault_dsisr; |
313 | vcpu->arch.shared->msr |= | 322 | vcpu->arch.shared->msr |= |
314 | (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); | 323 | (svcpu->shadow_srr1 & 0x00000000f8000000ULL); |
324 | svcpu_put(svcpu); | ||
315 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 325 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
316 | } else if (page_found == -EPERM) { | 326 | } else if (page_found == -EPERM) { |
317 | /* Storage protection */ | 327 | /* Storage protection */ |
328 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
318 | vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); | 329 | vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); |
319 | vcpu->arch.shared->dsisr = | 330 | vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE; |
320 | to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; | ||
321 | vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; | 331 | vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; |
322 | vcpu->arch.shared->msr |= | 332 | vcpu->arch.shared->msr |= |
323 | (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); | 333 | svcpu->shadow_srr1 & 0x00000000f8000000ULL; |
334 | svcpu_put(svcpu); | ||
324 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 335 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
325 | } else if (page_found == -EINVAL) { | 336 | } else if (page_found == -EINVAL) { |
326 | /* Page not found in guest SLB */ | 337 | /* Page not found in guest SLB */ |
@@ -517,24 +528,29 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
517 | run->ready_for_interrupt_injection = 1; | 528 | run->ready_for_interrupt_injection = 1; |
518 | 529 | ||
519 | trace_kvm_book3s_exit(exit_nr, vcpu); | 530 | trace_kvm_book3s_exit(exit_nr, vcpu); |
531 | preempt_enable(); | ||
520 | kvm_resched(vcpu); | 532 | kvm_resched(vcpu); |
521 | switch (exit_nr) { | 533 | switch (exit_nr) { |
522 | case BOOK3S_INTERRUPT_INST_STORAGE: | 534 | case BOOK3S_INTERRUPT_INST_STORAGE: |
535 | { | ||
536 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
537 | ulong shadow_srr1 = svcpu->shadow_srr1; | ||
523 | vcpu->stat.pf_instruc++; | 538 | vcpu->stat.pf_instruc++; |
524 | 539 | ||
525 | #ifdef CONFIG_PPC_BOOK3S_32 | 540 | #ifdef CONFIG_PPC_BOOK3S_32 |
526 | /* We set segments as unused segments when invalidating them. So | 541 | /* We set segments as unused segments when invalidating them. So |
527 | * treat the respective fault as segment fault. */ | 542 | * treat the respective fault as segment fault. */ |
528 | if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] | 543 | if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) { |
529 | == SR_INVALID) { | ||
530 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); | 544 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); |
531 | r = RESUME_GUEST; | 545 | r = RESUME_GUEST; |
546 | svcpu_put(svcpu); | ||
532 | break; | 547 | break; |
533 | } | 548 | } |
534 | #endif | 549 | #endif |
550 | svcpu_put(svcpu); | ||
535 | 551 | ||
536 | /* only care about PTEG not found errors, but leave NX alone */ | 552 | /* only care about PTEG not found errors, but leave NX alone */ |
537 | if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { | 553 | if (shadow_srr1 & 0x40000000) { |
538 | r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); | 554 | r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); |
539 | vcpu->stat.sp_instruc++; | 555 | vcpu->stat.sp_instruc++; |
540 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 556 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
@@ -547,33 +563,37 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
547 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); | 563 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); |
548 | r = RESUME_GUEST; | 564 | r = RESUME_GUEST; |
549 | } else { | 565 | } else { |
550 | vcpu->arch.shared->msr |= | 566 | vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; |
551 | to_svcpu(vcpu)->shadow_srr1 & 0x58000000; | ||
552 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 567 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
553 | r = RESUME_GUEST; | 568 | r = RESUME_GUEST; |
554 | } | 569 | } |
555 | break; | 570 | break; |
571 | } | ||
556 | case BOOK3S_INTERRUPT_DATA_STORAGE: | 572 | case BOOK3S_INTERRUPT_DATA_STORAGE: |
557 | { | 573 | { |
558 | ulong dar = kvmppc_get_fault_dar(vcpu); | 574 | ulong dar = kvmppc_get_fault_dar(vcpu); |
575 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
576 | u32 fault_dsisr = svcpu->fault_dsisr; | ||
559 | vcpu->stat.pf_storage++; | 577 | vcpu->stat.pf_storage++; |
560 | 578 | ||
561 | #ifdef CONFIG_PPC_BOOK3S_32 | 579 | #ifdef CONFIG_PPC_BOOK3S_32 |
562 | /* We set segments as unused segments when invalidating them. So | 580 | /* We set segments as unused segments when invalidating them. So |
563 | * treat the respective fault as segment fault. */ | 581 | * treat the respective fault as segment fault. */ |
564 | if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { | 582 | if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) { |
565 | kvmppc_mmu_map_segment(vcpu, dar); | 583 | kvmppc_mmu_map_segment(vcpu, dar); |
566 | r = RESUME_GUEST; | 584 | r = RESUME_GUEST; |
585 | svcpu_put(svcpu); | ||
567 | break; | 586 | break; |
568 | } | 587 | } |
569 | #endif | 588 | #endif |
589 | svcpu_put(svcpu); | ||
570 | 590 | ||
571 | /* The only case we need to handle is missing shadow PTEs */ | 591 | /* The only case we need to handle is missing shadow PTEs */ |
572 | if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { | 592 | if (fault_dsisr & DSISR_NOHPTE) { |
573 | r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); | 593 | r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); |
574 | } else { | 594 | } else { |
575 | vcpu->arch.shared->dar = dar; | 595 | vcpu->arch.shared->dar = dar; |
576 | vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr; | 596 | vcpu->arch.shared->dsisr = fault_dsisr; |
577 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 597 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
578 | r = RESUME_GUEST; | 598 | r = RESUME_GUEST; |
579 | } | 599 | } |
@@ -609,10 +629,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
609 | case BOOK3S_INTERRUPT_PROGRAM: | 629 | case BOOK3S_INTERRUPT_PROGRAM: |
610 | { | 630 | { |
611 | enum emulation_result er; | 631 | enum emulation_result er; |
632 | struct kvmppc_book3s_shadow_vcpu *svcpu; | ||
612 | ulong flags; | 633 | ulong flags; |
613 | 634 | ||
614 | program_interrupt: | 635 | program_interrupt: |
615 | flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; | 636 | svcpu = svcpu_get(vcpu); |
637 | flags = svcpu->shadow_srr1 & 0x1f0000ull; | ||
638 | svcpu_put(svcpu); | ||
616 | 639 | ||
617 | if (vcpu->arch.shared->msr & MSR_PR) { | 640 | if (vcpu->arch.shared->msr & MSR_PR) { |
618 | #ifdef EXIT_DEBUG | 641 | #ifdef EXIT_DEBUG |
@@ -740,20 +763,33 @@ program_interrupt: | |||
740 | r = RESUME_GUEST; | 763 | r = RESUME_GUEST; |
741 | break; | 764 | break; |
742 | default: | 765 | default: |
766 | { | ||
767 | struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); | ||
768 | ulong shadow_srr1 = svcpu->shadow_srr1; | ||
769 | svcpu_put(svcpu); | ||
743 | /* Ugh - bork here! What did we get? */ | 770 | /* Ugh - bork here! What did we get? */ |
744 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", | 771 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", |
745 | exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); | 772 | exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); |
746 | r = RESUME_HOST; | 773 | r = RESUME_HOST; |
747 | BUG(); | 774 | BUG(); |
748 | break; | 775 | break; |
749 | } | 776 | } |
750 | 777 | } | |
751 | 778 | ||
752 | if (!(r & RESUME_HOST)) { | 779 | if (!(r & RESUME_HOST)) { |
753 | /* To avoid clobbering exit_reason, only check for signals if | 780 | /* To avoid clobbering exit_reason, only check for signals if |
754 | * we aren't already exiting to userspace for some other | 781 | * we aren't already exiting to userspace for some other |
755 | * reason. */ | 782 | * reason. */ |
783 | |||
784 | /* | ||
785 | * Interrupts could be timers for the guest which we have to | ||
786 | * inject again, so let's postpone them until we're in the guest | ||
787 | * and if we really did time things so badly, then we just exit | ||
788 | * again due to a host external interrupt. | ||
789 | */ | ||
790 | __hard_irq_disable(); | ||
756 | if (signal_pending(current)) { | 791 | if (signal_pending(current)) { |
792 | __hard_irq_enable(); | ||
757 | #ifdef EXIT_DEBUG | 793 | #ifdef EXIT_DEBUG |
758 | printk(KERN_EMERG "KVM: Going back to host\n"); | 794 | printk(KERN_EMERG "KVM: Going back to host\n"); |
759 | #endif | 795 | #endif |
@@ -761,10 +797,12 @@ program_interrupt: | |||
761 | run->exit_reason = KVM_EXIT_INTR; | 797 | run->exit_reason = KVM_EXIT_INTR; |
762 | r = -EINTR; | 798 | r = -EINTR; |
763 | } else { | 799 | } else { |
800 | preempt_disable(); | ||
801 | |||
764 | /* In case an interrupt came in that was triggered | 802 | /* In case an interrupt came in that was triggered |
765 | * from userspace (like DEC), we need to check what | 803 | * from userspace (like DEC), we need to check what |
766 | * to inject now! */ | 804 | * to inject now! */ |
767 | kvmppc_core_deliver_interrupts(vcpu); | 805 | kvmppc_core_prepare_to_enter(vcpu); |
768 | } | 806 | } |
769 | } | 807 | } |
770 | 808 | ||
@@ -836,6 +874,38 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
836 | return 0; | 874 | return 0; |
837 | } | 875 | } |
838 | 876 | ||
877 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
878 | { | ||
879 | int r = -EINVAL; | ||
880 | |||
881 | switch (reg->id) { | ||
882 | case KVM_REG_PPC_HIOR: | ||
883 | r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); | ||
884 | break; | ||
885 | default: | ||
886 | break; | ||
887 | } | ||
888 | |||
889 | return r; | ||
890 | } | ||
891 | |||
892 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
893 | { | ||
894 | int r = -EINVAL; | ||
895 | |||
896 | switch (reg->id) { | ||
897 | case KVM_REG_PPC_HIOR: | ||
898 | r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); | ||
899 | if (!r) | ||
900 | to_book3s(vcpu)->hior_explicit = true; | ||
901 | break; | ||
902 | default: | ||
903 | break; | ||
904 | } | ||
905 | |||
906 | return r; | ||
907 | } | ||
908 | |||
839 | int kvmppc_core_check_processor_compat(void) | 909 | int kvmppc_core_check_processor_compat(void) |
840 | { | 910 | { |
841 | return 0; | 911 | return 0; |
@@ -923,16 +993,31 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
923 | #endif | 993 | #endif |
924 | ulong ext_msr; | 994 | ulong ext_msr; |
925 | 995 | ||
996 | preempt_disable(); | ||
997 | |||
926 | /* Check if we can run the vcpu at all */ | 998 | /* Check if we can run the vcpu at all */ |
927 | if (!vcpu->arch.sane) { | 999 | if (!vcpu->arch.sane) { |
928 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 1000 | kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
929 | return -EINVAL; | 1001 | ret = -EINVAL; |
1002 | goto out; | ||
930 | } | 1003 | } |
931 | 1004 | ||
1005 | kvmppc_core_prepare_to_enter(vcpu); | ||
1006 | |||
1007 | /* | ||
1008 | * Interrupts could be timers for the guest which we have to inject | ||
1009 | * again, so let's postpone them until we're in the guest and if we | ||
1010 | * really did time things so badly, then we just exit again due to | ||
1011 | * a host external interrupt. | ||
1012 | */ | ||
1013 | __hard_irq_disable(); | ||
1014 | |||
932 | /* No need to go into the guest when all we do is going out */ | 1015 | /* No need to go into the guest when all we do is going out */ |
933 | if (signal_pending(current)) { | 1016 | if (signal_pending(current)) { |
1017 | __hard_irq_enable(); | ||
934 | kvm_run->exit_reason = KVM_EXIT_INTR; | 1018 | kvm_run->exit_reason = KVM_EXIT_INTR; |
935 | return -EINTR; | 1019 | ret = -EINTR; |
1020 | goto out; | ||
936 | } | 1021 | } |
937 | 1022 | ||
938 | /* Save FPU state in stack */ | 1023 | /* Save FPU state in stack */ |
@@ -974,8 +1059,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
974 | 1059 | ||
975 | kvm_guest_exit(); | 1060 | kvm_guest_exit(); |
976 | 1061 | ||
977 | local_irq_disable(); | ||
978 | |||
979 | current->thread.regs->msr = ext_msr; | 1062 | current->thread.regs->msr = ext_msr; |
980 | 1063 | ||
981 | /* Make sure we save the guest FPU/Altivec/VSX state */ | 1064 | /* Make sure we save the guest FPU/Altivec/VSX state */ |
@@ -1002,9 +1085,50 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1002 | current->thread.used_vsr = used_vsr; | 1085 | current->thread.used_vsr = used_vsr; |
1003 | #endif | 1086 | #endif |
1004 | 1087 | ||
1088 | out: | ||
1089 | preempt_enable(); | ||
1005 | return ret; | 1090 | return ret; |
1006 | } | 1091 | } |
1007 | 1092 | ||
1093 | /* | ||
1094 | * Get (and clear) the dirty memory log for a memory slot. | ||
1095 | */ | ||
1096 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | ||
1097 | struct kvm_dirty_log *log) | ||
1098 | { | ||
1099 | struct kvm_memory_slot *memslot; | ||
1100 | struct kvm_vcpu *vcpu; | ||
1101 | ulong ga, ga_end; | ||
1102 | int is_dirty = 0; | ||
1103 | int r; | ||
1104 | unsigned long n; | ||
1105 | |||
1106 | mutex_lock(&kvm->slots_lock); | ||
1107 | |||
1108 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | ||
1109 | if (r) | ||
1110 | goto out; | ||
1111 | |||
1112 | /* If nothing is dirty, don't bother messing with page tables. */ | ||
1113 | if (is_dirty) { | ||
1114 | memslot = id_to_memslot(kvm->memslots, log->slot); | ||
1115 | |||
1116 | ga = memslot->base_gfn << PAGE_SHIFT; | ||
1117 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | ||
1118 | |||
1119 | kvm_for_each_vcpu(n, vcpu, kvm) | ||
1120 | kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); | ||
1121 | |||
1122 | n = kvm_dirty_bitmap_bytes(memslot); | ||
1123 | memset(memslot->dirty_bitmap, 0, n); | ||
1124 | } | ||
1125 | |||
1126 | r = 0; | ||
1127 | out: | ||
1128 | mutex_unlock(&kvm->slots_lock); | ||
1129 | return r; | ||
1130 | } | ||
1131 | |||
1008 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, | 1132 | int kvmppc_core_prepare_memory_region(struct kvm *kvm, |
1009 | struct kvm_userspace_memory_region *mem) | 1133 | struct kvm_userspace_memory_region *mem) |
1010 | { | 1134 | { |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index bb6c988f010a..ee9e1ee9c858 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -124,12 +124,6 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) | |||
124 | vcpu->arch.shared->msr = new_msr; | 124 | vcpu->arch.shared->msr = new_msr; |
125 | 125 | ||
126 | kvmppc_mmu_msr_notify(vcpu, old_msr); | 126 | kvmppc_mmu_msr_notify(vcpu, old_msr); |
127 | |||
128 | if (vcpu->arch.shared->msr & MSR_WE) { | ||
129 | kvm_vcpu_block(vcpu); | ||
130 | kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); | ||
131 | }; | ||
132 | |||
133 | kvmppc_vcpu_sync_spe(vcpu); | 127 | kvmppc_vcpu_sync_spe(vcpu); |
134 | } | 128 | } |
135 | 129 | ||
@@ -258,9 +252,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
258 | allowed = vcpu->arch.shared->msr & MSR_ME; | 252 | allowed = vcpu->arch.shared->msr & MSR_ME; |
259 | msr_mask = 0; | 253 | msr_mask = 0; |
260 | break; | 254 | break; |
261 | case BOOKE_IRQPRIO_EXTERNAL: | ||
262 | case BOOKE_IRQPRIO_DECREMENTER: | 255 | case BOOKE_IRQPRIO_DECREMENTER: |
263 | case BOOKE_IRQPRIO_FIT: | 256 | case BOOKE_IRQPRIO_FIT: |
257 | keep_irq = true; | ||
258 | /* fall through */ | ||
259 | case BOOKE_IRQPRIO_EXTERNAL: | ||
264 | allowed = vcpu->arch.shared->msr & MSR_EE; | 260 | allowed = vcpu->arch.shared->msr & MSR_EE; |
265 | allowed = allowed && !crit; | 261 | allowed = allowed && !crit; |
266 | msr_mask = MSR_CE|MSR_ME|MSR_DE; | 262 | msr_mask = MSR_CE|MSR_ME|MSR_DE; |
@@ -276,7 +272,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
276 | vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; | 272 | vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; |
277 | vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; | 273 | vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; |
278 | if (update_esr == true) | 274 | if (update_esr == true) |
279 | vcpu->arch.esr = vcpu->arch.queued_esr; | 275 | vcpu->arch.shared->esr = vcpu->arch.queued_esr; |
280 | if (update_dear == true) | 276 | if (update_dear == true) |
281 | vcpu->arch.shared->dar = vcpu->arch.queued_dear; | 277 | vcpu->arch.shared->dar = vcpu->arch.queued_dear; |
282 | kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); | 278 | kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); |
@@ -288,13 +284,26 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
288 | return allowed; | 284 | return allowed; |
289 | } | 285 | } |
290 | 286 | ||
291 | /* Check pending exceptions and deliver one, if possible. */ | 287 | static void update_timer_ints(struct kvm_vcpu *vcpu) |
292 | void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | 288 | { |
289 | if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) | ||
290 | kvmppc_core_queue_dec(vcpu); | ||
291 | else | ||
292 | kvmppc_core_dequeue_dec(vcpu); | ||
293 | } | ||
294 | |||
295 | static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) | ||
293 | { | 296 | { |
294 | unsigned long *pending = &vcpu->arch.pending_exceptions; | 297 | unsigned long *pending = &vcpu->arch.pending_exceptions; |
295 | unsigned long old_pending = vcpu->arch.pending_exceptions; | ||
296 | unsigned int priority; | 298 | unsigned int priority; |
297 | 299 | ||
300 | if (vcpu->requests) { | ||
301 | if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { | ||
302 | smp_mb(); | ||
303 | update_timer_ints(vcpu); | ||
304 | } | ||
305 | } | ||
306 | |||
298 | priority = __ffs(*pending); | 307 | priority = __ffs(*pending); |
299 | while (priority <= BOOKE_IRQPRIO_MAX) { | 308 | while (priority <= BOOKE_IRQPRIO_MAX) { |
300 | if (kvmppc_booke_irqprio_deliver(vcpu, priority)) | 309 | if (kvmppc_booke_irqprio_deliver(vcpu, priority)) |
@@ -306,10 +315,24 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
306 | } | 315 | } |
307 | 316 | ||
308 | /* Tell the guest about our interrupt status */ | 317 | /* Tell the guest about our interrupt status */ |
309 | if (*pending) | 318 | vcpu->arch.shared->int_pending = !!*pending; |
310 | vcpu->arch.shared->int_pending = 1; | 319 | } |
311 | else if (old_pending) | 320 | |
312 | vcpu->arch.shared->int_pending = 0; | 321 | /* Check pending exceptions and deliver one, if possible. */ |
322 | void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) | ||
323 | { | ||
324 | WARN_ON_ONCE(!irqs_disabled()); | ||
325 | |||
326 | kvmppc_core_check_exceptions(vcpu); | ||
327 | |||
328 | if (vcpu->arch.shared->msr & MSR_WE) { | ||
329 | local_irq_enable(); | ||
330 | kvm_vcpu_block(vcpu); | ||
331 | local_irq_disable(); | ||
332 | |||
333 | kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); | ||
334 | kvmppc_core_check_exceptions(vcpu); | ||
335 | }; | ||
313 | } | 336 | } |
314 | 337 | ||
315 | int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 338 | int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
@@ -322,11 +345,21 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
322 | } | 345 | } |
323 | 346 | ||
324 | local_irq_disable(); | 347 | local_irq_disable(); |
348 | |||
349 | kvmppc_core_prepare_to_enter(vcpu); | ||
350 | |||
351 | if (signal_pending(current)) { | ||
352 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
353 | ret = -EINTR; | ||
354 | goto out; | ||
355 | } | ||
356 | |||
325 | kvm_guest_enter(); | 357 | kvm_guest_enter(); |
326 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); | 358 | ret = __kvmppc_vcpu_run(kvm_run, vcpu); |
327 | kvm_guest_exit(); | 359 | kvm_guest_exit(); |
328 | local_irq_enable(); | ||
329 | 360 | ||
361 | out: | ||
362 | local_irq_enable(); | ||
330 | return ret; | 363 | return ret; |
331 | } | 364 | } |
332 | 365 | ||
@@ -603,7 +636,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
603 | 636 | ||
604 | local_irq_disable(); | 637 | local_irq_disable(); |
605 | 638 | ||
606 | kvmppc_core_deliver_interrupts(vcpu); | 639 | kvmppc_core_prepare_to_enter(vcpu); |
607 | 640 | ||
608 | if (!(r & RESUME_HOST)) { | 641 | if (!(r & RESUME_HOST)) { |
609 | /* To avoid clobbering exit_reason, only check for signals if | 642 | /* To avoid clobbering exit_reason, only check for signals if |
@@ -628,6 +661,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
628 | vcpu->arch.pc = 0; | 661 | vcpu->arch.pc = 0; |
629 | vcpu->arch.shared->msr = 0; | 662 | vcpu->arch.shared->msr = 0; |
630 | vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; | 663 | vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; |
664 | vcpu->arch.shared->pir = vcpu->vcpu_id; | ||
631 | kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ | 665 | kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ |
632 | 666 | ||
633 | vcpu->arch.shadow_pid = 1; | 667 | vcpu->arch.shadow_pid = 1; |
@@ -662,10 +696,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
662 | regs->sprg1 = vcpu->arch.shared->sprg1; | 696 | regs->sprg1 = vcpu->arch.shared->sprg1; |
663 | regs->sprg2 = vcpu->arch.shared->sprg2; | 697 | regs->sprg2 = vcpu->arch.shared->sprg2; |
664 | regs->sprg3 = vcpu->arch.shared->sprg3; | 698 | regs->sprg3 = vcpu->arch.shared->sprg3; |
665 | regs->sprg4 = vcpu->arch.sprg4; | 699 | regs->sprg4 = vcpu->arch.shared->sprg4; |
666 | regs->sprg5 = vcpu->arch.sprg5; | 700 | regs->sprg5 = vcpu->arch.shared->sprg5; |
667 | regs->sprg6 = vcpu->arch.sprg6; | 701 | regs->sprg6 = vcpu->arch.shared->sprg6; |
668 | regs->sprg7 = vcpu->arch.sprg7; | 702 | regs->sprg7 = vcpu->arch.shared->sprg7; |
669 | 703 | ||
670 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 704 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
671 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 705 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
@@ -690,10 +724,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
690 | vcpu->arch.shared->sprg1 = regs->sprg1; | 724 | vcpu->arch.shared->sprg1 = regs->sprg1; |
691 | vcpu->arch.shared->sprg2 = regs->sprg2; | 725 | vcpu->arch.shared->sprg2 = regs->sprg2; |
692 | vcpu->arch.shared->sprg3 = regs->sprg3; | 726 | vcpu->arch.shared->sprg3 = regs->sprg3; |
693 | vcpu->arch.sprg4 = regs->sprg4; | 727 | vcpu->arch.shared->sprg4 = regs->sprg4; |
694 | vcpu->arch.sprg5 = regs->sprg5; | 728 | vcpu->arch.shared->sprg5 = regs->sprg5; |
695 | vcpu->arch.sprg6 = regs->sprg6; | 729 | vcpu->arch.shared->sprg6 = regs->sprg6; |
696 | vcpu->arch.sprg7 = regs->sprg7; | 730 | vcpu->arch.shared->sprg7 = regs->sprg7; |
697 | 731 | ||
698 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 732 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
699 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 733 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
@@ -711,7 +745,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, | |||
711 | sregs->u.e.csrr0 = vcpu->arch.csrr0; | 745 | sregs->u.e.csrr0 = vcpu->arch.csrr0; |
712 | sregs->u.e.csrr1 = vcpu->arch.csrr1; | 746 | sregs->u.e.csrr1 = vcpu->arch.csrr1; |
713 | sregs->u.e.mcsr = vcpu->arch.mcsr; | 747 | sregs->u.e.mcsr = vcpu->arch.mcsr; |
714 | sregs->u.e.esr = vcpu->arch.esr; | 748 | sregs->u.e.esr = vcpu->arch.shared->esr; |
715 | sregs->u.e.dear = vcpu->arch.shared->dar; | 749 | sregs->u.e.dear = vcpu->arch.shared->dar; |
716 | sregs->u.e.tsr = vcpu->arch.tsr; | 750 | sregs->u.e.tsr = vcpu->arch.tsr; |
717 | sregs->u.e.tcr = vcpu->arch.tcr; | 751 | sregs->u.e.tcr = vcpu->arch.tcr; |
@@ -729,28 +763,19 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, | |||
729 | vcpu->arch.csrr0 = sregs->u.e.csrr0; | 763 | vcpu->arch.csrr0 = sregs->u.e.csrr0; |
730 | vcpu->arch.csrr1 = sregs->u.e.csrr1; | 764 | vcpu->arch.csrr1 = sregs->u.e.csrr1; |
731 | vcpu->arch.mcsr = sregs->u.e.mcsr; | 765 | vcpu->arch.mcsr = sregs->u.e.mcsr; |
732 | vcpu->arch.esr = sregs->u.e.esr; | 766 | vcpu->arch.shared->esr = sregs->u.e.esr; |
733 | vcpu->arch.shared->dar = sregs->u.e.dear; | 767 | vcpu->arch.shared->dar = sregs->u.e.dear; |
734 | vcpu->arch.vrsave = sregs->u.e.vrsave; | 768 | vcpu->arch.vrsave = sregs->u.e.vrsave; |
735 | vcpu->arch.tcr = sregs->u.e.tcr; | 769 | kvmppc_set_tcr(vcpu, sregs->u.e.tcr); |
736 | 770 | ||
737 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) | 771 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) { |
738 | vcpu->arch.dec = sregs->u.e.dec; | 772 | vcpu->arch.dec = sregs->u.e.dec; |
739 | 773 | kvmppc_emulate_dec(vcpu); | |
740 | kvmppc_emulate_dec(vcpu); | 774 | } |
741 | 775 | ||
742 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { | 776 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { |
743 | /* | 777 | vcpu->arch.tsr = sregs->u.e.tsr; |
744 | * FIXME: existing KVM timer handling is incomplete. | 778 | update_timer_ints(vcpu); |
745 | * TSR cannot be read by the guest, and its value in | ||
746 | * vcpu->arch is always zero. For now, just handle | ||
747 | * the case where the caller is trying to inject a | ||
748 | * decrementer interrupt. | ||
749 | */ | ||
750 | |||
751 | if ((sregs->u.e.tsr & TSR_DIS) && | ||
752 | (vcpu->arch.tcr & TCR_DIE)) | ||
753 | kvmppc_core_queue_dec(vcpu); | ||
754 | } | 779 | } |
755 | 780 | ||
756 | return 0; | 781 | return 0; |
@@ -761,7 +786,7 @@ static void get_sregs_arch206(struct kvm_vcpu *vcpu, | |||
761 | { | 786 | { |
762 | sregs->u.e.features |= KVM_SREGS_E_ARCH206; | 787 | sregs->u.e.features |= KVM_SREGS_E_ARCH206; |
763 | 788 | ||
764 | sregs->u.e.pir = 0; | 789 | sregs->u.e.pir = vcpu->vcpu_id; |
765 | sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; | 790 | sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0; |
766 | sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; | 791 | sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1; |
767 | sregs->u.e.decar = vcpu->arch.decar; | 792 | sregs->u.e.decar = vcpu->arch.decar; |
@@ -774,7 +799,7 @@ static int set_sregs_arch206(struct kvm_vcpu *vcpu, | |||
774 | if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) | 799 | if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206)) |
775 | return 0; | 800 | return 0; |
776 | 801 | ||
777 | if (sregs->u.e.pir != 0) | 802 | if (sregs->u.e.pir != vcpu->vcpu_id) |
778 | return -EINVAL; | 803 | return -EINVAL; |
779 | 804 | ||
780 | vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; | 805 | vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0; |
@@ -862,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
862 | return kvmppc_core_set_sregs(vcpu, sregs); | 887 | return kvmppc_core_set_sregs(vcpu, sregs); |
863 | } | 888 | } |
864 | 889 | ||
890 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
891 | { | ||
892 | return -EINVAL; | ||
893 | } | ||
894 | |||
895 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | ||
896 | { | ||
897 | return -EINVAL; | ||
898 | } | ||
899 | |||
865 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 900 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
866 | { | 901 | { |
867 | return -ENOTSUPP; | 902 | return -ENOTSUPP; |
@@ -906,6 +941,33 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
906 | { | 941 | { |
907 | } | 942 | } |
908 | 943 | ||
944 | void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) | ||
945 | { | ||
946 | vcpu->arch.tcr = new_tcr; | ||
947 | update_timer_ints(vcpu); | ||
948 | } | ||
949 | |||
950 | void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) | ||
951 | { | ||
952 | set_bits(tsr_bits, &vcpu->arch.tsr); | ||
953 | smp_wmb(); | ||
954 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); | ||
955 | kvm_vcpu_kick(vcpu); | ||
956 | } | ||
957 | |||
958 | void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) | ||
959 | { | ||
960 | clear_bits(tsr_bits, &vcpu->arch.tsr); | ||
961 | update_timer_ints(vcpu); | ||
962 | } | ||
963 | |||
964 | void kvmppc_decrementer_func(unsigned long data) | ||
965 | { | ||
966 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
967 | |||
968 | kvmppc_set_tsr_bits(vcpu, TSR_DIS); | ||
969 | } | ||
970 | |||
909 | int __init kvmppc_booke_init(void) | 971 | int __init kvmppc_booke_init(void) |
910 | { | 972 | { |
911 | unsigned long ivor[16]; | 973 | unsigned long ivor[16]; |
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 8e1fe33d64e5..2fe202705a3f 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h | |||
@@ -55,6 +55,10 @@ extern unsigned long kvmppc_booke_handlers; | |||
55 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); | 55 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); |
56 | void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); | 56 | void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); |
57 | 57 | ||
58 | void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); | ||
59 | void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); | ||
60 | void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); | ||
61 | |||
58 | int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 62 | int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
59 | unsigned int inst, int *advance); | 63 | unsigned int inst, int *advance); |
60 | int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); | 64 | int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); |
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 1260f5f24c0c..3e652da36534 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c | |||
@@ -13,6 +13,7 @@ | |||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
14 | * | 14 | * |
15 | * Copyright IBM Corp. 2008 | 15 | * Copyright IBM Corp. 2008 |
16 | * Copyright 2011 Freescale Semiconductor, Inc. | ||
16 | * | 17 | * |
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | 18 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> |
18 | */ | 19 | */ |
@@ -107,7 +108,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
107 | case SPRN_DEAR: | 108 | case SPRN_DEAR: |
108 | vcpu->arch.shared->dar = spr_val; break; | 109 | vcpu->arch.shared->dar = spr_val; break; |
109 | case SPRN_ESR: | 110 | case SPRN_ESR: |
110 | vcpu->arch.esr = spr_val; break; | 111 | vcpu->arch.shared->esr = spr_val; break; |
111 | case SPRN_DBCR0: | 112 | case SPRN_DBCR0: |
112 | vcpu->arch.dbcr0 = spr_val; break; | 113 | vcpu->arch.dbcr0 = spr_val; break; |
113 | case SPRN_DBCR1: | 114 | case SPRN_DBCR1: |
@@ -115,23 +116,23 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
115 | case SPRN_DBSR: | 116 | case SPRN_DBSR: |
116 | vcpu->arch.dbsr &= ~spr_val; break; | 117 | vcpu->arch.dbsr &= ~spr_val; break; |
117 | case SPRN_TSR: | 118 | case SPRN_TSR: |
118 | vcpu->arch.tsr &= ~spr_val; break; | 119 | kvmppc_clr_tsr_bits(vcpu, spr_val); |
120 | break; | ||
119 | case SPRN_TCR: | 121 | case SPRN_TCR: |
120 | vcpu->arch.tcr = spr_val; | 122 | kvmppc_set_tcr(vcpu, spr_val); |
121 | kvmppc_emulate_dec(vcpu); | ||
122 | break; | 123 | break; |
123 | 124 | ||
124 | /* Note: SPRG4-7 are user-readable. These values are | 125 | /* Note: SPRG4-7 are user-readable. These values are |
125 | * loaded into the real SPRGs when resuming the | 126 | * loaded into the real SPRGs when resuming the |
126 | * guest. */ | 127 | * guest. */ |
127 | case SPRN_SPRG4: | 128 | case SPRN_SPRG4: |
128 | vcpu->arch.sprg4 = spr_val; break; | 129 | vcpu->arch.shared->sprg4 = spr_val; break; |
129 | case SPRN_SPRG5: | 130 | case SPRN_SPRG5: |
130 | vcpu->arch.sprg5 = spr_val; break; | 131 | vcpu->arch.shared->sprg5 = spr_val; break; |
131 | case SPRN_SPRG6: | 132 | case SPRN_SPRG6: |
132 | vcpu->arch.sprg6 = spr_val; break; | 133 | vcpu->arch.shared->sprg6 = spr_val; break; |
133 | case SPRN_SPRG7: | 134 | case SPRN_SPRG7: |
134 | vcpu->arch.sprg7 = spr_val; break; | 135 | vcpu->arch.shared->sprg7 = spr_val; break; |
135 | 136 | ||
136 | case SPRN_IVPR: | 137 | case SPRN_IVPR: |
137 | vcpu->arch.ivpr = spr_val; | 138 | vcpu->arch.ivpr = spr_val; |
@@ -202,13 +203,17 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
202 | case SPRN_DEAR: | 203 | case SPRN_DEAR: |
203 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; | 204 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; |
204 | case SPRN_ESR: | 205 | case SPRN_ESR: |
205 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; | 206 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; |
206 | case SPRN_DBCR0: | 207 | case SPRN_DBCR0: |
207 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; | 208 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; |
208 | case SPRN_DBCR1: | 209 | case SPRN_DBCR1: |
209 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; | 210 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; |
210 | case SPRN_DBSR: | 211 | case SPRN_DBSR: |
211 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; | 212 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; |
213 | case SPRN_TSR: | ||
214 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; | ||
215 | case SPRN_TCR: | ||
216 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; | ||
212 | 217 | ||
213 | case SPRN_IVOR0: | 218 | case SPRN_IVOR0: |
214 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); | 219 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); |
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 42f2fb1f66e9..10d8ef602e5c 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
@@ -402,19 +402,25 @@ lightweight_exit: | |||
402 | /* Save vcpu pointer for the exception handlers. */ | 402 | /* Save vcpu pointer for the exception handlers. */ |
403 | mtspr SPRN_SPRG_WVCPU, r4 | 403 | mtspr SPRN_SPRG_WVCPU, r4 |
404 | 404 | ||
405 | lwz r5, VCPU_SHARED(r4) | ||
406 | |||
405 | /* Can't switch the stack pointer until after IVPR is switched, | 407 | /* Can't switch the stack pointer until after IVPR is switched, |
406 | * because host interrupt handlers would get confused. */ | 408 | * because host interrupt handlers would get confused. */ |
407 | lwz r1, VCPU_GPR(r1)(r4) | 409 | lwz r1, VCPU_GPR(r1)(r4) |
408 | 410 | ||
409 | /* Host interrupt handlers may have clobbered these guest-readable | 411 | /* |
410 | * SPRGs, so we need to reload them here with the guest's values. */ | 412 | * Host interrupt handlers may have clobbered these |
411 | lwz r3, VCPU_SPRG4(r4) | 413 | * guest-readable SPRGs, or the guest kernel may have |
414 | * written directly to the shared area, so we | ||
415 | * need to reload them here with the guest's values. | ||
416 | */ | ||
417 | lwz r3, VCPU_SHARED_SPRG4(r5) | ||
412 | mtspr SPRN_SPRG4W, r3 | 418 | mtspr SPRN_SPRG4W, r3 |
413 | lwz r3, VCPU_SPRG5(r4) | 419 | lwz r3, VCPU_SHARED_SPRG5(r5) |
414 | mtspr SPRN_SPRG5W, r3 | 420 | mtspr SPRN_SPRG5W, r3 |
415 | lwz r3, VCPU_SPRG6(r4) | 421 | lwz r3, VCPU_SHARED_SPRG6(r5) |
416 | mtspr SPRN_SPRG6W, r3 | 422 | mtspr SPRN_SPRG6W, r3 |
417 | lwz r3, VCPU_SPRG7(r4) | 423 | lwz r3, VCPU_SHARED_SPRG7(r5) |
418 | mtspr SPRN_SPRG7W, r3 | 424 | mtspr SPRN_SPRG7W, r3 |
419 | 425 | ||
420 | #ifdef CONFIG_KVM_EXIT_TIMING | 426 | #ifdef CONFIG_KVM_EXIT_TIMING |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 8c0d45a6faf7..ddcd896fa2ff 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -71,9 +71,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
71 | vcpu->arch.pvr = mfspr(SPRN_PVR); | 71 | vcpu->arch.pvr = mfspr(SPRN_PVR); |
72 | vcpu_e500->svr = mfspr(SPRN_SVR); | 72 | vcpu_e500->svr = mfspr(SPRN_SVR); |
73 | 73 | ||
74 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ | ||
75 | vcpu->vcpu_id = 0; | ||
76 | |||
77 | vcpu->arch.cpu_type = KVM_CPU_E500V2; | 74 | vcpu->arch.cpu_type = KVM_CPU_E500V2; |
78 | 75 | ||
79 | return 0; | 76 | return 0; |
@@ -118,12 +115,12 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
118 | sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; | 115 | sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; |
119 | sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; | 116 | sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; |
120 | 117 | ||
121 | sregs->u.e.mas0 = vcpu_e500->mas0; | 118 | sregs->u.e.mas0 = vcpu->arch.shared->mas0; |
122 | sregs->u.e.mas1 = vcpu_e500->mas1; | 119 | sregs->u.e.mas1 = vcpu->arch.shared->mas1; |
123 | sregs->u.e.mas2 = vcpu_e500->mas2; | 120 | sregs->u.e.mas2 = vcpu->arch.shared->mas2; |
124 | sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3; | 121 | sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; |
125 | sregs->u.e.mas4 = vcpu_e500->mas4; | 122 | sregs->u.e.mas4 = vcpu->arch.shared->mas4; |
126 | sregs->u.e.mas6 = vcpu_e500->mas6; | 123 | sregs->u.e.mas6 = vcpu->arch.shared->mas6; |
127 | 124 | ||
128 | sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); | 125 | sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); |
129 | sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; | 126 | sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; |
@@ -151,13 +148,12 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
151 | } | 148 | } |
152 | 149 | ||
153 | if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { | 150 | if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { |
154 | vcpu_e500->mas0 = sregs->u.e.mas0; | 151 | vcpu->arch.shared->mas0 = sregs->u.e.mas0; |
155 | vcpu_e500->mas1 = sregs->u.e.mas1; | 152 | vcpu->arch.shared->mas1 = sregs->u.e.mas1; |
156 | vcpu_e500->mas2 = sregs->u.e.mas2; | 153 | vcpu->arch.shared->mas2 = sregs->u.e.mas2; |
157 | vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32; | 154 | vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; |
158 | vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3; | 155 | vcpu->arch.shared->mas4 = sregs->u.e.mas4; |
159 | vcpu_e500->mas4 = sregs->u.e.mas4; | 156 | vcpu->arch.shared->mas6 = sregs->u.e.mas6; |
160 | vcpu_e500->mas6 = sregs->u.e.mas6; | ||
161 | } | 157 | } |
162 | 158 | ||
163 | if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) | 159 | if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) |
@@ -233,6 +229,10 @@ static int __init kvmppc_e500_init(void) | |||
233 | unsigned long ivor[3]; | 229 | unsigned long ivor[3]; |
234 | unsigned long max_ivor = 0; | 230 | unsigned long max_ivor = 0; |
235 | 231 | ||
232 | r = kvmppc_core_check_processor_compat(); | ||
233 | if (r) | ||
234 | return r; | ||
235 | |||
236 | r = kvmppc_booke_init(); | 236 | r = kvmppc_booke_init(); |
237 | if (r) | 237 | if (r) |
238 | return r; | 238 | return r; |
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index d48ae396f41e..6d0b2bd54fb0 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -89,19 +89,23 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
89 | return EMULATE_FAIL; | 89 | return EMULATE_FAIL; |
90 | vcpu_e500->pid[2] = spr_val; break; | 90 | vcpu_e500->pid[2] = spr_val; break; |
91 | case SPRN_MAS0: | 91 | case SPRN_MAS0: |
92 | vcpu_e500->mas0 = spr_val; break; | 92 | vcpu->arch.shared->mas0 = spr_val; break; |
93 | case SPRN_MAS1: | 93 | case SPRN_MAS1: |
94 | vcpu_e500->mas1 = spr_val; break; | 94 | vcpu->arch.shared->mas1 = spr_val; break; |
95 | case SPRN_MAS2: | 95 | case SPRN_MAS2: |
96 | vcpu_e500->mas2 = spr_val; break; | 96 | vcpu->arch.shared->mas2 = spr_val; break; |
97 | case SPRN_MAS3: | 97 | case SPRN_MAS3: |
98 | vcpu_e500->mas3 = spr_val; break; | 98 | vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; |
99 | vcpu->arch.shared->mas7_3 |= spr_val; | ||
100 | break; | ||
99 | case SPRN_MAS4: | 101 | case SPRN_MAS4: |
100 | vcpu_e500->mas4 = spr_val; break; | 102 | vcpu->arch.shared->mas4 = spr_val; break; |
101 | case SPRN_MAS6: | 103 | case SPRN_MAS6: |
102 | vcpu_e500->mas6 = spr_val; break; | 104 | vcpu->arch.shared->mas6 = spr_val; break; |
103 | case SPRN_MAS7: | 105 | case SPRN_MAS7: |
104 | vcpu_e500->mas7 = spr_val; break; | 106 | vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; |
107 | vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; | ||
108 | break; | ||
105 | case SPRN_L1CSR0: | 109 | case SPRN_L1CSR0: |
106 | vcpu_e500->l1csr0 = spr_val; | 110 | vcpu_e500->l1csr0 = spr_val; |
107 | vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); | 111 | vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); |
@@ -143,6 +147,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
143 | { | 147 | { |
144 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 148 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
145 | int emulated = EMULATE_DONE; | 149 | int emulated = EMULATE_DONE; |
150 | unsigned long val; | ||
146 | 151 | ||
147 | switch (sprn) { | 152 | switch (sprn) { |
148 | case SPRN_PID: | 153 | case SPRN_PID: |
@@ -152,20 +157,23 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
152 | case SPRN_PID2: | 157 | case SPRN_PID2: |
153 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; | 158 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; |
154 | case SPRN_MAS0: | 159 | case SPRN_MAS0: |
155 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break; | 160 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; |
156 | case SPRN_MAS1: | 161 | case SPRN_MAS1: |
157 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break; | 162 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; |
158 | case SPRN_MAS2: | 163 | case SPRN_MAS2: |
159 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; | 164 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; |
160 | case SPRN_MAS3: | 165 | case SPRN_MAS3: |
161 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; | 166 | val = (u32)vcpu->arch.shared->mas7_3; |
167 | kvmppc_set_gpr(vcpu, rt, val); | ||
168 | break; | ||
162 | case SPRN_MAS4: | 169 | case SPRN_MAS4: |
163 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; | 170 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; |
164 | case SPRN_MAS6: | 171 | case SPRN_MAS6: |
165 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; | 172 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; |
166 | case SPRN_MAS7: | 173 | case SPRN_MAS7: |
167 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; | 174 | val = vcpu->arch.shared->mas7_3 >> 32; |
168 | 175 | kvmppc_set_gpr(vcpu, rt, val); | |
176 | break; | ||
169 | case SPRN_TLB0CFG: | 177 | case SPRN_TLB0CFG: |
170 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; | 178 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; |
171 | case SPRN_TLB1CFG: | 179 | case SPRN_TLB1CFG: |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 13c432ea2fa8..6e53e4164de1 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
@@ -12,12 +12,19 @@ | |||
12 | * published by the Free Software Foundation. | 12 | * published by the Free Software Foundation. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/kernel.h> | ||
15 | #include <linux/types.h> | 16 | #include <linux/types.h> |
16 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
17 | #include <linux/string.h> | 18 | #include <linux/string.h> |
18 | #include <linux/kvm.h> | 19 | #include <linux/kvm.h> |
19 | #include <linux/kvm_host.h> | 20 | #include <linux/kvm_host.h> |
20 | #include <linux/highmem.h> | 21 | #include <linux/highmem.h> |
22 | #include <linux/log2.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/rwsem.h> | ||
26 | #include <linux/vmalloc.h> | ||
27 | #include <linux/hugetlb.h> | ||
21 | #include <asm/kvm_ppc.h> | 28 | #include <asm/kvm_ppc.h> |
22 | #include <asm/kvm_e500.h> | 29 | #include <asm/kvm_e500.h> |
23 | 30 | ||
@@ -26,7 +33,7 @@ | |||
26 | #include "trace.h" | 33 | #include "trace.h" |
27 | #include "timing.h" | 34 | #include "timing.h" |
28 | 35 | ||
29 | #define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1) | 36 | #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) |
30 | 37 | ||
31 | struct id { | 38 | struct id { |
32 | unsigned long val; | 39 | unsigned long val; |
@@ -63,7 +70,14 @@ static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); | |||
63 | * The valid range of shadow ID is [1..255] */ | 70 | * The valid range of shadow ID is [1..255] */ |
64 | static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); | 71 | static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); |
65 | 72 | ||
66 | static unsigned int tlb1_entry_num; | 73 | static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; |
74 | |||
75 | static struct kvm_book3e_206_tlb_entry *get_entry( | ||
76 | struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) | ||
77 | { | ||
78 | int offset = vcpu_e500->gtlb_offset[tlbsel]; | ||
79 | return &vcpu_e500->gtlb_arch[offset + entry]; | ||
80 | } | ||
67 | 81 | ||
68 | /* | 82 | /* |
69 | * Allocate a free shadow id and setup a valid sid mapping in given entry. | 83 | * Allocate a free shadow id and setup a valid sid mapping in given entry. |
@@ -116,13 +130,11 @@ static inline int local_sid_lookup(struct id *entry) | |||
116 | return -1; | 130 | return -1; |
117 | } | 131 | } |
118 | 132 | ||
119 | /* Invalidate all id mappings on local core */ | 133 | /* Invalidate all id mappings on local core -- call with preempt disabled */ |
120 | static inline void local_sid_destroy_all(void) | 134 | static inline void local_sid_destroy_all(void) |
121 | { | 135 | { |
122 | preempt_disable(); | ||
123 | __get_cpu_var(pcpu_last_used_sid) = 0; | 136 | __get_cpu_var(pcpu_last_used_sid) = 0; |
124 | memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); | 137 | memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); |
125 | preempt_enable(); | ||
126 | } | 138 | } |
127 | 139 | ||
128 | static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) | 140 | static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) |
@@ -218,34 +230,13 @@ void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
218 | preempt_enable(); | 230 | preempt_enable(); |
219 | } | 231 | } |
220 | 232 | ||
221 | void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) | 233 | static inline unsigned int gtlb0_get_next_victim( |
222 | { | ||
223 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
224 | struct tlbe *tlbe; | ||
225 | int i, tlbsel; | ||
226 | |||
227 | printk("| %8s | %8s | %8s | %8s | %8s |\n", | ||
228 | "nr", "mas1", "mas2", "mas3", "mas7"); | ||
229 | |||
230 | for (tlbsel = 0; tlbsel < 2; tlbsel++) { | ||
231 | printk("Guest TLB%d:\n", tlbsel); | ||
232 | for (i = 0; i < vcpu_e500->gtlb_size[tlbsel]; i++) { | ||
233 | tlbe = &vcpu_e500->gtlb_arch[tlbsel][i]; | ||
234 | if (tlbe->mas1 & MAS1_VALID) | ||
235 | printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n", | ||
236 | tlbsel, i, tlbe->mas1, tlbe->mas2, | ||
237 | tlbe->mas3, tlbe->mas7); | ||
238 | } | ||
239 | } | ||
240 | } | ||
241 | |||
242 | static inline unsigned int tlb0_get_next_victim( | ||
243 | struct kvmppc_vcpu_e500 *vcpu_e500) | 234 | struct kvmppc_vcpu_e500 *vcpu_e500) |
244 | { | 235 | { |
245 | unsigned int victim; | 236 | unsigned int victim; |
246 | 237 | ||
247 | victim = vcpu_e500->gtlb_nv[0]++; | 238 | victim = vcpu_e500->gtlb_nv[0]++; |
248 | if (unlikely(vcpu_e500->gtlb_nv[0] >= KVM_E500_TLB0_WAY_NUM)) | 239 | if (unlikely(vcpu_e500->gtlb_nv[0] >= vcpu_e500->gtlb_params[0].ways)) |
249 | vcpu_e500->gtlb_nv[0] = 0; | 240 | vcpu_e500->gtlb_nv[0] = 0; |
250 | 241 | ||
251 | return victim; | 242 | return victim; |
@@ -254,12 +245,12 @@ static inline unsigned int tlb0_get_next_victim( | |||
254 | static inline unsigned int tlb1_max_shadow_size(void) | 245 | static inline unsigned int tlb1_max_shadow_size(void) |
255 | { | 246 | { |
256 | /* reserve one entry for magic page */ | 247 | /* reserve one entry for magic page */ |
257 | return tlb1_entry_num - tlbcam_index - 1; | 248 | return host_tlb_params[1].entries - tlbcam_index - 1; |
258 | } | 249 | } |
259 | 250 | ||
260 | static inline int tlbe_is_writable(struct tlbe *tlbe) | 251 | static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) |
261 | { | 252 | { |
262 | return tlbe->mas3 & (MAS3_SW|MAS3_UW); | 253 | return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); |
263 | } | 254 | } |
264 | 255 | ||
265 | static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) | 256 | static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) |
@@ -290,40 +281,66 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) | |||
290 | /* | 281 | /* |
291 | * writing shadow tlb entry to host TLB | 282 | * writing shadow tlb entry to host TLB |
292 | */ | 283 | */ |
293 | static inline void __write_host_tlbe(struct tlbe *stlbe, uint32_t mas0) | 284 | static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, |
285 | uint32_t mas0) | ||
294 | { | 286 | { |
295 | unsigned long flags; | 287 | unsigned long flags; |
296 | 288 | ||
297 | local_irq_save(flags); | 289 | local_irq_save(flags); |
298 | mtspr(SPRN_MAS0, mas0); | 290 | mtspr(SPRN_MAS0, mas0); |
299 | mtspr(SPRN_MAS1, stlbe->mas1); | 291 | mtspr(SPRN_MAS1, stlbe->mas1); |
300 | mtspr(SPRN_MAS2, stlbe->mas2); | 292 | mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); |
301 | mtspr(SPRN_MAS3, stlbe->mas3); | 293 | mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); |
302 | mtspr(SPRN_MAS7, stlbe->mas7); | 294 | mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); |
303 | asm volatile("isync; tlbwe" : : : "memory"); | 295 | asm volatile("isync; tlbwe" : : : "memory"); |
304 | local_irq_restore(flags); | 296 | local_irq_restore(flags); |
297 | |||
298 | trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, | ||
299 | stlbe->mas2, stlbe->mas7_3); | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * Acquire a mas0 with victim hint, as if we just took a TLB miss. | ||
304 | * | ||
305 | * We don't care about the address we're searching for, other than that it's | ||
306 | * in the right set and is not present in the TLB. Using a zero PID and a | ||
307 | * userspace address means we don't have to set and then restore MAS5, or | ||
308 | * calculate a proper MAS6 value. | ||
309 | */ | ||
310 | static u32 get_host_mas0(unsigned long eaddr) | ||
311 | { | ||
312 | unsigned long flags; | ||
313 | u32 mas0; | ||
314 | |||
315 | local_irq_save(flags); | ||
316 | mtspr(SPRN_MAS6, 0); | ||
317 | asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); | ||
318 | mas0 = mfspr(SPRN_MAS0); | ||
319 | local_irq_restore(flags); | ||
320 | |||
321 | return mas0; | ||
305 | } | 322 | } |
306 | 323 | ||
324 | /* sesel is for tlb1 only */ | ||
307 | static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | 325 | static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, |
308 | int tlbsel, int esel, struct tlbe *stlbe) | 326 | int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) |
309 | { | 327 | { |
328 | u32 mas0; | ||
329 | |||
310 | if (tlbsel == 0) { | 330 | if (tlbsel == 0) { |
311 | __write_host_tlbe(stlbe, | 331 | mas0 = get_host_mas0(stlbe->mas2); |
312 | MAS0_TLBSEL(0) | | 332 | __write_host_tlbe(stlbe, mas0); |
313 | MAS0_ESEL(esel & (KVM_E500_TLB0_WAY_NUM - 1))); | ||
314 | } else { | 333 | } else { |
315 | __write_host_tlbe(stlbe, | 334 | __write_host_tlbe(stlbe, |
316 | MAS0_TLBSEL(1) | | 335 | MAS0_TLBSEL(1) | |
317 | MAS0_ESEL(to_htlb1_esel(esel))); | 336 | MAS0_ESEL(to_htlb1_esel(sesel))); |
318 | } | 337 | } |
319 | trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2, | ||
320 | stlbe->mas3, stlbe->mas7); | ||
321 | } | 338 | } |
322 | 339 | ||
323 | void kvmppc_map_magic(struct kvm_vcpu *vcpu) | 340 | void kvmppc_map_magic(struct kvm_vcpu *vcpu) |
324 | { | 341 | { |
325 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 342 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
326 | struct tlbe magic; | 343 | struct kvm_book3e_206_tlb_entry magic; |
327 | ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; | 344 | ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; |
328 | unsigned int stid; | 345 | unsigned int stid; |
329 | pfn_t pfn; | 346 | pfn_t pfn; |
@@ -337,9 +354,9 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) | |||
337 | magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | | 354 | magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | |
338 | MAS1_TSIZE(BOOK3E_PAGESZ_4K); | 355 | MAS1_TSIZE(BOOK3E_PAGESZ_4K); |
339 | magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; | 356 | magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; |
340 | magic.mas3 = (pfn << PAGE_SHIFT) | | 357 | magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | |
341 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; | 358 | MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; |
342 | magic.mas7 = pfn >> (32 - PAGE_SHIFT); | 359 | magic.mas8 = 0; |
343 | 360 | ||
344 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); | 361 | __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); |
345 | preempt_enable(); | 362 | preempt_enable(); |
@@ -357,10 +374,11 @@ void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) | |||
357 | { | 374 | { |
358 | } | 375 | } |
359 | 376 | ||
360 | static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, | 377 | static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, |
361 | int tlbsel, int esel) | 378 | int tlbsel, int esel) |
362 | { | 379 | { |
363 | struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 380 | struct kvm_book3e_206_tlb_entry *gtlbe = |
381 | get_entry(vcpu_e500, tlbsel, esel); | ||
364 | struct vcpu_id_table *idt = vcpu_e500->idt; | 382 | struct vcpu_id_table *idt = vcpu_e500->idt; |
365 | unsigned int pr, tid, ts, pid; | 383 | unsigned int pr, tid, ts, pid; |
366 | u32 val, eaddr; | 384 | u32 val, eaddr; |
@@ -414,25 +432,57 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
414 | preempt_enable(); | 432 | preempt_enable(); |
415 | } | 433 | } |
416 | 434 | ||
435 | static int tlb0_set_base(gva_t addr, int sets, int ways) | ||
436 | { | ||
437 | int set_base; | ||
438 | |||
439 | set_base = (addr >> PAGE_SHIFT) & (sets - 1); | ||
440 | set_base *= ways; | ||
441 | |||
442 | return set_base; | ||
443 | } | ||
444 | |||
445 | static int gtlb0_set_base(struct kvmppc_vcpu_e500 *vcpu_e500, gva_t addr) | ||
446 | { | ||
447 | return tlb0_set_base(addr, vcpu_e500->gtlb_params[0].sets, | ||
448 | vcpu_e500->gtlb_params[0].ways); | ||
449 | } | ||
450 | |||
451 | static unsigned int get_tlb_esel(struct kvm_vcpu *vcpu, int tlbsel) | ||
452 | { | ||
453 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
454 | int esel = get_tlb_esel_bit(vcpu); | ||
455 | |||
456 | if (tlbsel == 0) { | ||
457 | esel &= vcpu_e500->gtlb_params[0].ways - 1; | ||
458 | esel += gtlb0_set_base(vcpu_e500, vcpu->arch.shared->mas2); | ||
459 | } else { | ||
460 | esel &= vcpu_e500->gtlb_params[tlbsel].entries - 1; | ||
461 | } | ||
462 | |||
463 | return esel; | ||
464 | } | ||
465 | |||
417 | /* Search the guest TLB for a matching entry. */ | 466 | /* Search the guest TLB for a matching entry. */ |
418 | static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, | 467 | static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, |
419 | gva_t eaddr, int tlbsel, unsigned int pid, int as) | 468 | gva_t eaddr, int tlbsel, unsigned int pid, int as) |
420 | { | 469 | { |
421 | int size = vcpu_e500->gtlb_size[tlbsel]; | 470 | int size = vcpu_e500->gtlb_params[tlbsel].entries; |
422 | int set_base; | 471 | unsigned int set_base, offset; |
423 | int i; | 472 | int i; |
424 | 473 | ||
425 | if (tlbsel == 0) { | 474 | if (tlbsel == 0) { |
426 | int mask = size / KVM_E500_TLB0_WAY_NUM - 1; | 475 | set_base = gtlb0_set_base(vcpu_e500, eaddr); |
427 | set_base = (eaddr >> PAGE_SHIFT) & mask; | 476 | size = vcpu_e500->gtlb_params[0].ways; |
428 | set_base *= KVM_E500_TLB0_WAY_NUM; | ||
429 | size = KVM_E500_TLB0_WAY_NUM; | ||
430 | } else { | 477 | } else { |
431 | set_base = 0; | 478 | set_base = 0; |
432 | } | 479 | } |
433 | 480 | ||
481 | offset = vcpu_e500->gtlb_offset[tlbsel]; | ||
482 | |||
434 | for (i = 0; i < size; i++) { | 483 | for (i = 0; i < size; i++) { |
435 | struct tlbe *tlbe = &vcpu_e500->gtlb_arch[tlbsel][set_base + i]; | 484 | struct kvm_book3e_206_tlb_entry *tlbe = |
485 | &vcpu_e500->gtlb_arch[offset + set_base + i]; | ||
436 | unsigned int tid; | 486 | unsigned int tid; |
437 | 487 | ||
438 | if (eaddr < get_tlb_eaddr(tlbe)) | 488 | if (eaddr < get_tlb_eaddr(tlbe)) |
@@ -457,27 +507,55 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
457 | return -1; | 507 | return -1; |
458 | } | 508 | } |
459 | 509 | ||
460 | static inline void kvmppc_e500_priv_setup(struct tlbe_priv *priv, | 510 | static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, |
461 | struct tlbe *gtlbe, | 511 | struct kvm_book3e_206_tlb_entry *gtlbe, |
462 | pfn_t pfn) | 512 | pfn_t pfn) |
463 | { | 513 | { |
464 | priv->pfn = pfn; | 514 | ref->pfn = pfn; |
465 | priv->flags = E500_TLB_VALID; | 515 | ref->flags = E500_TLB_VALID; |
466 | 516 | ||
467 | if (tlbe_is_writable(gtlbe)) | 517 | if (tlbe_is_writable(gtlbe)) |
468 | priv->flags |= E500_TLB_DIRTY; | 518 | ref->flags |= E500_TLB_DIRTY; |
469 | } | 519 | } |
470 | 520 | ||
471 | static inline void kvmppc_e500_priv_release(struct tlbe_priv *priv) | 521 | static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) |
472 | { | 522 | { |
473 | if (priv->flags & E500_TLB_VALID) { | 523 | if (ref->flags & E500_TLB_VALID) { |
474 | if (priv->flags & E500_TLB_DIRTY) | 524 | if (ref->flags & E500_TLB_DIRTY) |
475 | kvm_release_pfn_dirty(priv->pfn); | 525 | kvm_release_pfn_dirty(ref->pfn); |
476 | else | 526 | else |
477 | kvm_release_pfn_clean(priv->pfn); | 527 | kvm_release_pfn_clean(ref->pfn); |
528 | |||
529 | ref->flags = 0; | ||
530 | } | ||
531 | } | ||
532 | |||
533 | static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
534 | { | ||
535 | int tlbsel = 0; | ||
536 | int i; | ||
537 | |||
538 | for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { | ||
539 | struct tlbe_ref *ref = | ||
540 | &vcpu_e500->gtlb_priv[tlbsel][i].ref; | ||
541 | kvmppc_e500_ref_release(ref); | ||
542 | } | ||
543 | } | ||
544 | |||
545 | static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
546 | { | ||
547 | int stlbsel = 1; | ||
548 | int i; | ||
549 | |||
550 | kvmppc_e500_id_table_reset_all(vcpu_e500); | ||
478 | 551 | ||
479 | priv->flags = 0; | 552 | for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { |
553 | struct tlbe_ref *ref = | ||
554 | &vcpu_e500->tlb_refs[stlbsel][i]; | ||
555 | kvmppc_e500_ref_release(ref); | ||
480 | } | 556 | } |
557 | |||
558 | clear_tlb_privs(vcpu_e500); | ||
481 | } | 559 | } |
482 | 560 | ||
483 | static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | 561 | static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, |
@@ -488,59 +566,54 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | |||
488 | int tlbsel; | 566 | int tlbsel; |
489 | 567 | ||
490 | /* since we only have two TLBs, only lower bit is used. */ | 568 | /* since we only have two TLBs, only lower bit is used. */ |
491 | tlbsel = (vcpu_e500->mas4 >> 28) & 0x1; | 569 | tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; |
492 | victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; | 570 | victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; |
493 | pidsel = (vcpu_e500->mas4 >> 16) & 0xf; | 571 | pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf; |
494 | tsized = (vcpu_e500->mas4 >> 7) & 0x1f; | 572 | tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; |
495 | 573 | ||
496 | vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | 574 | vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) |
497 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); | 575 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); |
498 | vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) | 576 | vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) |
499 | | MAS1_TID(vcpu_e500->pid[pidsel]) | 577 | | MAS1_TID(vcpu_e500->pid[pidsel]) |
500 | | MAS1_TSIZE(tsized); | 578 | | MAS1_TSIZE(tsized); |
501 | vcpu_e500->mas2 = (eaddr & MAS2_EPN) | 579 | vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) |
502 | | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK); | 580 | | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); |
503 | vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; | 581 | vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; |
504 | vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1) | 582 | vcpu->arch.shared->mas6 = (vcpu->arch.shared->mas6 & MAS6_SPID1) |
505 | | (get_cur_pid(vcpu) << 16) | 583 | | (get_cur_pid(vcpu) << 16) |
506 | | (as ? MAS6_SAS : 0); | 584 | | (as ? MAS6_SAS : 0); |
507 | vcpu_e500->mas7 = 0; | ||
508 | } | 585 | } |
509 | 586 | ||
510 | static inline void kvmppc_e500_setup_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | 587 | /* TID must be supplied by the caller */ |
511 | struct tlbe *gtlbe, int tsize, | 588 | static inline void kvmppc_e500_setup_stlbe( |
512 | struct tlbe_priv *priv, | 589 | struct kvmppc_vcpu_e500 *vcpu_e500, |
513 | u64 gvaddr, struct tlbe *stlbe) | 590 | struct kvm_book3e_206_tlb_entry *gtlbe, |
591 | int tsize, struct tlbe_ref *ref, u64 gvaddr, | ||
592 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
514 | { | 593 | { |
515 | pfn_t pfn = priv->pfn; | 594 | pfn_t pfn = ref->pfn; |
516 | unsigned int stid; | ||
517 | 595 | ||
518 | stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), | 596 | BUG_ON(!(ref->flags & E500_TLB_VALID)); |
519 | get_tlb_tid(gtlbe), | ||
520 | get_cur_pr(&vcpu_e500->vcpu), 0); | ||
521 | 597 | ||
522 | /* Force TS=1 IPROT=0 for all guest mappings. */ | 598 | /* Force TS=1 IPROT=0 for all guest mappings. */ |
523 | stlbe->mas1 = MAS1_TSIZE(tsize) | 599 | stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID; |
524 | | MAS1_TID(stid) | MAS1_TS | MAS1_VALID; | ||
525 | stlbe->mas2 = (gvaddr & MAS2_EPN) | 600 | stlbe->mas2 = (gvaddr & MAS2_EPN) |
526 | | e500_shadow_mas2_attrib(gtlbe->mas2, | 601 | | e500_shadow_mas2_attrib(gtlbe->mas2, |
527 | vcpu_e500->vcpu.arch.shared->msr & MSR_PR); | 602 | vcpu_e500->vcpu.arch.shared->msr & MSR_PR); |
528 | stlbe->mas3 = ((pfn << PAGE_SHIFT) & MAS3_RPN) | 603 | stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
529 | | e500_shadow_mas3_attrib(gtlbe->mas3, | 604 | | e500_shadow_mas3_attrib(gtlbe->mas7_3, |
530 | vcpu_e500->vcpu.arch.shared->msr & MSR_PR); | 605 | vcpu_e500->vcpu.arch.shared->msr & MSR_PR); |
531 | stlbe->mas7 = (pfn >> (32 - PAGE_SHIFT)) & MAS7_RPN; | ||
532 | } | 606 | } |
533 | 607 | ||
534 | |||
535 | static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | 608 | static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, |
536 | u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel, | 609 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, |
537 | struct tlbe *stlbe) | 610 | int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, |
611 | struct tlbe_ref *ref) | ||
538 | { | 612 | { |
539 | struct kvm_memory_slot *slot; | 613 | struct kvm_memory_slot *slot; |
540 | unsigned long pfn, hva; | 614 | unsigned long pfn, hva; |
541 | int pfnmap = 0; | 615 | int pfnmap = 0; |
542 | int tsize = BOOK3E_PAGESZ_4K; | 616 | int tsize = BOOK3E_PAGESZ_4K; |
543 | struct tlbe_priv *priv; | ||
544 | 617 | ||
545 | /* | 618 | /* |
546 | * Translate guest physical to true physical, acquiring | 619 | * Translate guest physical to true physical, acquiring |
@@ -621,12 +694,31 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
621 | pfn &= ~(tsize_pages - 1); | 694 | pfn &= ~(tsize_pages - 1); |
622 | break; | 695 | break; |
623 | } | 696 | } |
697 | } else if (vma && hva >= vma->vm_start && | ||
698 | (vma->vm_flags & VM_HUGETLB)) { | ||
699 | unsigned long psize = vma_kernel_pagesize(vma); | ||
700 | |||
701 | tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> | ||
702 | MAS1_TSIZE_SHIFT; | ||
703 | |||
704 | /* | ||
705 | * Take the largest page size that satisfies both host | ||
706 | * and guest mapping | ||
707 | */ | ||
708 | tsize = min(__ilog2(psize) - 10, tsize); | ||
709 | |||
710 | /* | ||
711 | * e500 doesn't implement the lowest tsize bit, | ||
712 | * or 1K pages. | ||
713 | */ | ||
714 | tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); | ||
624 | } | 715 | } |
625 | 716 | ||
626 | up_read(¤t->mm->mmap_sem); | 717 | up_read(¤t->mm->mmap_sem); |
627 | } | 718 | } |
628 | 719 | ||
629 | if (likely(!pfnmap)) { | 720 | if (likely(!pfnmap)) { |
721 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | ||
630 | pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); | 722 | pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); |
631 | if (is_error_pfn(pfn)) { | 723 | if (is_error_pfn(pfn)) { |
632 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | 724 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", |
@@ -634,45 +726,52 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
634 | kvm_release_pfn_clean(pfn); | 726 | kvm_release_pfn_clean(pfn); |
635 | return; | 727 | return; |
636 | } | 728 | } |
729 | |||
730 | /* Align guest and physical address to page map boundaries */ | ||
731 | pfn &= ~(tsize_pages - 1); | ||
732 | gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); | ||
637 | } | 733 | } |
638 | 734 | ||
639 | /* Drop old priv and setup new one. */ | 735 | /* Drop old ref and setup new one. */ |
640 | priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; | 736 | kvmppc_e500_ref_release(ref); |
641 | kvmppc_e500_priv_release(priv); | 737 | kvmppc_e500_ref_setup(ref, gtlbe, pfn); |
642 | kvmppc_e500_priv_setup(priv, gtlbe, pfn); | ||
643 | 738 | ||
644 | kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, priv, gvaddr, stlbe); | 739 | kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe); |
645 | } | 740 | } |
646 | 741 | ||
647 | /* XXX only map the one-one case, for now use TLB0 */ | 742 | /* XXX only map the one-one case, for now use TLB0 */ |
648 | static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, | 743 | static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, |
649 | int esel, struct tlbe *stlbe) | 744 | int esel, |
745 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
650 | { | 746 | { |
651 | struct tlbe *gtlbe; | 747 | struct kvm_book3e_206_tlb_entry *gtlbe; |
748 | struct tlbe_ref *ref; | ||
652 | 749 | ||
653 | gtlbe = &vcpu_e500->gtlb_arch[0][esel]; | 750 | gtlbe = get_entry(vcpu_e500, 0, esel); |
751 | ref = &vcpu_e500->gtlb_priv[0][esel].ref; | ||
654 | 752 | ||
655 | kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), | 753 | kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), |
656 | get_tlb_raddr(gtlbe) >> PAGE_SHIFT, | 754 | get_tlb_raddr(gtlbe) >> PAGE_SHIFT, |
657 | gtlbe, 0, esel, stlbe); | 755 | gtlbe, 0, stlbe, ref); |
658 | |||
659 | return esel; | ||
660 | } | 756 | } |
661 | 757 | ||
662 | /* Caller must ensure that the specified guest TLB entry is safe to insert into | 758 | /* Caller must ensure that the specified guest TLB entry is safe to insert into |
663 | * the shadow TLB. */ | 759 | * the shadow TLB. */ |
664 | /* XXX for both one-one and one-to-many , for now use TLB1 */ | 760 | /* XXX for both one-one and one-to-many , for now use TLB1 */ |
665 | static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, | 761 | static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, |
666 | u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, struct tlbe *stlbe) | 762 | u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, |
763 | struct kvm_book3e_206_tlb_entry *stlbe) | ||
667 | { | 764 | { |
765 | struct tlbe_ref *ref; | ||
668 | unsigned int victim; | 766 | unsigned int victim; |
669 | 767 | ||
670 | victim = vcpu_e500->gtlb_nv[1]++; | 768 | victim = vcpu_e500->host_tlb1_nv++; |
671 | 769 | ||
672 | if (unlikely(vcpu_e500->gtlb_nv[1] >= tlb1_max_shadow_size())) | 770 | if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) |
673 | vcpu_e500->gtlb_nv[1] = 0; | 771 | vcpu_e500->host_tlb1_nv = 0; |
674 | 772 | ||
675 | kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim, stlbe); | 773 | ref = &vcpu_e500->tlb_refs[1][victim]; |
774 | kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); | ||
676 | 775 | ||
677 | return victim; | 776 | return victim; |
678 | } | 777 | } |
@@ -689,7 +788,8 @@ static inline int kvmppc_e500_gtlbe_invalidate( | |||
689 | struct kvmppc_vcpu_e500 *vcpu_e500, | 788 | struct kvmppc_vcpu_e500 *vcpu_e500, |
690 | int tlbsel, int esel) | 789 | int tlbsel, int esel) |
691 | { | 790 | { |
692 | struct tlbe *gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 791 | struct kvm_book3e_206_tlb_entry *gtlbe = |
792 | get_entry(vcpu_e500, tlbsel, esel); | ||
693 | 793 | ||
694 | if (unlikely(get_tlb_iprot(gtlbe))) | 794 | if (unlikely(get_tlb_iprot(gtlbe))) |
695 | return -1; | 795 | return -1; |
@@ -704,10 +804,10 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) | |||
704 | int esel; | 804 | int esel; |
705 | 805 | ||
706 | if (value & MMUCSR0_TLB0FI) | 806 | if (value & MMUCSR0_TLB0FI) |
707 | for (esel = 0; esel < vcpu_e500->gtlb_size[0]; esel++) | 807 | for (esel = 0; esel < vcpu_e500->gtlb_params[0].entries; esel++) |
708 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); | 808 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 0, esel); |
709 | if (value & MMUCSR0_TLB1FI) | 809 | if (value & MMUCSR0_TLB1FI) |
710 | for (esel = 0; esel < vcpu_e500->gtlb_size[1]; esel++) | 810 | for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) |
711 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); | 811 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); |
712 | 812 | ||
713 | /* Invalidate all vcpu id mappings */ | 813 | /* Invalidate all vcpu id mappings */ |
@@ -732,7 +832,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) | |||
732 | 832 | ||
733 | if (ia) { | 833 | if (ia) { |
734 | /* invalidate all entries */ | 834 | /* invalidate all entries */ |
735 | for (esel = 0; esel < vcpu_e500->gtlb_size[tlbsel]; esel++) | 835 | for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; |
836 | esel++) | ||
736 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); | 837 | kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); |
737 | } else { | 838 | } else { |
738 | ea &= 0xfffff000; | 839 | ea &= 0xfffff000; |
@@ -752,18 +853,17 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) | |||
752 | { | 853 | { |
753 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 854 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
754 | int tlbsel, esel; | 855 | int tlbsel, esel; |
755 | struct tlbe *gtlbe; | 856 | struct kvm_book3e_206_tlb_entry *gtlbe; |
756 | 857 | ||
757 | tlbsel = get_tlb_tlbsel(vcpu_e500); | 858 | tlbsel = get_tlb_tlbsel(vcpu); |
758 | esel = get_tlb_esel(vcpu_e500, tlbsel); | 859 | esel = get_tlb_esel(vcpu, tlbsel); |
759 | 860 | ||
760 | gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 861 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); |
761 | vcpu_e500->mas0 &= ~MAS0_NV(~0); | 862 | vcpu->arch.shared->mas0 &= ~MAS0_NV(~0); |
762 | vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); | 863 | vcpu->arch.shared->mas0 |= MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); |
763 | vcpu_e500->mas1 = gtlbe->mas1; | 864 | vcpu->arch.shared->mas1 = gtlbe->mas1; |
764 | vcpu_e500->mas2 = gtlbe->mas2; | 865 | vcpu->arch.shared->mas2 = gtlbe->mas2; |
765 | vcpu_e500->mas3 = gtlbe->mas3; | 866 | vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; |
766 | vcpu_e500->mas7 = gtlbe->mas7; | ||
767 | 867 | ||
768 | return EMULATE_DONE; | 868 | return EMULATE_DONE; |
769 | } | 869 | } |
@@ -771,10 +871,10 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) | |||
771 | int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | 871 | int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) |
772 | { | 872 | { |
773 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 873 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
774 | int as = !!get_cur_sas(vcpu_e500); | 874 | int as = !!get_cur_sas(vcpu); |
775 | unsigned int pid = get_cur_spid(vcpu_e500); | 875 | unsigned int pid = get_cur_spid(vcpu); |
776 | int esel, tlbsel; | 876 | int esel, tlbsel; |
777 | struct tlbe *gtlbe = NULL; | 877 | struct kvm_book3e_206_tlb_entry *gtlbe = NULL; |
778 | gva_t ea; | 878 | gva_t ea; |
779 | 879 | ||
780 | ea = kvmppc_get_gpr(vcpu, rb); | 880 | ea = kvmppc_get_gpr(vcpu, rb); |
@@ -782,70 +882,90 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | |||
782 | for (tlbsel = 0; tlbsel < 2; tlbsel++) { | 882 | for (tlbsel = 0; tlbsel < 2; tlbsel++) { |
783 | esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); | 883 | esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); |
784 | if (esel >= 0) { | 884 | if (esel >= 0) { |
785 | gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 885 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); |
786 | break; | 886 | break; |
787 | } | 887 | } |
788 | } | 888 | } |
789 | 889 | ||
790 | if (gtlbe) { | 890 | if (gtlbe) { |
791 | vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) | 891 | esel &= vcpu_e500->gtlb_params[tlbsel].ways - 1; |
892 | |||
893 | vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel) | ||
792 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); | 894 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); |
793 | vcpu_e500->mas1 = gtlbe->mas1; | 895 | vcpu->arch.shared->mas1 = gtlbe->mas1; |
794 | vcpu_e500->mas2 = gtlbe->mas2; | 896 | vcpu->arch.shared->mas2 = gtlbe->mas2; |
795 | vcpu_e500->mas3 = gtlbe->mas3; | 897 | vcpu->arch.shared->mas7_3 = gtlbe->mas7_3; |
796 | vcpu_e500->mas7 = gtlbe->mas7; | ||
797 | } else { | 898 | } else { |
798 | int victim; | 899 | int victim; |
799 | 900 | ||
800 | /* since we only have two TLBs, only lower bit is used. */ | 901 | /* since we only have two TLBs, only lower bit is used. */ |
801 | tlbsel = vcpu_e500->mas4 >> 28 & 0x1; | 902 | tlbsel = vcpu->arch.shared->mas4 >> 28 & 0x1; |
802 | victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0; | 903 | victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; |
803 | 904 | ||
804 | vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | 905 | vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) |
906 | | MAS0_ESEL(victim) | ||
805 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); | 907 | | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); |
806 | vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0) | 908 | vcpu->arch.shared->mas1 = |
807 | | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0)) | 909 | (vcpu->arch.shared->mas6 & MAS6_SPID0) |
808 | | (vcpu_e500->mas4 & MAS4_TSIZED(~0)); | 910 | | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) |
809 | vcpu_e500->mas2 &= MAS2_EPN; | 911 | | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); |
810 | vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK; | 912 | vcpu->arch.shared->mas2 &= MAS2_EPN; |
811 | vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3; | 913 | vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & |
812 | vcpu_e500->mas7 = 0; | 914 | MAS2_ATTRIB_MASK; |
915 | vcpu->arch.shared->mas7_3 &= MAS3_U0 | MAS3_U1 | | ||
916 | MAS3_U2 | MAS3_U3; | ||
813 | } | 917 | } |
814 | 918 | ||
815 | kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); | 919 | kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); |
816 | return EMULATE_DONE; | 920 | return EMULATE_DONE; |
817 | } | 921 | } |
818 | 922 | ||
923 | /* sesel is for tlb1 only */ | ||
924 | static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, | ||
925 | struct kvm_book3e_206_tlb_entry *gtlbe, | ||
926 | struct kvm_book3e_206_tlb_entry *stlbe, | ||
927 | int stlbsel, int sesel) | ||
928 | { | ||
929 | int stid; | ||
930 | |||
931 | preempt_disable(); | ||
932 | stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), | ||
933 | get_tlb_tid(gtlbe), | ||
934 | get_cur_pr(&vcpu_e500->vcpu), 0); | ||
935 | |||
936 | stlbe->mas1 |= MAS1_TID(stid); | ||
937 | write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); | ||
938 | preempt_enable(); | ||
939 | } | ||
940 | |||
819 | int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | 941 | int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) |
820 | { | 942 | { |
821 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 943 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
822 | struct tlbe *gtlbe; | 944 | struct kvm_book3e_206_tlb_entry *gtlbe; |
823 | int tlbsel, esel; | 945 | int tlbsel, esel; |
824 | 946 | ||
825 | tlbsel = get_tlb_tlbsel(vcpu_e500); | 947 | tlbsel = get_tlb_tlbsel(vcpu); |
826 | esel = get_tlb_esel(vcpu_e500, tlbsel); | 948 | esel = get_tlb_esel(vcpu, tlbsel); |
827 | 949 | ||
828 | gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 950 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); |
829 | 951 | ||
830 | if (get_tlb_v(gtlbe)) | 952 | if (get_tlb_v(gtlbe)) |
831 | kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel); | 953 | inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); |
832 | 954 | ||
833 | gtlbe->mas1 = vcpu_e500->mas1; | 955 | gtlbe->mas1 = vcpu->arch.shared->mas1; |
834 | gtlbe->mas2 = vcpu_e500->mas2; | 956 | gtlbe->mas2 = vcpu->arch.shared->mas2; |
835 | gtlbe->mas3 = vcpu_e500->mas3; | 957 | gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; |
836 | gtlbe->mas7 = vcpu_e500->mas7; | ||
837 | 958 | ||
838 | trace_kvm_gtlb_write(vcpu_e500->mas0, gtlbe->mas1, gtlbe->mas2, | 959 | trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, |
839 | gtlbe->mas3, gtlbe->mas7); | 960 | gtlbe->mas2, gtlbe->mas7_3); |
840 | 961 | ||
841 | /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ | 962 | /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ |
842 | if (tlbe_is_host_safe(vcpu, gtlbe)) { | 963 | if (tlbe_is_host_safe(vcpu, gtlbe)) { |
843 | struct tlbe stlbe; | 964 | struct kvm_book3e_206_tlb_entry stlbe; |
844 | int stlbsel, sesel; | 965 | int stlbsel, sesel; |
845 | u64 eaddr; | 966 | u64 eaddr; |
846 | u64 raddr; | 967 | u64 raddr; |
847 | 968 | ||
848 | preempt_disable(); | ||
849 | switch (tlbsel) { | 969 | switch (tlbsel) { |
850 | case 0: | 970 | case 0: |
851 | /* TLB0 */ | 971 | /* TLB0 */ |
@@ -853,7 +973,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | |||
853 | gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); | 973 | gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); |
854 | 974 | ||
855 | stlbsel = 0; | 975 | stlbsel = 0; |
856 | sesel = kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); | 976 | kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); |
977 | sesel = 0; /* unused */ | ||
857 | 978 | ||
858 | break; | 979 | break; |
859 | 980 | ||
@@ -874,8 +995,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) | |||
874 | default: | 995 | default: |
875 | BUG(); | 996 | BUG(); |
876 | } | 997 | } |
877 | write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); | 998 | |
878 | preempt_enable(); | 999 | write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); |
879 | } | 1000 | } |
880 | 1001 | ||
881 | kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); | 1002 | kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); |
@@ -914,9 +1035,11 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index, | |||
914 | gva_t eaddr) | 1035 | gva_t eaddr) |
915 | { | 1036 | { |
916 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 1037 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
917 | struct tlbe *gtlbe = | 1038 | struct kvm_book3e_206_tlb_entry *gtlbe; |
918 | &vcpu_e500->gtlb_arch[tlbsel_of(index)][esel_of(index)]; | 1039 | u64 pgmask; |
919 | u64 pgmask = get_tlb_bytes(gtlbe) - 1; | 1040 | |
1041 | gtlbe = get_entry(vcpu_e500, tlbsel_of(index), esel_of(index)); | ||
1042 | pgmask = get_tlb_bytes(gtlbe) - 1; | ||
920 | 1043 | ||
921 | return get_tlb_raddr(gtlbe) | (eaddr & pgmask); | 1044 | return get_tlb_raddr(gtlbe) | (eaddr & pgmask); |
922 | } | 1045 | } |
@@ -930,22 +1053,21 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, | |||
930 | { | 1053 | { |
931 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 1054 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
932 | struct tlbe_priv *priv; | 1055 | struct tlbe_priv *priv; |
933 | struct tlbe *gtlbe, stlbe; | 1056 | struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; |
934 | int tlbsel = tlbsel_of(index); | 1057 | int tlbsel = tlbsel_of(index); |
935 | int esel = esel_of(index); | 1058 | int esel = esel_of(index); |
936 | int stlbsel, sesel; | 1059 | int stlbsel, sesel; |
937 | 1060 | ||
938 | gtlbe = &vcpu_e500->gtlb_arch[tlbsel][esel]; | 1061 | gtlbe = get_entry(vcpu_e500, tlbsel, esel); |
939 | 1062 | ||
940 | preempt_disable(); | ||
941 | switch (tlbsel) { | 1063 | switch (tlbsel) { |
942 | case 0: | 1064 | case 0: |
943 | stlbsel = 0; | 1065 | stlbsel = 0; |
944 | sesel = esel; | 1066 | sesel = 0; /* unused */ |
945 | priv = &vcpu_e500->gtlb_priv[stlbsel][sesel]; | 1067 | priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; |
946 | 1068 | ||
947 | kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, | 1069 | kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, |
948 | priv, eaddr, &stlbe); | 1070 | &priv->ref, eaddr, &stlbe); |
949 | break; | 1071 | break; |
950 | 1072 | ||
951 | case 1: { | 1073 | case 1: { |
@@ -962,8 +1084,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, | |||
962 | break; | 1084 | break; |
963 | } | 1085 | } |
964 | 1086 | ||
965 | write_host_tlbe(vcpu_e500, stlbsel, sesel, &stlbe); | 1087 | write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); |
966 | preempt_enable(); | ||
967 | } | 1088 | } |
968 | 1089 | ||
969 | int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, | 1090 | int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, |
@@ -993,85 +1114,279 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) | |||
993 | 1114 | ||
994 | void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) | 1115 | void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) |
995 | { | 1116 | { |
996 | struct tlbe *tlbe; | 1117 | struct kvm_book3e_206_tlb_entry *tlbe; |
997 | 1118 | ||
998 | /* Insert large initial mapping for guest. */ | 1119 | /* Insert large initial mapping for guest. */ |
999 | tlbe = &vcpu_e500->gtlb_arch[1][0]; | 1120 | tlbe = get_entry(vcpu_e500, 1, 0); |
1000 | tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); | 1121 | tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); |
1001 | tlbe->mas2 = 0; | 1122 | tlbe->mas2 = 0; |
1002 | tlbe->mas3 = E500_TLB_SUPER_PERM_MASK; | 1123 | tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; |
1003 | tlbe->mas7 = 0; | ||
1004 | 1124 | ||
1005 | /* 4K map for serial output. Used by kernel wrapper. */ | 1125 | /* 4K map for serial output. Used by kernel wrapper. */ |
1006 | tlbe = &vcpu_e500->gtlb_arch[1][1]; | 1126 | tlbe = get_entry(vcpu_e500, 1, 1); |
1007 | tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); | 1127 | tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); |
1008 | tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; | 1128 | tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; |
1009 | tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; | 1129 | tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; |
1010 | tlbe->mas7 = 0; | 1130 | } |
1131 | |||
1132 | static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) | ||
1133 | { | ||
1134 | int i; | ||
1135 | |||
1136 | clear_tlb_refs(vcpu_e500); | ||
1137 | kfree(vcpu_e500->gtlb_priv[0]); | ||
1138 | kfree(vcpu_e500->gtlb_priv[1]); | ||
1139 | |||
1140 | if (vcpu_e500->shared_tlb_pages) { | ||
1141 | vfree((void *)(round_down((uintptr_t)vcpu_e500->gtlb_arch, | ||
1142 | PAGE_SIZE))); | ||
1143 | |||
1144 | for (i = 0; i < vcpu_e500->num_shared_tlb_pages; i++) { | ||
1145 | set_page_dirty_lock(vcpu_e500->shared_tlb_pages[i]); | ||
1146 | put_page(vcpu_e500->shared_tlb_pages[i]); | ||
1147 | } | ||
1148 | |||
1149 | vcpu_e500->num_shared_tlb_pages = 0; | ||
1150 | vcpu_e500->shared_tlb_pages = NULL; | ||
1151 | } else { | ||
1152 | kfree(vcpu_e500->gtlb_arch); | ||
1153 | } | ||
1154 | |||
1155 | vcpu_e500->gtlb_arch = NULL; | ||
1156 | } | ||
1157 | |||
1158 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | ||
1159 | struct kvm_config_tlb *cfg) | ||
1160 | { | ||
1161 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
1162 | struct kvm_book3e_206_tlb_params params; | ||
1163 | char *virt; | ||
1164 | struct page **pages; | ||
1165 | struct tlbe_priv *privs[2] = {}; | ||
1166 | size_t array_len; | ||
1167 | u32 sets; | ||
1168 | int num_pages, ret, i; | ||
1169 | |||
1170 | if (cfg->mmu_type != KVM_MMU_FSL_BOOKE_NOHV) | ||
1171 | return -EINVAL; | ||
1172 | |||
1173 | if (copy_from_user(¶ms, (void __user *)(uintptr_t)cfg->params, | ||
1174 | sizeof(params))) | ||
1175 | return -EFAULT; | ||
1176 | |||
1177 | if (params.tlb_sizes[1] > 64) | ||
1178 | return -EINVAL; | ||
1179 | if (params.tlb_ways[1] != params.tlb_sizes[1]) | ||
1180 | return -EINVAL; | ||
1181 | if (params.tlb_sizes[2] != 0 || params.tlb_sizes[3] != 0) | ||
1182 | return -EINVAL; | ||
1183 | if (params.tlb_ways[2] != 0 || params.tlb_ways[3] != 0) | ||
1184 | return -EINVAL; | ||
1185 | |||
1186 | if (!is_power_of_2(params.tlb_ways[0])) | ||
1187 | return -EINVAL; | ||
1188 | |||
1189 | sets = params.tlb_sizes[0] >> ilog2(params.tlb_ways[0]); | ||
1190 | if (!is_power_of_2(sets)) | ||
1191 | return -EINVAL; | ||
1192 | |||
1193 | array_len = params.tlb_sizes[0] + params.tlb_sizes[1]; | ||
1194 | array_len *= sizeof(struct kvm_book3e_206_tlb_entry); | ||
1195 | |||
1196 | if (cfg->array_len < array_len) | ||
1197 | return -EINVAL; | ||
1198 | |||
1199 | num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - | ||
1200 | cfg->array / PAGE_SIZE; | ||
1201 | pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); | ||
1202 | if (!pages) | ||
1203 | return -ENOMEM; | ||
1204 | |||
1205 | ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); | ||
1206 | if (ret < 0) | ||
1207 | goto err_pages; | ||
1208 | |||
1209 | if (ret != num_pages) { | ||
1210 | num_pages = ret; | ||
1211 | ret = -EFAULT; | ||
1212 | goto err_put_page; | ||
1213 | } | ||
1214 | |||
1215 | virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); | ||
1216 | if (!virt) | ||
1217 | goto err_put_page; | ||
1218 | |||
1219 | privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], | ||
1220 | GFP_KERNEL); | ||
1221 | privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], | ||
1222 | GFP_KERNEL); | ||
1223 | |||
1224 | if (!privs[0] || !privs[1]) | ||
1225 | goto err_put_page; | ||
1226 | |||
1227 | free_gtlb(vcpu_e500); | ||
1228 | |||
1229 | vcpu_e500->gtlb_priv[0] = privs[0]; | ||
1230 | vcpu_e500->gtlb_priv[1] = privs[1]; | ||
1231 | |||
1232 | vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) | ||
1233 | (virt + (cfg->array & (PAGE_SIZE - 1))); | ||
1234 | |||
1235 | vcpu_e500->gtlb_params[0].entries = params.tlb_sizes[0]; | ||
1236 | vcpu_e500->gtlb_params[1].entries = params.tlb_sizes[1]; | ||
1237 | |||
1238 | vcpu_e500->gtlb_offset[0] = 0; | ||
1239 | vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; | ||
1240 | |||
1241 | vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
1242 | if (params.tlb_sizes[0] <= 2048) | ||
1243 | vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; | ||
1244 | vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; | ||
1245 | |||
1246 | vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
1247 | vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; | ||
1248 | vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; | ||
1249 | |||
1250 | vcpu_e500->shared_tlb_pages = pages; | ||
1251 | vcpu_e500->num_shared_tlb_pages = num_pages; | ||
1252 | |||
1253 | vcpu_e500->gtlb_params[0].ways = params.tlb_ways[0]; | ||
1254 | vcpu_e500->gtlb_params[0].sets = sets; | ||
1255 | |||
1256 | vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; | ||
1257 | vcpu_e500->gtlb_params[1].sets = 1; | ||
1258 | |||
1259 | return 0; | ||
1260 | |||
1261 | err_put_page: | ||
1262 | kfree(privs[0]); | ||
1263 | kfree(privs[1]); | ||
1264 | |||
1265 | for (i = 0; i < num_pages; i++) | ||
1266 | put_page(pages[i]); | ||
1267 | |||
1268 | err_pages: | ||
1269 | kfree(pages); | ||
1270 | return ret; | ||
1271 | } | ||
1272 | |||
1273 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | ||
1274 | struct kvm_dirty_tlb *dirty) | ||
1275 | { | ||
1276 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | ||
1277 | |||
1278 | clear_tlb_refs(vcpu_e500); | ||
1279 | return 0; | ||
1011 | } | 1280 | } |
1012 | 1281 | ||
1013 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | 1282 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) |
1014 | { | 1283 | { |
1015 | tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF; | 1284 | int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); |
1016 | 1285 | int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; | |
1017 | vcpu_e500->gtlb_size[0] = KVM_E500_TLB0_SIZE; | 1286 | |
1018 | vcpu_e500->gtlb_arch[0] = | 1287 | host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; |
1019 | kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL); | 1288 | host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; |
1020 | if (vcpu_e500->gtlb_arch[0] == NULL) | 1289 | |
1021 | goto err_out; | 1290 | /* |
1022 | 1291 | * This should never happen on real e500 hardware, but is | |
1023 | vcpu_e500->gtlb_size[1] = KVM_E500_TLB1_SIZE; | 1292 | * architecturally possible -- e.g. in some weird nested |
1024 | vcpu_e500->gtlb_arch[1] = | 1293 | * virtualization case. |
1025 | kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL); | 1294 | */ |
1026 | if (vcpu_e500->gtlb_arch[1] == NULL) | 1295 | if (host_tlb_params[0].entries == 0 || |
1027 | goto err_out_guest0; | 1296 | host_tlb_params[1].entries == 0) { |
1028 | 1297 | pr_err("%s: need to know host tlb size\n", __func__); | |
1029 | vcpu_e500->gtlb_priv[0] = (struct tlbe_priv *) | 1298 | return -ENODEV; |
1030 | kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB0_SIZE, GFP_KERNEL); | 1299 | } |
1031 | if (vcpu_e500->gtlb_priv[0] == NULL) | 1300 | |
1032 | goto err_out_guest1; | 1301 | host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> |
1033 | vcpu_e500->gtlb_priv[1] = (struct tlbe_priv *) | 1302 | TLBnCFG_ASSOC_SHIFT; |
1034 | kzalloc(sizeof(struct tlbe_priv) * KVM_E500_TLB1_SIZE, GFP_KERNEL); | 1303 | host_tlb_params[1].ways = host_tlb_params[1].entries; |
1035 | 1304 | ||
1036 | if (vcpu_e500->gtlb_priv[1] == NULL) | 1305 | if (!is_power_of_2(host_tlb_params[0].entries) || |
1037 | goto err_out_priv0; | 1306 | !is_power_of_2(host_tlb_params[0].ways) || |
1307 | host_tlb_params[0].entries < host_tlb_params[0].ways || | ||
1308 | host_tlb_params[0].ways == 0) { | ||
1309 | pr_err("%s: bad tlb0 host config: %u entries %u ways\n", | ||
1310 | __func__, host_tlb_params[0].entries, | ||
1311 | host_tlb_params[0].ways); | ||
1312 | return -ENODEV; | ||
1313 | } | ||
1314 | |||
1315 | host_tlb_params[0].sets = | ||
1316 | host_tlb_params[0].entries / host_tlb_params[0].ways; | ||
1317 | host_tlb_params[1].sets = 1; | ||
1318 | |||
1319 | vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; | ||
1320 | vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; | ||
1321 | |||
1322 | vcpu_e500->gtlb_params[0].ways = KVM_E500_TLB0_WAY_NUM; | ||
1323 | vcpu_e500->gtlb_params[0].sets = | ||
1324 | KVM_E500_TLB0_SIZE / KVM_E500_TLB0_WAY_NUM; | ||
1325 | |||
1326 | vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; | ||
1327 | vcpu_e500->gtlb_params[1].sets = 1; | ||
1328 | |||
1329 | vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); | ||
1330 | if (!vcpu_e500->gtlb_arch) | ||
1331 | return -ENOMEM; | ||
1332 | |||
1333 | vcpu_e500->gtlb_offset[0] = 0; | ||
1334 | vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; | ||
1335 | |||
1336 | vcpu_e500->tlb_refs[0] = | ||
1337 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, | ||
1338 | GFP_KERNEL); | ||
1339 | if (!vcpu_e500->tlb_refs[0]) | ||
1340 | goto err; | ||
1341 | |||
1342 | vcpu_e500->tlb_refs[1] = | ||
1343 | kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, | ||
1344 | GFP_KERNEL); | ||
1345 | if (!vcpu_e500->tlb_refs[1]) | ||
1346 | goto err; | ||
1347 | |||
1348 | vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * | ||
1349 | vcpu_e500->gtlb_params[0].entries, | ||
1350 | GFP_KERNEL); | ||
1351 | if (!vcpu_e500->gtlb_priv[0]) | ||
1352 | goto err; | ||
1353 | |||
1354 | vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * | ||
1355 | vcpu_e500->gtlb_params[1].entries, | ||
1356 | GFP_KERNEL); | ||
1357 | if (!vcpu_e500->gtlb_priv[1]) | ||
1358 | goto err; | ||
1038 | 1359 | ||
1039 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) | 1360 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) |
1040 | goto err_out_priv1; | 1361 | goto err; |
1041 | 1362 | ||
1042 | /* Init TLB configuration register */ | 1363 | /* Init TLB configuration register */ |
1043 | vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; | 1364 | vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & |
1044 | vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_size[0]; | 1365 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); |
1045 | vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; | 1366 | vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; |
1046 | vcpu_e500->tlb1cfg |= vcpu_e500->gtlb_size[1]; | 1367 | vcpu_e500->tlb0cfg |= |
1368 | vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; | ||
1369 | |||
1370 | vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & | ||
1371 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
1372 | vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries; | ||
1373 | vcpu_e500->tlb0cfg |= | ||
1374 | vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; | ||
1047 | 1375 | ||
1048 | return 0; | 1376 | return 0; |
1049 | 1377 | ||
1050 | err_out_priv1: | 1378 | err: |
1051 | kfree(vcpu_e500->gtlb_priv[1]); | 1379 | free_gtlb(vcpu_e500); |
1052 | err_out_priv0: | 1380 | kfree(vcpu_e500->tlb_refs[0]); |
1053 | kfree(vcpu_e500->gtlb_priv[0]); | 1381 | kfree(vcpu_e500->tlb_refs[1]); |
1054 | err_out_guest1: | ||
1055 | kfree(vcpu_e500->gtlb_arch[1]); | ||
1056 | err_out_guest0: | ||
1057 | kfree(vcpu_e500->gtlb_arch[0]); | ||
1058 | err_out: | ||
1059 | return -1; | 1382 | return -1; |
1060 | } | 1383 | } |
1061 | 1384 | ||
1062 | void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) | 1385 | void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) |
1063 | { | 1386 | { |
1064 | int stlbsel, i; | 1387 | free_gtlb(vcpu_e500); |
1065 | |||
1066 | /* release all privs */ | ||
1067 | for (stlbsel = 0; stlbsel < 2; stlbsel++) | ||
1068 | for (i = 0; i < vcpu_e500->gtlb_size[stlbsel]; i++) { | ||
1069 | struct tlbe_priv *priv = | ||
1070 | &vcpu_e500->gtlb_priv[stlbsel][i]; | ||
1071 | kvmppc_e500_priv_release(priv); | ||
1072 | } | ||
1073 | |||
1074 | kvmppc_e500_id_table_free(vcpu_e500); | 1388 | kvmppc_e500_id_table_free(vcpu_e500); |
1075 | kfree(vcpu_e500->gtlb_arch[1]); | 1389 | |
1076 | kfree(vcpu_e500->gtlb_arch[0]); | 1390 | kfree(vcpu_e500->tlb_refs[0]); |
1391 | kfree(vcpu_e500->tlb_refs[1]); | ||
1077 | } | 1392 | } |
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h index 59b88e99a235..5c6d2d7bf058 100644 --- a/arch/powerpc/kvm/e500_tlb.h +++ b/arch/powerpc/kvm/e500_tlb.h | |||
@@ -20,13 +20,9 @@ | |||
20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
21 | #include <asm/kvm_e500.h> | 21 | #include <asm/kvm_e500.h> |
22 | 22 | ||
23 | #define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */ | 23 | /* This geometry is the legacy default -- can be overridden by userspace */ |
24 | #define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT) | 24 | #define KVM_E500_TLB0_WAY_SIZE 128 |
25 | #define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1) | 25 | #define KVM_E500_TLB0_WAY_NUM 2 |
26 | |||
27 | #define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */ | ||
28 | #define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT) | ||
29 | #define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1) | ||
30 | 26 | ||
31 | #define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) | 27 | #define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) |
32 | #define KVM_E500_TLB1_SIZE 16 | 28 | #define KVM_E500_TLB1_SIZE 16 |
@@ -58,50 +54,54 @@ extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); | |||
58 | extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); | 54 | extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); |
59 | 55 | ||
60 | /* TLB helper functions */ | 56 | /* TLB helper functions */ |
61 | static inline unsigned int get_tlb_size(const struct tlbe *tlbe) | 57 | static inline unsigned int |
58 | get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) | ||
62 | { | 59 | { |
63 | return (tlbe->mas1 >> 7) & 0x1f; | 60 | return (tlbe->mas1 >> 7) & 0x1f; |
64 | } | 61 | } |
65 | 62 | ||
66 | static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) | 63 | static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) |
67 | { | 64 | { |
68 | return tlbe->mas2 & 0xfffff000; | 65 | return tlbe->mas2 & 0xfffff000; |
69 | } | 66 | } |
70 | 67 | ||
71 | static inline u64 get_tlb_bytes(const struct tlbe *tlbe) | 68 | static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) |
72 | { | 69 | { |
73 | unsigned int pgsize = get_tlb_size(tlbe); | 70 | unsigned int pgsize = get_tlb_size(tlbe); |
74 | return 1ULL << 10 << pgsize; | 71 | return 1ULL << 10 << pgsize; |
75 | } | 72 | } |
76 | 73 | ||
77 | static inline gva_t get_tlb_end(const struct tlbe *tlbe) | 74 | static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) |
78 | { | 75 | { |
79 | u64 bytes = get_tlb_bytes(tlbe); | 76 | u64 bytes = get_tlb_bytes(tlbe); |
80 | return get_tlb_eaddr(tlbe) + bytes - 1; | 77 | return get_tlb_eaddr(tlbe) + bytes - 1; |
81 | } | 78 | } |
82 | 79 | ||
83 | static inline u64 get_tlb_raddr(const struct tlbe *tlbe) | 80 | static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) |
84 | { | 81 | { |
85 | u64 rpn = tlbe->mas7; | 82 | return tlbe->mas7_3 & ~0xfffULL; |
86 | return (rpn << 32) | (tlbe->mas3 & 0xfffff000); | ||
87 | } | 83 | } |
88 | 84 | ||
89 | static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) | 85 | static inline unsigned int |
86 | get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) | ||
90 | { | 87 | { |
91 | return (tlbe->mas1 >> 16) & 0xff; | 88 | return (tlbe->mas1 >> 16) & 0xff; |
92 | } | 89 | } |
93 | 90 | ||
94 | static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) | 91 | static inline unsigned int |
92 | get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) | ||
95 | { | 93 | { |
96 | return (tlbe->mas1 >> 12) & 0x1; | 94 | return (tlbe->mas1 >> 12) & 0x1; |
97 | } | 95 | } |
98 | 96 | ||
99 | static inline unsigned int get_tlb_v(const struct tlbe *tlbe) | 97 | static inline unsigned int |
98 | get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) | ||
100 | { | 99 | { |
101 | return (tlbe->mas1 >> 31) & 0x1; | 100 | return (tlbe->mas1 >> 31) & 0x1; |
102 | } | 101 | } |
103 | 102 | ||
104 | static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe) | 103 | static inline unsigned int |
104 | get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) | ||
105 | { | 105 | { |
106 | return (tlbe->mas1 >> 30) & 0x1; | 106 | return (tlbe->mas1 >> 30) & 0x1; |
107 | } | 107 | } |
@@ -121,59 +121,37 @@ static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) | |||
121 | return !!(vcpu->arch.shared->msr & MSR_PR); | 121 | return !!(vcpu->arch.shared->msr & MSR_PR); |
122 | } | 122 | } |
123 | 123 | ||
124 | static inline unsigned int get_cur_spid( | 124 | static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) |
125 | const struct kvmppc_vcpu_e500 *vcpu_e500) | ||
126 | { | 125 | { |
127 | return (vcpu_e500->mas6 >> 16) & 0xff; | 126 | return (vcpu->arch.shared->mas6 >> 16) & 0xff; |
128 | } | 127 | } |
129 | 128 | ||
130 | static inline unsigned int get_cur_sas( | 129 | static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) |
131 | const struct kvmppc_vcpu_e500 *vcpu_e500) | ||
132 | { | 130 | { |
133 | return vcpu_e500->mas6 & 0x1; | 131 | return vcpu->arch.shared->mas6 & 0x1; |
134 | } | 132 | } |
135 | 133 | ||
136 | static inline unsigned int get_tlb_tlbsel( | 134 | static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) |
137 | const struct kvmppc_vcpu_e500 *vcpu_e500) | ||
138 | { | 135 | { |
139 | /* | 136 | /* |
140 | * Manual says that tlbsel has 2 bits wide. | 137 | * Manual says that tlbsel has 2 bits wide. |
141 | * Since we only have two TLBs, only lower bit is used. | 138 | * Since we only have two TLBs, only lower bit is used. |
142 | */ | 139 | */ |
143 | return (vcpu_e500->mas0 >> 28) & 0x1; | 140 | return (vcpu->arch.shared->mas0 >> 28) & 0x1; |
144 | } | ||
145 | |||
146 | static inline unsigned int get_tlb_nv_bit( | ||
147 | const struct kvmppc_vcpu_e500 *vcpu_e500) | ||
148 | { | ||
149 | return vcpu_e500->mas0 & 0xfff; | ||
150 | } | 141 | } |
151 | 142 | ||
152 | static inline unsigned int get_tlb_esel_bit( | 143 | static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) |
153 | const struct kvmppc_vcpu_e500 *vcpu_e500) | ||
154 | { | 144 | { |
155 | return (vcpu_e500->mas0 >> 16) & 0xfff; | 145 | return vcpu->arch.shared->mas0 & 0xfff; |
156 | } | 146 | } |
157 | 147 | ||
158 | static inline unsigned int get_tlb_esel( | 148 | static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) |
159 | const struct kvmppc_vcpu_e500 *vcpu_e500, | ||
160 | int tlbsel) | ||
161 | { | 149 | { |
162 | unsigned int esel = get_tlb_esel_bit(vcpu_e500); | 150 | return (vcpu->arch.shared->mas0 >> 16) & 0xfff; |
163 | |||
164 | if (tlbsel == 0) { | ||
165 | esel &= KVM_E500_TLB0_WAY_NUM_MASK; | ||
166 | esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK) | ||
167 | << KVM_E500_TLB0_WAY_NUM_BIT; | ||
168 | } else { | ||
169 | esel &= KVM_E500_TLB1_SIZE - 1; | ||
170 | } | ||
171 | |||
172 | return esel; | ||
173 | } | 151 | } |
174 | 152 | ||
175 | static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, | 153 | static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, |
176 | const struct tlbe *tlbe) | 154 | const struct kvm_book3e_206_tlb_entry *tlbe) |
177 | { | 155 | { |
178 | gpa_t gpa; | 156 | gpa_t gpa; |
179 | 157 | ||
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 141dce3c6810..968f40101883 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -13,6 +13,7 @@ | |||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
14 | * | 14 | * |
15 | * Copyright IBM Corp. 2007 | 15 | * Copyright IBM Corp. 2007 |
16 | * Copyright 2011 Freescale Semiconductor, Inc. | ||
16 | * | 17 | * |
17 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> | 18 | * Authors: Hollis Blanchard <hollisb@us.ibm.com> |
18 | */ | 19 | */ |
@@ -69,54 +70,55 @@ | |||
69 | #define OP_STH 44 | 70 | #define OP_STH 44 |
70 | #define OP_STHU 45 | 71 | #define OP_STHU 45 |
71 | 72 | ||
72 | #ifdef CONFIG_PPC_BOOK3S | ||
73 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) | ||
74 | { | ||
75 | return 1; | ||
76 | } | ||
77 | #else | ||
78 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) | ||
79 | { | ||
80 | return vcpu->arch.tcr & TCR_DIE; | ||
81 | } | ||
82 | #endif | ||
83 | |||
84 | void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | 73 | void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) |
85 | { | 74 | { |
86 | unsigned long dec_nsec; | 75 | unsigned long dec_nsec; |
76 | unsigned long long dec_time; | ||
87 | 77 | ||
88 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); | 78 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); |
79 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
80 | |||
89 | #ifdef CONFIG_PPC_BOOK3S | 81 | #ifdef CONFIG_PPC_BOOK3S |
90 | /* mtdec lowers the interrupt line when positive. */ | 82 | /* mtdec lowers the interrupt line when positive. */ |
91 | kvmppc_core_dequeue_dec(vcpu); | 83 | kvmppc_core_dequeue_dec(vcpu); |
92 | 84 | ||
93 | /* POWER4+ triggers a dec interrupt if the value is < 0 */ | 85 | /* POWER4+ triggers a dec interrupt if the value is < 0 */ |
94 | if (vcpu->arch.dec & 0x80000000) { | 86 | if (vcpu->arch.dec & 0x80000000) { |
95 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | ||
96 | kvmppc_core_queue_dec(vcpu); | 87 | kvmppc_core_queue_dec(vcpu); |
97 | return; | 88 | return; |
98 | } | 89 | } |
99 | #endif | 90 | #endif |
100 | if (kvmppc_dec_enabled(vcpu)) { | 91 | |
101 | /* The decrementer ticks at the same rate as the timebase, so | 92 | #ifdef CONFIG_BOOKE |
102 | * that's how we convert the guest DEC value to the number of | 93 | /* On BOOKE, DEC = 0 is as good as decrementer not enabled */ |
103 | * host ticks. */ | 94 | if (vcpu->arch.dec == 0) |
104 | 95 | return; | |
105 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | 96 | #endif |
106 | dec_nsec = vcpu->arch.dec; | 97 | |
107 | dec_nsec *= 1000; | 98 | /* |
108 | dec_nsec /= tb_ticks_per_usec; | 99 | * The decrementer ticks at the same rate as the timebase, so |
109 | hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), | 100 | * that's how we convert the guest DEC value to the number of |
110 | HRTIMER_MODE_REL); | 101 | * host ticks. |
111 | vcpu->arch.dec_jiffies = get_tb(); | 102 | */ |
112 | } else { | 103 | |
113 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | 104 | dec_time = vcpu->arch.dec; |
114 | } | 105 | dec_time *= 1000; |
106 | do_div(dec_time, tb_ticks_per_usec); | ||
107 | dec_nsec = do_div(dec_time, NSEC_PER_SEC); | ||
108 | hrtimer_start(&vcpu->arch.dec_timer, | ||
109 | ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); | ||
110 | vcpu->arch.dec_jiffies = get_tb(); | ||
115 | } | 111 | } |
116 | 112 | ||
117 | u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) | 113 | u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) |
118 | { | 114 | { |
119 | u64 jd = tb - vcpu->arch.dec_jiffies; | 115 | u64 jd = tb - vcpu->arch.dec_jiffies; |
116 | |||
117 | #ifdef CONFIG_BOOKE | ||
118 | if (vcpu->arch.dec < jd) | ||
119 | return 0; | ||
120 | #endif | ||
121 | |||
120 | return vcpu->arch.dec - jd; | 122 | return vcpu->arch.dec - jd; |
121 | } | 123 | } |
122 | 124 | ||
@@ -159,7 +161,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
159 | case OP_TRAP_64: | 161 | case OP_TRAP_64: |
160 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); | 162 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); |
161 | #else | 163 | #else |
162 | kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR); | 164 | kvmppc_core_queue_program(vcpu, |
165 | vcpu->arch.shared->esr | ESR_PTR); | ||
163 | #endif | 166 | #endif |
164 | advance = 0; | 167 | advance = 0; |
165 | break; | 168 | break; |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 607fbdf24b84..00d7e345b3fe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -39,7 +39,8 @@ | |||
39 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 39 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
40 | { | 40 | { |
41 | return !(v->arch.shared->msr & MSR_WE) || | 41 | return !(v->arch.shared->msr & MSR_WE) || |
42 | !!(v->arch.pending_exceptions); | 42 | !!(v->arch.pending_exceptions) || |
43 | v->requests; | ||
43 | } | 44 | } |
44 | 45 | ||
45 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | 46 | int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) |
@@ -66,7 +67,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) | |||
66 | vcpu->arch.magic_page_pa = param1; | 67 | vcpu->arch.magic_page_pa = param1; |
67 | vcpu->arch.magic_page_ea = param2; | 68 | vcpu->arch.magic_page_ea = param2; |
68 | 69 | ||
69 | r2 = KVM_MAGIC_FEAT_SR; | 70 | r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; |
70 | 71 | ||
71 | r = HC_EV_SUCCESS; | 72 | r = HC_EV_SUCCESS; |
72 | break; | 73 | break; |
@@ -171,8 +172,11 @@ void kvm_arch_check_processor_compat(void *rtn) | |||
171 | *(int *)rtn = kvmppc_core_check_processor_compat(); | 172 | *(int *)rtn = kvmppc_core_check_processor_compat(); |
172 | } | 173 | } |
173 | 174 | ||
174 | int kvm_arch_init_vm(struct kvm *kvm) | 175 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
175 | { | 176 | { |
177 | if (type) | ||
178 | return -EINVAL; | ||
179 | |||
176 | return kvmppc_core_init_vm(kvm); | 180 | return kvmppc_core_init_vm(kvm); |
177 | } | 181 | } |
178 | 182 | ||
@@ -208,17 +212,22 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
208 | case KVM_CAP_PPC_BOOKE_SREGS: | 212 | case KVM_CAP_PPC_BOOKE_SREGS: |
209 | #else | 213 | #else |
210 | case KVM_CAP_PPC_SEGSTATE: | 214 | case KVM_CAP_PPC_SEGSTATE: |
215 | case KVM_CAP_PPC_HIOR: | ||
211 | case KVM_CAP_PPC_PAPR: | 216 | case KVM_CAP_PPC_PAPR: |
212 | #endif | 217 | #endif |
213 | case KVM_CAP_PPC_UNSET_IRQ: | 218 | case KVM_CAP_PPC_UNSET_IRQ: |
214 | case KVM_CAP_PPC_IRQ_LEVEL: | 219 | case KVM_CAP_PPC_IRQ_LEVEL: |
215 | case KVM_CAP_ENABLE_CAP: | 220 | case KVM_CAP_ENABLE_CAP: |
221 | case KVM_CAP_ONE_REG: | ||
216 | r = 1; | 222 | r = 1; |
217 | break; | 223 | break; |
218 | #ifndef CONFIG_KVM_BOOK3S_64_HV | 224 | #ifndef CONFIG_KVM_BOOK3S_64_HV |
219 | case KVM_CAP_PPC_PAIRED_SINGLES: | 225 | case KVM_CAP_PPC_PAIRED_SINGLES: |
220 | case KVM_CAP_PPC_OSI: | 226 | case KVM_CAP_PPC_OSI: |
221 | case KVM_CAP_PPC_GET_PVINFO: | 227 | case KVM_CAP_PPC_GET_PVINFO: |
228 | #ifdef CONFIG_KVM_E500 | ||
229 | case KVM_CAP_SW_TLB: | ||
230 | #endif | ||
222 | r = 1; | 231 | r = 1; |
223 | break; | 232 | break; |
224 | case KVM_CAP_COALESCED_MMIO: | 233 | case KVM_CAP_COALESCED_MMIO: |
@@ -238,7 +247,26 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
238 | if (cpu_has_feature(CPU_FTR_ARCH_201)) | 247 | if (cpu_has_feature(CPU_FTR_ARCH_201)) |
239 | r = 2; | 248 | r = 2; |
240 | break; | 249 | break; |
250 | case KVM_CAP_SYNC_MMU: | ||
251 | r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; | ||
252 | break; | ||
241 | #endif | 253 | #endif |
254 | case KVM_CAP_NR_VCPUS: | ||
255 | /* | ||
256 | * Recommending a number of CPUs is somewhat arbitrary; we | ||
257 | * return the number of present CPUs for -HV (since a host | ||
258 | * will have secondary threads "offline"), and for other KVM | ||
259 | * implementations just count online CPUs. | ||
260 | */ | ||
261 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
262 | r = num_present_cpus(); | ||
263 | #else | ||
264 | r = num_online_cpus(); | ||
265 | #endif | ||
266 | break; | ||
267 | case KVM_CAP_MAX_VCPUS: | ||
268 | r = KVM_MAX_VCPUS; | ||
269 | break; | ||
242 | default: | 270 | default: |
243 | r = 0; | 271 | r = 0; |
244 | break; | 272 | break; |
@@ -253,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
253 | return -EINVAL; | 281 | return -EINVAL; |
254 | } | 282 | } |
255 | 283 | ||
284 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
285 | struct kvm_memory_slot *dont) | ||
286 | { | ||
287 | } | ||
288 | |||
289 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | ||
290 | { | ||
291 | return 0; | ||
292 | } | ||
293 | |||
256 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 294 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
257 | struct kvm_memory_slot *memslot, | 295 | struct kvm_memory_slot *memslot, |
258 | struct kvm_memory_slot old, | 296 | struct kvm_memory_slot old, |
@@ -279,9 +317,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
279 | { | 317 | { |
280 | struct kvm_vcpu *vcpu; | 318 | struct kvm_vcpu *vcpu; |
281 | vcpu = kvmppc_core_vcpu_create(kvm, id); | 319 | vcpu = kvmppc_core_vcpu_create(kvm, id); |
282 | vcpu->arch.wqp = &vcpu->wq; | 320 | if (!IS_ERR(vcpu)) { |
283 | if (!IS_ERR(vcpu)) | 321 | vcpu->arch.wqp = &vcpu->wq; |
284 | kvmppc_create_vcpu_debugfs(vcpu, id); | 322 | kvmppc_create_vcpu_debugfs(vcpu, id); |
323 | } | ||
285 | return vcpu; | 324 | return vcpu; |
286 | } | 325 | } |
287 | 326 | ||
@@ -305,18 +344,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
305 | return kvmppc_core_pending_dec(vcpu); | 344 | return kvmppc_core_pending_dec(vcpu); |
306 | } | 345 | } |
307 | 346 | ||
308 | static void kvmppc_decrementer_func(unsigned long data) | ||
309 | { | ||
310 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | ||
311 | |||
312 | kvmppc_core_queue_dec(vcpu); | ||
313 | |||
314 | if (waitqueue_active(vcpu->arch.wqp)) { | ||
315 | wake_up_interruptible(vcpu->arch.wqp); | ||
316 | vcpu->stat.halt_wakeup++; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | /* | 347 | /* |
321 | * low level hrtimer wake routine. Because this runs in hardirq context | 348 | * low level hrtimer wake routine. Because this runs in hardirq context |
322 | * we schedule a tasklet to do the real work. | 349 | * we schedule a tasklet to do the real work. |
@@ -431,20 +458,20 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
431 | 458 | ||
432 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | 459 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); |
433 | 460 | ||
434 | switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { | 461 | switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { |
435 | case KVM_REG_GPR: | 462 | case KVM_MMIO_REG_GPR: |
436 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | 463 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); |
437 | break; | 464 | break; |
438 | case KVM_REG_FPR: | 465 | case KVM_MMIO_REG_FPR: |
439 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | 466 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; |
440 | break; | 467 | break; |
441 | #ifdef CONFIG_PPC_BOOK3S | 468 | #ifdef CONFIG_PPC_BOOK3S |
442 | case KVM_REG_QPR: | 469 | case KVM_MMIO_REG_QPR: |
443 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | 470 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; |
444 | break; | 471 | break; |
445 | case KVM_REG_FQPR: | 472 | case KVM_MMIO_REG_FQPR: |
446 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | 473 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; |
447 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | 474 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; |
448 | break; | 475 | break; |
449 | #endif | 476 | #endif |
450 | default: | 477 | default: |
@@ -553,8 +580,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
553 | vcpu->arch.hcall_needed = 0; | 580 | vcpu->arch.hcall_needed = 0; |
554 | } | 581 | } |
555 | 582 | ||
556 | kvmppc_core_deliver_interrupts(vcpu); | ||
557 | |||
558 | r = kvmppc_vcpu_run(run, vcpu); | 583 | r = kvmppc_vcpu_run(run, vcpu); |
559 | 584 | ||
560 | if (vcpu->sigset_active) | 585 | if (vcpu->sigset_active) |
@@ -563,6 +588,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
563 | return r; | 588 | return r; |
564 | } | 589 | } |
565 | 590 | ||
591 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | ||
592 | { | ||
593 | int me; | ||
594 | int cpu = vcpu->cpu; | ||
595 | |||
596 | me = get_cpu(); | ||
597 | if (waitqueue_active(vcpu->arch.wqp)) { | ||
598 | wake_up_interruptible(vcpu->arch.wqp); | ||
599 | vcpu->stat.halt_wakeup++; | ||
600 | } else if (cpu != me && cpu != -1) { | ||
601 | smp_send_reschedule(vcpu->cpu); | ||
602 | } | ||
603 | put_cpu(); | ||
604 | } | ||
605 | |||
566 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 606 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
567 | { | 607 | { |
568 | if (irq->irq == KVM_INTERRUPT_UNSET) { | 608 | if (irq->irq == KVM_INTERRUPT_UNSET) { |
@@ -571,13 +611,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | |||
571 | } | 611 | } |
572 | 612 | ||
573 | kvmppc_core_queue_external(vcpu, irq); | 613 | kvmppc_core_queue_external(vcpu, irq); |
574 | 614 | kvm_vcpu_kick(vcpu); | |
575 | if (waitqueue_active(vcpu->arch.wqp)) { | ||
576 | wake_up_interruptible(vcpu->arch.wqp); | ||
577 | vcpu->stat.halt_wakeup++; | ||
578 | } else if (vcpu->cpu != -1) { | ||
579 | smp_send_reschedule(vcpu->cpu); | ||
580 | } | ||
581 | 615 | ||
582 | return 0; | 616 | return 0; |
583 | } | 617 | } |
@@ -599,6 +633,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
599 | r = 0; | 633 | r = 0; |
600 | vcpu->arch.papr_enabled = true; | 634 | vcpu->arch.papr_enabled = true; |
601 | break; | 635 | break; |
636 | #ifdef CONFIG_KVM_E500 | ||
637 | case KVM_CAP_SW_TLB: { | ||
638 | struct kvm_config_tlb cfg; | ||
639 | void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; | ||
640 | |||
641 | r = -EFAULT; | ||
642 | if (copy_from_user(&cfg, user_ptr, sizeof(cfg))) | ||
643 | break; | ||
644 | |||
645 | r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg); | ||
646 | break; | ||
647 | } | ||
648 | #endif | ||
602 | default: | 649 | default: |
603 | r = -EINVAL; | 650 | r = -EINVAL; |
604 | break; | 651 | break; |
@@ -648,6 +695,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
648 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); | 695 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); |
649 | break; | 696 | break; |
650 | } | 697 | } |
698 | |||
699 | case KVM_SET_ONE_REG: | ||
700 | case KVM_GET_ONE_REG: | ||
701 | { | ||
702 | struct kvm_one_reg reg; | ||
703 | r = -EFAULT; | ||
704 | if (copy_from_user(®, argp, sizeof(reg))) | ||
705 | goto out; | ||
706 | if (ioctl == KVM_SET_ONE_REG) | ||
707 | r = kvm_vcpu_ioctl_set_one_reg(vcpu, ®); | ||
708 | else | ||
709 | r = kvm_vcpu_ioctl_get_one_reg(vcpu, ®); | ||
710 | break; | ||
711 | } | ||
712 | |||
713 | #ifdef CONFIG_KVM_E500 | ||
714 | case KVM_DIRTY_TLB: { | ||
715 | struct kvm_dirty_tlb dirty; | ||
716 | r = -EFAULT; | ||
717 | if (copy_from_user(&dirty, argp, sizeof(dirty))) | ||
718 | goto out; | ||
719 | r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); | ||
720 | break; | ||
721 | } | ||
722 | #endif | ||
723 | |||
651 | default: | 724 | default: |
652 | r = -EINVAL; | 725 | r = -EINVAL; |
653 | } | 726 | } |
@@ -656,6 +729,11 @@ out: | |||
656 | return r; | 729 | return r; |
657 | } | 730 | } |
658 | 731 | ||
732 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | ||
733 | { | ||
734 | return VM_FAULT_SIGBUS; | ||
735 | } | ||
736 | |||
659 | static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) | 737 | static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) |
660 | { | 738 | { |
661 | u32 inst_lis = 0x3c000000; | 739 | u32 inst_lis = 0x3c000000; |
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index b135d3d397db..877186b7b1c3 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h | |||
@@ -118,11 +118,14 @@ TRACE_EVENT(kvm_book3s_exit, | |||
118 | ), | 118 | ), |
119 | 119 | ||
120 | TP_fast_assign( | 120 | TP_fast_assign( |
121 | struct kvmppc_book3s_shadow_vcpu *svcpu; | ||
121 | __entry->exit_nr = exit_nr; | 122 | __entry->exit_nr = exit_nr; |
122 | __entry->pc = kvmppc_get_pc(vcpu); | 123 | __entry->pc = kvmppc_get_pc(vcpu); |
123 | __entry->dar = kvmppc_get_fault_dar(vcpu); | 124 | __entry->dar = kvmppc_get_fault_dar(vcpu); |
124 | __entry->msr = vcpu->arch.shared->msr; | 125 | __entry->msr = vcpu->arch.shared->msr; |
125 | __entry->srr1 = to_svcpu(vcpu)->shadow_srr1; | 126 | svcpu = svcpu_get(vcpu); |
127 | __entry->srr1 = svcpu->shadow_srr1; | ||
128 | svcpu_put(svcpu); | ||
126 | ), | 129 | ), |
127 | 130 | ||
128 | TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", | 131 | TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", |
@@ -337,6 +340,63 @@ TRACE_EVENT(kvm_book3s_slbmte, | |||
337 | 340 | ||
338 | #endif /* CONFIG_PPC_BOOK3S */ | 341 | #endif /* CONFIG_PPC_BOOK3S */ |
339 | 342 | ||
343 | |||
344 | /************************************************************************* | ||
345 | * Book3E trace points * | ||
346 | *************************************************************************/ | ||
347 | |||
348 | #ifdef CONFIG_BOOKE | ||
349 | |||
350 | TRACE_EVENT(kvm_booke206_stlb_write, | ||
351 | TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3), | ||
352 | TP_ARGS(mas0, mas8, mas1, mas2, mas7_3), | ||
353 | |||
354 | TP_STRUCT__entry( | ||
355 | __field( __u32, mas0 ) | ||
356 | __field( __u32, mas8 ) | ||
357 | __field( __u32, mas1 ) | ||
358 | __field( __u64, mas2 ) | ||
359 | __field( __u64, mas7_3 ) | ||
360 | ), | ||
361 | |||
362 | TP_fast_assign( | ||
363 | __entry->mas0 = mas0; | ||
364 | __entry->mas8 = mas8; | ||
365 | __entry->mas1 = mas1; | ||
366 | __entry->mas2 = mas2; | ||
367 | __entry->mas7_3 = mas7_3; | ||
368 | ), | ||
369 | |||
370 | TP_printk("mas0=%x mas8=%x mas1=%x mas2=%llx mas7_3=%llx", | ||
371 | __entry->mas0, __entry->mas8, __entry->mas1, | ||
372 | __entry->mas2, __entry->mas7_3) | ||
373 | ); | ||
374 | |||
375 | TRACE_EVENT(kvm_booke206_gtlb_write, | ||
376 | TP_PROTO(__u32 mas0, __u32 mas1, __u64 mas2, __u64 mas7_3), | ||
377 | TP_ARGS(mas0, mas1, mas2, mas7_3), | ||
378 | |||
379 | TP_STRUCT__entry( | ||
380 | __field( __u32, mas0 ) | ||
381 | __field( __u32, mas1 ) | ||
382 | __field( __u64, mas2 ) | ||
383 | __field( __u64, mas7_3 ) | ||
384 | ), | ||
385 | |||
386 | TP_fast_assign( | ||
387 | __entry->mas0 = mas0; | ||
388 | __entry->mas1 = mas1; | ||
389 | __entry->mas2 = mas2; | ||
390 | __entry->mas7_3 = mas7_3; | ||
391 | ), | ||
392 | |||
393 | TP_printk("mas0=%x mas1=%x mas2=%llx mas7_3=%llx", | ||
394 | __entry->mas0, __entry->mas1, | ||
395 | __entry->mas2, __entry->mas7_3) | ||
396 | ); | ||
397 | |||
398 | #endif | ||
399 | |||
340 | #endif /* _TRACE_KVM_H */ | 400 | #endif /* _TRACE_KVM_H */ |
341 | 401 | ||
342 | /* This part must be outside protection */ | 402 | /* This part must be outside protection */ |
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a3e628727697..fb05b123218f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/io.h> | 12 | #include <linux/io.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/hugetlb.h> | 14 | #include <linux/hugetlb.h> |
15 | #include <linux/export.h> | ||
15 | #include <linux/of_fdt.h> | 16 | #include <linux/of_fdt.h> |
16 | #include <linux/memblock.h> | 17 | #include <linux/memblock.h> |
17 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
@@ -103,6 +104,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift | |||
103 | *shift = hugepd_shift(*hpdp); | 104 | *shift = hugepd_shift(*hpdp); |
104 | return hugepte_offset(hpdp, ea, pdshift); | 105 | return hugepte_offset(hpdp, ea, pdshift); |
105 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
106 | 108 | ||
107 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
108 | { | 110 | { |
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 82b32a100c7d..96076676e224 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h | |||
@@ -41,4 +41,15 @@ struct kvm_debug_exit_arch { | |||
41 | struct kvm_guest_debug_arch { | 41 | struct kvm_guest_debug_arch { |
42 | }; | 42 | }; |
43 | 43 | ||
44 | #define KVM_SYNC_PREFIX (1UL << 0) | ||
45 | #define KVM_SYNC_GPRS (1UL << 1) | ||
46 | #define KVM_SYNC_ACRS (1UL << 2) | ||
47 | #define KVM_SYNC_CRS (1UL << 3) | ||
48 | /* definition of registers in kvm_run */ | ||
49 | struct kvm_sync_regs { | ||
50 | __u64 prefix; /* prefix register */ | ||
51 | __u64 gprs[16]; /* general purpose registers */ | ||
52 | __u32 acrs[16]; /* access registers */ | ||
53 | __u64 crs[16]; /* control registers */ | ||
54 | }; | ||
44 | #endif | 55 | #endif |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b0c235cb6ad5..7343872890a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -220,18 +220,17 @@ struct kvm_s390_float_interrupt { | |||
220 | struct list_head list; | 220 | struct list_head list; |
221 | atomic_t active; | 221 | atomic_t active; |
222 | int next_rr_cpu; | 222 | int next_rr_cpu; |
223 | unsigned long idle_mask [(64 + sizeof(long) - 1) / sizeof(long)]; | 223 | unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) |
224 | struct kvm_s390_local_interrupt *local_int[64]; | 224 | / sizeof(long)]; |
225 | struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS]; | ||
225 | }; | 226 | }; |
226 | 227 | ||
227 | 228 | ||
228 | struct kvm_vcpu_arch { | 229 | struct kvm_vcpu_arch { |
229 | struct kvm_s390_sie_block *sie_block; | 230 | struct kvm_s390_sie_block *sie_block; |
230 | unsigned long guest_gprs[16]; | ||
231 | s390_fp_regs host_fpregs; | 231 | s390_fp_regs host_fpregs; |
232 | unsigned int host_acrs[NUM_ACRS]; | 232 | unsigned int host_acrs[NUM_ACRS]; |
233 | s390_fp_regs guest_fpregs; | 233 | s390_fp_regs guest_fpregs; |
234 | unsigned int guest_acrs[NUM_ACRS]; | ||
235 | struct kvm_s390_local_interrupt local_int; | 234 | struct kvm_s390_local_interrupt local_int; |
236 | struct hrtimer ckc_timer; | 235 | struct hrtimer ckc_timer; |
237 | struct tasklet_struct tasklet; | 236 | struct tasklet_struct tasklet; |
@@ -246,6 +245,9 @@ struct kvm_vm_stat { | |||
246 | u32 remote_tlb_flush; | 245 | u32 remote_tlb_flush; |
247 | }; | 246 | }; |
248 | 247 | ||
248 | struct kvm_arch_memory_slot { | ||
249 | }; | ||
250 | |||
249 | struct kvm_arch{ | 251 | struct kvm_arch{ |
250 | struct sca_block *sca; | 252 | struct sca_block *sca; |
251 | debug_info_t *dbf; | 253 | debug_info_t *dbf; |
@@ -253,5 +255,5 @@ struct kvm_arch{ | |||
253 | struct gmap *gmap; | 255 | struct gmap *gmap; |
254 | }; | 256 | }; |
255 | 257 | ||
256 | extern int sie64a(struct kvm_s390_sie_block *, unsigned long *); | 258 | extern int sie64a(struct kvm_s390_sie_block *, u64 *); |
257 | #endif | 259 | #endif |
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index a21634173a66..78eb9847008f 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
@@ -34,6 +34,15 @@ config KVM | |||
34 | 34 | ||
35 | If unsure, say N. | 35 | If unsure, say N. |
36 | 36 | ||
37 | config KVM_S390_UCONTROL | ||
38 | bool "Userspace controlled virtual machines" | ||
39 | depends on KVM | ||
40 | ---help--- | ||
41 | Allow CAP_SYS_ADMIN users to create KVM virtual machines that are | ||
42 | controlled by userspace. | ||
43 | |||
44 | If unsure, say N. | ||
45 | |||
37 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 46 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
38 | # the virtualization menu. | 47 | # the virtualization menu. |
39 | source drivers/vhost/Kconfig | 48 | source drivers/vhost/Kconfig |
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 8943e82cd4d9..a353f0ea45c2 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -20,8 +20,8 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) | |||
20 | unsigned long start, end; | 20 | unsigned long start, end; |
21 | unsigned long prefix = vcpu->arch.sie_block->prefix; | 21 | unsigned long prefix = vcpu->arch.sie_block->prefix; |
22 | 22 | ||
23 | start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; | 23 | start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; |
24 | end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; | 24 | end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; |
25 | 25 | ||
26 | if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end | 26 | if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end |
27 | || start < 2 * PAGE_SIZE) | 27 | || start < 2 * PAGE_SIZE) |
@@ -56,7 +56,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) | |||
56 | static int __diag_ipl_functions(struct kvm_vcpu *vcpu) | 56 | static int __diag_ipl_functions(struct kvm_vcpu *vcpu) |
57 | { | 57 | { |
58 | unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; | 58 | unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; |
59 | unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; | 59 | unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; |
60 | 60 | ||
61 | VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); | 61 | VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); |
62 | switch (subcode) { | 62 | switch (subcode) { |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 02434543eabb..361456577c6f 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -36,7 +36,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) | |||
36 | 36 | ||
37 | useraddr = disp2; | 37 | useraddr = disp2; |
38 | if (base2) | 38 | if (base2) |
39 | useraddr += vcpu->arch.guest_gprs[base2]; | 39 | useraddr += vcpu->run->s.regs.gprs[base2]; |
40 | 40 | ||
41 | if (useraddr & 7) | 41 | if (useraddr & 7) |
42 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 42 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -75,7 +75,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) | |||
75 | 75 | ||
76 | useraddr = disp2; | 76 | useraddr = disp2; |
77 | if (base2) | 77 | if (base2) |
78 | useraddr += vcpu->arch.guest_gprs[base2]; | 78 | useraddr += vcpu->run->s.regs.gprs[base2]; |
79 | 79 | ||
80 | if (useraddr & 3) | 80 | if (useraddr & 3) |
81 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 81 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -133,13 +133,6 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
133 | 133 | ||
134 | vcpu->stat.exit_stop_request++; | 134 | vcpu->stat.exit_stop_request++; |
135 | spin_lock_bh(&vcpu->arch.local_int.lock); | 135 | spin_lock_bh(&vcpu->arch.local_int.lock); |
136 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { | ||
137 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; | ||
138 | rc = kvm_s390_vcpu_store_status(vcpu, | ||
139 | KVM_S390_STORE_STATUS_NOADDR); | ||
140 | if (rc >= 0) | ||
141 | rc = -EOPNOTSUPP; | ||
142 | } | ||
143 | 136 | ||
144 | if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { | 137 | if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) { |
145 | vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; | 138 | vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP; |
@@ -155,7 +148,18 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
155 | rc = -EOPNOTSUPP; | 148 | rc = -EOPNOTSUPP; |
156 | } | 149 | } |
157 | 150 | ||
158 | spin_unlock_bh(&vcpu->arch.local_int.lock); | 151 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { |
152 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; | ||
153 | /* store status must be called unlocked. Since local_int.lock | ||
154 | * only protects local_int.* and not guest memory we can give | ||
155 | * up the lock here */ | ||
156 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
157 | rc = kvm_s390_vcpu_store_status(vcpu, | ||
158 | KVM_S390_STORE_STATUS_NOADDR); | ||
159 | if (rc >= 0) | ||
160 | rc = -EOPNOTSUPP; | ||
161 | } else | ||
162 | spin_unlock_bh(&vcpu->arch.local_int.lock); | ||
159 | return rc; | 163 | return rc; |
160 | } | 164 | } |
161 | 165 | ||
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f0647ce6da21..2d9f9a72bb81 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -236,8 +236,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
236 | VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", | 236 | VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", |
237 | inti->prefix.address); | 237 | inti->prefix.address); |
238 | vcpu->stat.deliver_prefix_signal++; | 238 | vcpu->stat.deliver_prefix_signal++; |
239 | vcpu->arch.sie_block->prefix = inti->prefix.address; | 239 | kvm_s390_set_prefix(vcpu, inti->prefix.address); |
240 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
241 | break; | 240 | break; |
242 | 241 | ||
243 | case KVM_S390_RESTART: | 242 | case KVM_S390_RESTART: |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d1c445732451..17ad69d596fd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -129,6 +129,10 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
129 | case KVM_CAP_S390_PSW: | 129 | case KVM_CAP_S390_PSW: |
130 | case KVM_CAP_S390_GMAP: | 130 | case KVM_CAP_S390_GMAP: |
131 | case KVM_CAP_SYNC_MMU: | 131 | case KVM_CAP_SYNC_MMU: |
132 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
133 | case KVM_CAP_S390_UCONTROL: | ||
134 | #endif | ||
135 | case KVM_CAP_SYNC_REGS: | ||
132 | r = 1; | 136 | r = 1; |
133 | break; | 137 | break; |
134 | default: | 138 | default: |
@@ -171,11 +175,22 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
171 | return r; | 175 | return r; |
172 | } | 176 | } |
173 | 177 | ||
174 | int kvm_arch_init_vm(struct kvm *kvm) | 178 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
175 | { | 179 | { |
176 | int rc; | 180 | int rc; |
177 | char debug_name[16]; | 181 | char debug_name[16]; |
178 | 182 | ||
183 | rc = -EINVAL; | ||
184 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
185 | if (type & ~KVM_VM_S390_UCONTROL) | ||
186 | goto out_err; | ||
187 | if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) | ||
188 | goto out_err; | ||
189 | #else | ||
190 | if (type) | ||
191 | goto out_err; | ||
192 | #endif | ||
193 | |||
179 | rc = s390_enable_sie(); | 194 | rc = s390_enable_sie(); |
180 | if (rc) | 195 | if (rc) |
181 | goto out_err; | 196 | goto out_err; |
@@ -198,10 +213,13 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
198 | debug_register_view(kvm->arch.dbf, &debug_sprintf_view); | 213 | debug_register_view(kvm->arch.dbf, &debug_sprintf_view); |
199 | VM_EVENT(kvm, 3, "%s", "vm created"); | 214 | VM_EVENT(kvm, 3, "%s", "vm created"); |
200 | 215 | ||
201 | kvm->arch.gmap = gmap_alloc(current->mm); | 216 | if (type & KVM_VM_S390_UCONTROL) { |
202 | if (!kvm->arch.gmap) | 217 | kvm->arch.gmap = NULL; |
203 | goto out_nogmap; | 218 | } else { |
204 | 219 | kvm->arch.gmap = gmap_alloc(current->mm); | |
220 | if (!kvm->arch.gmap) | ||
221 | goto out_nogmap; | ||
222 | } | ||
205 | return 0; | 223 | return 0; |
206 | out_nogmap: | 224 | out_nogmap: |
207 | debug_unregister(kvm->arch.dbf); | 225 | debug_unregister(kvm->arch.dbf); |
@@ -214,11 +232,18 @@ out_err: | |||
214 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 232 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
215 | { | 233 | { |
216 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); | 234 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); |
217 | clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); | 235 | if (!kvm_is_ucontrol(vcpu->kvm)) { |
218 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == | 236 | clear_bit(63 - vcpu->vcpu_id, |
219 | (__u64) vcpu->arch.sie_block) | 237 | (unsigned long *) &vcpu->kvm->arch.sca->mcn); |
220 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; | 238 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == |
239 | (__u64) vcpu->arch.sie_block) | ||
240 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; | ||
241 | } | ||
221 | smp_mb(); | 242 | smp_mb(); |
243 | |||
244 | if (kvm_is_ucontrol(vcpu->kvm)) | ||
245 | gmap_free(vcpu->arch.gmap); | ||
246 | |||
222 | free_page((unsigned long)(vcpu->arch.sie_block)); | 247 | free_page((unsigned long)(vcpu->arch.sie_block)); |
223 | kvm_vcpu_uninit(vcpu); | 248 | kvm_vcpu_uninit(vcpu); |
224 | kfree(vcpu); | 249 | kfree(vcpu); |
@@ -249,13 +274,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
249 | kvm_free_vcpus(kvm); | 274 | kvm_free_vcpus(kvm); |
250 | free_page((unsigned long)(kvm->arch.sca)); | 275 | free_page((unsigned long)(kvm->arch.sca)); |
251 | debug_unregister(kvm->arch.dbf); | 276 | debug_unregister(kvm->arch.dbf); |
252 | gmap_free(kvm->arch.gmap); | 277 | if (!kvm_is_ucontrol(kvm)) |
278 | gmap_free(kvm->arch.gmap); | ||
253 | } | 279 | } |
254 | 280 | ||
255 | /* Section: vcpu related */ | 281 | /* Section: vcpu related */ |
256 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 282 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
257 | { | 283 | { |
284 | if (kvm_is_ucontrol(vcpu->kvm)) { | ||
285 | vcpu->arch.gmap = gmap_alloc(current->mm); | ||
286 | if (!vcpu->arch.gmap) | ||
287 | return -ENOMEM; | ||
288 | return 0; | ||
289 | } | ||
290 | |||
258 | vcpu->arch.gmap = vcpu->kvm->arch.gmap; | 291 | vcpu->arch.gmap = vcpu->kvm->arch.gmap; |
292 | vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | | ||
293 | KVM_SYNC_GPRS | | ||
294 | KVM_SYNC_ACRS | | ||
295 | KVM_SYNC_CRS; | ||
259 | return 0; | 296 | return 0; |
260 | } | 297 | } |
261 | 298 | ||
@@ -270,7 +307,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
270 | save_access_regs(vcpu->arch.host_acrs); | 307 | save_access_regs(vcpu->arch.host_acrs); |
271 | vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; | 308 | vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; |
272 | restore_fp_regs(&vcpu->arch.guest_fpregs); | 309 | restore_fp_regs(&vcpu->arch.guest_fpregs); |
273 | restore_access_regs(vcpu->arch.guest_acrs); | 310 | restore_access_regs(vcpu->run->s.regs.acrs); |
274 | gmap_enable(vcpu->arch.gmap); | 311 | gmap_enable(vcpu->arch.gmap); |
275 | atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | 312 | atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); |
276 | } | 313 | } |
@@ -280,7 +317,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
280 | atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); | 317 | atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); |
281 | gmap_disable(vcpu->arch.gmap); | 318 | gmap_disable(vcpu->arch.gmap); |
282 | save_fp_regs(&vcpu->arch.guest_fpregs); | 319 | save_fp_regs(&vcpu->arch.guest_fpregs); |
283 | save_access_regs(vcpu->arch.guest_acrs); | 320 | save_access_regs(vcpu->run->s.regs.acrs); |
284 | restore_fp_regs(&vcpu->arch.host_fpregs); | 321 | restore_fp_regs(&vcpu->arch.host_fpregs); |
285 | restore_access_regs(vcpu->arch.host_acrs); | 322 | restore_access_regs(vcpu->arch.host_acrs); |
286 | } | 323 | } |
@@ -290,8 +327,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) | |||
290 | /* this equals initial cpu reset in pop, but we don't switch to ESA */ | 327 | /* this equals initial cpu reset in pop, but we don't switch to ESA */ |
291 | vcpu->arch.sie_block->gpsw.mask = 0UL; | 328 | vcpu->arch.sie_block->gpsw.mask = 0UL; |
292 | vcpu->arch.sie_block->gpsw.addr = 0UL; | 329 | vcpu->arch.sie_block->gpsw.addr = 0UL; |
293 | vcpu->arch.sie_block->prefix = 0UL; | 330 | kvm_s390_set_prefix(vcpu, 0); |
294 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
295 | vcpu->arch.sie_block->cputm = 0UL; | 331 | vcpu->arch.sie_block->cputm = 0UL; |
296 | vcpu->arch.sie_block->ckc = 0UL; | 332 | vcpu->arch.sie_block->ckc = 0UL; |
297 | vcpu->arch.sie_block->todpr = 0; | 333 | vcpu->arch.sie_block->todpr = 0; |
@@ -342,12 +378,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
342 | goto out_free_cpu; | 378 | goto out_free_cpu; |
343 | 379 | ||
344 | vcpu->arch.sie_block->icpua = id; | 380 | vcpu->arch.sie_block->icpua = id; |
345 | BUG_ON(!kvm->arch.sca); | 381 | if (!kvm_is_ucontrol(kvm)) { |
346 | if (!kvm->arch.sca->cpu[id].sda) | 382 | if (!kvm->arch.sca) { |
347 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; | 383 | WARN_ON_ONCE(1); |
348 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); | 384 | goto out_free_cpu; |
349 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | 385 | } |
350 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); | 386 | if (!kvm->arch.sca->cpu[id].sda) |
387 | kvm->arch.sca->cpu[id].sda = | ||
388 | (__u64) vcpu->arch.sie_block; | ||
389 | vcpu->arch.sie_block->scaoh = | ||
390 | (__u32)(((__u64)kvm->arch.sca) >> 32); | ||
391 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | ||
392 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); | ||
393 | } | ||
351 | 394 | ||
352 | spin_lock_init(&vcpu->arch.local_int.lock); | 395 | spin_lock_init(&vcpu->arch.local_int.lock); |
353 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); | 396 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); |
@@ -388,29 +431,29 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) | |||
388 | 431 | ||
389 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 432 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
390 | { | 433 | { |
391 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); | 434 | memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); |
392 | return 0; | 435 | return 0; |
393 | } | 436 | } |
394 | 437 | ||
395 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 438 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
396 | { | 439 | { |
397 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); | 440 | memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); |
398 | return 0; | 441 | return 0; |
399 | } | 442 | } |
400 | 443 | ||
401 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 444 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
402 | struct kvm_sregs *sregs) | 445 | struct kvm_sregs *sregs) |
403 | { | 446 | { |
404 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | 447 | memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); |
405 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | 448 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); |
406 | restore_access_regs(vcpu->arch.guest_acrs); | 449 | restore_access_regs(vcpu->run->s.regs.acrs); |
407 | return 0; | 450 | return 0; |
408 | } | 451 | } |
409 | 452 | ||
410 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 453 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
411 | struct kvm_sregs *sregs) | 454 | struct kvm_sregs *sregs) |
412 | { | 455 | { |
413 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); | 456 | memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); |
414 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); | 457 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); |
415 | return 0; | 458 | return 0; |
416 | } | 459 | } |
@@ -418,7 +461,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
418 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 461 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
419 | { | 462 | { |
420 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | 463 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); |
421 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | 464 | vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK; |
422 | restore_fp_regs(&vcpu->arch.guest_fpregs); | 465 | restore_fp_regs(&vcpu->arch.guest_fpregs); |
423 | return 0; | 466 | return 0; |
424 | } | 467 | } |
@@ -467,9 +510,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
467 | return -EINVAL; /* not implemented yet */ | 510 | return -EINVAL; /* not implemented yet */ |
468 | } | 511 | } |
469 | 512 | ||
470 | static void __vcpu_run(struct kvm_vcpu *vcpu) | 513 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
471 | { | 514 | { |
472 | memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); | 515 | int rc; |
516 | |||
517 | memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); | ||
473 | 518 | ||
474 | if (need_resched()) | 519 | if (need_resched()) |
475 | schedule(); | 520 | schedule(); |
@@ -477,7 +522,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) | |||
477 | if (test_thread_flag(TIF_MCCK_PENDING)) | 522 | if (test_thread_flag(TIF_MCCK_PENDING)) |
478 | s390_handle_mcck(); | 523 | s390_handle_mcck(); |
479 | 524 | ||
480 | kvm_s390_deliver_pending_interrupts(vcpu); | 525 | if (!kvm_is_ucontrol(vcpu->kvm)) |
526 | kvm_s390_deliver_pending_interrupts(vcpu); | ||
481 | 527 | ||
482 | vcpu->arch.sie_block->icptcode = 0; | 528 | vcpu->arch.sie_block->icptcode = 0; |
483 | local_irq_disable(); | 529 | local_irq_disable(); |
@@ -485,9 +531,15 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) | |||
485 | local_irq_enable(); | 531 | local_irq_enable(); |
486 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", | 532 | VCPU_EVENT(vcpu, 6, "entering sie flags %x", |
487 | atomic_read(&vcpu->arch.sie_block->cpuflags)); | 533 | atomic_read(&vcpu->arch.sie_block->cpuflags)); |
488 | if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { | 534 | rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); |
489 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); | 535 | if (rc) { |
490 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 536 | if (kvm_is_ucontrol(vcpu->kvm)) { |
537 | rc = SIE_INTERCEPT_UCONTROL; | ||
538 | } else { | ||
539 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); | ||
540 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
541 | rc = 0; | ||
542 | } | ||
491 | } | 543 | } |
492 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", | 544 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", |
493 | vcpu->arch.sie_block->icptcode); | 545 | vcpu->arch.sie_block->icptcode); |
@@ -495,7 +547,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) | |||
495 | kvm_guest_exit(); | 547 | kvm_guest_exit(); |
496 | local_irq_enable(); | 548 | local_irq_enable(); |
497 | 549 | ||
498 | memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); | 550 | memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); |
551 | return rc; | ||
499 | } | 552 | } |
500 | 553 | ||
501 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 554 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
@@ -516,6 +569,7 @@ rerun_vcpu: | |||
516 | case KVM_EXIT_UNKNOWN: | 569 | case KVM_EXIT_UNKNOWN: |
517 | case KVM_EXIT_INTR: | 570 | case KVM_EXIT_INTR: |
518 | case KVM_EXIT_S390_RESET: | 571 | case KVM_EXIT_S390_RESET: |
572 | case KVM_EXIT_S390_UCONTROL: | ||
519 | break; | 573 | break; |
520 | default: | 574 | default: |
521 | BUG(); | 575 | BUG(); |
@@ -523,12 +577,26 @@ rerun_vcpu: | |||
523 | 577 | ||
524 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; | 578 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; |
525 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; | 579 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; |
580 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { | ||
581 | kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; | ||
582 | kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); | ||
583 | } | ||
584 | if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { | ||
585 | kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; | ||
586 | memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); | ||
587 | kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); | ||
588 | } | ||
526 | 589 | ||
527 | might_fault(); | 590 | might_fault(); |
528 | 591 | ||
529 | do { | 592 | do { |
530 | __vcpu_run(vcpu); | 593 | rc = __vcpu_run(vcpu); |
531 | rc = kvm_handle_sie_intercept(vcpu); | 594 | if (rc) |
595 | break; | ||
596 | if (kvm_is_ucontrol(vcpu->kvm)) | ||
597 | rc = -EOPNOTSUPP; | ||
598 | else | ||
599 | rc = kvm_handle_sie_intercept(vcpu); | ||
532 | } while (!signal_pending(current) && !rc); | 600 | } while (!signal_pending(current) && !rc); |
533 | 601 | ||
534 | if (rc == SIE_INTERCEPT_RERUNVCPU) | 602 | if (rc == SIE_INTERCEPT_RERUNVCPU) |
@@ -539,6 +607,16 @@ rerun_vcpu: | |||
539 | rc = -EINTR; | 607 | rc = -EINTR; |
540 | } | 608 | } |
541 | 609 | ||
610 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
611 | if (rc == SIE_INTERCEPT_UCONTROL) { | ||
612 | kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; | ||
613 | kvm_run->s390_ucontrol.trans_exc_code = | ||
614 | current->thread.gmap_addr; | ||
615 | kvm_run->s390_ucontrol.pgm_code = 0x10; | ||
616 | rc = 0; | ||
617 | } | ||
618 | #endif | ||
619 | |||
542 | if (rc == -EOPNOTSUPP) { | 620 | if (rc == -EOPNOTSUPP) { |
543 | /* intercept cannot be handled in-kernel, prepare kvm-run */ | 621 | /* intercept cannot be handled in-kernel, prepare kvm-run */ |
544 | kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; | 622 | kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; |
@@ -556,6 +634,8 @@ rerun_vcpu: | |||
556 | 634 | ||
557 | kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; | 635 | kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; |
558 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; | 636 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; |
637 | kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; | ||
638 | memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); | ||
559 | 639 | ||
560 | if (vcpu->sigset_active) | 640 | if (vcpu->sigset_active) |
561 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 641 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
@@ -602,7 +682,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
602 | return -EFAULT; | 682 | return -EFAULT; |
603 | 683 | ||
604 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), | 684 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), |
605 | vcpu->arch.guest_gprs, 128, prefix)) | 685 | vcpu->run->s.regs.gprs, 128, prefix)) |
606 | return -EFAULT; | 686 | return -EFAULT; |
607 | 687 | ||
608 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), | 688 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), |
@@ -631,7 +711,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
631 | return -EFAULT; | 711 | return -EFAULT; |
632 | 712 | ||
633 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), | 713 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), |
634 | &vcpu->arch.guest_acrs, 64, prefix)) | 714 | &vcpu->run->s.regs.acrs, 64, prefix)) |
635 | return -EFAULT; | 715 | return -EFAULT; |
636 | 716 | ||
637 | if (__guestcopy(vcpu, | 717 | if (__guestcopy(vcpu, |
@@ -673,12 +753,77 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
673 | case KVM_S390_INITIAL_RESET: | 753 | case KVM_S390_INITIAL_RESET: |
674 | r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); | 754 | r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); |
675 | break; | 755 | break; |
756 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
757 | case KVM_S390_UCAS_MAP: { | ||
758 | struct kvm_s390_ucas_mapping ucasmap; | ||
759 | |||
760 | if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { | ||
761 | r = -EFAULT; | ||
762 | break; | ||
763 | } | ||
764 | |||
765 | if (!kvm_is_ucontrol(vcpu->kvm)) { | ||
766 | r = -EINVAL; | ||
767 | break; | ||
768 | } | ||
769 | |||
770 | r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, | ||
771 | ucasmap.vcpu_addr, ucasmap.length); | ||
772 | break; | ||
773 | } | ||
774 | case KVM_S390_UCAS_UNMAP: { | ||
775 | struct kvm_s390_ucas_mapping ucasmap; | ||
776 | |||
777 | if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { | ||
778 | r = -EFAULT; | ||
779 | break; | ||
780 | } | ||
781 | |||
782 | if (!kvm_is_ucontrol(vcpu->kvm)) { | ||
783 | r = -EINVAL; | ||
784 | break; | ||
785 | } | ||
786 | |||
787 | r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, | ||
788 | ucasmap.length); | ||
789 | break; | ||
790 | } | ||
791 | #endif | ||
792 | case KVM_S390_VCPU_FAULT: { | ||
793 | r = gmap_fault(arg, vcpu->arch.gmap); | ||
794 | if (!IS_ERR_VALUE(r)) | ||
795 | r = 0; | ||
796 | break; | ||
797 | } | ||
676 | default: | 798 | default: |
677 | r = -EINVAL; | 799 | r = -ENOTTY; |
678 | } | 800 | } |
679 | return r; | 801 | return r; |
680 | } | 802 | } |
681 | 803 | ||
804 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | ||
805 | { | ||
806 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
807 | if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) | ||
808 | && (kvm_is_ucontrol(vcpu->kvm))) { | ||
809 | vmf->page = virt_to_page(vcpu->arch.sie_block); | ||
810 | get_page(vmf->page); | ||
811 | return 0; | ||
812 | } | ||
813 | #endif | ||
814 | return VM_FAULT_SIGBUS; | ||
815 | } | ||
816 | |||
817 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
818 | struct kvm_memory_slot *dont) | ||
819 | { | ||
820 | } | ||
821 | |||
822 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | ||
823 | { | ||
824 | return 0; | ||
825 | } | ||
826 | |||
682 | /* Section: memory related */ | 827 | /* Section: memory related */ |
683 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 828 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
684 | struct kvm_memory_slot *memslot, | 829 | struct kvm_memory_slot *memslot, |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 99b0b7597115..ff28f9d1c9eb 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -26,6 +26,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | |||
26 | 26 | ||
27 | /* negativ values are error codes, positive values for internal conditions */ | 27 | /* negativ values are error codes, positive values for internal conditions */ |
28 | #define SIE_INTERCEPT_RERUNVCPU (1<<0) | 28 | #define SIE_INTERCEPT_RERUNVCPU (1<<0) |
29 | #define SIE_INTERCEPT_UCONTROL (1<<1) | ||
29 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); | 30 | int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); |
30 | 31 | ||
31 | #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ | 32 | #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ |
@@ -47,6 +48,23 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) | |||
47 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; | 48 | return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; |
48 | } | 49 | } |
49 | 50 | ||
51 | static inline int kvm_is_ucontrol(struct kvm *kvm) | ||
52 | { | ||
53 | #ifdef CONFIG_KVM_S390_UCONTROL | ||
54 | if (kvm->arch.gmap) | ||
55 | return 0; | ||
56 | return 1; | ||
57 | #else | ||
58 | return 0; | ||
59 | #endif | ||
60 | } | ||
61 | |||
62 | static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | ||
63 | { | ||
64 | vcpu->arch.sie_block->prefix = prefix & 0x7fffe000u; | ||
65 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
66 | } | ||
67 | |||
50 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 68 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
51 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | 69 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
52 | void kvm_s390_tasklet(unsigned long parm); | 70 | void kvm_s390_tasklet(unsigned long parm); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d02638959922..e5a45dbd26ac 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -33,7 +33,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
33 | 33 | ||
34 | operand2 = disp2; | 34 | operand2 = disp2; |
35 | if (base2) | 35 | if (base2) |
36 | operand2 += vcpu->arch.guest_gprs[base2]; | 36 | operand2 += vcpu->run->s.regs.gprs[base2]; |
37 | 37 | ||
38 | /* must be word boundary */ | 38 | /* must be word boundary */ |
39 | if (operand2 & 3) { | 39 | if (operand2 & 3) { |
@@ -56,8 +56,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
56 | goto out; | 56 | goto out; |
57 | } | 57 | } |
58 | 58 | ||
59 | vcpu->arch.sie_block->prefix = address; | 59 | kvm_s390_set_prefix(vcpu, address); |
60 | vcpu->arch.sie_block->ihcpu = 0xffff; | ||
61 | 60 | ||
62 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); | 61 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); |
63 | out: | 62 | out: |
@@ -74,7 +73,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) | |||
74 | vcpu->stat.instruction_stpx++; | 73 | vcpu->stat.instruction_stpx++; |
75 | operand2 = disp2; | 74 | operand2 = disp2; |
76 | if (base2) | 75 | if (base2) |
77 | operand2 += vcpu->arch.guest_gprs[base2]; | 76 | operand2 += vcpu->run->s.regs.gprs[base2]; |
78 | 77 | ||
79 | /* must be word boundary */ | 78 | /* must be word boundary */ |
80 | if (operand2 & 3) { | 79 | if (operand2 & 3) { |
@@ -106,7 +105,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | |||
106 | vcpu->stat.instruction_stap++; | 105 | vcpu->stat.instruction_stap++; |
107 | useraddr = disp2; | 106 | useraddr = disp2; |
108 | if (base2) | 107 | if (base2) |
109 | useraddr += vcpu->arch.guest_gprs[base2]; | 108 | useraddr += vcpu->run->s.regs.gprs[base2]; |
110 | 109 | ||
111 | if (useraddr & 1) { | 110 | if (useraddr & 1) { |
112 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 111 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -181,7 +180,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) | |||
181 | vcpu->stat.instruction_stidp++; | 180 | vcpu->stat.instruction_stidp++; |
182 | operand2 = disp2; | 181 | operand2 = disp2; |
183 | if (base2) | 182 | if (base2) |
184 | operand2 += vcpu->arch.guest_gprs[base2]; | 183 | operand2 += vcpu->run->s.regs.gprs[base2]; |
185 | 184 | ||
186 | if (operand2 & 7) { | 185 | if (operand2 & 7) { |
187 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 186 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
@@ -232,9 +231,9 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) | |||
232 | 231 | ||
233 | static int handle_stsi(struct kvm_vcpu *vcpu) | 232 | static int handle_stsi(struct kvm_vcpu *vcpu) |
234 | { | 233 | { |
235 | int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; | 234 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; |
236 | int sel1 = vcpu->arch.guest_gprs[0] & 0xff; | 235 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; |
237 | int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; | 236 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; |
238 | int base2 = vcpu->arch.sie_block->ipb >> 28; | 237 | int base2 = vcpu->arch.sie_block->ipb >> 28; |
239 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 238 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
240 | u64 operand2; | 239 | u64 operand2; |
@@ -245,14 +244,14 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
245 | 244 | ||
246 | operand2 = disp2; | 245 | operand2 = disp2; |
247 | if (base2) | 246 | if (base2) |
248 | operand2 += vcpu->arch.guest_gprs[base2]; | 247 | operand2 += vcpu->run->s.regs.gprs[base2]; |
249 | 248 | ||
250 | if (operand2 & 0xfff && fc > 0) | 249 | if (operand2 & 0xfff && fc > 0) |
251 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 250 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
252 | 251 | ||
253 | switch (fc) { | 252 | switch (fc) { |
254 | case 0: | 253 | case 0: |
255 | vcpu->arch.guest_gprs[0] = 3 << 28; | 254 | vcpu->run->s.regs.gprs[0] = 3 << 28; |
256 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 255 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
257 | return 0; | 256 | return 0; |
258 | case 1: /* same handling for 1 and 2 */ | 257 | case 1: /* same handling for 1 and 2 */ |
@@ -281,7 +280,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
281 | } | 280 | } |
282 | free_page(mem); | 281 | free_page(mem); |
283 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 282 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
284 | vcpu->arch.guest_gprs[0] = 0; | 283 | vcpu->run->s.regs.gprs[0] = 0; |
285 | return 0; | 284 | return 0; |
286 | out_mem: | 285 | out_mem: |
287 | free_page(mem); | 286 | free_page(mem); |
@@ -333,8 +332,8 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
333 | int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; | 332 | int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; |
334 | int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; | 333 | int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; |
335 | int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; | 334 | int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; |
336 | u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0; | 335 | u64 address1 = disp1 + base1 ? vcpu->run->s.regs.gprs[base1] : 0; |
337 | u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0; | 336 | u64 address2 = disp2 + base2 ? vcpu->run->s.regs.gprs[base2] : 0; |
338 | struct vm_area_struct *vma; | 337 | struct vm_area_struct *vma; |
339 | unsigned long user_address; | 338 | unsigned long user_address; |
340 | 339 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 0a7941d74bc6..0ad4cf238391 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -48,7 +48,7 @@ | |||
48 | 48 | ||
49 | 49 | ||
50 | static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, | 50 | static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, |
51 | unsigned long *reg) | 51 | u64 *reg) |
52 | { | 52 | { |
53 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | 53 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; |
54 | int rc; | 54 | int rc; |
@@ -160,12 +160,15 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) | |||
160 | inti->type = KVM_S390_SIGP_STOP; | 160 | inti->type = KVM_S390_SIGP_STOP; |
161 | 161 | ||
162 | spin_lock_bh(&li->lock); | 162 | spin_lock_bh(&li->lock); |
163 | if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) | ||
164 | goto out; | ||
163 | list_add_tail(&inti->list, &li->list); | 165 | list_add_tail(&inti->list, &li->list); |
164 | atomic_set(&li->active, 1); | 166 | atomic_set(&li->active, 1); |
165 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); | 167 | atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); |
166 | li->action_bits |= action; | 168 | li->action_bits |= action; |
167 | if (waitqueue_active(&li->wq)) | 169 | if (waitqueue_active(&li->wq)) |
168 | wake_up_interruptible(&li->wq); | 170 | wake_up_interruptible(&li->wq); |
171 | out: | ||
169 | spin_unlock_bh(&li->lock); | 172 | spin_unlock_bh(&li->lock); |
170 | 173 | ||
171 | return 0; /* order accepted */ | 174 | return 0; /* order accepted */ |
@@ -220,7 +223,7 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) | |||
220 | } | 223 | } |
221 | 224 | ||
222 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | 225 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, |
223 | unsigned long *reg) | 226 | u64 *reg) |
224 | { | 227 | { |
225 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | 228 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; |
226 | struct kvm_s390_local_interrupt *li = NULL; | 229 | struct kvm_s390_local_interrupt *li = NULL; |
@@ -278,7 +281,7 @@ out_fi: | |||
278 | } | 281 | } |
279 | 282 | ||
280 | static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, | 283 | static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, |
281 | unsigned long *reg) | 284 | u64 *reg) |
282 | { | 285 | { |
283 | int rc; | 286 | int rc; |
284 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | 287 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; |
@@ -309,6 +312,34 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, | |||
309 | return rc; | 312 | return rc; |
310 | } | 313 | } |
311 | 314 | ||
315 | static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) | ||
316 | { | ||
317 | int rc = 0; | ||
318 | struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; | ||
319 | struct kvm_s390_local_interrupt *li; | ||
320 | |||
321 | if (cpu_addr >= KVM_MAX_VCPUS) | ||
322 | return 3; /* not operational */ | ||
323 | |||
324 | spin_lock(&fi->lock); | ||
325 | li = fi->local_int[cpu_addr]; | ||
326 | if (li == NULL) { | ||
327 | rc = 3; /* not operational */ | ||
328 | goto out; | ||
329 | } | ||
330 | |||
331 | spin_lock_bh(&li->lock); | ||
332 | if (li->action_bits & ACTION_STOP_ON_STOP) | ||
333 | rc = 2; /* busy */ | ||
334 | else | ||
335 | VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", | ||
336 | cpu_addr); | ||
337 | spin_unlock_bh(&li->lock); | ||
338 | out: | ||
339 | spin_unlock(&fi->lock); | ||
340 | return rc; | ||
341 | } | ||
342 | |||
312 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | 343 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) |
313 | { | 344 | { |
314 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; | 345 | int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; |
@@ -316,7 +347,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
316 | int base2 = vcpu->arch.sie_block->ipb >> 28; | 347 | int base2 = vcpu->arch.sie_block->ipb >> 28; |
317 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 348 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
318 | u32 parameter; | 349 | u32 parameter; |
319 | u16 cpu_addr = vcpu->arch.guest_gprs[r3]; | 350 | u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; |
320 | u8 order_code; | 351 | u8 order_code; |
321 | int rc; | 352 | int rc; |
322 | 353 | ||
@@ -327,18 +358,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
327 | 358 | ||
328 | order_code = disp2; | 359 | order_code = disp2; |
329 | if (base2) | 360 | if (base2) |
330 | order_code += vcpu->arch.guest_gprs[base2]; | 361 | order_code += vcpu->run->s.regs.gprs[base2]; |
331 | 362 | ||
332 | if (r1 % 2) | 363 | if (r1 % 2) |
333 | parameter = vcpu->arch.guest_gprs[r1]; | 364 | parameter = vcpu->run->s.regs.gprs[r1]; |
334 | else | 365 | else |
335 | parameter = vcpu->arch.guest_gprs[r1 + 1]; | 366 | parameter = vcpu->run->s.regs.gprs[r1 + 1]; |
336 | 367 | ||
337 | switch (order_code) { | 368 | switch (order_code) { |
338 | case SIGP_SENSE: | 369 | case SIGP_SENSE: |
339 | vcpu->stat.instruction_sigp_sense++; | 370 | vcpu->stat.instruction_sigp_sense++; |
340 | rc = __sigp_sense(vcpu, cpu_addr, | 371 | rc = __sigp_sense(vcpu, cpu_addr, |
341 | &vcpu->arch.guest_gprs[r1]); | 372 | &vcpu->run->s.regs.gprs[r1]); |
342 | break; | 373 | break; |
343 | case SIGP_EXTERNAL_CALL: | 374 | case SIGP_EXTERNAL_CALL: |
344 | vcpu->stat.instruction_sigp_external_call++; | 375 | vcpu->stat.instruction_sigp_external_call++; |
@@ -354,7 +385,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
354 | break; | 385 | break; |
355 | case SIGP_STOP_STORE_STATUS: | 386 | case SIGP_STOP_STORE_STATUS: |
356 | vcpu->stat.instruction_sigp_stop++; | 387 | vcpu->stat.instruction_sigp_stop++; |
357 | rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP); | 388 | rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | |
389 | ACTION_STOP_ON_STOP); | ||
358 | break; | 390 | break; |
359 | case SIGP_SET_ARCH: | 391 | case SIGP_SET_ARCH: |
360 | vcpu->stat.instruction_sigp_arch++; | 392 | vcpu->stat.instruction_sigp_arch++; |
@@ -363,15 +395,18 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
363 | case SIGP_SET_PREFIX: | 395 | case SIGP_SET_PREFIX: |
364 | vcpu->stat.instruction_sigp_prefix++; | 396 | vcpu->stat.instruction_sigp_prefix++; |
365 | rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, | 397 | rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, |
366 | &vcpu->arch.guest_gprs[r1]); | 398 | &vcpu->run->s.regs.gprs[r1]); |
367 | break; | 399 | break; |
368 | case SIGP_SENSE_RUNNING: | 400 | case SIGP_SENSE_RUNNING: |
369 | vcpu->stat.instruction_sigp_sense_running++; | 401 | vcpu->stat.instruction_sigp_sense_running++; |
370 | rc = __sigp_sense_running(vcpu, cpu_addr, | 402 | rc = __sigp_sense_running(vcpu, cpu_addr, |
371 | &vcpu->arch.guest_gprs[r1]); | 403 | &vcpu->run->s.regs.gprs[r1]); |
372 | break; | 404 | break; |
373 | case SIGP_RESTART: | 405 | case SIGP_RESTART: |
374 | vcpu->stat.instruction_sigp_restart++; | 406 | vcpu->stat.instruction_sigp_restart++; |
407 | rc = __sigp_restart(vcpu, cpu_addr); | ||
408 | if (rc == 2) /* busy */ | ||
409 | break; | ||
375 | /* user space must know about restart */ | 410 | /* user space must know about restart */ |
376 | default: | 411 | default: |
377 | return -EOPNOTSUPP; | 412 | return -EOPNOTSUPP; |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4d8dcbdfc120..e7d1c194d272 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -321,4 +321,8 @@ struct kvm_xcrs { | |||
321 | __u64 padding[16]; | 321 | __u64 padding[16]; |
322 | }; | 322 | }; |
323 | 323 | ||
324 | /* definition of registers in kvm_run */ | ||
325 | struct kvm_sync_regs { | ||
326 | }; | ||
327 | |||
324 | #endif /* _ASM_X86_KVM_H */ | 328 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7b9cfc4878af..c222e1a1b12a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -176,6 +176,7 @@ struct x86_emulate_ops { | |||
176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); | 176 | void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); |
177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); | 177 | ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); |
178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); | 178 | int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); |
179 | void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val); | ||
179 | int (*cpl)(struct x86_emulate_ctxt *ctxt); | 180 | int (*cpl)(struct x86_emulate_ctxt *ctxt); |
180 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); | 181 | int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); |
181 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); | 182 | int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); |
@@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); | |||
388 | #define EMULATION_INTERCEPTED 2 | 389 | #define EMULATION_INTERCEPTED 2 |
389 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); | 390 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); |
390 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 391 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
391 | u16 tss_selector, int reason, | 392 | u16 tss_selector, int idt_index, int reason, |
392 | bool has_error_code, u32 error_code); | 393 | bool has_error_code, u32 error_code); |
393 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); | 394 | int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); |
394 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 395 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52d6640a5ca1..e216ba066e79 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <asm/msr-index.h> | 29 | #include <asm/msr-index.h> |
30 | 30 | ||
31 | #define KVM_MAX_VCPUS 254 | 31 | #define KVM_MAX_VCPUS 254 |
32 | #define KVM_SOFT_MAX_VCPUS 64 | 32 | #define KVM_SOFT_MAX_VCPUS 160 |
33 | #define KVM_MEMORY_SLOTS 32 | 33 | #define KVM_MEMORY_SLOTS 32 |
34 | /* memory slots that does not exposed to userspace */ | 34 | /* memory slots that does not exposed to userspace */ |
35 | #define KVM_PRIVATE_MEM_SLOTS 4 | 35 | #define KVM_PRIVATE_MEM_SLOTS 4 |
@@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache { | |||
181 | void *objects[KVM_NR_MEM_OBJS]; | 181 | void *objects[KVM_NR_MEM_OBJS]; |
182 | }; | 182 | }; |
183 | 183 | ||
184 | #define NR_PTE_CHAIN_ENTRIES 5 | ||
185 | |||
186 | struct kvm_pte_chain { | ||
187 | u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES]; | ||
188 | struct hlist_node link; | ||
189 | }; | ||
190 | |||
191 | /* | 184 | /* |
192 | * kvm_mmu_page_role, below, is defined as: | 185 | * kvm_mmu_page_role, below, is defined as: |
193 | * | 186 | * |
@@ -427,12 +420,16 @@ struct kvm_vcpu_arch { | |||
427 | 420 | ||
428 | u64 last_guest_tsc; | 421 | u64 last_guest_tsc; |
429 | u64 last_kernel_ns; | 422 | u64 last_kernel_ns; |
430 | u64 last_tsc_nsec; | 423 | u64 last_host_tsc; |
431 | u64 last_tsc_write; | 424 | u64 tsc_offset_adjustment; |
432 | u32 virtual_tsc_khz; | 425 | u64 this_tsc_nsec; |
426 | u64 this_tsc_write; | ||
427 | u8 this_tsc_generation; | ||
433 | bool tsc_catchup; | 428 | bool tsc_catchup; |
434 | u32 tsc_catchup_mult; | 429 | bool tsc_always_catchup; |
435 | s8 tsc_catchup_shift; | 430 | s8 virtual_tsc_shift; |
431 | u32 virtual_tsc_mult; | ||
432 | u32 virtual_tsc_khz; | ||
436 | 433 | ||
437 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ | 434 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
438 | unsigned nmi_pending; /* NMI queued after currently running handler */ | 435 | unsigned nmi_pending; /* NMI queued after currently running handler */ |
@@ -478,6 +475,21 @@ struct kvm_vcpu_arch { | |||
478 | u32 id; | 475 | u32 id; |
479 | bool send_user_only; | 476 | bool send_user_only; |
480 | } apf; | 477 | } apf; |
478 | |||
479 | /* OSVW MSRs (AMD only) */ | ||
480 | struct { | ||
481 | u64 length; | ||
482 | u64 status; | ||
483 | } osvw; | ||
484 | }; | ||
485 | |||
486 | struct kvm_lpage_info { | ||
487 | unsigned long rmap_pde; | ||
488 | int write_count; | ||
489 | }; | ||
490 | |||
491 | struct kvm_arch_memory_slot { | ||
492 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | ||
481 | }; | 493 | }; |
482 | 494 | ||
483 | struct kvm_arch { | 495 | struct kvm_arch { |
@@ -511,8 +523,12 @@ struct kvm_arch { | |||
511 | s64 kvmclock_offset; | 523 | s64 kvmclock_offset; |
512 | raw_spinlock_t tsc_write_lock; | 524 | raw_spinlock_t tsc_write_lock; |
513 | u64 last_tsc_nsec; | 525 | u64 last_tsc_nsec; |
514 | u64 last_tsc_offset; | ||
515 | u64 last_tsc_write; | 526 | u64 last_tsc_write; |
527 | u32 last_tsc_khz; | ||
528 | u64 cur_tsc_nsec; | ||
529 | u64 cur_tsc_write; | ||
530 | u64 cur_tsc_offset; | ||
531 | u8 cur_tsc_generation; | ||
516 | 532 | ||
517 | struct kvm_xen_hvm_config xen_hvm_config; | 533 | struct kvm_xen_hvm_config xen_hvm_config; |
518 | 534 | ||
@@ -644,7 +660,7 @@ struct kvm_x86_ops { | |||
644 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 660 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
645 | int (*get_lpage_level)(void); | 661 | int (*get_lpage_level)(void); |
646 | bool (*rdtscp_supported)(void); | 662 | bool (*rdtscp_supported)(void); |
647 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); | 663 | void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); |
648 | 664 | ||
649 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 665 | void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
650 | 666 | ||
@@ -652,7 +668,7 @@ struct kvm_x86_ops { | |||
652 | 668 | ||
653 | bool (*has_wbinvd_exit)(void); | 669 | bool (*has_wbinvd_exit)(void); |
654 | 670 | ||
655 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz); | 671 | void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); |
656 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); | 672 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
657 | 673 | ||
658 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); | 674 | u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); |
@@ -674,6 +690,17 @@ struct kvm_arch_async_pf { | |||
674 | 690 | ||
675 | extern struct kvm_x86_ops *kvm_x86_ops; | 691 | extern struct kvm_x86_ops *kvm_x86_ops; |
676 | 692 | ||
693 | static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, | ||
694 | s64 adjustment) | ||
695 | { | ||
696 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); | ||
697 | } | ||
698 | |||
699 | static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) | ||
700 | { | ||
701 | kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); | ||
702 | } | ||
703 | |||
677 | int kvm_mmu_module_init(void); | 704 | int kvm_mmu_module_init(void); |
678 | void kvm_mmu_module_exit(void); | 705 | void kvm_mmu_module_exit(void); |
679 | 706 | ||
@@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | |||
741 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 768 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
742 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 769 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
743 | 770 | ||
744 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 771 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
745 | bool has_error_code, u32 error_code); | 772 | int reason, bool has_error_code, u32 error_code); |
746 | 773 | ||
747 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 774 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
748 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 775 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index e8fb2c7a5f4f..2291895b1836 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
26 | #define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19) | ||
26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | 27 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) |
27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) | 28 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) | 29 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 15d99153a96d..c91e8b9d588b 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); | |||
61 | extern void check_tsc_sync_target(void); | 61 | extern void check_tsc_sync_target(void); |
62 | 62 | ||
63 | extern int notsc_setup(char *); | 63 | extern int notsc_setup(char *); |
64 | extern void save_sched_clock_state(void); | 64 | extern void tsc_save_sched_clock_state(void); |
65 | extern void restore_sched_clock_state(void); | 65 | extern void tsc_restore_sched_clock_state(void); |
66 | 66 | ||
67 | #endif /* _ASM_X86_TSC_H */ | 67 | #endif /* _ASM_X86_TSC_H */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 517d4767ffdd..baaca8defec8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -145,9 +145,11 @@ struct x86_init_ops { | |||
145 | /** | 145 | /** |
146 | * struct x86_cpuinit_ops - platform specific cpu hotplug setups | 146 | * struct x86_cpuinit_ops - platform specific cpu hotplug setups |
147 | * @setup_percpu_clockev: set up the per cpu clock event device | 147 | * @setup_percpu_clockev: set up the per cpu clock event device |
148 | * @early_percpu_clock_init: early init of the per cpu clock event device | ||
148 | */ | 149 | */ |
149 | struct x86_cpuinit_ops { | 150 | struct x86_cpuinit_ops { |
150 | void (*setup_percpu_clockev)(void); | 151 | void (*setup_percpu_clockev)(void); |
152 | void (*early_percpu_clock_init)(void); | ||
151 | void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); | 153 | void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); |
152 | }; | 154 | }; |
153 | 155 | ||
@@ -160,6 +162,8 @@ struct x86_cpuinit_ops { | |||
160 | * @is_untracked_pat_range exclude from PAT logic | 162 | * @is_untracked_pat_range exclude from PAT logic |
161 | * @nmi_init enable NMI on cpus | 163 | * @nmi_init enable NMI on cpus |
162 | * @i8042_detect pre-detect if i8042 controller exists | 164 | * @i8042_detect pre-detect if i8042 controller exists |
165 | * @save_sched_clock_state: save state for sched_clock() on suspend | ||
166 | * @restore_sched_clock_state: restore state for sched_clock() on resume | ||
163 | */ | 167 | */ |
164 | struct x86_platform_ops { | 168 | struct x86_platform_ops { |
165 | unsigned long (*calibrate_tsc)(void); | 169 | unsigned long (*calibrate_tsc)(void); |
@@ -171,6 +175,8 @@ struct x86_platform_ops { | |||
171 | void (*nmi_init)(void); | 175 | void (*nmi_init)(void); |
172 | unsigned char (*get_nmi_reason)(void); | 176 | unsigned char (*get_nmi_reason)(void); |
173 | int (*i8042_detect)(void); | 177 | int (*i8042_detect)(void); |
178 | void (*save_sched_clock_state)(void); | ||
179 | void (*restore_sched_clock_state)(void); | ||
174 | }; | 180 | }; |
175 | 181 | ||
176 | struct pci_dev; | 182 | struct pci_dev; |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 44842d756b29..f8492da65bfc 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt) | |||
136 | return ret; | 136 | return ret; |
137 | } | 137 | } |
138 | 138 | ||
139 | static void kvm_save_sched_clock_state(void) | ||
140 | { | ||
141 | } | ||
142 | |||
143 | static void kvm_restore_sched_clock_state(void) | ||
144 | { | ||
145 | kvm_register_clock("primary cpu clock, resume"); | ||
146 | } | ||
147 | |||
139 | #ifdef CONFIG_X86_LOCAL_APIC | 148 | #ifdef CONFIG_X86_LOCAL_APIC |
140 | static void __cpuinit kvm_setup_secondary_clock(void) | 149 | static void __cpuinit kvm_setup_secondary_clock(void) |
141 | { | 150 | { |
@@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void) | |||
144 | * we shouldn't fail. | 153 | * we shouldn't fail. |
145 | */ | 154 | */ |
146 | WARN_ON(kvm_register_clock("secondary cpu clock")); | 155 | WARN_ON(kvm_register_clock("secondary cpu clock")); |
147 | /* ok, done with our trickery, call native */ | ||
148 | setup_secondary_APIC_clock(); | ||
149 | } | 156 | } |
150 | #endif | 157 | #endif |
151 | 158 | ||
@@ -194,9 +201,11 @@ void __init kvmclock_init(void) | |||
194 | x86_platform.get_wallclock = kvm_get_wallclock; | 201 | x86_platform.get_wallclock = kvm_get_wallclock; |
195 | x86_platform.set_wallclock = kvm_set_wallclock; | 202 | x86_platform.set_wallclock = kvm_set_wallclock; |
196 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
197 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.early_percpu_clock_init = |
198 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
199 | #endif | 206 | #endif |
207 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; | ||
208 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; | ||
200 | machine_ops.shutdown = kvm_shutdown; | 209 | machine_ops.shutdown = kvm_shutdown; |
201 | #ifdef CONFIG_KEXEC | 210 | #ifdef CONFIG_KEXEC |
202 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 211 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e578a79a3093..5104a2b685cf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -255,6 +255,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
255 | * most necessary things. | 255 | * most necessary things. |
256 | */ | 256 | */ |
257 | cpu_init(); | 257 | cpu_init(); |
258 | x86_cpuinit.early_percpu_clock_init(); | ||
258 | preempt_disable(); | 259 | preempt_disable(); |
259 | smp_callin(); | 260 | smp_callin(); |
260 | 261 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 183c5925a9fe..899a03f2d181 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -630,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | |||
630 | 630 | ||
631 | static unsigned long long cyc2ns_suspend; | 631 | static unsigned long long cyc2ns_suspend; |
632 | 632 | ||
633 | void save_sched_clock_state(void) | 633 | void tsc_save_sched_clock_state(void) |
634 | { | 634 | { |
635 | if (!sched_clock_stable) | 635 | if (!sched_clock_stable) |
636 | return; | 636 | return; |
@@ -646,7 +646,7 @@ void save_sched_clock_state(void) | |||
646 | * that sched_clock() continues from the point where it was left off during | 646 | * that sched_clock() continues from the point where it was left off during |
647 | * suspend. | 647 | * suspend. |
648 | */ | 648 | */ |
649 | void restore_sched_clock_state(void) | 649 | void tsc_restore_sched_clock_state(void) |
650 | { | 650 | { |
651 | unsigned long long offset; | 651 | unsigned long long offset; |
652 | unsigned long flags; | 652 | unsigned long flags; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 947a06ccc673..e9f265fd79ae 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = { | |||
91 | }; | 91 | }; |
92 | 92 | ||
93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 93 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
94 | .early_percpu_clock_init = x86_init_noop, | ||
94 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 95 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
95 | .fixup_cpu_id = x86_default_fixup_cpu_id, | 96 | .fixup_cpu_id = x86_default_fixup_cpu_id, |
96 | }; | 97 | }; |
@@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = { | |||
107 | .is_untracked_pat_range = is_ISA_range, | 108 | .is_untracked_pat_range = is_ISA_range, |
108 | .nmi_init = default_nmi_init, | 109 | .nmi_init = default_nmi_init, |
109 | .get_nmi_reason = default_get_nmi_reason, | 110 | .get_nmi_reason = default_get_nmi_reason, |
110 | .i8042_detect = default_i8042_detect | 111 | .i8042_detect = default_i8042_detect, |
112 | .save_sched_clock_state = tsc_save_sched_clock_state, | ||
113 | .restore_sched_clock_state = tsc_restore_sched_clock_state, | ||
111 | }; | 114 | }; |
112 | 115 | ||
113 | EXPORT_SYMBOL_GPL(x86_platform); | 116 | EXPORT_SYMBOL_GPL(x86_platform); |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 89b02bfaaca5..9fed5bedaad6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
236 | const u32 kvm_supported_word6_x86_features = | 236 | const u32 kvm_supported_word6_x86_features = |
237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | | 237 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | |
238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | 238 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
239 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | | 239 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | 240 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
241 | 241 | ||
242 | /* cpuid 0xC0000001.edx */ | 242 | /* cpuid 0xC0000001.edx */ |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 5b97e1797a6d..26d1fb437eb5 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | |||
43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | 43 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); |
44 | } | 44 | } |
45 | 45 | ||
46 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | ||
47 | { | ||
48 | struct kvm_cpuid_entry2 *best; | ||
49 | |||
50 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
51 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | ||
52 | } | ||
53 | |||
46 | #endif | 54 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0982507b962a..83756223f8aa 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #define OpDS 23ull /* DS */ | 57 | #define OpDS 23ull /* DS */ |
58 | #define OpFS 24ull /* FS */ | 58 | #define OpFS 24ull /* FS */ |
59 | #define OpGS 25ull /* GS */ | 59 | #define OpGS 25ull /* GS */ |
60 | #define OpMem8 26ull /* 8-bit zero extended memory operand */ | ||
60 | 61 | ||
61 | #define OpBits 5 /* Width of operand field */ | 62 | #define OpBits 5 /* Width of operand field */ |
62 | #define OpMask ((1ull << OpBits) - 1) | 63 | #define OpMask ((1ull << OpBits) - 1) |
@@ -101,6 +102,7 @@ | |||
101 | #define SrcAcc (OpAcc << SrcShift) | 102 | #define SrcAcc (OpAcc << SrcShift) |
102 | #define SrcImmU16 (OpImmU16 << SrcShift) | 103 | #define SrcImmU16 (OpImmU16 << SrcShift) |
103 | #define SrcDX (OpDX << SrcShift) | 104 | #define SrcDX (OpDX << SrcShift) |
105 | #define SrcMem8 (OpMem8 << SrcShift) | ||
104 | #define SrcMask (OpMask << SrcShift) | 106 | #define SrcMask (OpMask << SrcShift) |
105 | #define BitOp (1<<11) | 107 | #define BitOp (1<<11) |
106 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ | 108 | #define MemAbs (1<<12) /* Memory operand is absolute displacement */ |
@@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, | |||
858 | } | 860 | } |
859 | 861 | ||
860 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | 862 | static void decode_register_operand(struct x86_emulate_ctxt *ctxt, |
861 | struct operand *op, | 863 | struct operand *op) |
862 | int inhibit_bytereg) | ||
863 | { | 864 | { |
864 | unsigned reg = ctxt->modrm_reg; | 865 | unsigned reg = ctxt->modrm_reg; |
865 | int highbyte_regs = ctxt->rex_prefix == 0; | 866 | int highbyte_regs = ctxt->rex_prefix == 0; |
@@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, | |||
876 | } | 877 | } |
877 | 878 | ||
878 | op->type = OP_REG; | 879 | op->type = OP_REG; |
879 | if ((ctxt->d & ByteOp) && !inhibit_bytereg) { | 880 | if (ctxt->d & ByteOp) { |
880 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); | 881 | op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); |
881 | op->bytes = 1; | 882 | op->bytes = 1; |
882 | } else { | 883 | } else { |
@@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1151 | return 1; | 1152 | return 1; |
1152 | } | 1153 | } |
1153 | 1154 | ||
1155 | static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1156 | u16 index, struct desc_struct *desc) | ||
1157 | { | ||
1158 | struct desc_ptr dt; | ||
1159 | ulong addr; | ||
1160 | |||
1161 | ctxt->ops->get_idt(ctxt, &dt); | ||
1162 | |||
1163 | if (dt.size < index * 8 + 7) | ||
1164 | return emulate_gp(ctxt, index << 3 | 0x2); | ||
1165 | |||
1166 | addr = dt.address + index * 8; | ||
1167 | return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, | ||
1168 | &ctxt->exception); | ||
1169 | } | ||
1170 | |||
1154 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | 1171 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, |
1155 | u16 selector, struct desc_ptr *dt) | 1172 | u16 selector, struct desc_ptr *dt) |
1156 | { | 1173 | { |
@@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1227 | seg_desc.type = 3; | 1244 | seg_desc.type = 3; |
1228 | seg_desc.p = 1; | 1245 | seg_desc.p = 1; |
1229 | seg_desc.s = 1; | 1246 | seg_desc.s = 1; |
1247 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1248 | seg_desc.dpl = 3; | ||
1230 | goto load; | 1249 | goto load; |
1231 | } | 1250 | } |
1232 | 1251 | ||
@@ -1891,6 +1910,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1891 | ss->p = 1; | 1910 | ss->p = 1; |
1892 | } | 1911 | } |
1893 | 1912 | ||
1913 | static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | ||
1914 | { | ||
1915 | u32 eax, ebx, ecx, edx; | ||
1916 | |||
1917 | eax = ecx = 0; | ||
1918 | return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx) | ||
1919 | && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | ||
1920 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | ||
1921 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | ||
1922 | } | ||
1923 | |||
1894 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | 1924 | static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) |
1895 | { | 1925 | { |
1896 | struct x86_emulate_ops *ops = ctxt->ops; | 1926 | struct x86_emulate_ops *ops = ctxt->ops; |
@@ -2007,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2007 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2037 | if (ctxt->mode == X86EMUL_MODE_REAL) |
2008 | return emulate_gp(ctxt, 0); | 2038 | return emulate_gp(ctxt, 0); |
2009 | 2039 | ||
2040 | /* | ||
2041 | * Not recognized on AMD in compat mode (but is recognized in legacy | ||
2042 | * mode). | ||
2043 | */ | ||
2044 | if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA) | ||
2045 | && !vendor_intel(ctxt)) | ||
2046 | return emulate_ud(ctxt); | ||
2047 | |||
2010 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 2048 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
2011 | * Therefore, we inject an #UD. | 2049 | * Therefore, we inject an #UD. |
2012 | */ | 2050 | */ |
@@ -2306,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2306 | return emulate_gp(ctxt, 0); | 2344 | return emulate_gp(ctxt, 0); |
2307 | ctxt->_eip = tss->eip; | 2345 | ctxt->_eip = tss->eip; |
2308 | ctxt->eflags = tss->eflags | 2; | 2346 | ctxt->eflags = tss->eflags | 2; |
2347 | |||
2348 | /* General purpose registers */ | ||
2309 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; | 2349 | ctxt->regs[VCPU_REGS_RAX] = tss->eax; |
2310 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; | 2350 | ctxt->regs[VCPU_REGS_RCX] = tss->ecx; |
2311 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; | 2351 | ctxt->regs[VCPU_REGS_RDX] = tss->edx; |
@@ -2328,6 +2368,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2328 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); | 2368 | set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS); |
2329 | 2369 | ||
2330 | /* | 2370 | /* |
2371 | * If we're switching between Protected Mode and VM86, we need to make | ||
2372 | * sure to update the mode before loading the segment descriptors so | ||
2373 | * that the selectors are interpreted correctly. | ||
2374 | * | ||
2375 | * Need to get rflags to the vcpu struct immediately because it | ||
2376 | * influences the CPL which is checked at least when loading the segment | ||
2377 | * descriptors and when pushing an error code to the new kernel stack. | ||
2378 | * | ||
2379 | * TODO Introduce a separate ctxt->ops->set_cpl callback | ||
2380 | */ | ||
2381 | if (ctxt->eflags & X86_EFLAGS_VM) | ||
2382 | ctxt->mode = X86EMUL_MODE_VM86; | ||
2383 | else | ||
2384 | ctxt->mode = X86EMUL_MODE_PROT32; | ||
2385 | |||
2386 | ctxt->ops->set_rflags(ctxt, ctxt->eflags); | ||
2387 | |||
2388 | /* | ||
2331 | * Now load segment descriptors. If fault happenes at this stage | 2389 | * Now load segment descriptors. If fault happenes at this stage |
2332 | * it is handled in a context of new task | 2390 | * it is handled in a context of new task |
2333 | */ | 2391 | */ |
@@ -2401,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2401 | } | 2459 | } |
2402 | 2460 | ||
2403 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | 2461 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, |
2404 | u16 tss_selector, int reason, | 2462 | u16 tss_selector, int idt_index, int reason, |
2405 | bool has_error_code, u32 error_code) | 2463 | bool has_error_code, u32 error_code) |
2406 | { | 2464 | { |
2407 | struct x86_emulate_ops *ops = ctxt->ops; | 2465 | struct x86_emulate_ops *ops = ctxt->ops; |
@@ -2423,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2423 | 2481 | ||
2424 | /* FIXME: check that next_tss_desc is tss */ | 2482 | /* FIXME: check that next_tss_desc is tss */ |
2425 | 2483 | ||
2426 | if (reason != TASK_SWITCH_IRET) { | 2484 | /* |
2427 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2485 | * Check privileges. The three cases are task switch caused by... |
2428 | ops->cpl(ctxt) > next_tss_desc.dpl) | 2486 | * |
2429 | return emulate_gp(ctxt, 0); | 2487 | * 1. jmp/call/int to task gate: Check against DPL of the task gate |
2488 | * 2. Exception/IRQ/iret: No check is performed | ||
2489 | * 3. jmp/call to TSS: Check agains DPL of the TSS | ||
2490 | */ | ||
2491 | if (reason == TASK_SWITCH_GATE) { | ||
2492 | if (idt_index != -1) { | ||
2493 | /* Software interrupts */ | ||
2494 | struct desc_struct task_gate_desc; | ||
2495 | int dpl; | ||
2496 | |||
2497 | ret = read_interrupt_descriptor(ctxt, idt_index, | ||
2498 | &task_gate_desc); | ||
2499 | if (ret != X86EMUL_CONTINUE) | ||
2500 | return ret; | ||
2501 | |||
2502 | dpl = task_gate_desc.dpl; | ||
2503 | if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) | ||
2504 | return emulate_gp(ctxt, (idt_index << 3) | 0x2); | ||
2505 | } | ||
2506 | } else if (reason != TASK_SWITCH_IRET) { | ||
2507 | int dpl = next_tss_desc.dpl; | ||
2508 | if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl) | ||
2509 | return emulate_gp(ctxt, tss_selector); | ||
2430 | } | 2510 | } |
2431 | 2511 | ||
2512 | |||
2432 | desc_limit = desc_limit_scaled(&next_tss_desc); | 2513 | desc_limit = desc_limit_scaled(&next_tss_desc); |
2433 | if (!next_tss_desc.p || | 2514 | if (!next_tss_desc.p || |
2434 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2515 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
@@ -2481,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2481 | } | 2562 | } |
2482 | 2563 | ||
2483 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | 2564 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, |
2484 | u16 tss_selector, int reason, | 2565 | u16 tss_selector, int idt_index, int reason, |
2485 | bool has_error_code, u32 error_code) | 2566 | bool has_error_code, u32 error_code) |
2486 | { | 2567 | { |
2487 | int rc; | 2568 | int rc; |
@@ -2489,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2489 | ctxt->_eip = ctxt->eip; | 2570 | ctxt->_eip = ctxt->eip; |
2490 | ctxt->dst.type = OP_NONE; | 2571 | ctxt->dst.type = OP_NONE; |
2491 | 2572 | ||
2492 | rc = emulator_do_task_switch(ctxt, tss_selector, reason, | 2573 | rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, |
2493 | has_error_code, error_code); | 2574 | has_error_code, error_code); |
2494 | 2575 | ||
2495 | if (rc == X86EMUL_CONTINUE) | 2576 | if (rc == X86EMUL_CONTINUE) |
@@ -3514,13 +3595,13 @@ static struct opcode twobyte_table[256] = { | |||
3514 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), | 3595 | I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), |
3515 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), | 3596 | I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), |
3516 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), | 3597 | I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), |
3517 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3598 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3518 | /* 0xB8 - 0xBF */ | 3599 | /* 0xB8 - 0xBF */ |
3519 | N, N, | 3600 | N, N, |
3520 | G(BitOp, group8), | 3601 | G(BitOp, group8), |
3521 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), | 3602 | I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), |
3522 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), | 3603 | I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), |
3523 | D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), | 3604 | D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), |
3524 | /* 0xC0 - 0xCF */ | 3605 | /* 0xC0 - 0xCF */ |
3525 | D2bv(DstMem | SrcReg | ModRM | Lock), | 3606 | D2bv(DstMem | SrcReg | ModRM | Lock), |
3526 | N, D(DstMem | SrcReg | ModRM | Mov), | 3607 | N, D(DstMem | SrcReg | ModRM | Mov), |
@@ -3602,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3602 | 3683 | ||
3603 | switch (d) { | 3684 | switch (d) { |
3604 | case OpReg: | 3685 | case OpReg: |
3605 | decode_register_operand(ctxt, op, | 3686 | decode_register_operand(ctxt, op); |
3606 | op == &ctxt->dst && | ||
3607 | ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7)); | ||
3608 | break; | 3687 | break; |
3609 | case OpImmUByte: | 3688 | case OpImmUByte: |
3610 | rc = decode_imm(ctxt, op, 1, false); | 3689 | rc = decode_imm(ctxt, op, 1, false); |
@@ -3656,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
3656 | case OpImm: | 3735 | case OpImm: |
3657 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); | 3736 | rc = decode_imm(ctxt, op, imm_size(ctxt), true); |
3658 | break; | 3737 | break; |
3738 | case OpMem8: | ||
3739 | ctxt->memop.bytes = 1; | ||
3740 | goto mem_common; | ||
3659 | case OpMem16: | 3741 | case OpMem16: |
3660 | ctxt->memop.bytes = 2; | 3742 | ctxt->memop.bytes = 2; |
3661 | goto mem_common; | 3743 | goto mem_common; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index b6a73537e1ef..81cf4fa4a2be 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
307 | if (val & 0x10) { | 307 | if (val & 0x10) { |
308 | s->init4 = val & 1; | 308 | s->init4 = val & 1; |
309 | s->last_irr = 0; | 309 | s->last_irr = 0; |
310 | s->irr &= s->elcr; | ||
310 | s->imr = 0; | 311 | s->imr = 0; |
311 | s->priority_add = 0; | 312 | s->priority_add = 0; |
312 | s->special_mask = 0; | 313 | s->special_mask = 0; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 31bfc6927bc0..858432287ab6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
433 | break; | 433 | break; |
434 | 434 | ||
435 | case APIC_DM_INIT: | 435 | case APIC_DM_INIT: |
436 | if (level) { | 436 | if (!trig_mode || level) { |
437 | result = 1; | 437 | result = 1; |
438 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 438 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
439 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 439 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
731 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; | 731 | u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; |
732 | u64 ns = 0; | 732 | u64 ns = 0; |
733 | struct kvm_vcpu *vcpu = apic->vcpu; | 733 | struct kvm_vcpu *vcpu = apic->vcpu; |
734 | unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); | 734 | unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; |
735 | unsigned long flags; | 735 | unsigned long flags; |
736 | 736 | ||
737 | if (unlikely(!tscdeadline || !this_tsc_khz)) | 737 | if (unlikely(!tscdeadline || !this_tsc_khz)) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 224b02c3cda9..4cb164268846 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, | |||
688 | { | 688 | { |
689 | unsigned long idx; | 689 | unsigned long idx; |
690 | 690 | ||
691 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 691 | idx = gfn_to_index(gfn, slot->base_gfn, level); |
692 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 692 | return &slot->arch.lpage_info[level - 2][idx]; |
693 | return &slot->lpage_info[level - 2][idx]; | ||
694 | } | 693 | } |
695 | 694 | ||
696 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 695 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
@@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
946 | } | 945 | } |
947 | } | 946 | } |
948 | 947 | ||
949 | static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level, | 948 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, |
950 | struct kvm_memory_slot *slot) | 949 | struct kvm_memory_slot *slot) |
951 | { | 950 | { |
952 | struct kvm_lpage_info *linfo; | 951 | struct kvm_lpage_info *linfo; |
@@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | |||
966 | struct kvm_memory_slot *slot; | 965 | struct kvm_memory_slot *slot; |
967 | 966 | ||
968 | slot = gfn_to_memslot(kvm, gfn); | 967 | slot = gfn_to_memslot(kvm, gfn); |
969 | return __gfn_to_rmap(kvm, gfn, level, slot); | 968 | return __gfn_to_rmap(gfn, level, slot); |
970 | } | 969 | } |
971 | 970 | ||
972 | static bool rmap_can_add(struct kvm_vcpu *vcpu) | 971 | static bool rmap_can_add(struct kvm_vcpu *vcpu) |
@@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
988 | return pte_list_add(vcpu, spte, rmapp); | 987 | return pte_list_add(vcpu, spte, rmapp); |
989 | } | 988 | } |
990 | 989 | ||
991 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 990 | static u64 *rmap_next(unsigned long *rmapp, u64 *spte) |
992 | { | 991 | { |
993 | return pte_list_next(rmapp, spte); | 992 | return pte_list_next(rmapp, spte); |
994 | } | 993 | } |
@@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1018 | u64 *spte; | 1017 | u64 *spte; |
1019 | int i, write_protected = 0; | 1018 | int i, write_protected = 0; |
1020 | 1019 | ||
1021 | rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot); | 1020 | rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); |
1022 | spte = rmap_next(kvm, rmapp, NULL); | 1021 | spte = rmap_next(rmapp, NULL); |
1023 | while (spte) { | 1022 | while (spte) { |
1024 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1023 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1025 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1024 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
@@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1027 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); | 1026 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); |
1028 | write_protected = 1; | 1027 | write_protected = 1; |
1029 | } | 1028 | } |
1030 | spte = rmap_next(kvm, rmapp, spte); | 1029 | spte = rmap_next(rmapp, spte); |
1031 | } | 1030 | } |
1032 | 1031 | ||
1033 | /* check for huge page mappings */ | 1032 | /* check for huge page mappings */ |
1034 | for (i = PT_DIRECTORY_LEVEL; | 1033 | for (i = PT_DIRECTORY_LEVEL; |
1035 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1034 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
1036 | rmapp = __gfn_to_rmap(kvm, gfn, i, slot); | 1035 | rmapp = __gfn_to_rmap(gfn, i, slot); |
1037 | spte = rmap_next(kvm, rmapp, NULL); | 1036 | spte = rmap_next(rmapp, NULL); |
1038 | while (spte) { | 1037 | while (spte) { |
1039 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1038 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1040 | BUG_ON(!is_large_pte(*spte)); | 1039 | BUG_ON(!is_large_pte(*spte)); |
@@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | |||
1045 | spte = NULL; | 1044 | spte = NULL; |
1046 | write_protected = 1; | 1045 | write_protected = 1; |
1047 | } | 1046 | } |
1048 | spte = rmap_next(kvm, rmapp, spte); | 1047 | spte = rmap_next(rmapp, spte); |
1049 | } | 1048 | } |
1050 | } | 1049 | } |
1051 | 1050 | ||
@@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1066 | u64 *spte; | 1065 | u64 *spte; |
1067 | int need_tlb_flush = 0; | 1066 | int need_tlb_flush = 0; |
1068 | 1067 | ||
1069 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 1068 | while ((spte = rmap_next(rmapp, NULL))) { |
1070 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1069 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
1071 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 1070 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
1072 | drop_spte(kvm, spte); | 1071 | drop_spte(kvm, spte); |
@@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1085 | 1084 | ||
1086 | WARN_ON(pte_huge(*ptep)); | 1085 | WARN_ON(pte_huge(*ptep)); |
1087 | new_pfn = pte_pfn(*ptep); | 1086 | new_pfn = pte_pfn(*ptep); |
1088 | spte = rmap_next(kvm, rmapp, NULL); | 1087 | spte = rmap_next(rmapp, NULL); |
1089 | while (spte) { | 1088 | while (spte) { |
1090 | BUG_ON(!is_shadow_present_pte(*spte)); | 1089 | BUG_ON(!is_shadow_present_pte(*spte)); |
1091 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 1090 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
1092 | need_flush = 1; | 1091 | need_flush = 1; |
1093 | if (pte_write(*ptep)) { | 1092 | if (pte_write(*ptep)) { |
1094 | drop_spte(kvm, spte); | 1093 | drop_spte(kvm, spte); |
1095 | spte = rmap_next(kvm, rmapp, NULL); | 1094 | spte = rmap_next(rmapp, NULL); |
1096 | } else { | 1095 | } else { |
1097 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 1096 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
1098 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 1097 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
@@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1102 | new_spte &= ~shadow_accessed_mask; | 1101 | new_spte &= ~shadow_accessed_mask; |
1103 | mmu_spte_clear_track_bits(spte); | 1102 | mmu_spte_clear_track_bits(spte); |
1104 | mmu_spte_set(spte, new_spte); | 1103 | mmu_spte_set(spte, new_spte); |
1105 | spte = rmap_next(kvm, rmapp, spte); | 1104 | spte = rmap_next(rmapp, spte); |
1106 | } | 1105 | } |
1107 | } | 1106 | } |
1108 | if (need_flush) | 1107 | if (need_flush) |
@@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1176 | if (!shadow_accessed_mask) | 1175 | if (!shadow_accessed_mask) |
1177 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1176 | return kvm_unmap_rmapp(kvm, rmapp, data); |
1178 | 1177 | ||
1179 | spte = rmap_next(kvm, rmapp, NULL); | 1178 | spte = rmap_next(rmapp, NULL); |
1180 | while (spte) { | 1179 | while (spte) { |
1181 | int _young; | 1180 | int _young; |
1182 | u64 _spte = *spte; | 1181 | u64 _spte = *spte; |
@@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1186 | young = 1; | 1185 | young = 1; |
1187 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 1186 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); |
1188 | } | 1187 | } |
1189 | spte = rmap_next(kvm, rmapp, spte); | 1188 | spte = rmap_next(rmapp, spte); |
1190 | } | 1189 | } |
1191 | return young; | 1190 | return young; |
1192 | } | 1191 | } |
@@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1205 | if (!shadow_accessed_mask) | 1204 | if (!shadow_accessed_mask) |
1206 | goto out; | 1205 | goto out; |
1207 | 1206 | ||
1208 | spte = rmap_next(kvm, rmapp, NULL); | 1207 | spte = rmap_next(rmapp, NULL); |
1209 | while (spte) { | 1208 | while (spte) { |
1210 | u64 _spte = *spte; | 1209 | u64 _spte = *spte; |
1211 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1210 | BUG_ON(!(_spte & PT_PRESENT_MASK)); |
@@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1214 | young = 1; | 1213 | young = 1; |
1215 | break; | 1214 | break; |
1216 | } | 1215 | } |
1217 | spte = rmap_next(kvm, rmapp, spte); | 1216 | spte = rmap_next(rmapp, spte); |
1218 | } | 1217 | } |
1219 | out: | 1218 | out: |
1220 | return young; | 1219 | return young; |
@@ -1391,11 +1390,6 @@ struct kvm_mmu_pages { | |||
1391 | unsigned int nr; | 1390 | unsigned int nr; |
1392 | }; | 1391 | }; |
1393 | 1392 | ||
1394 | #define for_each_unsync_children(bitmap, idx) \ | ||
1395 | for (idx = find_first_bit(bitmap, 512); \ | ||
1396 | idx < 512; \ | ||
1397 | idx = find_next_bit(bitmap, 512, idx+1)) | ||
1398 | |||
1399 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, | 1393 | static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, |
1400 | int idx) | 1394 | int idx) |
1401 | { | 1395 | { |
@@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1417 | { | 1411 | { |
1418 | int i, ret, nr_unsync_leaf = 0; | 1412 | int i, ret, nr_unsync_leaf = 0; |
1419 | 1413 | ||
1420 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1414 | for_each_set_bit(i, sp->unsync_child_bitmap, 512) { |
1421 | struct kvm_mmu_page *child; | 1415 | struct kvm_mmu_page *child; |
1422 | u64 ent = sp->spt[i]; | 1416 | u64 ent = sp->spt[i]; |
1423 | 1417 | ||
@@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1803 | { | 1797 | { |
1804 | if (is_large_pte(*sptep)) { | 1798 | if (is_large_pte(*sptep)) { |
1805 | drop_spte(vcpu->kvm, sptep); | 1799 | drop_spte(vcpu->kvm, sptep); |
1800 | --vcpu->kvm->stat.lpages; | ||
1806 | kvm_flush_remote_tlbs(vcpu->kvm); | 1801 | kvm_flush_remote_tlbs(vcpu->kvm); |
1807 | } | 1802 | } |
1808 | } | 1803 | } |
@@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, | |||
3190 | #undef PTTYPE | 3185 | #undef PTTYPE |
3191 | 3186 | ||
3192 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | 3187 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, |
3193 | struct kvm_mmu *context, | 3188 | struct kvm_mmu *context) |
3194 | int level) | ||
3195 | { | 3189 | { |
3196 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3190 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3197 | u64 exb_bit_rsvd = 0; | 3191 | u64 exb_bit_rsvd = 0; |
3198 | 3192 | ||
3199 | if (!context->nx) | 3193 | if (!context->nx) |
3200 | exb_bit_rsvd = rsvd_bits(63, 63); | 3194 | exb_bit_rsvd = rsvd_bits(63, 63); |
3201 | switch (level) { | 3195 | switch (context->root_level) { |
3202 | case PT32_ROOT_LEVEL: | 3196 | case PT32_ROOT_LEVEL: |
3203 | /* no rsvd bits for 2 level 4K page table entries */ | 3197 | /* no rsvd bits for 2 level 4K page table entries */ |
3204 | context->rsvd_bits_mask[0][1] = 0; | 3198 | context->rsvd_bits_mask[0][1] = 0; |
@@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3256 | int level) | 3250 | int level) |
3257 | { | 3251 | { |
3258 | context->nx = is_nx(vcpu); | 3252 | context->nx = is_nx(vcpu); |
3253 | context->root_level = level; | ||
3259 | 3254 | ||
3260 | reset_rsvds_bits_mask(vcpu, context, level); | 3255 | reset_rsvds_bits_mask(vcpu, context); |
3261 | 3256 | ||
3262 | ASSERT(is_pae(vcpu)); | 3257 | ASSERT(is_pae(vcpu)); |
3263 | context->new_cr3 = paging_new_cr3; | 3258 | context->new_cr3 = paging_new_cr3; |
@@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
3267 | context->invlpg = paging64_invlpg; | 3262 | context->invlpg = paging64_invlpg; |
3268 | context->update_pte = paging64_update_pte; | 3263 | context->update_pte = paging64_update_pte; |
3269 | context->free = paging_free; | 3264 | context->free = paging_free; |
3270 | context->root_level = level; | ||
3271 | context->shadow_root_level = level; | 3265 | context->shadow_root_level = level; |
3272 | context->root_hpa = INVALID_PAGE; | 3266 | context->root_hpa = INVALID_PAGE; |
3273 | context->direct_map = false; | 3267 | context->direct_map = false; |
@@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3284 | struct kvm_mmu *context) | 3278 | struct kvm_mmu *context) |
3285 | { | 3279 | { |
3286 | context->nx = false; | 3280 | context->nx = false; |
3281 | context->root_level = PT32_ROOT_LEVEL; | ||
3287 | 3282 | ||
3288 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | 3283 | reset_rsvds_bits_mask(vcpu, context); |
3289 | 3284 | ||
3290 | context->new_cr3 = paging_new_cr3; | 3285 | context->new_cr3 = paging_new_cr3; |
3291 | context->page_fault = paging32_page_fault; | 3286 | context->page_fault = paging32_page_fault; |
@@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
3294 | context->sync_page = paging32_sync_page; | 3289 | context->sync_page = paging32_sync_page; |
3295 | context->invlpg = paging32_invlpg; | 3290 | context->invlpg = paging32_invlpg; |
3296 | context->update_pte = paging32_update_pte; | 3291 | context->update_pte = paging32_update_pte; |
3297 | context->root_level = PT32_ROOT_LEVEL; | ||
3298 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 3292 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
3299 | context->root_hpa = INVALID_PAGE; | 3293 | context->root_hpa = INVALID_PAGE; |
3300 | context->direct_map = false; | 3294 | context->direct_map = false; |
@@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3325 | context->get_cr3 = get_cr3; | 3319 | context->get_cr3 = get_cr3; |
3326 | context->get_pdptr = kvm_pdptr_read; | 3320 | context->get_pdptr = kvm_pdptr_read; |
3327 | context->inject_page_fault = kvm_inject_page_fault; | 3321 | context->inject_page_fault = kvm_inject_page_fault; |
3328 | context->nx = is_nx(vcpu); | ||
3329 | 3322 | ||
3330 | if (!is_paging(vcpu)) { | 3323 | if (!is_paging(vcpu)) { |
3331 | context->nx = false; | 3324 | context->nx = false; |
@@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
3333 | context->root_level = 0; | 3326 | context->root_level = 0; |
3334 | } else if (is_long_mode(vcpu)) { | 3327 | } else if (is_long_mode(vcpu)) { |
3335 | context->nx = is_nx(vcpu); | 3328 | context->nx = is_nx(vcpu); |
3336 | reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL); | ||
3337 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3338 | context->root_level = PT64_ROOT_LEVEL; | 3329 | context->root_level = PT64_ROOT_LEVEL; |
3330 | reset_rsvds_bits_mask(vcpu, context); | ||
3331 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3339 | } else if (is_pae(vcpu)) { | 3332 | } else if (is_pae(vcpu)) { |
3340 | context->nx = is_nx(vcpu); | 3333 | context->nx = is_nx(vcpu); |
3341 | reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL); | ||
3342 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3343 | context->root_level = PT32E_ROOT_LEVEL; | 3334 | context->root_level = PT32E_ROOT_LEVEL; |
3335 | reset_rsvds_bits_mask(vcpu, context); | ||
3336 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
3344 | } else { | 3337 | } else { |
3345 | context->nx = false; | 3338 | context->nx = false; |
3346 | reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); | ||
3347 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
3348 | context->root_level = PT32_ROOT_LEVEL; | 3339 | context->root_level = PT32_ROOT_LEVEL; |
3340 | reset_rsvds_bits_mask(vcpu, context); | ||
3341 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
3349 | } | 3342 | } |
3350 | 3343 | ||
3351 | return 0; | 3344 | return 0; |
@@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3408 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; | 3401 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; |
3409 | } else if (is_long_mode(vcpu)) { | 3402 | } else if (is_long_mode(vcpu)) { |
3410 | g_context->nx = is_nx(vcpu); | 3403 | g_context->nx = is_nx(vcpu); |
3411 | reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL); | ||
3412 | g_context->root_level = PT64_ROOT_LEVEL; | 3404 | g_context->root_level = PT64_ROOT_LEVEL; |
3405 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3413 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 3406 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
3414 | } else if (is_pae(vcpu)) { | 3407 | } else if (is_pae(vcpu)) { |
3415 | g_context->nx = is_nx(vcpu); | 3408 | g_context->nx = is_nx(vcpu); |
3416 | reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL); | ||
3417 | g_context->root_level = PT32E_ROOT_LEVEL; | 3409 | g_context->root_level = PT32E_ROOT_LEVEL; |
3410 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3418 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 3411 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
3419 | } else { | 3412 | } else { |
3420 | g_context->nx = false; | 3413 | g_context->nx = false; |
3421 | reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL); | ||
3422 | g_context->root_level = PT32_ROOT_LEVEL; | 3414 | g_context->root_level = PT32_ROOT_LEVEL; |
3415 | reset_rsvds_bits_mask(vcpu, g_context); | ||
3423 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; | 3416 | g_context->gva_to_gpa = paging32_gva_to_gpa_nested; |
3424 | } | 3417 | } |
3425 | 3418 | ||
@@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
3555 | * If we're seeing too many writes to a page, it may no longer be a page table, | 3548 | * If we're seeing too many writes to a page, it may no longer be a page table, |
3556 | * or we may be forking, in which case it is better to unmap the page. | 3549 | * or we may be forking, in which case it is better to unmap the page. |
3557 | */ | 3550 | */ |
3558 | static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte) | 3551 | static bool detect_write_flooding(struct kvm_mmu_page *sp) |
3559 | { | 3552 | { |
3560 | /* | 3553 | /* |
3561 | * Skip write-flooding detected for the sp whose level is 1, because | 3554 | * Skip write-flooding detected for the sp whose level is 1, because |
@@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3664 | 3657 | ||
3665 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3658 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
3666 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3659 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
3667 | spte = get_written_sptes(sp, gpa, &npte); | ||
3668 | |||
3669 | if (detect_write_misaligned(sp, gpa, bytes) || | 3660 | if (detect_write_misaligned(sp, gpa, bytes) || |
3670 | detect_write_flooding(sp, spte)) { | 3661 | detect_write_flooding(sp)) { |
3671 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3662 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
3672 | &invalid_list); | 3663 | &invalid_list); |
3673 | ++vcpu->kvm->stat.mmu_flooded; | 3664 | ++vcpu->kvm->stat.mmu_flooded; |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ea7b4fd34676..715da5a19a5b 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
200 | slot = gfn_to_memslot(kvm, sp->gfn); | 200 | slot = gfn_to_memslot(kvm, sp->gfn); |
201 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; | 201 | rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; |
202 | 202 | ||
203 | spte = rmap_next(kvm, rmapp, NULL); | 203 | spte = rmap_next(rmapp, NULL); |
204 | while (spte) { | 204 | while (spte) { |
205 | if (is_writable_pte(*spte)) | 205 | if (is_writable_pte(*spte)) |
206 | audit_printk(kvm, "shadow page has writable " | 206 | audit_printk(kvm, "shadow page has writable " |
207 | "mappings: gfn %llx role %x\n", | 207 | "mappings: gfn %llx role %x\n", |
208 | sp->gfn, sp->role.word); | 208 | sp->gfn, sp->role.word); |
209 | spte = rmap_next(kvm, rmapp, spte); | 209 | spte = rmap_next(rmapp, spte); |
210 | } | 210 | } |
211 | } | 211 | } |
212 | 212 | ||
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 7aad5446f393..a73f0c104813 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping { | |||
33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, | 33 | [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, |
34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 34 | [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, |
35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, | 35 | [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, |
36 | [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, | ||
36 | }; | 37 | }; |
37 | 38 | ||
38 | /* mapping between fixed pmc index and arch_events array */ | 39 | /* mapping between fixed pmc index and arch_events array */ |
39 | int fixed_pmc_events[] = {1, 0, 2}; | 40 | int fixed_pmc_events[] = {1, 0, 7}; |
40 | 41 | ||
41 | static bool pmc_is_gp(struct kvm_pmc *pmc) | 42 | static bool pmc_is_gp(struct kvm_pmc *pmc) |
42 | { | 43 | { |
@@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
210 | unsigned config, type = PERF_TYPE_RAW; | 211 | unsigned config, type = PERF_TYPE_RAW; |
211 | u8 event_select, unit_mask; | 212 | u8 event_select, unit_mask; |
212 | 213 | ||
214 | if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) | ||
215 | printk_once("kvm pmu: pin control bit is ignored\n"); | ||
216 | |||
213 | pmc->eventsel = eventsel; | 217 | pmc->eventsel = eventsel; |
214 | 218 | ||
215 | stop_counter(pmc); | 219 | stop_counter(pmc); |
@@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) | |||
220 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; | 224 | event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; |
221 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; | 225 | unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; |
222 | 226 | ||
223 | if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE | | 227 | if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | |
224 | ARCH_PERFMON_EVENTSEL_INV | | 228 | ARCH_PERFMON_EVENTSEL_INV | |
225 | ARCH_PERFMON_EVENTSEL_CMASK))) { | 229 | ARCH_PERFMON_EVENTSEL_CMASK))) { |
226 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | 230 | config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, |
@@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | |||
413 | struct kvm_pmc *counters; | 417 | struct kvm_pmc *counters; |
414 | u64 ctr; | 418 | u64 ctr; |
415 | 419 | ||
416 | pmc &= (3u << 30) - 1; | 420 | pmc &= ~(3u << 30); |
417 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) | 421 | if (!fixed && pmc >= pmu->nr_arch_gp_counters) |
418 | return 1; | 422 | return 1; |
419 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) | 423 | if (fixed && pmc >= pmu->nr_arch_fixed_counters) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e385214711cb..e334389e1c75 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -111,6 +111,12 @@ struct nested_state { | |||
111 | #define MSRPM_OFFSETS 16 | 111 | #define MSRPM_OFFSETS 16 |
112 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | 112 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; |
113 | 113 | ||
114 | /* | ||
115 | * Set osvw_len to higher value when updated Revision Guides | ||
116 | * are published and we know what the new status bits are | ||
117 | */ | ||
118 | static uint64_t osvw_len = 4, osvw_status; | ||
119 | |||
114 | struct vcpu_svm { | 120 | struct vcpu_svm { |
115 | struct kvm_vcpu vcpu; | 121 | struct kvm_vcpu vcpu; |
116 | struct vmcb *vmcb; | 122 | struct vmcb *vmcb; |
@@ -177,11 +183,13 @@ static bool npt_enabled = true; | |||
177 | #else | 183 | #else |
178 | static bool npt_enabled; | 184 | static bool npt_enabled; |
179 | #endif | 185 | #endif |
180 | static int npt = 1; | ||
181 | 186 | ||
187 | /* allow nested paging (virtualized MMU) for all guests */ | ||
188 | static int npt = true; | ||
182 | module_param(npt, int, S_IRUGO); | 189 | module_param(npt, int, S_IRUGO); |
183 | 190 | ||
184 | static int nested = 1; | 191 | /* allow nested virtualization in KVM/SVM */ |
192 | static int nested = true; | ||
185 | module_param(nested, int, S_IRUGO); | 193 | module_param(nested, int, S_IRUGO); |
186 | 194 | ||
187 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 195 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void) | |||
557 | erratum_383_found = true; | 565 | erratum_383_found = true; |
558 | } | 566 | } |
559 | 567 | ||
568 | static void svm_init_osvw(struct kvm_vcpu *vcpu) | ||
569 | { | ||
570 | /* | ||
571 | * Guests should see errata 400 and 415 as fixed (assuming that | ||
572 | * HLT and IO instructions are intercepted). | ||
573 | */ | ||
574 | vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; | ||
575 | vcpu->arch.osvw.status = osvw_status & ~(6ULL); | ||
576 | |||
577 | /* | ||
578 | * By increasing VCPU's osvw.length to 3 we are telling the guest that | ||
579 | * all osvw.status bits inside that length, including bit 0 (which is | ||
580 | * reserved for erratum 298), are valid. However, if host processor's | ||
581 | * osvw_len is 0 then osvw_status[0] carries no information. We need to | ||
582 | * be conservative here and therefore we tell the guest that erratum 298 | ||
583 | * is present (because we really don't know). | ||
584 | */ | ||
585 | if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) | ||
586 | vcpu->arch.osvw.status |= 1; | ||
587 | } | ||
588 | |||
560 | static int has_svm(void) | 589 | static int has_svm(void) |
561 | { | 590 | { |
562 | const char *msg; | 591 | const char *msg; |
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage) | |||
623 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; | 652 | __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; |
624 | } | 653 | } |
625 | 654 | ||
655 | |||
656 | /* | ||
657 | * Get OSVW bits. | ||
658 | * | ||
659 | * Note that it is possible to have a system with mixed processor | ||
660 | * revisions and therefore different OSVW bits. If bits are not the same | ||
661 | * on different processors then choose the worst case (i.e. if erratum | ||
662 | * is present on one processor and not on another then assume that the | ||
663 | * erratum is present everywhere). | ||
664 | */ | ||
665 | if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { | ||
666 | uint64_t len, status = 0; | ||
667 | int err; | ||
668 | |||
669 | len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); | ||
670 | if (!err) | ||
671 | status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, | ||
672 | &err); | ||
673 | |||
674 | if (err) | ||
675 | osvw_status = osvw_len = 0; | ||
676 | else { | ||
677 | if (len < osvw_len) | ||
678 | osvw_len = len; | ||
679 | osvw_status |= status; | ||
680 | osvw_status &= (1ULL << osvw_len) - 1; | ||
681 | } | ||
682 | } else | ||
683 | osvw_status = osvw_len = 0; | ||
684 | |||
626 | svm_init_erratum_383(); | 685 | svm_init_erratum_383(); |
627 | 686 | ||
628 | amd_pmu_enable_virt(); | 687 | amd_pmu_enable_virt(); |
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) | |||
910 | return _tsc; | 969 | return _tsc; |
911 | } | 970 | } |
912 | 971 | ||
913 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | 972 | static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) |
914 | { | 973 | { |
915 | struct vcpu_svm *svm = to_svm(vcpu); | 974 | struct vcpu_svm *svm = to_svm(vcpu); |
916 | u64 ratio; | 975 | u64 ratio; |
917 | u64 khz; | 976 | u64 khz; |
918 | 977 | ||
919 | /* TSC scaling supported? */ | 978 | /* Guest TSC same frequency as host TSC? */ |
920 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) | 979 | if (!scale) { |
980 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | ||
921 | return; | 981 | return; |
982 | } | ||
922 | 983 | ||
923 | /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */ | 984 | /* TSC scaling supported? */ |
924 | if (user_tsc_khz == 0) { | 985 | if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { |
925 | vcpu->arch.virtual_tsc_khz = 0; | 986 | if (user_tsc_khz > tsc_khz) { |
926 | svm->tsc_ratio = TSC_RATIO_DEFAULT; | 987 | vcpu->arch.tsc_catchup = 1; |
988 | vcpu->arch.tsc_always_catchup = 1; | ||
989 | } else | ||
990 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
927 | return; | 991 | return; |
928 | } | 992 | } |
929 | 993 | ||
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | |||
938 | user_tsc_khz); | 1002 | user_tsc_khz); |
939 | return; | 1003 | return; |
940 | } | 1004 | } |
941 | vcpu->arch.virtual_tsc_khz = user_tsc_khz; | ||
942 | svm->tsc_ratio = ratio; | 1005 | svm->tsc_ratio = ratio; |
943 | } | 1006 | } |
944 | 1007 | ||
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
958 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | 1021 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
959 | } | 1022 | } |
960 | 1023 | ||
961 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1024 | static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) |
962 | { | 1025 | { |
963 | struct vcpu_svm *svm = to_svm(vcpu); | 1026 | struct vcpu_svm *svm = to_svm(vcpu); |
964 | 1027 | ||
1028 | WARN_ON(adjustment < 0); | ||
1029 | if (host) | ||
1030 | adjustment = svm_scale_tsc(vcpu, adjustment); | ||
1031 | |||
965 | svm->vmcb->control.tsc_offset += adjustment; | 1032 | svm->vmcb->control.tsc_offset += adjustment; |
966 | if (is_guest_mode(vcpu)) | 1033 | if (is_guest_mode(vcpu)) |
967 | svm->nested.hsave->control.tsc_offset += adjustment; | 1034 | svm->nested.hsave->control.tsc_offset += adjustment; |
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1191 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 1258 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
1192 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 1259 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
1193 | 1260 | ||
1261 | svm_init_osvw(&svm->vcpu); | ||
1262 | |||
1194 | return &svm->vcpu; | 1263 | return &svm->vcpu; |
1195 | 1264 | ||
1196 | free_page4: | 1265 | free_page4: |
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1268 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1337 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1269 | } | 1338 | } |
1270 | 1339 | ||
1340 | static void svm_update_cpl(struct kvm_vcpu *vcpu) | ||
1341 | { | ||
1342 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1343 | int cpl; | ||
1344 | |||
1345 | if (!is_protmode(vcpu)) | ||
1346 | cpl = 0; | ||
1347 | else if (svm->vmcb->save.rflags & X86_EFLAGS_VM) | ||
1348 | cpl = 3; | ||
1349 | else | ||
1350 | cpl = svm->vmcb->save.cs.selector & 0x3; | ||
1351 | |||
1352 | svm->vmcb->save.cpl = cpl; | ||
1353 | } | ||
1354 | |||
1271 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 1355 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
1272 | { | 1356 | { |
1273 | return to_svm(vcpu)->vmcb->save.rflags; | 1357 | return to_svm(vcpu)->vmcb->save.rflags; |
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | |||
1275 | 1359 | ||
1276 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 1360 | static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
1277 | { | 1361 | { |
1362 | unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags; | ||
1363 | |||
1278 | to_svm(vcpu)->vmcb->save.rflags = rflags; | 1364 | to_svm(vcpu)->vmcb->save.rflags = rflags; |
1365 | if ((old_rflags ^ rflags) & X86_EFLAGS_VM) | ||
1366 | svm_update_cpl(vcpu); | ||
1279 | } | 1367 | } |
1280 | 1368 | ||
1281 | static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | 1369 | static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) |
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
1543 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; | 1631 | s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; |
1544 | } | 1632 | } |
1545 | if (seg == VCPU_SREG_CS) | 1633 | if (seg == VCPU_SREG_CS) |
1546 | svm->vmcb->save.cpl | 1634 | svm_update_cpl(vcpu); |
1547 | = (svm->vmcb->save.cs.attrib | ||
1548 | >> SVM_SELECTOR_DPL_SHIFT) & 3; | ||
1549 | 1635 | ||
1550 | mark_dirty(svm->vmcb, VMCB_SEG); | 1636 | mark_dirty(svm->vmcb, VMCB_SEG); |
1551 | } | 1637 | } |
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2735 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2821 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2736 | skip_emulated_instruction(&svm->vcpu); | 2822 | skip_emulated_instruction(&svm->vcpu); |
2737 | 2823 | ||
2738 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, | 2824 | if (int_type != SVM_EXITINTINFO_TYPE_SOFT) |
2825 | int_vec = -1; | ||
2826 | |||
2827 | if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, | ||
2739 | has_error_code, error_code) == EMULATE_FAIL) { | 2828 | has_error_code, error_code) == EMULATE_FAIL) { |
2740 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2829 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
2741 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2830 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 246490f643b6..280751c84724 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -70,9 +70,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); | |||
70 | static bool __read_mostly vmm_exclusive = 1; | 70 | static bool __read_mostly vmm_exclusive = 1; |
71 | module_param(vmm_exclusive, bool, S_IRUGO); | 71 | module_param(vmm_exclusive, bool, S_IRUGO); |
72 | 72 | ||
73 | static bool __read_mostly yield_on_hlt = 1; | ||
74 | module_param(yield_on_hlt, bool, S_IRUGO); | ||
75 | |||
76 | static bool __read_mostly fasteoi = 1; | 73 | static bool __read_mostly fasteoi = 1; |
77 | module_param(fasteoi, bool, S_IRUGO); | 74 | module_param(fasteoi, bool, S_IRUGO); |
78 | 75 | ||
@@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
1655 | vmx_set_interrupt_shadow(vcpu, 0); | 1652 | vmx_set_interrupt_shadow(vcpu, 0); |
1656 | } | 1653 | } |
1657 | 1654 | ||
1658 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
1659 | { | ||
1660 | /* Ensure that we clear the HLT state in the VMCS. We don't need to | ||
1661 | * explicitly skip the instruction because if the HLT state is set, then | ||
1662 | * the instruction is already executing and RIP has already been | ||
1663 | * advanced. */ | ||
1664 | if (!yield_on_hlt && | ||
1665 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
1666 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
1667 | } | ||
1668 | |||
1669 | /* | 1655 | /* |
1670 | * KVM wants to inject page-faults which it got to the guest. This function | 1656 | * KVM wants to inject page-faults which it got to the guest. This function |
1671 | * checks whether in a nested guest, we need to inject them to L1 or L2. | 1657 | * checks whether in a nested guest, we need to inject them to L1 or L2. |
@@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu) | |||
1678 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 1664 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
1679 | 1665 | ||
1680 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 1666 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ |
1681 | if (!(vmcs12->exception_bitmap & PF_VECTOR)) | 1667 | if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR))) |
1682 | return 0; | 1668 | return 0; |
1683 | 1669 | ||
1684 | nested_vmx_vmexit(vcpu); | 1670 | nested_vmx_vmexit(vcpu); |
@@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1718 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 1704 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
1719 | 1705 | ||
1720 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 1706 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
1721 | vmx_clear_hlt(vcpu); | ||
1722 | } | 1707 | } |
1723 | 1708 | ||
1724 | static bool vmx_rdtscp_supported(void) | 1709 | static bool vmx_rdtscp_supported(void) |
@@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) | |||
1817 | } | 1802 | } |
1818 | 1803 | ||
1819 | /* | 1804 | /* |
1820 | * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ | 1805 | * Engage any workarounds for mis-matched TSC rates. Currently limited to |
1821 | * ioctl. In this case the call-back should update internal vmx state to make | 1806 | * software catchup for faster rates on slower CPUs. |
1822 | * the changes effective. | ||
1823 | */ | 1807 | */ |
1824 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) | 1808 | static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) |
1825 | { | 1809 | { |
1826 | /* Nothing to do here */ | 1810 | if (!scale) |
1811 | return; | ||
1812 | |||
1813 | if (user_tsc_khz > tsc_khz) { | ||
1814 | vcpu->arch.tsc_catchup = 1; | ||
1815 | vcpu->arch.tsc_always_catchup = 1; | ||
1816 | } else | ||
1817 | WARN(1, "user requested TSC rate below hardware speed\n"); | ||
1827 | } | 1818 | } |
1828 | 1819 | ||
1829 | /* | 1820 | /* |
@@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
1850 | } | 1841 | } |
1851 | } | 1842 | } |
1852 | 1843 | ||
1853 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) | 1844 | static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) |
1854 | { | 1845 | { |
1855 | u64 offset = vmcs_read64(TSC_OFFSET); | 1846 | u64 offset = vmcs_read64(TSC_OFFSET); |
1856 | vmcs_write64(TSC_OFFSET, offset + adjustment); | 1847 | vmcs_write64(TSC_OFFSET, offset + adjustment); |
@@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
2219 | msr = find_msr_entry(vmx, msr_index); | 2210 | msr = find_msr_entry(vmx, msr_index); |
2220 | if (msr) { | 2211 | if (msr) { |
2221 | msr->data = data; | 2212 | msr->data = data; |
2213 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) | ||
2214 | kvm_set_shared_msr(msr->index, msr->data, | ||
2215 | msr->mask); | ||
2222 | break; | 2216 | break; |
2223 | } | 2217 | } |
2224 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 2218 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
@@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2399 | &_pin_based_exec_control) < 0) | 2393 | &_pin_based_exec_control) < 0) |
2400 | return -EIO; | 2394 | return -EIO; |
2401 | 2395 | ||
2402 | min = | 2396 | min = CPU_BASED_HLT_EXITING | |
2403 | #ifdef CONFIG_X86_64 | 2397 | #ifdef CONFIG_X86_64 |
2404 | CPU_BASED_CR8_LOAD_EXITING | | 2398 | CPU_BASED_CR8_LOAD_EXITING | |
2405 | CPU_BASED_CR8_STORE_EXITING | | 2399 | CPU_BASED_CR8_STORE_EXITING | |
@@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2414 | CPU_BASED_INVLPG_EXITING | | 2408 | CPU_BASED_INVLPG_EXITING | |
2415 | CPU_BASED_RDPMC_EXITING; | 2409 | CPU_BASED_RDPMC_EXITING; |
2416 | 2410 | ||
2417 | if (yield_on_hlt) | ||
2418 | min |= CPU_BASED_HLT_EXITING; | ||
2419 | |||
2420 | opt = CPU_BASED_TPR_SHADOW | | 2411 | opt = CPU_BASED_TPR_SHADOW | |
2421 | CPU_BASED_USE_MSR_BITMAPS | | 2412 | CPU_BASED_USE_MSR_BITMAPS | |
2422 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2413 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -4003,7 +3994,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
4003 | } else | 3994 | } else |
4004 | intr |= INTR_TYPE_EXT_INTR; | 3995 | intr |= INTR_TYPE_EXT_INTR; |
4005 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 3996 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
4006 | vmx_clear_hlt(vcpu); | ||
4007 | } | 3997 | } |
4008 | 3998 | ||
4009 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 3999 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -4035,7 +4025,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
4035 | } | 4025 | } |
4036 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 4026 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
4037 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 4027 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
4038 | vmx_clear_hlt(vcpu); | ||
4039 | } | 4028 | } |
4040 | 4029 | ||
4041 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4030 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
@@ -4672,9 +4661,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
4672 | bool has_error_code = false; | 4661 | bool has_error_code = false; |
4673 | u32 error_code = 0; | 4662 | u32 error_code = 0; |
4674 | u16 tss_selector; | 4663 | u16 tss_selector; |
4675 | int reason, type, idt_v; | 4664 | int reason, type, idt_v, idt_index; |
4676 | 4665 | ||
4677 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | 4666 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); |
4667 | idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); | ||
4678 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); | 4668 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); |
4679 | 4669 | ||
4680 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 4670 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
@@ -4712,8 +4702,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
4712 | type != INTR_TYPE_NMI_INTR)) | 4702 | type != INTR_TYPE_NMI_INTR)) |
4713 | skip_emulated_instruction(vcpu); | 4703 | skip_emulated_instruction(vcpu); |
4714 | 4704 | ||
4715 | if (kvm_task_switch(vcpu, tss_selector, reason, | 4705 | if (kvm_task_switch(vcpu, tss_selector, |
4716 | has_error_code, error_code) == EMULATE_FAIL) { | 4706 | type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, |
4707 | has_error_code, error_code) == EMULATE_FAIL) { | ||
4717 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 4708 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
4718 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 4709 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4719 | vcpu->run->internal.ndata = 0; | 4710 | vcpu->run->internal.ndata = 0; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54696b5f8443..4044ce0bf7c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -97,6 +97,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | |||
97 | u32 kvm_max_guest_tsc_khz; | 97 | u32 kvm_max_guest_tsc_khz; |
98 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | 98 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); |
99 | 99 | ||
100 | /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ | ||
101 | static u32 tsc_tolerance_ppm = 250; | ||
102 | module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); | ||
103 | |||
100 | #define KVM_NR_SHARED_MSRS 16 | 104 | #define KVM_NR_SHARED_MSRS 16 |
101 | 105 | ||
102 | struct kvm_shared_msrs_global { | 106 | struct kvm_shared_msrs_global { |
@@ -969,50 +973,51 @@ static inline u64 get_kernel_ns(void) | |||
969 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 973 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
970 | unsigned long max_tsc_khz; | 974 | unsigned long max_tsc_khz; |
971 | 975 | ||
972 | static inline int kvm_tsc_changes_freq(void) | 976 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) |
973 | { | 977 | { |
974 | int cpu = get_cpu(); | 978 | return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, |
975 | int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | 979 | vcpu->arch.virtual_tsc_shift); |
976 | cpufreq_quick_get(cpu) != 0; | ||
977 | put_cpu(); | ||
978 | return ret; | ||
979 | } | 980 | } |
980 | 981 | ||
981 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) | 982 | static u32 adjust_tsc_khz(u32 khz, s32 ppm) |
982 | { | 983 | { |
983 | if (vcpu->arch.virtual_tsc_khz) | 984 | u64 v = (u64)khz * (1000000 + ppm); |
984 | return vcpu->arch.virtual_tsc_khz; | 985 | do_div(v, 1000000); |
985 | else | 986 | return v; |
986 | return __this_cpu_read(cpu_tsc_khz); | ||
987 | } | 987 | } |
988 | 988 | ||
989 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | 989 | static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) |
990 | { | 990 | { |
991 | u64 ret; | 991 | u32 thresh_lo, thresh_hi; |
992 | 992 | int use_scaling = 0; | |
993 | WARN_ON(preemptible()); | ||
994 | if (kvm_tsc_changes_freq()) | ||
995 | printk_once(KERN_WARNING | ||
996 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | ||
997 | ret = nsec * vcpu_tsc_khz(vcpu); | ||
998 | do_div(ret, USEC_PER_SEC); | ||
999 | return ret; | ||
1000 | } | ||
1001 | 993 | ||
1002 | static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | ||
1003 | { | ||
1004 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 994 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1005 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 995 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1006 | &vcpu->arch.tsc_catchup_shift, | 996 | &vcpu->arch.virtual_tsc_shift, |
1007 | &vcpu->arch.tsc_catchup_mult); | 997 | &vcpu->arch.virtual_tsc_mult); |
998 | vcpu->arch.virtual_tsc_khz = this_tsc_khz; | ||
999 | |||
1000 | /* | ||
1001 | * Compute the variation in TSC rate which is acceptable | ||
1002 | * within the range of tolerance and decide if the | ||
1003 | * rate being applied is within that bounds of the hardware | ||
1004 | * rate. If so, no scaling or compensation need be done. | ||
1005 | */ | ||
1006 | thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); | ||
1007 | thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); | ||
1008 | if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) { | ||
1009 | pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); | ||
1010 | use_scaling = 1; | ||
1011 | } | ||
1012 | kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); | ||
1008 | } | 1013 | } |
1009 | 1014 | ||
1010 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | 1015 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) |
1011 | { | 1016 | { |
1012 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | 1017 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec, |
1013 | vcpu->arch.tsc_catchup_mult, | 1018 | vcpu->arch.virtual_tsc_mult, |
1014 | vcpu->arch.tsc_catchup_shift); | 1019 | vcpu->arch.virtual_tsc_shift); |
1015 | tsc += vcpu->arch.last_tsc_write; | 1020 | tsc += vcpu->arch.this_tsc_write; |
1016 | return tsc; | 1021 | return tsc; |
1017 | } | 1022 | } |
1018 | 1023 | ||
@@ -1021,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1021 | struct kvm *kvm = vcpu->kvm; | 1026 | struct kvm *kvm = vcpu->kvm; |
1022 | u64 offset, ns, elapsed; | 1027 | u64 offset, ns, elapsed; |
1023 | unsigned long flags; | 1028 | unsigned long flags; |
1024 | s64 sdiff; | 1029 | s64 usdiff; |
1025 | 1030 | ||
1026 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1031 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1027 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | 1032 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); |
1028 | ns = get_kernel_ns(); | 1033 | ns = get_kernel_ns(); |
1029 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1034 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1030 | sdiff = data - kvm->arch.last_tsc_write; | 1035 | |
1031 | if (sdiff < 0) | 1036 | /* n.b - signed multiplication and division required */ |
1032 | sdiff = -sdiff; | 1037 | usdiff = data - kvm->arch.last_tsc_write; |
1038 | #ifdef CONFIG_X86_64 | ||
1039 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | ||
1040 | #else | ||
1041 | /* do_div() only does unsigned */ | ||
1042 | asm("idivl %2; xor %%edx, %%edx" | ||
1043 | : "=A"(usdiff) | ||
1044 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | ||
1045 | #endif | ||
1046 | do_div(elapsed, 1000); | ||
1047 | usdiff -= elapsed; | ||
1048 | if (usdiff < 0) | ||
1049 | usdiff = -usdiff; | ||
1033 | 1050 | ||
1034 | /* | 1051 | /* |
1035 | * Special case: close write to TSC within 5 seconds of | 1052 | * Special case: TSC write with a small delta (1 second) of virtual |
1036 | * another CPU is interpreted as an attempt to synchronize | 1053 | * cycle time against real time is interpreted as an attempt to |
1037 | * The 5 seconds is to accommodate host load / swapping as | 1054 | * synchronize the CPU. |
1038 | * well as any reset of TSC during the boot process. | 1055 | * |
1039 | * | 1056 | * For a reliable TSC, we can match TSC offsets, and for an unstable |
1040 | * In that case, for a reliable TSC, we can match TSC offsets, | 1057 | * TSC, we add elapsed time in this computation. We could let the |
1041 | * or make a best guest using elapsed value. | 1058 | * compensation code attempt to catch up if we fall behind, but |
1042 | */ | 1059 | * it's better to try to match offsets from the beginning. |
1043 | if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && | 1060 | */ |
1044 | elapsed < 5ULL * NSEC_PER_SEC) { | 1061 | if (usdiff < USEC_PER_SEC && |
1062 | vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { | ||
1045 | if (!check_tsc_unstable()) { | 1063 | if (!check_tsc_unstable()) { |
1046 | offset = kvm->arch.last_tsc_offset; | 1064 | offset = kvm->arch.cur_tsc_offset; |
1047 | pr_debug("kvm: matched tsc offset for %llu\n", data); | 1065 | pr_debug("kvm: matched tsc offset for %llu\n", data); |
1048 | } else { | 1066 | } else { |
1049 | u64 delta = nsec_to_cycles(vcpu, elapsed); | 1067 | u64 delta = nsec_to_cycles(vcpu, elapsed); |
1050 | offset += delta; | 1068 | data += delta; |
1069 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | ||
1051 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1070 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1052 | } | 1071 | } |
1053 | ns = kvm->arch.last_tsc_nsec; | 1072 | } else { |
1073 | /* | ||
1074 | * We split periods of matched TSC writes into generations. | ||
1075 | * For each generation, we track the original measured | ||
1076 | * nanosecond time, offset, and write, so if TSCs are in | ||
1077 | * sync, we can match exact offset, and if not, we can match | ||
1078 | * exact software computaion in compute_guest_tsc() | ||
1079 | * | ||
1080 | * These values are tracked in kvm->arch.cur_xxx variables. | ||
1081 | */ | ||
1082 | kvm->arch.cur_tsc_generation++; | ||
1083 | kvm->arch.cur_tsc_nsec = ns; | ||
1084 | kvm->arch.cur_tsc_write = data; | ||
1085 | kvm->arch.cur_tsc_offset = offset; | ||
1086 | pr_debug("kvm: new tsc generation %u, clock %llu\n", | ||
1087 | kvm->arch.cur_tsc_generation, data); | ||
1054 | } | 1088 | } |
1089 | |||
1090 | /* | ||
1091 | * We also track th most recent recorded KHZ, write and time to | ||
1092 | * allow the matching interval to be extended at each write. | ||
1093 | */ | ||
1055 | kvm->arch.last_tsc_nsec = ns; | 1094 | kvm->arch.last_tsc_nsec = ns; |
1056 | kvm->arch.last_tsc_write = data; | 1095 | kvm->arch.last_tsc_write = data; |
1057 | kvm->arch.last_tsc_offset = offset; | 1096 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1058 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
1059 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ||
1060 | 1097 | ||
1061 | /* Reset of TSC must disable overshoot protection below */ | 1098 | /* Reset of TSC must disable overshoot protection below */ |
1062 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1099 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
1063 | vcpu->arch.last_tsc_write = data; | 1100 | vcpu->arch.last_guest_tsc = data; |
1064 | vcpu->arch.last_tsc_nsec = ns; | 1101 | |
1102 | /* Keep track of which generation this VCPU has synchronized to */ | ||
1103 | vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation; | ||
1104 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; | ||
1105 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; | ||
1106 | |||
1107 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
1108 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ||
1065 | } | 1109 | } |
1110 | |||
1066 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | 1111 | EXPORT_SYMBOL_GPL(kvm_write_tsc); |
1067 | 1112 | ||
1068 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1113 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
@@ -1078,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1078 | local_irq_save(flags); | 1123 | local_irq_save(flags); |
1079 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); | 1124 | tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); |
1080 | kernel_ns = get_kernel_ns(); | 1125 | kernel_ns = get_kernel_ns(); |
1081 | this_tsc_khz = vcpu_tsc_khz(v); | 1126 | this_tsc_khz = __get_cpu_var(cpu_tsc_khz); |
1082 | if (unlikely(this_tsc_khz == 0)) { | 1127 | if (unlikely(this_tsc_khz == 0)) { |
1083 | local_irq_restore(flags); | 1128 | local_irq_restore(flags); |
1084 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | 1129 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); |
@@ -1098,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1098 | if (vcpu->tsc_catchup) { | 1143 | if (vcpu->tsc_catchup) { |
1099 | u64 tsc = compute_guest_tsc(v, kernel_ns); | 1144 | u64 tsc = compute_guest_tsc(v, kernel_ns); |
1100 | if (tsc > tsc_timestamp) { | 1145 | if (tsc > tsc_timestamp) { |
1101 | kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); | 1146 | adjust_tsc_offset_guest(v, tsc - tsc_timestamp); |
1102 | tsc_timestamp = tsc; | 1147 | tsc_timestamp = tsc; |
1103 | } | 1148 | } |
1104 | } | 1149 | } |
@@ -1130,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1130 | * observed by the guest and ensure the new system time is greater. | 1175 | * observed by the guest and ensure the new system time is greater. |
1131 | */ | 1176 | */ |
1132 | max_kernel_ns = 0; | 1177 | max_kernel_ns = 0; |
1133 | if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { | 1178 | if (vcpu->hv_clock.tsc_timestamp) { |
1134 | max_kernel_ns = vcpu->last_guest_tsc - | 1179 | max_kernel_ns = vcpu->last_guest_tsc - |
1135 | vcpu->hv_clock.tsc_timestamp; | 1180 | vcpu->hv_clock.tsc_timestamp; |
1136 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | 1181 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, |
@@ -1504,6 +1549,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1504 | case MSR_K7_HWCR: | 1549 | case MSR_K7_HWCR: |
1505 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1550 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1506 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 1551 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
1552 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | ||
1507 | if (data != 0) { | 1553 | if (data != 0) { |
1508 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1554 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1509 | data); | 1555 | data); |
@@ -1676,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1676 | */ | 1722 | */ |
1677 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | 1723 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); |
1678 | break; | 1724 | break; |
1725 | case MSR_AMD64_OSVW_ID_LENGTH: | ||
1726 | if (!guest_cpuid_has_osvw(vcpu)) | ||
1727 | return 1; | ||
1728 | vcpu->arch.osvw.length = data; | ||
1729 | break; | ||
1730 | case MSR_AMD64_OSVW_STATUS: | ||
1731 | if (!guest_cpuid_has_osvw(vcpu)) | ||
1732 | return 1; | ||
1733 | vcpu->arch.osvw.status = data; | ||
1734 | break; | ||
1679 | default: | 1735 | default: |
1680 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1736 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1681 | return xen_hvm_config(vcpu, data); | 1737 | return xen_hvm_config(vcpu, data); |
@@ -1960,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1960 | */ | 2016 | */ |
1961 | data = 0xbe702111; | 2017 | data = 0xbe702111; |
1962 | break; | 2018 | break; |
2019 | case MSR_AMD64_OSVW_ID_LENGTH: | ||
2020 | if (!guest_cpuid_has_osvw(vcpu)) | ||
2021 | return 1; | ||
2022 | data = vcpu->arch.osvw.length; | ||
2023 | break; | ||
2024 | case MSR_AMD64_OSVW_STATUS: | ||
2025 | if (!guest_cpuid_has_osvw(vcpu)) | ||
2026 | return 1; | ||
2027 | data = vcpu->arch.osvw.status; | ||
2028 | break; | ||
1963 | default: | 2029 | default: |
1964 | if (kvm_pmu_msr(vcpu, msr)) | 2030 | if (kvm_pmu_msr(vcpu, msr)) |
1965 | return kvm_pmu_get_msr(vcpu, msr, pdata); | 2031 | return kvm_pmu_get_msr(vcpu, msr, pdata); |
@@ -2080,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2080 | case KVM_CAP_XSAVE: | 2146 | case KVM_CAP_XSAVE: |
2081 | case KVM_CAP_ASYNC_PF: | 2147 | case KVM_CAP_ASYNC_PF: |
2082 | case KVM_CAP_GET_TSC_KHZ: | 2148 | case KVM_CAP_GET_TSC_KHZ: |
2149 | case KVM_CAP_PCI_2_3: | ||
2083 | r = 1; | 2150 | r = 1; |
2084 | break; | 2151 | break; |
2085 | case KVM_CAP_COALESCED_MMIO: | 2152 | case KVM_CAP_COALESCED_MMIO: |
@@ -2214,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2214 | } | 2281 | } |
2215 | 2282 | ||
2216 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 2283 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
2217 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | ||
2218 | /* Make sure TSC doesn't go backwards */ | ||
2219 | s64 tsc_delta; | ||
2220 | u64 tsc; | ||
2221 | 2284 | ||
2222 | tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2285 | /* Apply any externally detected TSC adjustments (due to suspend) */ |
2223 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | 2286 | if (unlikely(vcpu->arch.tsc_offset_adjustment)) { |
2224 | tsc - vcpu->arch.last_guest_tsc; | 2287 | adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); |
2288 | vcpu->arch.tsc_offset_adjustment = 0; | ||
2289 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
2290 | } | ||
2225 | 2291 | ||
2292 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { | ||
2293 | s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : | ||
2294 | native_read_tsc() - vcpu->arch.last_host_tsc; | ||
2226 | if (tsc_delta < 0) | 2295 | if (tsc_delta < 0) |
2227 | mark_tsc_unstable("KVM discovered backwards TSC"); | 2296 | mark_tsc_unstable("KVM discovered backwards TSC"); |
2228 | if (check_tsc_unstable()) { | 2297 | if (check_tsc_unstable()) { |
2229 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2298 | u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, |
2299 | vcpu->arch.last_guest_tsc); | ||
2300 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
2230 | vcpu->arch.tsc_catchup = 1; | 2301 | vcpu->arch.tsc_catchup = 1; |
2231 | } | 2302 | } |
2232 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2303 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
@@ -2243,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2243 | { | 2314 | { |
2244 | kvm_x86_ops->vcpu_put(vcpu); | 2315 | kvm_x86_ops->vcpu_put(vcpu); |
2245 | kvm_put_guest_fpu(vcpu); | 2316 | kvm_put_guest_fpu(vcpu); |
2246 | vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); | 2317 | vcpu->arch.last_host_tsc = native_read_tsc(); |
2247 | } | 2318 | } |
2248 | 2319 | ||
2249 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2320 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
@@ -2785,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2785 | u32 user_tsc_khz; | 2856 | u32 user_tsc_khz; |
2786 | 2857 | ||
2787 | r = -EINVAL; | 2858 | r = -EINVAL; |
2788 | if (!kvm_has_tsc_control) | ||
2789 | break; | ||
2790 | |||
2791 | user_tsc_khz = (u32)arg; | 2859 | user_tsc_khz = (u32)arg; |
2792 | 2860 | ||
2793 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) | 2861 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) |
2794 | goto out; | 2862 | goto out; |
2795 | 2863 | ||
2796 | kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); | 2864 | if (user_tsc_khz == 0) |
2865 | user_tsc_khz = tsc_khz; | ||
2866 | |||
2867 | kvm_set_tsc_khz(vcpu, user_tsc_khz); | ||
2797 | 2868 | ||
2798 | r = 0; | 2869 | r = 0; |
2799 | goto out; | 2870 | goto out; |
2800 | } | 2871 | } |
2801 | case KVM_GET_TSC_KHZ: { | 2872 | case KVM_GET_TSC_KHZ: { |
2802 | r = -EIO; | 2873 | r = vcpu->arch.virtual_tsc_khz; |
2803 | if (check_tsc_unstable()) | ||
2804 | goto out; | ||
2805 | |||
2806 | r = vcpu_tsc_khz(vcpu); | ||
2807 | |||
2808 | goto out; | 2874 | goto out; |
2809 | } | 2875 | } |
2810 | default: | 2876 | default: |
@@ -2815,6 +2881,11 @@ out: | |||
2815 | return r; | 2881 | return r; |
2816 | } | 2882 | } |
2817 | 2883 | ||
2884 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) | ||
2885 | { | ||
2886 | return VM_FAULT_SIGBUS; | ||
2887 | } | ||
2888 | |||
2818 | static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) | 2889 | static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) |
2819 | { | 2890 | { |
2820 | int ret; | 2891 | int ret; |
@@ -2998,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm, | |||
2998 | unsigned long *dirty_bitmap, | 3069 | unsigned long *dirty_bitmap, |
2999 | unsigned long nr_dirty_pages) | 3070 | unsigned long nr_dirty_pages) |
3000 | { | 3071 | { |
3072 | spin_lock(&kvm->mmu_lock); | ||
3073 | |||
3001 | /* Not many dirty pages compared to # of shadow pages. */ | 3074 | /* Not many dirty pages compared to # of shadow pages. */ |
3002 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { | 3075 | if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { |
3003 | unsigned long gfn_offset; | 3076 | unsigned long gfn_offset; |
@@ -3005,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm, | |||
3005 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { | 3078 | for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { |
3006 | unsigned long gfn = memslot->base_gfn + gfn_offset; | 3079 | unsigned long gfn = memslot->base_gfn + gfn_offset; |
3007 | 3080 | ||
3008 | spin_lock(&kvm->mmu_lock); | ||
3009 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); | 3081 | kvm_mmu_rmap_write_protect(kvm, gfn, memslot); |
3010 | spin_unlock(&kvm->mmu_lock); | ||
3011 | } | 3082 | } |
3012 | kvm_flush_remote_tlbs(kvm); | 3083 | kvm_flush_remote_tlbs(kvm); |
3013 | } else { | 3084 | } else |
3014 | spin_lock(&kvm->mmu_lock); | ||
3015 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); | 3085 | kvm_mmu_slot_remove_write_access(kvm, memslot->id); |
3016 | spin_unlock(&kvm->mmu_lock); | 3086 | |
3017 | } | 3087 | spin_unlock(&kvm->mmu_lock); |
3018 | } | 3088 | } |
3019 | 3089 | ||
3020 | /* | 3090 | /* |
@@ -3133,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3133 | r = -EEXIST; | 3203 | r = -EEXIST; |
3134 | if (kvm->arch.vpic) | 3204 | if (kvm->arch.vpic) |
3135 | goto create_irqchip_unlock; | 3205 | goto create_irqchip_unlock; |
3206 | r = -EINVAL; | ||
3207 | if (atomic_read(&kvm->online_vcpus)) | ||
3208 | goto create_irqchip_unlock; | ||
3136 | r = -ENOMEM; | 3209 | r = -ENOMEM; |
3137 | vpic = kvm_create_pic(kvm); | 3210 | vpic = kvm_create_pic(kvm); |
3138 | if (vpic) { | 3211 | if (vpic) { |
@@ -4063,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) | |||
4063 | return res; | 4136 | return res; |
4064 | } | 4137 | } |
4065 | 4138 | ||
4139 | static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val) | ||
4140 | { | ||
4141 | kvm_set_rflags(emul_to_vcpu(ctxt), val); | ||
4142 | } | ||
4143 | |||
4066 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) | 4144 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) |
4067 | { | 4145 | { |
4068 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); | 4146 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); |
@@ -4244,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = { | |||
4244 | .set_idt = emulator_set_idt, | 4322 | .set_idt = emulator_set_idt, |
4245 | .get_cr = emulator_get_cr, | 4323 | .get_cr = emulator_get_cr, |
4246 | .set_cr = emulator_set_cr, | 4324 | .set_cr = emulator_set_cr, |
4325 | .set_rflags = emulator_set_rflags, | ||
4247 | .cpl = emulator_get_cpl, | 4326 | .cpl = emulator_get_cpl, |
4248 | .get_dr = emulator_get_dr, | 4327 | .get_dr = emulator_get_dr, |
4249 | .set_dr = emulator_set_dr, | 4328 | .set_dr = emulator_set_dr, |
@@ -5288,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5288 | profile_hit(KVM_PROFILING, (void *)rip); | 5367 | profile_hit(KVM_PROFILING, (void *)rip); |
5289 | } | 5368 | } |
5290 | 5369 | ||
5370 | if (unlikely(vcpu->arch.tsc_always_catchup)) | ||
5371 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
5291 | 5372 | ||
5292 | kvm_lapic_sync_from_vapic(vcpu); | 5373 | kvm_lapic_sync_from_vapic(vcpu); |
5293 | 5374 | ||
@@ -5587,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
5587 | return 0; | 5668 | return 0; |
5588 | } | 5669 | } |
5589 | 5670 | ||
5590 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 5671 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
5591 | bool has_error_code, u32 error_code) | 5672 | int reason, bool has_error_code, u32 error_code) |
5592 | { | 5673 | { |
5593 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 5674 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
5594 | int ret; | 5675 | int ret; |
5595 | 5676 | ||
5596 | init_emulate_ctxt(vcpu); | 5677 | init_emulate_ctxt(vcpu); |
5597 | 5678 | ||
5598 | ret = emulator_task_switch(ctxt, tss_selector, reason, | 5679 | ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason, |
5599 | has_error_code, error_code); | 5680 | has_error_code, error_code); |
5600 | 5681 | ||
5601 | if (ret) | 5682 | if (ret) |
@@ -5928,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage) | |||
5928 | struct kvm *kvm; | 6009 | struct kvm *kvm; |
5929 | struct kvm_vcpu *vcpu; | 6010 | struct kvm_vcpu *vcpu; |
5930 | int i; | 6011 | int i; |
6012 | int ret; | ||
6013 | u64 local_tsc; | ||
6014 | u64 max_tsc = 0; | ||
6015 | bool stable, backwards_tsc = false; | ||
5931 | 6016 | ||
5932 | kvm_shared_msr_cpu_online(); | 6017 | kvm_shared_msr_cpu_online(); |
5933 | list_for_each_entry(kvm, &vm_list, vm_list) | 6018 | ret = kvm_x86_ops->hardware_enable(garbage); |
5934 | kvm_for_each_vcpu(i, vcpu, kvm) | 6019 | if (ret != 0) |
5935 | if (vcpu->cpu == smp_processor_id()) | 6020 | return ret; |
5936 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 6021 | |
5937 | return kvm_x86_ops->hardware_enable(garbage); | 6022 | local_tsc = native_read_tsc(); |
6023 | stable = !check_tsc_unstable(); | ||
6024 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
6025 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
6026 | if (!stable && vcpu->cpu == smp_processor_id()) | ||
6027 | set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests); | ||
6028 | if (stable && vcpu->arch.last_host_tsc > local_tsc) { | ||
6029 | backwards_tsc = true; | ||
6030 | if (vcpu->arch.last_host_tsc > max_tsc) | ||
6031 | max_tsc = vcpu->arch.last_host_tsc; | ||
6032 | } | ||
6033 | } | ||
6034 | } | ||
6035 | |||
6036 | /* | ||
6037 | * Sometimes, even reliable TSCs go backwards. This happens on | ||
6038 | * platforms that reset TSC during suspend or hibernate actions, but | ||
6039 | * maintain synchronization. We must compensate. Fortunately, we can | ||
6040 | * detect that condition here, which happens early in CPU bringup, | ||
6041 | * before any KVM threads can be running. Unfortunately, we can't | ||
6042 | * bring the TSCs fully up to date with real time, as we aren't yet far | ||
6043 | * enough into CPU bringup that we know how much real time has actually | ||
6044 | * elapsed; our helper function, get_kernel_ns() will be using boot | ||
6045 | * variables that haven't been updated yet. | ||
6046 | * | ||
6047 | * So we simply find the maximum observed TSC above, then record the | ||
6048 | * adjustment to TSC in each VCPU. When the VCPU later gets loaded, | ||
6049 | * the adjustment will be applied. Note that we accumulate | ||
6050 | * adjustments, in case multiple suspend cycles happen before some VCPU | ||
6051 | * gets a chance to run again. In the event that no KVM threads get a | ||
6052 | * chance to run, we will miss the entire elapsed period, as we'll have | ||
6053 | * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may | ||
6054 | * loose cycle time. This isn't too big a deal, since the loss will be | ||
6055 | * uniform across all VCPUs (not to mention the scenario is extremely | ||
6056 | * unlikely). It is possible that a second hibernate recovery happens | ||
6057 | * much faster than a first, causing the observed TSC here to be | ||
6058 | * smaller; this would require additional padding adjustment, which is | ||
6059 | * why we set last_host_tsc to the local tsc observed here. | ||
6060 | * | ||
6061 | * N.B. - this code below runs only on platforms with reliable TSC, | ||
6062 | * as that is the only way backwards_tsc is set above. Also note | ||
6063 | * that this runs for ALL vcpus, which is not a bug; all VCPUs should | ||
6064 | * have the same delta_cyc adjustment applied if backwards_tsc | ||
6065 | * is detected. Note further, this adjustment is only done once, | ||
6066 | * as we reset last_host_tsc on all VCPUs to stop this from being | ||
6067 | * called multiple times (one for each physical CPU bringup). | ||
6068 | * | ||
6069 | * Platforms with unnreliable TSCs don't have to deal with this, they | ||
6070 | * will be compensated by the logic in vcpu_load, which sets the TSC to | ||
6071 | * catchup mode. This will catchup all VCPUs to real time, but cannot | ||
6072 | * guarantee that they stay in perfect synchronization. | ||
6073 | */ | ||
6074 | if (backwards_tsc) { | ||
6075 | u64 delta_cyc = max_tsc - local_tsc; | ||
6076 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
6077 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
6078 | vcpu->arch.tsc_offset_adjustment += delta_cyc; | ||
6079 | vcpu->arch.last_host_tsc = local_tsc; | ||
6080 | } | ||
6081 | |||
6082 | /* | ||
6083 | * We have to disable TSC offset matching.. if you were | ||
6084 | * booting a VM while issuing an S4 host suspend.... | ||
6085 | * you may have some problem. Solving this issue is | ||
6086 | * left as an exercise to the reader. | ||
6087 | */ | ||
6088 | kvm->arch.last_tsc_nsec = 0; | ||
6089 | kvm->arch.last_tsc_write = 0; | ||
6090 | } | ||
6091 | |||
6092 | } | ||
6093 | return 0; | ||
5938 | } | 6094 | } |
5939 | 6095 | ||
5940 | void kvm_arch_hardware_disable(void *garbage) | 6096 | void kvm_arch_hardware_disable(void *garbage) |
@@ -5958,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn) | |||
5958 | kvm_x86_ops->check_processor_compatibility(rtn); | 6114 | kvm_x86_ops->check_processor_compatibility(rtn); |
5959 | } | 6115 | } |
5960 | 6116 | ||
6117 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) | ||
6118 | { | ||
6119 | return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); | ||
6120 | } | ||
6121 | |||
5961 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | 6122 | int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) |
5962 | { | 6123 | { |
5963 | struct page *page; | 6124 | struct page *page; |
@@ -5980,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5980 | } | 6141 | } |
5981 | vcpu->arch.pio_data = page_address(page); | 6142 | vcpu->arch.pio_data = page_address(page); |
5982 | 6143 | ||
5983 | kvm_init_tsc_catchup(vcpu, max_tsc_khz); | 6144 | kvm_set_tsc_khz(vcpu, max_tsc_khz); |
5984 | 6145 | ||
5985 | r = kvm_mmu_create(vcpu); | 6146 | r = kvm_mmu_create(vcpu); |
5986 | if (r < 0) | 6147 | if (r < 0) |
@@ -6032,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
6032 | free_page((unsigned long)vcpu->arch.pio_data); | 6193 | free_page((unsigned long)vcpu->arch.pio_data); |
6033 | } | 6194 | } |
6034 | 6195 | ||
6035 | int kvm_arch_init_vm(struct kvm *kvm) | 6196 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) |
6036 | { | 6197 | { |
6198 | if (type) | ||
6199 | return -EINVAL; | ||
6200 | |||
6037 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6201 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
6038 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6202 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
6039 | 6203 | ||
@@ -6093,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
6093 | put_page(kvm->arch.ept_identity_pagetable); | 6257 | put_page(kvm->arch.ept_identity_pagetable); |
6094 | } | 6258 | } |
6095 | 6259 | ||
6260 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
6261 | struct kvm_memory_slot *dont) | ||
6262 | { | ||
6263 | int i; | ||
6264 | |||
6265 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6266 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | ||
6267 | vfree(free->arch.lpage_info[i]); | ||
6268 | free->arch.lpage_info[i] = NULL; | ||
6269 | } | ||
6270 | } | ||
6271 | } | ||
6272 | |||
6273 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | ||
6274 | { | ||
6275 | int i; | ||
6276 | |||
6277 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6278 | unsigned long ugfn; | ||
6279 | int lpages; | ||
6280 | int level = i + 2; | ||
6281 | |||
6282 | lpages = gfn_to_index(slot->base_gfn + npages - 1, | ||
6283 | slot->base_gfn, level) + 1; | ||
6284 | |||
6285 | slot->arch.lpage_info[i] = | ||
6286 | vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | ||
6287 | if (!slot->arch.lpage_info[i]) | ||
6288 | goto out_free; | ||
6289 | |||
6290 | if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
6291 | slot->arch.lpage_info[i][0].write_count = 1; | ||
6292 | if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
6293 | slot->arch.lpage_info[i][lpages - 1].write_count = 1; | ||
6294 | ugfn = slot->userspace_addr >> PAGE_SHIFT; | ||
6295 | /* | ||
6296 | * If the gfn and userspace address are not aligned wrt each | ||
6297 | * other, or if explicitly asked to, disable large page | ||
6298 | * support for this slot | ||
6299 | */ | ||
6300 | if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | ||
6301 | !kvm_largepages_enabled()) { | ||
6302 | unsigned long j; | ||
6303 | |||
6304 | for (j = 0; j < lpages; ++j) | ||
6305 | slot->arch.lpage_info[i][j].write_count = 1; | ||
6306 | } | ||
6307 | } | ||
6308 | |||
6309 | return 0; | ||
6310 | |||
6311 | out_free: | ||
6312 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
6313 | vfree(slot->arch.lpage_info[i]); | ||
6314 | slot->arch.lpage_info[i] = NULL; | ||
6315 | } | ||
6316 | return -ENOMEM; | ||
6317 | } | ||
6318 | |||
6096 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6319 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
6097 | struct kvm_memory_slot *memslot, | 6320 | struct kvm_memory_slot *memslot, |
6098 | struct kvm_memory_slot old, | 6321 | struct kvm_memory_slot old, |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4889655ba784..47936830968c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -115,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
115 | void save_processor_state(void) | 115 | void save_processor_state(void) |
116 | { | 116 | { |
117 | __save_processor_state(&saved_context); | 117 | __save_processor_state(&saved_context); |
118 | save_sched_clock_state(); | 118 | x86_platform.save_sched_clock_state(); |
119 | } | 119 | } |
120 | #ifdef CONFIG_X86_32 | 120 | #ifdef CONFIG_X86_32 |
121 | EXPORT_SYMBOL(save_processor_state); | 121 | EXPORT_SYMBOL(save_processor_state); |
@@ -231,8 +231,8 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
231 | /* Needed by apm.c */ | 231 | /* Needed by apm.c */ |
232 | void restore_processor_state(void) | 232 | void restore_processor_state(void) |
233 | { | 233 | { |
234 | x86_platform.restore_sched_clock_state(); | ||
234 | __restore_processor_state(&saved_context); | 235 | __restore_processor_state(&saved_context); |
235 | restore_sched_clock_state(); | ||
236 | } | 236 | } |
237 | #ifdef CONFIG_X86_32 | 237 | #ifdef CONFIG_X86_32 |
238 | EXPORT_SYMBOL(restore_processor_state); | 238 | EXPORT_SYMBOL(restore_processor_state); |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 68e67e50d028..6c322a90b92f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -162,6 +162,7 @@ struct kvm_pit_config { | |||
162 | #define KVM_EXIT_INTERNAL_ERROR 17 | 162 | #define KVM_EXIT_INTERNAL_ERROR 17 |
163 | #define KVM_EXIT_OSI 18 | 163 | #define KVM_EXIT_OSI 18 |
164 | #define KVM_EXIT_PAPR_HCALL 19 | 164 | #define KVM_EXIT_PAPR_HCALL 19 |
165 | #define KVM_EXIT_S390_UCONTROL 20 | ||
165 | 166 | ||
166 | /* For KVM_EXIT_INTERNAL_ERROR */ | 167 | /* For KVM_EXIT_INTERNAL_ERROR */ |
167 | #define KVM_INTERNAL_ERROR_EMULATION 1 | 168 | #define KVM_INTERNAL_ERROR_EMULATION 1 |
@@ -249,6 +250,11 @@ struct kvm_run { | |||
249 | #define KVM_S390_RESET_CPU_INIT 8 | 250 | #define KVM_S390_RESET_CPU_INIT 8 |
250 | #define KVM_S390_RESET_IPL 16 | 251 | #define KVM_S390_RESET_IPL 16 |
251 | __u64 s390_reset_flags; | 252 | __u64 s390_reset_flags; |
253 | /* KVM_EXIT_S390_UCONTROL */ | ||
254 | struct { | ||
255 | __u64 trans_exc_code; | ||
256 | __u32 pgm_code; | ||
257 | } s390_ucontrol; | ||
252 | /* KVM_EXIT_DCR */ | 258 | /* KVM_EXIT_DCR */ |
253 | struct { | 259 | struct { |
254 | __u32 dcrn; | 260 | __u32 dcrn; |
@@ -273,6 +279,20 @@ struct kvm_run { | |||
273 | /* Fix the size of the union. */ | 279 | /* Fix the size of the union. */ |
274 | char padding[256]; | 280 | char padding[256]; |
275 | }; | 281 | }; |
282 | |||
283 | /* | ||
284 | * shared registers between kvm and userspace. | ||
285 | * kvm_valid_regs specifies the register classes set by the host | ||
286 | * kvm_dirty_regs specified the register classes dirtied by userspace | ||
287 | * struct kvm_sync_regs is architecture specific, as well as the | ||
288 | * bits for kvm_valid_regs and kvm_dirty_regs | ||
289 | */ | ||
290 | __u64 kvm_valid_regs; | ||
291 | __u64 kvm_dirty_regs; | ||
292 | union { | ||
293 | struct kvm_sync_regs regs; | ||
294 | char padding[1024]; | ||
295 | } s; | ||
276 | }; | 296 | }; |
277 | 297 | ||
278 | /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */ | 298 | /* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */ |
@@ -431,6 +451,11 @@ struct kvm_ppc_pvinfo { | |||
431 | 451 | ||
432 | #define KVMIO 0xAE | 452 | #define KVMIO 0xAE |
433 | 453 | ||
454 | /* machine type bits, to be used as argument to KVM_CREATE_VM */ | ||
455 | #define KVM_VM_S390_UCONTROL 1 | ||
456 | |||
457 | #define KVM_S390_SIE_PAGE_OFFSET 1 | ||
458 | |||
434 | /* | 459 | /* |
435 | * ioctls for /dev/kvm fds: | 460 | * ioctls for /dev/kvm fds: |
436 | */ | 461 | */ |
@@ -555,9 +580,15 @@ struct kvm_ppc_pvinfo { | |||
555 | #define KVM_CAP_PPC_SMT 64 | 580 | #define KVM_CAP_PPC_SMT 64 |
556 | #define KVM_CAP_PPC_RMA 65 | 581 | #define KVM_CAP_PPC_RMA 65 |
557 | #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ | 582 | #define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ |
583 | #define KVM_CAP_PPC_HIOR 67 | ||
558 | #define KVM_CAP_PPC_PAPR 68 | 584 | #define KVM_CAP_PPC_PAPR 68 |
585 | #define KVM_CAP_SW_TLB 69 | ||
586 | #define KVM_CAP_ONE_REG 70 | ||
559 | #define KVM_CAP_S390_GMAP 71 | 587 | #define KVM_CAP_S390_GMAP 71 |
560 | #define KVM_CAP_TSC_DEADLINE_TIMER 72 | 588 | #define KVM_CAP_TSC_DEADLINE_TIMER 72 |
589 | #define KVM_CAP_S390_UCONTROL 73 | ||
590 | #define KVM_CAP_SYNC_REGS 74 | ||
591 | #define KVM_CAP_PCI_2_3 75 | ||
561 | 592 | ||
562 | #ifdef KVM_CAP_IRQ_ROUTING | 593 | #ifdef KVM_CAP_IRQ_ROUTING |
563 | 594 | ||
@@ -637,6 +668,52 @@ struct kvm_clock_data { | |||
637 | __u32 pad[9]; | 668 | __u32 pad[9]; |
638 | }; | 669 | }; |
639 | 670 | ||
671 | #define KVM_MMU_FSL_BOOKE_NOHV 0 | ||
672 | #define KVM_MMU_FSL_BOOKE_HV 1 | ||
673 | |||
674 | struct kvm_config_tlb { | ||
675 | __u64 params; | ||
676 | __u64 array; | ||
677 | __u32 mmu_type; | ||
678 | __u32 array_len; | ||
679 | }; | ||
680 | |||
681 | struct kvm_dirty_tlb { | ||
682 | __u64 bitmap; | ||
683 | __u32 num_dirty; | ||
684 | }; | ||
685 | |||
686 | /* Available with KVM_CAP_ONE_REG */ | ||
687 | |||
688 | #define KVM_REG_ARCH_MASK 0xff00000000000000ULL | ||
689 | #define KVM_REG_GENERIC 0x0000000000000000ULL | ||
690 | |||
691 | /* | ||
692 | * Architecture specific registers are to be defined in arch headers and | ||
693 | * ORed with the arch identifier. | ||
694 | */ | ||
695 | #define KVM_REG_PPC 0x1000000000000000ULL | ||
696 | #define KVM_REG_X86 0x2000000000000000ULL | ||
697 | #define KVM_REG_IA64 0x3000000000000000ULL | ||
698 | #define KVM_REG_ARM 0x4000000000000000ULL | ||
699 | #define KVM_REG_S390 0x5000000000000000ULL | ||
700 | |||
701 | #define KVM_REG_SIZE_SHIFT 52 | ||
702 | #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL | ||
703 | #define KVM_REG_SIZE_U8 0x0000000000000000ULL | ||
704 | #define KVM_REG_SIZE_U16 0x0010000000000000ULL | ||
705 | #define KVM_REG_SIZE_U32 0x0020000000000000ULL | ||
706 | #define KVM_REG_SIZE_U64 0x0030000000000000ULL | ||
707 | #define KVM_REG_SIZE_U128 0x0040000000000000ULL | ||
708 | #define KVM_REG_SIZE_U256 0x0050000000000000ULL | ||
709 | #define KVM_REG_SIZE_U512 0x0060000000000000ULL | ||
710 | #define KVM_REG_SIZE_U1024 0x0070000000000000ULL | ||
711 | |||
712 | struct kvm_one_reg { | ||
713 | __u64 id; | ||
714 | __u64 addr; | ||
715 | }; | ||
716 | |||
640 | /* | 717 | /* |
641 | * ioctls for VM fds | 718 | * ioctls for VM fds |
642 | */ | 719 | */ |
@@ -655,6 +732,17 @@ struct kvm_clock_data { | |||
655 | struct kvm_userspace_memory_region) | 732 | struct kvm_userspace_memory_region) |
656 | #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) | 733 | #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) |
657 | #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) | 734 | #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) |
735 | |||
736 | /* enable ucontrol for s390 */ | ||
737 | struct kvm_s390_ucas_mapping { | ||
738 | __u64 user_addr; | ||
739 | __u64 vcpu_addr; | ||
740 | __u64 length; | ||
741 | }; | ||
742 | #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) | ||
743 | #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) | ||
744 | #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) | ||
745 | |||
658 | /* Device model IOC */ | 746 | /* Device model IOC */ |
659 | #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) | 747 | #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) |
660 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) | 748 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) |
@@ -697,6 +785,9 @@ struct kvm_clock_data { | |||
697 | /* Available with KVM_CAP_TSC_CONTROL */ | 785 | /* Available with KVM_CAP_TSC_CONTROL */ |
698 | #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) | 786 | #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) |
699 | #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) | 787 | #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) |
788 | /* Available with KVM_CAP_PCI_2_3 */ | ||
789 | #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ | ||
790 | struct kvm_assigned_pci_dev) | ||
700 | 791 | ||
701 | /* | 792 | /* |
702 | * ioctls for vcpu fds | 793 | * ioctls for vcpu fds |
@@ -763,8 +854,15 @@ struct kvm_clock_data { | |||
763 | #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) | 854 | #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) |
764 | /* Available with KVM_CAP_RMA */ | 855 | /* Available with KVM_CAP_RMA */ |
765 | #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) | 856 | #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) |
857 | /* Available with KVM_CAP_SW_TLB */ | ||
858 | #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) | ||
859 | /* Available with KVM_CAP_ONE_REG */ | ||
860 | #define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) | ||
861 | #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) | ||
766 | 862 | ||
767 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 863 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
864 | #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) | ||
865 | #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) | ||
768 | 866 | ||
769 | struct kvm_assigned_pci_dev { | 867 | struct kvm_assigned_pci_dev { |
770 | __u32 assigned_dev_id; | 868 | __u32 assigned_dev_id; |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca1b153585d3..665a260c7e09 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -172,11 +172,6 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) | |||
172 | */ | 172 | */ |
173 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) | 173 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) |
174 | 174 | ||
175 | struct kvm_lpage_info { | ||
176 | unsigned long rmap_pde; | ||
177 | int write_count; | ||
178 | }; | ||
179 | |||
180 | struct kvm_memory_slot { | 175 | struct kvm_memory_slot { |
181 | gfn_t base_gfn; | 176 | gfn_t base_gfn; |
182 | unsigned long npages; | 177 | unsigned long npages; |
@@ -185,7 +180,7 @@ struct kvm_memory_slot { | |||
185 | unsigned long *dirty_bitmap; | 180 | unsigned long *dirty_bitmap; |
186 | unsigned long *dirty_bitmap_head; | 181 | unsigned long *dirty_bitmap_head; |
187 | unsigned long nr_dirty_pages; | 182 | unsigned long nr_dirty_pages; |
188 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 183 | struct kvm_arch_memory_slot arch; |
189 | unsigned long userspace_addr; | 184 | unsigned long userspace_addr; |
190 | int user_alloc; | 185 | int user_alloc; |
191 | int id; | 186 | int id; |
@@ -377,6 +372,9 @@ int kvm_set_memory_region(struct kvm *kvm, | |||
377 | int __kvm_set_memory_region(struct kvm *kvm, | 372 | int __kvm_set_memory_region(struct kvm *kvm, |
378 | struct kvm_userspace_memory_region *mem, | 373 | struct kvm_userspace_memory_region *mem, |
379 | int user_alloc); | 374 | int user_alloc); |
375 | void kvm_arch_free_memslot(struct kvm_memory_slot *free, | ||
376 | struct kvm_memory_slot *dont); | ||
377 | int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); | ||
380 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 378 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
381 | struct kvm_memory_slot *memslot, | 379 | struct kvm_memory_slot *memslot, |
382 | struct kvm_memory_slot old, | 380 | struct kvm_memory_slot old, |
@@ -386,6 +384,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
386 | struct kvm_userspace_memory_region *mem, | 384 | struct kvm_userspace_memory_region *mem, |
387 | struct kvm_memory_slot old, | 385 | struct kvm_memory_slot old, |
388 | int user_alloc); | 386 | int user_alloc); |
387 | bool kvm_largepages_enabled(void); | ||
389 | void kvm_disable_largepages(void); | 388 | void kvm_disable_largepages(void); |
390 | void kvm_arch_flush_shadow(struct kvm *kvm); | 389 | void kvm_arch_flush_shadow(struct kvm *kvm); |
391 | 390 | ||
@@ -451,6 +450,7 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
451 | unsigned int ioctl, unsigned long arg); | 450 | unsigned int ioctl, unsigned long arg); |
452 | long kvm_arch_vcpu_ioctl(struct file *filp, | 451 | long kvm_arch_vcpu_ioctl(struct file *filp, |
453 | unsigned int ioctl, unsigned long arg); | 452 | unsigned int ioctl, unsigned long arg); |
453 | int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); | ||
454 | 454 | ||
455 | int kvm_dev_ioctl_check_extension(long ext); | 455 | int kvm_dev_ioctl_check_extension(long ext); |
456 | 456 | ||
@@ -521,7 +521,7 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) | |||
521 | } | 521 | } |
522 | #endif | 522 | #endif |
523 | 523 | ||
524 | int kvm_arch_init_vm(struct kvm *kvm); | 524 | int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); |
525 | void kvm_arch_destroy_vm(struct kvm *kvm); | 525 | void kvm_arch_destroy_vm(struct kvm *kvm); |
526 | void kvm_free_all_assigned_devices(struct kvm *kvm); | 526 | void kvm_free_all_assigned_devices(struct kvm *kvm); |
527 | void kvm_arch_sync_events(struct kvm *kvm); | 527 | void kvm_arch_sync_events(struct kvm *kvm); |
@@ -547,6 +547,7 @@ struct kvm_assigned_dev_kernel { | |||
547 | unsigned int entries_nr; | 547 | unsigned int entries_nr; |
548 | int host_irq; | 548 | int host_irq; |
549 | bool host_irq_disabled; | 549 | bool host_irq_disabled; |
550 | bool pci_2_3; | ||
550 | struct msix_entry *host_msix_entries; | 551 | struct msix_entry *host_msix_entries; |
551 | int guest_irq; | 552 | int guest_irq; |
552 | struct msix_entry *guest_msix_entries; | 553 | struct msix_entry *guest_msix_entries; |
@@ -556,6 +557,7 @@ struct kvm_assigned_dev_kernel { | |||
556 | struct pci_dev *dev; | 557 | struct pci_dev *dev; |
557 | struct kvm *kvm; | 558 | struct kvm *kvm; |
558 | spinlock_t intx_lock; | 559 | spinlock_t intx_lock; |
560 | spinlock_t intx_mask_lock; | ||
559 | char irq_name[32]; | 561 | char irq_name[32]; |
560 | struct pci_saved_state *pci_saved_state; | 562 | struct pci_saved_state *pci_saved_state; |
561 | }; | 563 | }; |
@@ -651,11 +653,43 @@ static inline void kvm_guest_exit(void) | |||
651 | current->flags &= ~PF_VCPU; | 653 | current->flags &= ~PF_VCPU; |
652 | } | 654 | } |
653 | 655 | ||
656 | /* | ||
657 | * search_memslots() and __gfn_to_memslot() are here because they are | ||
658 | * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. | ||
659 | * gfn_to_memslot() itself isn't here as an inline because that would | ||
660 | * bloat other code too much. | ||
661 | */ | ||
662 | static inline struct kvm_memory_slot * | ||
663 | search_memslots(struct kvm_memslots *slots, gfn_t gfn) | ||
664 | { | ||
665 | struct kvm_memory_slot *memslot; | ||
666 | |||
667 | kvm_for_each_memslot(memslot, slots) | ||
668 | if (gfn >= memslot->base_gfn && | ||
669 | gfn < memslot->base_gfn + memslot->npages) | ||
670 | return memslot; | ||
671 | |||
672 | return NULL; | ||
673 | } | ||
674 | |||
675 | static inline struct kvm_memory_slot * | ||
676 | __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) | ||
677 | { | ||
678 | return search_memslots(slots, gfn); | ||
679 | } | ||
680 | |||
654 | static inline int memslot_id(struct kvm *kvm, gfn_t gfn) | 681 | static inline int memslot_id(struct kvm *kvm, gfn_t gfn) |
655 | { | 682 | { |
656 | return gfn_to_memslot(kvm, gfn)->id; | 683 | return gfn_to_memslot(kvm, gfn)->id; |
657 | } | 684 | } |
658 | 685 | ||
686 | static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | ||
687 | { | ||
688 | /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ | ||
689 | return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | ||
690 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | ||
691 | } | ||
692 | |||
659 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | 693 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, |
660 | gfn_t gfn) | 694 | gfn_t gfn) |
661 | { | 695 | { |
@@ -702,12 +736,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se | |||
702 | if (unlikely(vcpu->kvm->mmu_notifier_count)) | 736 | if (unlikely(vcpu->kvm->mmu_notifier_count)) |
703 | return 1; | 737 | return 1; |
704 | /* | 738 | /* |
705 | * Both reads happen under the mmu_lock and both values are | 739 | * Ensure the read of mmu_notifier_count happens before the read |
706 | * modified under mmu_lock, so there's no need of smb_rmb() | 740 | * of mmu_notifier_seq. This interacts with the smp_wmb() in |
707 | * here in between, otherwise mmu_notifier_count should be | 741 | * mmu_notifier_invalidate_range_end to make sure that the caller |
708 | * read before mmu_notifier_seq, see | 742 | * either sees the old (non-zero) value of mmu_notifier_count or |
709 | * mmu_notifier_invalidate_range_end write side. | 743 | * the new (incremented) value of mmu_notifier_seq. |
744 | * PowerPC Book3s HV KVM calls this under a per-page lock | ||
745 | * rather than under kvm->mmu_lock, for scalability, so | ||
746 | * can't rely on kvm->mmu_lock to keep things ordered. | ||
710 | */ | 747 | */ |
748 | smp_rmb(); | ||
711 | if (vcpu->kvm->mmu_notifier_seq != mmu_seq) | 749 | if (vcpu->kvm->mmu_notifier_seq != mmu_seq) |
712 | return 1; | 750 | return 1; |
713 | return 0; | 751 | return 0; |
@@ -770,6 +808,13 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) | |||
770 | { | 808 | { |
771 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; | 809 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; |
772 | } | 810 | } |
811 | |||
812 | bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); | ||
813 | |||
814 | #else | ||
815 | |||
816 | static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } | ||
817 | |||
773 | #endif | 818 | #endif |
774 | 819 | ||
775 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | 820 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 758e3b36d4cf..01f572c10c71 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -49,31 +49,73 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |||
49 | index = i; | 49 | index = i; |
50 | break; | 50 | break; |
51 | } | 51 | } |
52 | if (index < 0) { | 52 | if (index < 0) |
53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | 53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); |
54 | return 0; | ||
55 | } | ||
56 | 54 | ||
57 | return index; | 55 | return index; |
58 | } | 56 | } |
59 | 57 | ||
60 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) | 58 | static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) |
61 | { | 59 | { |
62 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
61 | int ret; | ||
62 | |||
63 | spin_lock(&assigned_dev->intx_lock); | ||
64 | if (pci_check_and_mask_intx(assigned_dev->dev)) { | ||
65 | assigned_dev->host_irq_disabled = true; | ||
66 | ret = IRQ_WAKE_THREAD; | ||
67 | } else | ||
68 | ret = IRQ_NONE; | ||
69 | spin_unlock(&assigned_dev->intx_lock); | ||
70 | |||
71 | return ret; | ||
72 | } | ||
63 | 73 | ||
64 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { | 74 | static void |
65 | spin_lock(&assigned_dev->intx_lock); | 75 | kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, |
76 | int vector) | ||
77 | { | ||
78 | if (unlikely(assigned_dev->irq_requested_type & | ||
79 | KVM_DEV_IRQ_GUEST_INTX)) { | ||
80 | spin_lock(&assigned_dev->intx_mask_lock); | ||
81 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) | ||
82 | kvm_set_irq(assigned_dev->kvm, | ||
83 | assigned_dev->irq_source_id, vector, 1); | ||
84 | spin_unlock(&assigned_dev->intx_mask_lock); | ||
85 | } else | ||
86 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
87 | vector, 1); | ||
88 | } | ||
89 | |||
90 | static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) | ||
91 | { | ||
92 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
93 | |||
94 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
95 | spin_lock_irq(&assigned_dev->intx_lock); | ||
66 | disable_irq_nosync(irq); | 96 | disable_irq_nosync(irq); |
67 | assigned_dev->host_irq_disabled = true; | 97 | assigned_dev->host_irq_disabled = true; |
68 | spin_unlock(&assigned_dev->intx_lock); | 98 | spin_unlock_irq(&assigned_dev->intx_lock); |
69 | } | 99 | } |
70 | 100 | ||
71 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 101 | kvm_assigned_dev_raise_guest_irq(assigned_dev, |
72 | assigned_dev->guest_irq, 1); | 102 | assigned_dev->guest_irq); |
73 | 103 | ||
74 | return IRQ_HANDLED; | 104 | return IRQ_HANDLED; |
75 | } | 105 | } |
76 | 106 | ||
107 | #ifdef __KVM_HAVE_MSI | ||
108 | static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) | ||
109 | { | ||
110 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | ||
111 | |||
112 | kvm_assigned_dev_raise_guest_irq(assigned_dev, | ||
113 | assigned_dev->guest_irq); | ||
114 | |||
115 | return IRQ_HANDLED; | ||
116 | } | ||
117 | #endif | ||
118 | |||
77 | #ifdef __KVM_HAVE_MSIX | 119 | #ifdef __KVM_HAVE_MSIX |
78 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | 120 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) |
79 | { | 121 | { |
@@ -83,8 +125,7 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | |||
83 | 125 | ||
84 | if (index >= 0) { | 126 | if (index >= 0) { |
85 | vector = assigned_dev->guest_msix_entries[index].vector; | 127 | vector = assigned_dev->guest_msix_entries[index].vector; |
86 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 128 | kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); |
87 | vector, 1); | ||
88 | } | 129 | } |
89 | 130 | ||
90 | return IRQ_HANDLED; | 131 | return IRQ_HANDLED; |
@@ -100,15 +141,31 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
100 | 141 | ||
101 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | 142 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); |
102 | 143 | ||
103 | /* The guest irq may be shared so this ack may be | 144 | spin_lock(&dev->intx_mask_lock); |
104 | * from another device. | 145 | |
105 | */ | 146 | if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { |
106 | spin_lock(&dev->intx_lock); | 147 | bool reassert = false; |
107 | if (dev->host_irq_disabled) { | 148 | |
108 | enable_irq(dev->host_irq); | 149 | spin_lock_irq(&dev->intx_lock); |
109 | dev->host_irq_disabled = false; | 150 | /* |
151 | * The guest IRQ may be shared so this ack can come from an | ||
152 | * IRQ for another guest device. | ||
153 | */ | ||
154 | if (dev->host_irq_disabled) { | ||
155 | if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) | ||
156 | enable_irq(dev->host_irq); | ||
157 | else if (!pci_check_and_unmask_intx(dev->dev)) | ||
158 | reassert = true; | ||
159 | dev->host_irq_disabled = reassert; | ||
160 | } | ||
161 | spin_unlock_irq(&dev->intx_lock); | ||
162 | |||
163 | if (reassert) | ||
164 | kvm_set_irq(dev->kvm, dev->irq_source_id, | ||
165 | dev->guest_irq, 1); | ||
110 | } | 166 | } |
111 | spin_unlock(&dev->intx_lock); | 167 | |
168 | spin_unlock(&dev->intx_mask_lock); | ||
112 | } | 169 | } |
113 | 170 | ||
114 | static void deassign_guest_irq(struct kvm *kvm, | 171 | static void deassign_guest_irq(struct kvm *kvm, |
@@ -156,7 +213,15 @@ static void deassign_host_irq(struct kvm *kvm, | |||
156 | pci_disable_msix(assigned_dev->dev); | 213 | pci_disable_msix(assigned_dev->dev); |
157 | } else { | 214 | } else { |
158 | /* Deal with MSI and INTx */ | 215 | /* Deal with MSI and INTx */ |
159 | disable_irq(assigned_dev->host_irq); | 216 | if ((assigned_dev->irq_requested_type & |
217 | KVM_DEV_IRQ_HOST_INTX) && | ||
218 | (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
219 | spin_lock_irq(&assigned_dev->intx_lock); | ||
220 | pci_intx(assigned_dev->dev, false); | ||
221 | spin_unlock_irq(&assigned_dev->intx_lock); | ||
222 | synchronize_irq(assigned_dev->host_irq); | ||
223 | } else | ||
224 | disable_irq(assigned_dev->host_irq); | ||
160 | 225 | ||
161 | free_irq(assigned_dev->host_irq, assigned_dev); | 226 | free_irq(assigned_dev->host_irq, assigned_dev); |
162 | 227 | ||
@@ -237,15 +302,34 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) | |||
237 | static int assigned_device_enable_host_intx(struct kvm *kvm, | 302 | static int assigned_device_enable_host_intx(struct kvm *kvm, |
238 | struct kvm_assigned_dev_kernel *dev) | 303 | struct kvm_assigned_dev_kernel *dev) |
239 | { | 304 | { |
305 | irq_handler_t irq_handler; | ||
306 | unsigned long flags; | ||
307 | |||
240 | dev->host_irq = dev->dev->irq; | 308 | dev->host_irq = dev->dev->irq; |
241 | /* Even though this is PCI, we don't want to use shared | 309 | |
242 | * interrupts. Sharing host devices with guest-assigned devices | 310 | /* |
243 | * on the same interrupt line is not a happy situation: there | 311 | * We can only share the IRQ line with other host devices if we are |
244 | * are going to be long delays in accepting, acking, etc. | 312 | * able to disable the IRQ source at device-level - independently of |
313 | * the guest driver. Otherwise host devices may suffer from unbounded | ||
314 | * IRQ latencies when the guest keeps the line asserted. | ||
245 | */ | 315 | */ |
246 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 316 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { |
247 | IRQF_ONESHOT, dev->irq_name, dev)) | 317 | irq_handler = kvm_assigned_dev_intx; |
318 | flags = IRQF_SHARED; | ||
319 | } else { | ||
320 | irq_handler = NULL; | ||
321 | flags = IRQF_ONESHOT; | ||
322 | } | ||
323 | if (request_threaded_irq(dev->host_irq, irq_handler, | ||
324 | kvm_assigned_dev_thread_intx, flags, | ||
325 | dev->irq_name, dev)) | ||
248 | return -EIO; | 326 | return -EIO; |
327 | |||
328 | if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { | ||
329 | spin_lock_irq(&dev->intx_lock); | ||
330 | pci_intx(dev->dev, true); | ||
331 | spin_unlock_irq(&dev->intx_lock); | ||
332 | } | ||
249 | return 0; | 333 | return 0; |
250 | } | 334 | } |
251 | 335 | ||
@@ -262,8 +346,9 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, | |||
262 | } | 346 | } |
263 | 347 | ||
264 | dev->host_irq = dev->dev->irq; | 348 | dev->host_irq = dev->dev->irq; |
265 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, | 349 | if (request_threaded_irq(dev->host_irq, NULL, |
266 | 0, dev->irq_name, dev)) { | 350 | kvm_assigned_dev_thread_msi, 0, |
351 | dev->irq_name, dev)) { | ||
267 | pci_disable_msi(dev->dev); | 352 | pci_disable_msi(dev->dev); |
268 | return -EIO; | 353 | return -EIO; |
269 | } | 354 | } |
@@ -321,7 +406,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, | |||
321 | { | 406 | { |
322 | dev->guest_irq = irq->guest_irq; | 407 | dev->guest_irq = irq->guest_irq; |
323 | dev->ack_notifier.gsi = -1; | 408 | dev->ack_notifier.gsi = -1; |
324 | dev->host_irq_disabled = false; | ||
325 | return 0; | 409 | return 0; |
326 | } | 410 | } |
327 | #endif | 411 | #endif |
@@ -333,7 +417,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, | |||
333 | { | 417 | { |
334 | dev->guest_irq = irq->guest_irq; | 418 | dev->guest_irq = irq->guest_irq; |
335 | dev->ack_notifier.gsi = -1; | 419 | dev->ack_notifier.gsi = -1; |
336 | dev->host_irq_disabled = false; | ||
337 | return 0; | 420 | return 0; |
338 | } | 421 | } |
339 | #endif | 422 | #endif |
@@ -367,6 +450,7 @@ static int assign_host_irq(struct kvm *kvm, | |||
367 | default: | 450 | default: |
368 | r = -EINVAL; | 451 | r = -EINVAL; |
369 | } | 452 | } |
453 | dev->host_irq_disabled = false; | ||
370 | 454 | ||
371 | if (!r) | 455 | if (!r) |
372 | dev->irq_requested_type |= host_irq_type; | 456 | dev->irq_requested_type |= host_irq_type; |
@@ -468,6 +552,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |||
468 | { | 552 | { |
469 | int r = -ENODEV; | 553 | int r = -ENODEV; |
470 | struct kvm_assigned_dev_kernel *match; | 554 | struct kvm_assigned_dev_kernel *match; |
555 | unsigned long irq_type; | ||
471 | 556 | ||
472 | mutex_lock(&kvm->lock); | 557 | mutex_lock(&kvm->lock); |
473 | 558 | ||
@@ -476,7 +561,9 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |||
476 | if (!match) | 561 | if (!match) |
477 | goto out; | 562 | goto out; |
478 | 563 | ||
479 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | 564 | irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | |
565 | KVM_DEV_IRQ_GUEST_MASK); | ||
566 | r = kvm_deassign_irq(kvm, match, irq_type); | ||
480 | out: | 567 | out: |
481 | mutex_unlock(&kvm->lock); | 568 | mutex_unlock(&kvm->lock); |
482 | return r; | 569 | return r; |
@@ -609,6 +696,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
609 | if (!match->pci_saved_state) | 696 | if (!match->pci_saved_state) |
610 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | 697 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", |
611 | __func__, dev_name(&dev->dev)); | 698 | __func__, dev_name(&dev->dev)); |
699 | |||
700 | if (!pci_intx_mask_supported(dev)) | ||
701 | assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; | ||
702 | |||
612 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | 703 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
613 | match->host_segnr = assigned_dev->segnr; | 704 | match->host_segnr = assigned_dev->segnr; |
614 | match->host_busnr = assigned_dev->busnr; | 705 | match->host_busnr = assigned_dev->busnr; |
@@ -616,6 +707,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
616 | match->flags = assigned_dev->flags; | 707 | match->flags = assigned_dev->flags; |
617 | match->dev = dev; | 708 | match->dev = dev; |
618 | spin_lock_init(&match->intx_lock); | 709 | spin_lock_init(&match->intx_lock); |
710 | spin_lock_init(&match->intx_mask_lock); | ||
619 | match->irq_source_id = -1; | 711 | match->irq_source_id = -1; |
620 | match->kvm = kvm; | 712 | match->kvm = kvm; |
621 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | 713 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; |
@@ -761,6 +853,55 @@ msix_entry_out: | |||
761 | } | 853 | } |
762 | #endif | 854 | #endif |
763 | 855 | ||
856 | static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, | ||
857 | struct kvm_assigned_pci_dev *assigned_dev) | ||
858 | { | ||
859 | int r = 0; | ||
860 | struct kvm_assigned_dev_kernel *match; | ||
861 | |||
862 | mutex_lock(&kvm->lock); | ||
863 | |||
864 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
865 | assigned_dev->assigned_dev_id); | ||
866 | if (!match) { | ||
867 | r = -ENODEV; | ||
868 | goto out; | ||
869 | } | ||
870 | |||
871 | spin_lock(&match->intx_mask_lock); | ||
872 | |||
873 | match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; | ||
874 | match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; | ||
875 | |||
876 | if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
877 | if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { | ||
878 | kvm_set_irq(match->kvm, match->irq_source_id, | ||
879 | match->guest_irq, 0); | ||
880 | /* | ||
881 | * Masking at hardware-level is performed on demand, | ||
882 | * i.e. when an IRQ actually arrives at the host. | ||
883 | */ | ||
884 | } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { | ||
885 | /* | ||
886 | * Unmask the IRQ line if required. Unmasking at | ||
887 | * device level will be performed by user space. | ||
888 | */ | ||
889 | spin_lock_irq(&match->intx_lock); | ||
890 | if (match->host_irq_disabled) { | ||
891 | enable_irq(match->host_irq); | ||
892 | match->host_irq_disabled = false; | ||
893 | } | ||
894 | spin_unlock_irq(&match->intx_lock); | ||
895 | } | ||
896 | } | ||
897 | |||
898 | spin_unlock(&match->intx_mask_lock); | ||
899 | |||
900 | out: | ||
901 | mutex_unlock(&kvm->lock); | ||
902 | return r; | ||
903 | } | ||
904 | |||
764 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | 905 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, |
765 | unsigned long arg) | 906 | unsigned long arg) |
766 | { | 907 | { |
@@ -868,6 +1009,15 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
868 | break; | 1009 | break; |
869 | } | 1010 | } |
870 | #endif | 1011 | #endif |
1012 | case KVM_ASSIGN_SET_INTX_MASK: { | ||
1013 | struct kvm_assigned_pci_dev assigned_dev; | ||
1014 | |||
1015 | r = -EFAULT; | ||
1016 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
1017 | goto out; | ||
1018 | r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); | ||
1019 | break; | ||
1020 | } | ||
871 | default: | 1021 | default: |
872 | r = -ENOTTY; | 1022 | r = -ENOTTY; |
873 | break; | 1023 | break; |
@@ -875,4 +1025,3 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |||
875 | out: | 1025 | out: |
876 | return r; | 1026 | return r; |
877 | } | 1027 | } |
878 | |||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a91f980077d8..42b73930a6de 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -203,7 +203,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
203 | 203 | ||
204 | void kvm_flush_remote_tlbs(struct kvm *kvm) | 204 | void kvm_flush_remote_tlbs(struct kvm *kvm) |
205 | { | 205 | { |
206 | int dirty_count = kvm->tlbs_dirty; | 206 | long dirty_count = kvm->tlbs_dirty; |
207 | 207 | ||
208 | smp_mb(); | 208 | smp_mb(); |
209 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 209 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
@@ -289,15 +289,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
289 | */ | 289 | */ |
290 | idx = srcu_read_lock(&kvm->srcu); | 290 | idx = srcu_read_lock(&kvm->srcu); |
291 | spin_lock(&kvm->mmu_lock); | 291 | spin_lock(&kvm->mmu_lock); |
292 | |||
292 | kvm->mmu_notifier_seq++; | 293 | kvm->mmu_notifier_seq++; |
293 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; | 294 | need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; |
294 | spin_unlock(&kvm->mmu_lock); | ||
295 | srcu_read_unlock(&kvm->srcu, idx); | ||
296 | |||
297 | /* we've to flush the tlb before the pages can be freed */ | 295 | /* we've to flush the tlb before the pages can be freed */ |
298 | if (need_tlb_flush) | 296 | if (need_tlb_flush) |
299 | kvm_flush_remote_tlbs(kvm); | 297 | kvm_flush_remote_tlbs(kvm); |
300 | 298 | ||
299 | spin_unlock(&kvm->mmu_lock); | ||
300 | srcu_read_unlock(&kvm->srcu, idx); | ||
301 | } | 301 | } |
302 | 302 | ||
303 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | 303 | static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, |
@@ -335,12 +335,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
335 | for (; start < end; start += PAGE_SIZE) | 335 | for (; start < end; start += PAGE_SIZE) |
336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
337 | need_tlb_flush |= kvm->tlbs_dirty; | 337 | need_tlb_flush |= kvm->tlbs_dirty; |
338 | spin_unlock(&kvm->mmu_lock); | ||
339 | srcu_read_unlock(&kvm->srcu, idx); | ||
340 | |||
341 | /* we've to flush the tlb before the pages can be freed */ | 338 | /* we've to flush the tlb before the pages can be freed */ |
342 | if (need_tlb_flush) | 339 | if (need_tlb_flush) |
343 | kvm_flush_remote_tlbs(kvm); | 340 | kvm_flush_remote_tlbs(kvm); |
341 | |||
342 | spin_unlock(&kvm->mmu_lock); | ||
343 | srcu_read_unlock(&kvm->srcu, idx); | ||
344 | } | 344 | } |
345 | 345 | ||
346 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | 346 | static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, |
@@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
357 | * been freed. | 357 | * been freed. |
358 | */ | 358 | */ |
359 | kvm->mmu_notifier_seq++; | 359 | kvm->mmu_notifier_seq++; |
360 | smp_wmb(); | ||
360 | /* | 361 | /* |
361 | * The above sequence increase must be visible before the | 362 | * The above sequence increase must be visible before the |
362 | * below count decrease but both values are read by the kvm | 363 | * below count decrease, which is ensured by the smp_wmb above |
363 | * page fault under mmu_lock spinlock so we don't need to add | 364 | * in conjunction with the smp_rmb in mmu_notifier_retry(). |
364 | * a smb_wmb() here in between the two. | ||
365 | */ | 365 | */ |
366 | kvm->mmu_notifier_count--; | 366 | kvm->mmu_notifier_count--; |
367 | spin_unlock(&kvm->mmu_lock); | 367 | spin_unlock(&kvm->mmu_lock); |
@@ -378,13 +378,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
378 | 378 | ||
379 | idx = srcu_read_lock(&kvm->srcu); | 379 | idx = srcu_read_lock(&kvm->srcu); |
380 | spin_lock(&kvm->mmu_lock); | 380 | spin_lock(&kvm->mmu_lock); |
381 | young = kvm_age_hva(kvm, address); | ||
382 | spin_unlock(&kvm->mmu_lock); | ||
383 | srcu_read_unlock(&kvm->srcu, idx); | ||
384 | 381 | ||
382 | young = kvm_age_hva(kvm, address); | ||
385 | if (young) | 383 | if (young) |
386 | kvm_flush_remote_tlbs(kvm); | 384 | kvm_flush_remote_tlbs(kvm); |
387 | 385 | ||
386 | spin_unlock(&kvm->mmu_lock); | ||
387 | srcu_read_unlock(&kvm->srcu, idx); | ||
388 | |||
388 | return young; | 389 | return young; |
389 | } | 390 | } |
390 | 391 | ||
@@ -449,7 +450,7 @@ static void kvm_init_memslots_id(struct kvm *kvm) | |||
449 | slots->id_to_index[i] = slots->memslots[i].id = i; | 450 | slots->id_to_index[i] = slots->memslots[i].id = i; |
450 | } | 451 | } |
451 | 452 | ||
452 | static struct kvm *kvm_create_vm(void) | 453 | static struct kvm *kvm_create_vm(unsigned long type) |
453 | { | 454 | { |
454 | int r, i; | 455 | int r, i; |
455 | struct kvm *kvm = kvm_arch_alloc_vm(); | 456 | struct kvm *kvm = kvm_arch_alloc_vm(); |
@@ -457,7 +458,7 @@ static struct kvm *kvm_create_vm(void) | |||
457 | if (!kvm) | 458 | if (!kvm) |
458 | return ERR_PTR(-ENOMEM); | 459 | return ERR_PTR(-ENOMEM); |
459 | 460 | ||
460 | r = kvm_arch_init_vm(kvm); | 461 | r = kvm_arch_init_vm(kvm, type); |
461 | if (r) | 462 | if (r) |
462 | goto out_err_nodisable; | 463 | goto out_err_nodisable; |
463 | 464 | ||
@@ -535,21 +536,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
535 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 536 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
536 | struct kvm_memory_slot *dont) | 537 | struct kvm_memory_slot *dont) |
537 | { | 538 | { |
538 | int i; | ||
539 | |||
540 | if (!dont || free->rmap != dont->rmap) | 539 | if (!dont || free->rmap != dont->rmap) |
541 | vfree(free->rmap); | 540 | vfree(free->rmap); |
542 | 541 | ||
543 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 542 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
544 | kvm_destroy_dirty_bitmap(free); | 543 | kvm_destroy_dirty_bitmap(free); |
545 | 544 | ||
546 | 545 | kvm_arch_free_memslot(free, dont); | |
547 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
548 | if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { | ||
549 | vfree(free->lpage_info[i]); | ||
550 | free->lpage_info[i] = NULL; | ||
551 | } | ||
552 | } | ||
553 | 546 | ||
554 | free->npages = 0; | 547 | free->npages = 0; |
555 | free->rmap = NULL; | 548 | free->rmap = NULL; |
@@ -616,7 +609,6 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
616 | return 0; | 609 | return 0; |
617 | } | 610 | } |
618 | 611 | ||
619 | #ifndef CONFIG_S390 | ||
620 | /* | 612 | /* |
621 | * Allocation size is twice as large as the actual dirty bitmap size. | 613 | * Allocation size is twice as large as the actual dirty bitmap size. |
622 | * This makes it possible to do double buffering: see x86's | 614 | * This makes it possible to do double buffering: see x86's |
@@ -624,6 +616,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) | |||
624 | */ | 616 | */ |
625 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | 617 | static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) |
626 | { | 618 | { |
619 | #ifndef CONFIG_S390 | ||
627 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); | 620 | unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); |
628 | 621 | ||
629 | if (dirty_bytes > PAGE_SIZE) | 622 | if (dirty_bytes > PAGE_SIZE) |
@@ -636,21 +629,8 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
636 | 629 | ||
637 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; | 630 | memslot->dirty_bitmap_head = memslot->dirty_bitmap; |
638 | memslot->nr_dirty_pages = 0; | 631 | memslot->nr_dirty_pages = 0; |
639 | return 0; | ||
640 | } | ||
641 | #endif /* !CONFIG_S390 */ | 632 | #endif /* !CONFIG_S390 */ |
642 | 633 | return 0; | |
643 | static struct kvm_memory_slot * | ||
644 | search_memslots(struct kvm_memslots *slots, gfn_t gfn) | ||
645 | { | ||
646 | struct kvm_memory_slot *memslot; | ||
647 | |||
648 | kvm_for_each_memslot(memslot, slots) | ||
649 | if (gfn >= memslot->base_gfn && | ||
650 | gfn < memslot->base_gfn + memslot->npages) | ||
651 | return memslot; | ||
652 | |||
653 | return NULL; | ||
654 | } | 634 | } |
655 | 635 | ||
656 | static int cmp_memslot(const void *slot1, const void *slot2) | 636 | static int cmp_memslot(const void *slot1, const void *slot2) |
@@ -778,69 +758,24 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
778 | r = -ENOMEM; | 758 | r = -ENOMEM; |
779 | 759 | ||
780 | /* Allocate if a slot is being created */ | 760 | /* Allocate if a slot is being created */ |
761 | if (npages && !old.npages) { | ||
762 | new.user_alloc = user_alloc; | ||
763 | new.userspace_addr = mem->userspace_addr; | ||
781 | #ifndef CONFIG_S390 | 764 | #ifndef CONFIG_S390 |
782 | if (npages && !new.rmap) { | ||
783 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); | 765 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); |
784 | |||
785 | if (!new.rmap) | 766 | if (!new.rmap) |
786 | goto out_free; | 767 | goto out_free; |
787 | 768 | #endif /* not defined CONFIG_S390 */ | |
788 | new.user_alloc = user_alloc; | 769 | if (kvm_arch_create_memslot(&new, npages)) |
789 | new.userspace_addr = mem->userspace_addr; | ||
790 | } | ||
791 | if (!npages) | ||
792 | goto skip_lpage; | ||
793 | |||
794 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | ||
795 | unsigned long ugfn; | ||
796 | unsigned long j; | ||
797 | int lpages; | ||
798 | int level = i + 2; | ||
799 | |||
800 | /* Avoid unused variable warning if no large pages */ | ||
801 | (void)level; | ||
802 | |||
803 | if (new.lpage_info[i]) | ||
804 | continue; | ||
805 | |||
806 | lpages = 1 + ((base_gfn + npages - 1) | ||
807 | >> KVM_HPAGE_GFN_SHIFT(level)); | ||
808 | lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); | ||
809 | |||
810 | new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); | ||
811 | |||
812 | if (!new.lpage_info[i]) | ||
813 | goto out_free; | 770 | goto out_free; |
814 | |||
815 | if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
816 | new.lpage_info[i][0].write_count = 1; | ||
817 | if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) | ||
818 | new.lpage_info[i][lpages - 1].write_count = 1; | ||
819 | ugfn = new.userspace_addr >> PAGE_SHIFT; | ||
820 | /* | ||
821 | * If the gfn and userspace address are not aligned wrt each | ||
822 | * other, or if explicitly asked to, disable large page | ||
823 | * support for this slot | ||
824 | */ | ||
825 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || | ||
826 | !largepages_enabled) | ||
827 | for (j = 0; j < lpages; ++j) | ||
828 | new.lpage_info[i][j].write_count = 1; | ||
829 | } | 771 | } |
830 | 772 | ||
831 | skip_lpage: | ||
832 | |||
833 | /* Allocate page dirty bitmap if needed */ | 773 | /* Allocate page dirty bitmap if needed */ |
834 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { | 774 | if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { |
835 | if (kvm_create_dirty_bitmap(&new) < 0) | 775 | if (kvm_create_dirty_bitmap(&new) < 0) |
836 | goto out_free; | 776 | goto out_free; |
837 | /* destroy any largepage mappings for dirty tracking */ | 777 | /* destroy any largepage mappings for dirty tracking */ |
838 | } | 778 | } |
839 | #else /* not defined CONFIG_S390 */ | ||
840 | new.user_alloc = user_alloc; | ||
841 | if (user_alloc) | ||
842 | new.userspace_addr = mem->userspace_addr; | ||
843 | #endif /* not defined CONFIG_S390 */ | ||
844 | 779 | ||
845 | if (!npages) { | 780 | if (!npages) { |
846 | struct kvm_memory_slot *slot; | 781 | struct kvm_memory_slot *slot; |
@@ -890,8 +825,7 @@ skip_lpage: | |||
890 | if (!npages) { | 825 | if (!npages) { |
891 | new.rmap = NULL; | 826 | new.rmap = NULL; |
892 | new.dirty_bitmap = NULL; | 827 | new.dirty_bitmap = NULL; |
893 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | 828 | memset(&new.arch, 0, sizeof(new.arch)); |
894 | new.lpage_info[i] = NULL; | ||
895 | } | 829 | } |
896 | 830 | ||
897 | update_memslots(slots, &new); | 831 | update_memslots(slots, &new); |
@@ -978,6 +912,11 @@ out: | |||
978 | return r; | 912 | return r; |
979 | } | 913 | } |
980 | 914 | ||
915 | bool kvm_largepages_enabled(void) | ||
916 | { | ||
917 | return largepages_enabled; | ||
918 | } | ||
919 | |||
981 | void kvm_disable_largepages(void) | 920 | void kvm_disable_largepages(void) |
982 | { | 921 | { |
983 | largepages_enabled = false; | 922 | largepages_enabled = false; |
@@ -1031,12 +970,6 @@ int kvm_is_error_hva(unsigned long addr) | |||
1031 | } | 970 | } |
1032 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | 971 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); |
1033 | 972 | ||
1034 | static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, | ||
1035 | gfn_t gfn) | ||
1036 | { | ||
1037 | return search_memslots(slots, gfn); | ||
1038 | } | ||
1039 | |||
1040 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 973 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
1041 | { | 974 | { |
1042 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); | 975 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); |
@@ -1459,7 +1392,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
1459 | 1392 | ||
1460 | ghc->gpa = gpa; | 1393 | ghc->gpa = gpa; |
1461 | ghc->generation = slots->generation; | 1394 | ghc->generation = slots->generation; |
1462 | ghc->memslot = __gfn_to_memslot(slots, gfn); | 1395 | ghc->memslot = gfn_to_memslot(kvm, gfn); |
1463 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); | 1396 | ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); |
1464 | if (!kvm_is_error_hva(ghc->hva)) | 1397 | if (!kvm_is_error_hva(ghc->hva)) |
1465 | ghc->hva += offset; | 1398 | ghc->hva += offset; |
@@ -1657,7 +1590,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1657 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); | 1590 | page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); |
1658 | #endif | 1591 | #endif |
1659 | else | 1592 | else |
1660 | return VM_FAULT_SIGBUS; | 1593 | return kvm_arch_vcpu_fault(vcpu, vmf); |
1661 | get_page(page); | 1594 | get_page(page); |
1662 | vmf->page = page; | 1595 | vmf->page = page; |
1663 | return 0; | 1596 | return 0; |
@@ -1718,6 +1651,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) | |||
1718 | goto vcpu_destroy; | 1651 | goto vcpu_destroy; |
1719 | 1652 | ||
1720 | mutex_lock(&kvm->lock); | 1653 | mutex_lock(&kvm->lock); |
1654 | if (!kvm_vcpu_compatible(vcpu)) { | ||
1655 | r = -EINVAL; | ||
1656 | goto unlock_vcpu_destroy; | ||
1657 | } | ||
1721 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { | 1658 | if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { |
1722 | r = -EINVAL; | 1659 | r = -EINVAL; |
1723 | goto unlock_vcpu_destroy; | 1660 | goto unlock_vcpu_destroy; |
@@ -2198,12 +2135,12 @@ static struct file_operations kvm_vm_fops = { | |||
2198 | .llseek = noop_llseek, | 2135 | .llseek = noop_llseek, |
2199 | }; | 2136 | }; |
2200 | 2137 | ||
2201 | static int kvm_dev_ioctl_create_vm(void) | 2138 | static int kvm_dev_ioctl_create_vm(unsigned long type) |
2202 | { | 2139 | { |
2203 | int r; | 2140 | int r; |
2204 | struct kvm *kvm; | 2141 | struct kvm *kvm; |
2205 | 2142 | ||
2206 | kvm = kvm_create_vm(); | 2143 | kvm = kvm_create_vm(type); |
2207 | if (IS_ERR(kvm)) | 2144 | if (IS_ERR(kvm)) |
2208 | return PTR_ERR(kvm); | 2145 | return PTR_ERR(kvm); |
2209 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 2146 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
@@ -2254,10 +2191,7 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2254 | r = KVM_API_VERSION; | 2191 | r = KVM_API_VERSION; |
2255 | break; | 2192 | break; |
2256 | case KVM_CREATE_VM: | 2193 | case KVM_CREATE_VM: |
2257 | r = -EINVAL; | 2194 | r = kvm_dev_ioctl_create_vm(arg); |
2258 | if (arg) | ||
2259 | goto out; | ||
2260 | r = kvm_dev_ioctl_create_vm(); | ||
2261 | break; | 2195 | break; |
2262 | case KVM_CHECK_EXTENSION: | 2196 | case KVM_CHECK_EXTENSION: |
2263 | r = kvm_dev_ioctl_check_extension_generic(arg); | 2197 | r = kvm_dev_ioctl_check_extension_generic(arg); |